{ "best_metric": 1.02767229, "best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputvqa/qwen2-vl-2b-instruct/v2-20241111-134633/checkpoint-20000", "epoch": 49.98828216545582, "eval_steps": 10000, "global_step": 106650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.953125, "epoch": 0.00046871338176704945, "grad_norm": 9.64400863647461, "learning_rate": 0.0, "loss": 0.44107044, "memory(GiB)": 11.82, "step": 1, "train_speed(iter/s)": 0.029637 }, { "acc": 0.91267246, "epoch": 0.002343566908835247, "grad_norm": 15.824323654174805, "learning_rate": 1.8754369027018946e-06, "loss": 0.76212209, "memory(GiB)": 13.7, "step": 5, "train_speed(iter/s)": 0.138313 }, { "acc": 0.9139843, "epoch": 0.004687133817670494, "grad_norm": 9.513165473937988, "learning_rate": 2.683143612841372e-06, "loss": 0.65215297, "memory(GiB)": 13.7, "step": 10, "train_speed(iter/s)": 0.253823 }, { "acc": 0.93444805, "epoch": 0.007030700726505742, "grad_norm": 95.97209930419922, "learning_rate": 3.1556217498538185e-06, "loss": 0.55912886, "memory(GiB)": 13.7, "step": 15, "train_speed(iter/s)": 0.352369 }, { "acc": 0.95495377, "epoch": 0.009374267635340989, "grad_norm": 23.95183753967285, "learning_rate": 3.4908503229808484e-06, "loss": 0.41665173, "memory(GiB)": 13.7, "step": 20, "train_speed(iter/s)": 0.436734 }, { "acc": 0.9349577, "epoch": 0.011717834544176237, "grad_norm": 7.3498616218566895, "learning_rate": 3.750873805403789e-06, "loss": 0.51904373, "memory(GiB)": 13.7, "step": 25, "train_speed(iter/s)": 0.509278 }, { "acc": 0.96130838, "epoch": 0.014061401453011484, "grad_norm": 16.67639923095703, "learning_rate": 3.963328459993295e-06, "loss": 0.38310118, "memory(GiB)": 13.7, "step": 30, "train_speed(iter/s)": 0.573077 }, { "acc": 0.95442772, "epoch": 0.016404968361846732, "grad_norm": 16.329227447509766, "learning_rate": 4.142956310990909e-06, "loss": 0.40951576, "memory(GiB)": 13.7, "step": 35, "train_speed(iter/s)": 0.630888 }, { "acc": 0.94406204, "epoch": 0.018748535270681977, "grad_norm": 19.512208938598633, "learning_rate": 4.298557033120325e-06, "loss": 0.34754143, "memory(GiB)": 13.7, "step": 40, "train_speed(iter/s)": 0.683998 }, { "acc": 0.94656563, "epoch": 0.021092102179517225, "grad_norm": 7.505685806274414, "learning_rate": 4.435806597005741e-06, "loss": 0.53699813, "memory(GiB)": 13.7, "step": 45, "train_speed(iter/s)": 0.731532 }, { "acc": 0.91913939, "epoch": 0.023435669088352474, "grad_norm": 7.174973011016846, "learning_rate": 4.558580515543266e-06, "loss": 0.69415998, "memory(GiB)": 13.7, "step": 50, "train_speed(iter/s)": 0.7733 }, { "acc": 0.93467045, "epoch": 0.02577923599718772, "grad_norm": 11.03010082244873, "learning_rate": 4.669643034343912e-06, "loss": 0.43059707, "memory(GiB)": 13.7, "step": 55, "train_speed(iter/s)": 0.812017 }, { "acc": 0.95404949, "epoch": 0.028122802906022967, "grad_norm": 17.31180191040039, "learning_rate": 4.771035170132772e-06, "loss": 0.39132476, "memory(GiB)": 13.7, "step": 60, "train_speed(iter/s)": 0.844764 }, { "acc": 0.94601717, "epoch": 0.030466369814858216, "grad_norm": 108.04169464111328, "learning_rate": 4.864306893511089e-06, "loss": 0.52508001, "memory(GiB)": 13.7, "step": 65, "train_speed(iter/s)": 0.875055 }, { "acc": 0.94423113, "epoch": 0.032809936723693464, "grad_norm": 10.126409530639648, "learning_rate": 4.9506630211303875e-06, "loss": 0.48019028, "memory(GiB)": 13.7, "step": 70, "train_speed(iter/s)": 0.902706 }, { "acc": 0.91829491, "epoch": 0.035153503632528706, "grad_norm": 26.11595344543457, "learning_rate": 5.031058652555712e-06, "loss": 0.70826035, "memory(GiB)": 13.7, "step": 75, "train_speed(iter/s)": 0.929706 }, { "acc": 0.94070187, "epoch": 0.037497070541363954, "grad_norm": 10.799473762512207, "learning_rate": 5.106263743259802e-06, "loss": 0.47705693, "memory(GiB)": 13.7, "step": 80, "train_speed(iter/s)": 0.953277 }, { "acc": 0.94058418, "epoch": 0.0398406374501992, "grad_norm": 9.009799003601074, "learning_rate": 5.176908067025565e-06, "loss": 0.41230688, "memory(GiB)": 13.7, "step": 85, "train_speed(iter/s)": 0.97299 }, { "acc": 0.97420425, "epoch": 0.04218420435903445, "grad_norm": 6.202676773071289, "learning_rate": 5.243513307145218e-06, "loss": 0.24567184, "memory(GiB)": 13.7, "step": 90, "train_speed(iter/s)": 0.993439 }, { "acc": 0.92257595, "epoch": 0.0445277712678697, "grad_norm": 18.65657615661621, "learning_rate": 5.306516459496381e-06, "loss": 0.68258247, "memory(GiB)": 13.7, "step": 95, "train_speed(iter/s)": 1.011374 }, { "acc": 0.93326387, "epoch": 0.04687133817670495, "grad_norm": 10.520679473876953, "learning_rate": 5.366287225682744e-06, "loss": 0.62276874, "memory(GiB)": 13.7, "step": 100, "train_speed(iter/s)": 1.030089 }, { "acc": 0.92561817, "epoch": 0.04921490508554019, "grad_norm": 13.338568687438965, "learning_rate": 5.423141158142833e-06, "loss": 0.6411911, "memory(GiB)": 13.7, "step": 105, "train_speed(iter/s)": 1.048542 }, { "acc": 0.94637842, "epoch": 0.05155847199437544, "grad_norm": 50.43000030517578, "learning_rate": 5.477349744483389e-06, "loss": 0.35556946, "memory(GiB)": 13.7, "step": 110, "train_speed(iter/s)": 1.0659 }, { "acc": 0.9627943, "epoch": 0.053902038903210686, "grad_norm": 79.94879150390625, "learning_rate": 5.529148248340802e-06, "loss": 0.39782405, "memory(GiB)": 13.7, "step": 115, "train_speed(iter/s)": 1.079593 }, { "acc": 0.93793964, "epoch": 0.056245605812045935, "grad_norm": 88.38262939453125, "learning_rate": 5.578741880272248e-06, "loss": 0.61173015, "memory(GiB)": 13.7, "step": 120, "train_speed(iter/s)": 1.094146 }, { "acc": 0.93801689, "epoch": 0.05858917272088118, "grad_norm": 45.577884674072266, "learning_rate": 5.626310708105684e-06, "loss": 0.46817045, "memory(GiB)": 13.7, "step": 125, "train_speed(iter/s)": 1.107385 }, { "acc": 0.91904535, "epoch": 0.06093273962971643, "grad_norm": 57.07664108276367, "learning_rate": 5.672013603650566e-06, "loss": 0.75130863, "memory(GiB)": 13.7, "step": 130, "train_speed(iter/s)": 1.121727 }, { "acc": 0.9378438, "epoch": 0.06327630653855168, "grad_norm": 9.486706733703613, "learning_rate": 5.715991444157665e-06, "loss": 0.43740759, "memory(GiB)": 13.7, "step": 135, "train_speed(iter/s)": 1.135639 }, { "acc": 0.96066475, "epoch": 0.06561987344738693, "grad_norm": 10.14747142791748, "learning_rate": 5.758369731269862e-06, "loss": 0.31908503, "memory(GiB)": 13.7, "step": 140, "train_speed(iter/s)": 1.145364 }, { "acc": 0.95880413, "epoch": 0.06796344035622218, "grad_norm": 9.716619491577148, "learning_rate": 5.799260750196488e-06, "loss": 0.29555159, "memory(GiB)": 13.7, "step": 145, "train_speed(iter/s)": 1.155906 }, { "acc": 0.92928581, "epoch": 0.07030700726505741, "grad_norm": 18.20058822631836, "learning_rate": 5.838765362695189e-06, "loss": 0.64425688, "memory(GiB)": 13.7, "step": 150, "train_speed(iter/s)": 1.164799 }, { "acc": 0.93890381, "epoch": 0.07265057417389266, "grad_norm": 6.389927387237549, "learning_rate": 5.876974505949613e-06, "loss": 0.5672842, "memory(GiB)": 13.7, "step": 155, "train_speed(iter/s)": 1.17428 }, { "acc": 0.92416458, "epoch": 0.07499414108272791, "grad_norm": 11.73437786102295, "learning_rate": 5.913970453399279e-06, "loss": 0.56795902, "memory(GiB)": 13.7, "step": 160, "train_speed(iter/s)": 1.184735 }, { "acc": 0.91507759, "epoch": 0.07733770799156316, "grad_norm": 6.097660064697266, "learning_rate": 5.9498278814958345e-06, "loss": 0.65721445, "memory(GiB)": 13.7, "step": 165, "train_speed(iter/s)": 1.193824 }, { "acc": 0.90391369, "epoch": 0.0796812749003984, "grad_norm": 13.731201171875, "learning_rate": 5.9846147771650435e-06, "loss": 0.70883646, "memory(GiB)": 13.7, "step": 170, "train_speed(iter/s)": 1.20174 }, { "acc": 0.91414585, "epoch": 0.08202484180923365, "grad_norm": 3.909681797027588, "learning_rate": 6.018393213692805e-06, "loss": 0.66449919, "memory(GiB)": 13.7, "step": 175, "train_speed(iter/s)": 1.210921 }, { "acc": 0.92824402, "epoch": 0.0843684087180689, "grad_norm": 1.5090925693511963, "learning_rate": 6.051220017284696e-06, "loss": 0.5140667, "memory(GiB)": 13.7, "step": 180, "train_speed(iter/s)": 1.219171 }, { "acc": 0.93839836, "epoch": 0.08671197562690415, "grad_norm": 10.515935897827148, "learning_rate": 6.083147342279079e-06, "loss": 0.4410614, "memory(GiB)": 13.7, "step": 185, "train_speed(iter/s)": 1.225102 }, { "acc": 0.93658867, "epoch": 0.0890555425357394, "grad_norm": 10.370787620544434, "learning_rate": 6.114223169635859e-06, "loss": 0.45227637, "memory(GiB)": 13.7, "step": 190, "train_speed(iter/s)": 1.23234 }, { "acc": 0.94556599, "epoch": 0.09139910944457465, "grad_norm": 10.614800453186035, "learning_rate": 6.144491740663014e-06, "loss": 0.5464817, "memory(GiB)": 13.7, "step": 195, "train_speed(iter/s)": 1.239216 }, { "acc": 0.9599432, "epoch": 0.0937426763534099, "grad_norm": 5.3152594566345215, "learning_rate": 6.17399393582222e-06, "loss": 0.37399163, "memory(GiB)": 13.7, "step": 200, "train_speed(iter/s)": 1.244491 }, { "acc": 0.94327631, "epoch": 0.09608624326224514, "grad_norm": 16.009845733642578, "learning_rate": 6.202767606753127e-06, "loss": 0.47419367, "memory(GiB)": 13.7, "step": 205, "train_speed(iter/s)": 1.250355 }, { "acc": 0.94939985, "epoch": 0.09842981017108038, "grad_norm": 13.5386323928833, "learning_rate": 6.23084786828231e-06, "loss": 0.46035404, "memory(GiB)": 13.7, "step": 210, "train_speed(iter/s)": 1.257851 }, { "acc": 0.89467258, "epoch": 0.10077337707991563, "grad_norm": 24.628019332885742, "learning_rate": 6.258267356068121e-06, "loss": 0.76980696, "memory(GiB)": 13.7, "step": 215, "train_speed(iter/s)": 1.263582 }, { "acc": 0.96384926, "epoch": 0.10311694398875088, "grad_norm": 3.371161460876465, "learning_rate": 6.2850564546228665e-06, "loss": 0.32467041, "memory(GiB)": 13.7, "step": 220, "train_speed(iter/s)": 1.269588 }, { "acc": 0.90851393, "epoch": 0.10546051089758612, "grad_norm": 23.517005920410156, "learning_rate": 6.311243499707637e-06, "loss": 0.7794302, "memory(GiB)": 13.7, "step": 225, "train_speed(iter/s)": 1.274623 }, { "acc": 0.93922348, "epoch": 0.10780407780642137, "grad_norm": 52.51816177368164, "learning_rate": 6.336854958480278e-06, "loss": 0.50722022, "memory(GiB)": 13.7, "step": 230, "train_speed(iter/s)": 1.280812 }, { "acc": 0.94232616, "epoch": 0.11014764471525662, "grad_norm": 16.423213958740234, "learning_rate": 6.361915590267854e-06, "loss": 0.49406967, "memory(GiB)": 13.7, "step": 235, "train_speed(iter/s)": 1.285116 }, { "acc": 0.92007103, "epoch": 0.11249121162409187, "grad_norm": 69.34735870361328, "learning_rate": 6.386448590411726e-06, "loss": 0.66728969, "memory(GiB)": 13.7, "step": 240, "train_speed(iter/s)": 1.289543 }, { "acc": 0.92937498, "epoch": 0.11483477853292712, "grad_norm": 28.151479721069336, "learning_rate": 6.410475719279925e-06, "loss": 0.65406895, "memory(GiB)": 13.7, "step": 245, "train_speed(iter/s)": 1.293927 }, { "acc": 0.92235565, "epoch": 0.11717834544176237, "grad_norm": 28.73829460144043, "learning_rate": 6.434017418245161e-06, "loss": 0.66648808, "memory(GiB)": 13.7, "step": 250, "train_speed(iter/s)": 1.297929 }, { "acc": 0.91913691, "epoch": 0.11952191235059761, "grad_norm": 12.44413948059082, "learning_rate": 6.457092914177489e-06, "loss": 0.7049654, "memory(GiB)": 13.7, "step": 255, "train_speed(iter/s)": 1.301855 }, { "acc": 0.94980888, "epoch": 0.12186547925943286, "grad_norm": 8.249322891235352, "learning_rate": 6.479720313790043e-06, "loss": 0.35471454, "memory(GiB)": 13.7, "step": 260, "train_speed(iter/s)": 1.305361 }, { "acc": 0.96967258, "epoch": 0.1242090461682681, "grad_norm": 26.4986515045166, "learning_rate": 6.501916688997754e-06, "loss": 0.28521855, "memory(GiB)": 13.7, "step": 265, "train_speed(iter/s)": 1.309018 }, { "acc": 0.959375, "epoch": 0.12655261307710336, "grad_norm": 11.708891868591309, "learning_rate": 6.523698154297142e-06, "loss": 0.39313886, "memory(GiB)": 13.7, "step": 270, "train_speed(iter/s)": 1.31389 }, { "acc": 0.94966393, "epoch": 0.1288961799859386, "grad_norm": 16.02495765686035, "learning_rate": 6.5450799370458065e-06, "loss": 0.47870007, "memory(GiB)": 13.7, "step": 275, "train_speed(iter/s)": 1.316807 }, { "acc": 0.94462185, "epoch": 0.13123974689477386, "grad_norm": 19.642139434814453, "learning_rate": 6.566076441409341e-06, "loss": 0.51291795, "memory(GiB)": 13.7, "step": 280, "train_speed(iter/s)": 1.320067 }, { "acc": 0.93043156, "epoch": 0.1335833138036091, "grad_norm": 4.865396976470947, "learning_rate": 6.586701306648306e-06, "loss": 0.43843241, "memory(GiB)": 13.7, "step": 285, "train_speed(iter/s)": 1.324103 }, { "acc": 0.92899303, "epoch": 0.13592688071244435, "grad_norm": 23.99181365966797, "learning_rate": 6.606967460335964e-06, "loss": 0.59981699, "memory(GiB)": 13.7, "step": 290, "train_speed(iter/s)": 1.328413 }, { "acc": 0.93604078, "epoch": 0.13827044762127957, "grad_norm": 15.749946594238281, "learning_rate": 6.626887167026808e-06, "loss": 0.52561812, "memory(GiB)": 13.7, "step": 295, "train_speed(iter/s)": 1.330848 }, { "acc": 0.91384201, "epoch": 0.14061401453011482, "grad_norm": 7.7227044105529785, "learning_rate": 6.646472072834666e-06, "loss": 0.77837791, "memory(GiB)": 13.7, "step": 300, "train_speed(iter/s)": 1.334273 }, { "acc": 0.94072933, "epoch": 0.14295758143895007, "grad_norm": 56.41731643676758, "learning_rate": 6.665733246326175e-06, "loss": 0.41867404, "memory(GiB)": 13.7, "step": 305, "train_speed(iter/s)": 1.336856 }, { "acc": 0.94104605, "epoch": 0.14530114834778532, "grad_norm": 18.778913497924805, "learning_rate": 6.684681216089089e-06, "loss": 0.4925662, "memory(GiB)": 13.7, "step": 310, "train_speed(iter/s)": 1.340311 }, { "acc": 0.9509798, "epoch": 0.14764471525662057, "grad_norm": 17.81103515625, "learning_rate": 6.7033260052947565e-06, "loss": 0.39596946, "memory(GiB)": 13.7, "step": 315, "train_speed(iter/s)": 1.343149 }, { "acc": 0.94083328, "epoch": 0.14998828216545582, "grad_norm": 9.39840316772461, "learning_rate": 6.721677163538756e-06, "loss": 0.45275054, "memory(GiB)": 13.7, "step": 320, "train_speed(iter/s)": 1.345851 }, { "acc": 0.92486744, "epoch": 0.15233184907429106, "grad_norm": 21.553722381591797, "learning_rate": 6.739743796212984e-06, "loss": 0.6255559, "memory(GiB)": 13.7, "step": 325, "train_speed(iter/s)": 1.348383 }, { "acc": 0.92512741, "epoch": 0.1546754159831263, "grad_norm": 10.306232452392578, "learning_rate": 6.757534591635312e-06, "loss": 0.71740732, "memory(GiB)": 13.7, "step": 330, "train_speed(iter/s)": 1.349557 }, { "acc": 0.91537933, "epoch": 0.15701898289196156, "grad_norm": 109.26817321777344, "learning_rate": 6.775057846139185e-06, "loss": 0.68918648, "memory(GiB)": 13.7, "step": 335, "train_speed(iter/s)": 1.35284 }, { "acc": 0.95430059, "epoch": 0.1593625498007968, "grad_norm": 3.8184053897857666, "learning_rate": 6.792321487304519e-06, "loss": 0.33101661, "memory(GiB)": 13.7, "step": 340, "train_speed(iter/s)": 1.355782 }, { "acc": 0.9182435, "epoch": 0.16170611670963206, "grad_norm": 25.077871322631836, "learning_rate": 6.809333095492726e-06, "loss": 0.49172106, "memory(GiB)": 13.7, "step": 345, "train_speed(iter/s)": 1.358042 }, { "acc": 0.94647274, "epoch": 0.1640496836184673, "grad_norm": 55.18722915649414, "learning_rate": 6.8260999238322814e-06, "loss": 0.45929823, "memory(GiB)": 13.7, "step": 350, "train_speed(iter/s)": 1.360832 }, { "acc": 0.95144844, "epoch": 0.16639325052730256, "grad_norm": 8.207587242126465, "learning_rate": 6.842628916786747e-06, "loss": 0.50476933, "memory(GiB)": 13.7, "step": 355, "train_speed(iter/s)": 1.363622 }, { "acc": 0.93142014, "epoch": 0.1687368174361378, "grad_norm": 153.62496948242188, "learning_rate": 6.858926727424172e-06, "loss": 0.49843264, "memory(GiB)": 13.7, "step": 360, "train_speed(iter/s)": 1.365122 }, { "acc": 0.9295536, "epoch": 0.17108038434497305, "grad_norm": 137.7303466796875, "learning_rate": 6.874999733495412e-06, "loss": 0.53842278, "memory(GiB)": 13.7, "step": 365, "train_speed(iter/s)": 1.368139 }, { "acc": 0.94353132, "epoch": 0.1734239512538083, "grad_norm": 23.323732376098633, "learning_rate": 6.890854052418556e-06, "loss": 0.39474516, "memory(GiB)": 13.7, "step": 370, "train_speed(iter/s)": 1.371056 }, { "acc": 0.94267426, "epoch": 0.17576751816264355, "grad_norm": 10.554622650146484, "learning_rate": 6.906495555257607e-06, "loss": 0.53215766, "memory(GiB)": 13.7, "step": 375, "train_speed(iter/s)": 1.373455 }, { "acc": 0.93565025, "epoch": 0.1781110850714788, "grad_norm": 13.877140998840332, "learning_rate": 6.921929879775335e-06, "loss": 0.58664808, "memory(GiB)": 13.7, "step": 380, "train_speed(iter/s)": 1.375965 }, { "acc": 0.94248915, "epoch": 0.18045465198031405, "grad_norm": 17.77116584777832, "learning_rate": 6.937162442632928e-06, "loss": 0.47987828, "memory(GiB)": 13.7, "step": 385, "train_speed(iter/s)": 1.377774 }, { "acc": 0.91304626, "epoch": 0.1827982188891493, "grad_norm": 14.801981925964355, "learning_rate": 6.952198450802489e-06, "loss": 0.78061514, "memory(GiB)": 13.7, "step": 390, "train_speed(iter/s)": 1.380399 }, { "acc": 0.95791664, "epoch": 0.18514178579798454, "grad_norm": 11.724641799926758, "learning_rate": 6.967042912252593e-06, "loss": 0.42429113, "memory(GiB)": 13.7, "step": 395, "train_speed(iter/s)": 1.383125 }, { "acc": 0.95688667, "epoch": 0.1874853527068198, "grad_norm": 13.330667495727539, "learning_rate": 6.981700645961697e-06, "loss": 0.29880164, "memory(GiB)": 13.7, "step": 400, "train_speed(iter/s)": 1.384971 }, { "acc": 0.93025646, "epoch": 0.18982891961565504, "grad_norm": 61.05171203613281, "learning_rate": 6.996176291309588e-06, "loss": 0.53117738, "memory(GiB)": 13.7, "step": 405, "train_speed(iter/s)": 1.387243 }, { "acc": 0.94292316, "epoch": 0.1921724865244903, "grad_norm": 22.22393798828125, "learning_rate": 7.010474316892604e-06, "loss": 0.5279984, "memory(GiB)": 13.7, "step": 410, "train_speed(iter/s)": 1.388341 }, { "acc": 0.93440342, "epoch": 0.1945160534333255, "grad_norm": 4.981116771697998, "learning_rate": 7.024599028804561e-06, "loss": 0.56669588, "memory(GiB)": 13.7, "step": 415, "train_speed(iter/s)": 1.390593 }, { "acc": 0.95901489, "epoch": 0.19685962034216076, "grad_norm": 13.733412742614746, "learning_rate": 7.038554578421787e-06, "loss": 0.41162195, "memory(GiB)": 13.7, "step": 420, "train_speed(iter/s)": 1.392802 }, { "acc": 0.92579136, "epoch": 0.199203187250996, "grad_norm": 18.293087005615234, "learning_rate": 7.05234496972746e-06, "loss": 0.50074825, "memory(GiB)": 13.7, "step": 425, "train_speed(iter/s)": 1.394719 }, { "acc": 0.95209827, "epoch": 0.20154675415983125, "grad_norm": 7.398438453674316, "learning_rate": 7.065974066207598e-06, "loss": 0.40505586, "memory(GiB)": 13.7, "step": 430, "train_speed(iter/s)": 1.396921 }, { "acc": 0.93570251, "epoch": 0.2038903210686665, "grad_norm": 52.570556640625, "learning_rate": 7.079445597348412e-06, "loss": 0.38889692, "memory(GiB)": 13.7, "step": 435, "train_speed(iter/s)": 1.3983 }, { "acc": 0.94045563, "epoch": 0.20623388797750175, "grad_norm": 19.73626136779785, "learning_rate": 7.092763164762342e-06, "loss": 0.53413043, "memory(GiB)": 13.7, "step": 440, "train_speed(iter/s)": 1.39945 }, { "acc": 0.9010705, "epoch": 0.208577454886337, "grad_norm": 14.353471755981445, "learning_rate": 7.105930247967991e-06, "loss": 0.76549401, "memory(GiB)": 13.7, "step": 445, "train_speed(iter/s)": 1.401444 }, { "acc": 0.93669643, "epoch": 0.21092102179517225, "grad_norm": 30.272554397583008, "learning_rate": 7.118950209847113e-06, "loss": 0.42661614, "memory(GiB)": 13.7, "step": 450, "train_speed(iter/s)": 1.402818 }, { "acc": 0.93669682, "epoch": 0.2132645887040075, "grad_norm": 18.092199325561523, "learning_rate": 7.131826301800104e-06, "loss": 0.59193163, "memory(GiB)": 13.7, "step": 455, "train_speed(iter/s)": 1.405525 }, { "acc": 0.92135906, "epoch": 0.21560815561284274, "grad_norm": 19.370960235595703, "learning_rate": 7.144561668619756e-06, "loss": 0.59287977, "memory(GiB)": 13.7, "step": 460, "train_speed(iter/s)": 1.407846 }, { "acc": 0.91335402, "epoch": 0.217951722521678, "grad_norm": 20.170787811279297, "learning_rate": 7.157159353101536e-06, "loss": 0.69164958, "memory(GiB)": 13.7, "step": 465, "train_speed(iter/s)": 1.408617 }, { "acc": 0.91824913, "epoch": 0.22029528943051324, "grad_norm": 14.79083251953125, "learning_rate": 7.169622300407332e-06, "loss": 0.61680269, "memory(GiB)": 13.7, "step": 470, "train_speed(iter/s)": 1.409761 }, { "acc": 0.93749866, "epoch": 0.2226388563393485, "grad_norm": 12.71214485168457, "learning_rate": 7.181953362198276e-06, "loss": 0.56734819, "memory(GiB)": 13.7, "step": 475, "train_speed(iter/s)": 1.411473 }, { "acc": 0.96002703, "epoch": 0.22498242324818374, "grad_norm": 14.86760425567627, "learning_rate": 7.194155300551203e-06, "loss": 0.3621547, "memory(GiB)": 13.7, "step": 480, "train_speed(iter/s)": 1.41304 }, { "acc": 0.93871803, "epoch": 0.227325990157019, "grad_norm": 11.031716346740723, "learning_rate": 7.2062307916721445e-06, "loss": 0.43626819, "memory(GiB)": 13.7, "step": 485, "train_speed(iter/s)": 1.414534 }, { "acc": 0.92226763, "epoch": 0.22966955706585424, "grad_norm": 45.30253219604492, "learning_rate": 7.218182429419401e-06, "loss": 0.76675038, "memory(GiB)": 13.7, "step": 490, "train_speed(iter/s)": 1.415475 }, { "acc": 0.91231384, "epoch": 0.23201312397468948, "grad_norm": 36.52009582519531, "learning_rate": 7.230012728647758e-06, "loss": 0.74716158, "memory(GiB)": 13.7, "step": 495, "train_speed(iter/s)": 1.416182 }, { "acc": 0.94873219, "epoch": 0.23435669088352473, "grad_norm": 9.566007614135742, "learning_rate": 7.2417241283846376e-06, "loss": 0.27785511, "memory(GiB)": 13.7, "step": 500, "train_speed(iter/s)": 1.417573 }, { "acc": 0.92172461, "epoch": 0.23670025779235998, "grad_norm": 8.248847007751465, "learning_rate": 7.253318994848236e-06, "loss": 0.613765, "memory(GiB)": 13.7, "step": 505, "train_speed(iter/s)": 1.41963 }, { "acc": 0.92406693, "epoch": 0.23904382470119523, "grad_norm": 15.82424545288086, "learning_rate": 7.264799624316965e-06, "loss": 0.54952898, "memory(GiB)": 13.7, "step": 510, "train_speed(iter/s)": 1.42115 }, { "acc": 0.92580357, "epoch": 0.24138739161003048, "grad_norm": 8.522337913513184, "learning_rate": 7.27616824585893e-06, "loss": 0.61504154, "memory(GiB)": 13.7, "step": 515, "train_speed(iter/s)": 1.422387 }, { "acc": 0.93562603, "epoch": 0.24373095851886573, "grad_norm": 13.539356231689453, "learning_rate": 7.287427023929521e-06, "loss": 0.61969471, "memory(GiB)": 13.7, "step": 520, "train_speed(iter/s)": 1.424372 }, { "acc": 0.933074, "epoch": 0.24607452542770097, "grad_norm": 16.927488327026367, "learning_rate": 7.298578060844728e-06, "loss": 0.45399218, "memory(GiB)": 13.7, "step": 525, "train_speed(iter/s)": 1.426237 }, { "acc": 0.91239853, "epoch": 0.2484180923365362, "grad_norm": 83.28849029541016, "learning_rate": 7.3096233991372305e-06, "loss": 0.70754528, "memory(GiB)": 13.7, "step": 530, "train_speed(iter/s)": 1.427831 }, { "acc": 0.93219471, "epoch": 0.25076165924537147, "grad_norm": 82.86286163330078, "learning_rate": 7.320565023801859e-06, "loss": 0.5424047, "memory(GiB)": 13.7, "step": 535, "train_speed(iter/s)": 1.428407 }, { "acc": 0.93686867, "epoch": 0.2531052261542067, "grad_norm": 7.044783592224121, "learning_rate": 7.331404864436619e-06, "loss": 0.54315419, "memory(GiB)": 13.7, "step": 540, "train_speed(iter/s)": 1.429559 }, { "acc": 0.94280224, "epoch": 0.25544879306304197, "grad_norm": 13.72964859008789, "learning_rate": 7.342144797285042e-06, "loss": 0.4571528, "memory(GiB)": 13.7, "step": 545, "train_speed(iter/s)": 1.430806 }, { "acc": 0.93639336, "epoch": 0.2577923599718772, "grad_norm": 14.994649887084961, "learning_rate": 7.352786647185283e-06, "loss": 0.35833638, "memory(GiB)": 13.7, "step": 550, "train_speed(iter/s)": 1.432159 }, { "acc": 0.92542486, "epoch": 0.26013592688071246, "grad_norm": 45.92943572998047, "learning_rate": 7.363332189431003e-06, "loss": 0.64804773, "memory(GiB)": 13.7, "step": 555, "train_speed(iter/s)": 1.43333 }, { "acc": 0.91932259, "epoch": 0.2624794937895477, "grad_norm": 5.633096694946289, "learning_rate": 7.373783151548816e-06, "loss": 0.56720748, "memory(GiB)": 13.7, "step": 560, "train_speed(iter/s)": 1.434154 }, { "acc": 0.94199829, "epoch": 0.26482306069838296, "grad_norm": 9.260346412658691, "learning_rate": 7.3841412149967375e-06, "loss": 0.47847128, "memory(GiB)": 13.7, "step": 565, "train_speed(iter/s)": 1.435087 }, { "acc": 0.94308872, "epoch": 0.2671666276072182, "grad_norm": 12.471441268920898, "learning_rate": 7.394408016787783e-06, "loss": 0.43882389, "memory(GiB)": 13.7, "step": 570, "train_speed(iter/s)": 1.436258 }, { "acc": 0.91120539, "epoch": 0.26951019451605346, "grad_norm": 66.03192901611328, "learning_rate": 7.404585151042697e-06, "loss": 0.64140396, "memory(GiB)": 13.7, "step": 575, "train_speed(iter/s)": 1.437596 }, { "acc": 0.94149609, "epoch": 0.2718537614248887, "grad_norm": 14.967682838439941, "learning_rate": 7.414674170475442e-06, "loss": 0.48561959, "memory(GiB)": 13.7, "step": 580, "train_speed(iter/s)": 1.439481 }, { "acc": 0.94018984, "epoch": 0.27419732833372396, "grad_norm": 3.0722525119781494, "learning_rate": 7.4246765878149365e-06, "loss": 0.39963131, "memory(GiB)": 13.7, "step": 585, "train_speed(iter/s)": 1.440686 }, { "acc": 0.91085272, "epoch": 0.27654089524255915, "grad_norm": 32.622989654541016, "learning_rate": 7.434593877166285e-06, "loss": 0.63852124, "memory(GiB)": 13.7, "step": 590, "train_speed(iter/s)": 1.44165 }, { "acc": 0.90302324, "epoch": 0.2788844621513944, "grad_norm": 53.7294807434082, "learning_rate": 7.4444274753145805e-06, "loss": 0.85247946, "memory(GiB)": 13.7, "step": 595, "train_speed(iter/s)": 1.442421 }, { "acc": 0.95172663, "epoch": 0.28122802906022965, "grad_norm": 24.86988067626953, "learning_rate": 7.454178782974143e-06, "loss": 0.41565504, "memory(GiB)": 13.7, "step": 600, "train_speed(iter/s)": 1.443441 }, { "acc": 0.94080114, "epoch": 0.2835715959690649, "grad_norm": 35.52899169921875, "learning_rate": 7.463849165985929e-06, "loss": 0.55279884, "memory(GiB)": 13.7, "step": 605, "train_speed(iter/s)": 1.444413 }, { "acc": 0.92430172, "epoch": 0.28591516287790014, "grad_norm": 20.72629737854004, "learning_rate": 7.473439956465651e-06, "loss": 0.62414274, "memory(GiB)": 13.7, "step": 610, "train_speed(iter/s)": 1.445663 }, { "acc": 0.93964691, "epoch": 0.2882587297867354, "grad_norm": 12.276875495910645, "learning_rate": 7.48295245390505e-06, "loss": 0.43076105, "memory(GiB)": 13.7, "step": 615, "train_speed(iter/s)": 1.446431 }, { "acc": 0.93828735, "epoch": 0.29060229669557064, "grad_norm": 39.22727584838867, "learning_rate": 7.492387926228567e-06, "loss": 0.45804334, "memory(GiB)": 13.7, "step": 620, "train_speed(iter/s)": 1.44733 }, { "acc": 0.93639622, "epoch": 0.2929458636044059, "grad_norm": 50.679813385009766, "learning_rate": 7.501747610807578e-06, "loss": 0.50700817, "memory(GiB)": 13.7, "step": 625, "train_speed(iter/s)": 1.448388 }, { "acc": 0.93451681, "epoch": 0.29528943051324114, "grad_norm": 10.626431465148926, "learning_rate": 7.511032715434233e-06, "loss": 0.41069522, "memory(GiB)": 13.7, "step": 630, "train_speed(iter/s)": 1.449923 }, { "acc": 0.93044968, "epoch": 0.2976329974220764, "grad_norm": 70.3131103515625, "learning_rate": 7.520244419256781e-06, "loss": 0.49552631, "memory(GiB)": 13.7, "step": 635, "train_speed(iter/s)": 1.451041 }, { "acc": 0.90376892, "epoch": 0.29997656433091163, "grad_norm": 26.834829330444336, "learning_rate": 7.529383873678233e-06, "loss": 0.67799854, "memory(GiB)": 13.7, "step": 640, "train_speed(iter/s)": 1.451837 }, { "acc": 0.95850201, "epoch": 0.3023201312397469, "grad_norm": 63.649085998535156, "learning_rate": 7.538452203220046e-06, "loss": 0.38767571, "memory(GiB)": 13.7, "step": 645, "train_speed(iter/s)": 1.452284 }, { "acc": 0.93675776, "epoch": 0.30466369814858213, "grad_norm": 15.333565711975098, "learning_rate": 7.5474505063524606e-06, "loss": 0.44747386, "memory(GiB)": 13.7, "step": 650, "train_speed(iter/s)": 1.453148 }, { "acc": 0.92040291, "epoch": 0.3070072650574174, "grad_norm": 20.647167205810547, "learning_rate": 7.5563798562930336e-06, "loss": 0.67334819, "memory(GiB)": 13.7, "step": 655, "train_speed(iter/s)": 1.453342 }, { "acc": 0.91853228, "epoch": 0.3093508319662526, "grad_norm": 5.709941864013672, "learning_rate": 7.5652413017747885e-06, "loss": 0.49667997, "memory(GiB)": 13.7, "step": 660, "train_speed(iter/s)": 1.454171 }, { "acc": 0.92491951, "epoch": 0.3116943988750879, "grad_norm": 24.914522171020508, "learning_rate": 7.574035867785397e-06, "loss": 0.51631827, "memory(GiB)": 13.7, "step": 665, "train_speed(iter/s)": 1.454996 }, { "acc": 0.90987186, "epoch": 0.3140379657839231, "grad_norm": 8.172112464904785, "learning_rate": 7.582764556278662e-06, "loss": 0.55154305, "memory(GiB)": 13.7, "step": 670, "train_speed(iter/s)": 1.455531 }, { "acc": 0.92048359, "epoch": 0.31638153269275837, "grad_norm": 25.06749153137207, "learning_rate": 7.59142834685956e-06, "loss": 0.60502119, "memory(GiB)": 13.7, "step": 675, "train_speed(iter/s)": 1.456446 }, { "acc": 0.95114594, "epoch": 0.3187250996015936, "grad_norm": 7.022165298461914, "learning_rate": 7.600028197443996e-06, "loss": 0.38246336, "memory(GiB)": 13.7, "step": 680, "train_speed(iter/s)": 1.457172 }, { "acc": 0.93819208, "epoch": 0.32106866651042887, "grad_norm": 24.211719512939453, "learning_rate": 7.6085650448944e-06, "loss": 0.37768383, "memory(GiB)": 13.7, "step": 685, "train_speed(iter/s)": 1.458479 }, { "acc": 0.92371483, "epoch": 0.3234122334192641, "grad_norm": 39.94783401489258, "learning_rate": 7.617039805632201e-06, "loss": 0.62169504, "memory(GiB)": 13.7, "step": 690, "train_speed(iter/s)": 1.459056 }, { "acc": 0.9222023, "epoch": 0.32575580032809937, "grad_norm": 47.2852783203125, "learning_rate": 7.625453376228198e-06, "loss": 0.53810873, "memory(GiB)": 13.7, "step": 695, "train_speed(iter/s)": 1.459622 }, { "acc": 0.90160942, "epoch": 0.3280993672369346, "grad_norm": 45.968631744384766, "learning_rate": 7.633806633971758e-06, "loss": 0.69917231, "memory(GiB)": 13.7, "step": 700, "train_speed(iter/s)": 1.460373 }, { "acc": 0.92642355, "epoch": 0.33044293414576986, "grad_norm": 106.01464080810547, "learning_rate": 7.642100437419778e-06, "loss": 0.60224648, "memory(GiB)": 13.7, "step": 705, "train_speed(iter/s)": 1.461087 }, { "acc": 0.92043276, "epoch": 0.3327865010546051, "grad_norm": 39.98884963989258, "learning_rate": 7.650335626926223e-06, "loss": 0.64041529, "memory(GiB)": 13.7, "step": 710, "train_speed(iter/s)": 1.461504 }, { "acc": 0.9365633, "epoch": 0.33513006796344036, "grad_norm": 367.3218078613281, "learning_rate": 7.658513025153106e-06, "loss": 0.34341388, "memory(GiB)": 13.7, "step": 715, "train_speed(iter/s)": 1.462672 }, { "acc": 0.93229675, "epoch": 0.3374736348722756, "grad_norm": 12.069906234741211, "learning_rate": 7.66663343756365e-06, "loss": 0.5711978, "memory(GiB)": 13.7, "step": 720, "train_speed(iter/s)": 1.462979 }, { "acc": 0.95274754, "epoch": 0.33981720178111086, "grad_norm": 8.559541702270508, "learning_rate": 7.674697652898383e-06, "loss": 0.35071921, "memory(GiB)": 13.7, "step": 725, "train_speed(iter/s)": 1.463515 }, { "acc": 0.9111661, "epoch": 0.3421607686899461, "grad_norm": 10.273635864257812, "learning_rate": 7.68270644363489e-06, "loss": 0.65954318, "memory(GiB)": 13.7, "step": 730, "train_speed(iter/s)": 1.464023 }, { "acc": 0.89719992, "epoch": 0.34450433559878135, "grad_norm": 15.306828498840332, "learning_rate": 7.690660566431848e-06, "loss": 0.72256656, "memory(GiB)": 13.7, "step": 735, "train_speed(iter/s)": 1.464454 }, { "acc": 0.88961506, "epoch": 0.3468479025076166, "grad_norm": 21.694114685058594, "learning_rate": 7.698560762558033e-06, "loss": 0.81938362, "memory(GiB)": 13.7, "step": 740, "train_speed(iter/s)": 1.464765 }, { "acc": 0.93970737, "epoch": 0.34919146941645185, "grad_norm": 20.14076042175293, "learning_rate": 7.706407758306863e-06, "loss": 0.50838246, "memory(GiB)": 13.7, "step": 745, "train_speed(iter/s)": 1.464815 }, { "acc": 0.94440479, "epoch": 0.3515350363252871, "grad_norm": 10.389219284057617, "learning_rate": 7.714202265397085e-06, "loss": 0.4367939, "memory(GiB)": 13.7, "step": 750, "train_speed(iter/s)": 1.465596 }, { "acc": 0.92057991, "epoch": 0.35387860323412235, "grad_norm": 38.78284454345703, "learning_rate": 7.721944981360152e-06, "loss": 0.48701458, "memory(GiB)": 13.7, "step": 755, "train_speed(iter/s)": 1.466309 }, { "acc": 0.93340778, "epoch": 0.3562221701429576, "grad_norm": 168.21075439453125, "learning_rate": 7.729636589914814e-06, "loss": 0.57707071, "memory(GiB)": 13.7, "step": 760, "train_speed(iter/s)": 1.466849 }, { "acc": 0.92811861, "epoch": 0.35856573705179284, "grad_norm": 10.903087615966797, "learning_rate": 7.737277761329413e-06, "loss": 0.49227571, "memory(GiB)": 13.7, "step": 765, "train_speed(iter/s)": 1.467137 }, { "acc": 0.92275276, "epoch": 0.3609093039606281, "grad_norm": 49.789886474609375, "learning_rate": 7.744869152772403e-06, "loss": 0.48347993, "memory(GiB)": 13.7, "step": 770, "train_speed(iter/s)": 1.467334 }, { "acc": 0.94684429, "epoch": 0.36325287086946334, "grad_norm": 15.57144832611084, "learning_rate": 7.752411408651508e-06, "loss": 0.4073545, "memory(GiB)": 13.7, "step": 775, "train_speed(iter/s)": 1.468084 }, { "acc": 0.9350812, "epoch": 0.3655964377782986, "grad_norm": 20.29900550842285, "learning_rate": 7.759905160941968e-06, "loss": 0.55926666, "memory(GiB)": 13.7, "step": 780, "train_speed(iter/s)": 1.46837 }, { "acc": 0.90552578, "epoch": 0.36794000468713384, "grad_norm": 53.499610900878906, "learning_rate": 7.767351029504318e-06, "loss": 0.7648242, "memory(GiB)": 13.7, "step": 785, "train_speed(iter/s)": 1.468698 }, { "acc": 0.91478844, "epoch": 0.3702835715959691, "grad_norm": 13.713704109191895, "learning_rate": 7.77474962239207e-06, "loss": 0.70477953, "memory(GiB)": 13.7, "step": 790, "train_speed(iter/s)": 1.46939 }, { "acc": 0.95369053, "epoch": 0.37262713850480433, "grad_norm": 15.852684020996094, "learning_rate": 7.782101536149678e-06, "loss": 0.36244845, "memory(GiB)": 13.7, "step": 795, "train_speed(iter/s)": 1.469624 }, { "acc": 0.93913193, "epoch": 0.3749707054136396, "grad_norm": 9.645682334899902, "learning_rate": 7.789407356101173e-06, "loss": 0.46597791, "memory(GiB)": 13.7, "step": 800, "train_speed(iter/s)": 1.469629 }, { "acc": 0.92397957, "epoch": 0.37731427232247483, "grad_norm": 9.243246078491211, "learning_rate": 7.796667656629818e-06, "loss": 0.48799801, "memory(GiB)": 13.7, "step": 805, "train_speed(iter/s)": 1.470164 }, { "acc": 0.941994, "epoch": 0.3796578392313101, "grad_norm": 28.100919723510742, "learning_rate": 7.803883001449065e-06, "loss": 0.40982046, "memory(GiB)": 13.7, "step": 810, "train_speed(iter/s)": 1.470765 }, { "acc": 0.93828602, "epoch": 0.3820014061401453, "grad_norm": 8.304512023925781, "learning_rate": 7.811053943865229e-06, "loss": 0.39699681, "memory(GiB)": 13.7, "step": 815, "train_speed(iter/s)": 1.470944 }, { "acc": 0.94302082, "epoch": 0.3843449730489806, "grad_norm": 16.90865707397461, "learning_rate": 7.81818102703208e-06, "loss": 0.39008343, "memory(GiB)": 13.7, "step": 820, "train_speed(iter/s)": 1.47133 }, { "acc": 0.92611427, "epoch": 0.38668853995781577, "grad_norm": 27.392288208007812, "learning_rate": 7.825264784197728e-06, "loss": 0.47103567, "memory(GiB)": 13.7, "step": 825, "train_speed(iter/s)": 1.47196 }, { "acc": 0.93820353, "epoch": 0.389032106866651, "grad_norm": 36.91737747192383, "learning_rate": 7.832305738944038e-06, "loss": 0.49320149, "memory(GiB)": 13.7, "step": 830, "train_speed(iter/s)": 1.472418 }, { "acc": 0.93970509, "epoch": 0.39137567377548627, "grad_norm": 75.50379180908203, "learning_rate": 7.839304405418845e-06, "loss": 0.44697189, "memory(GiB)": 13.7, "step": 835, "train_speed(iter/s)": 1.472947 }, { "acc": 0.93725653, "epoch": 0.3937192406843215, "grad_norm": 36.801666259765625, "learning_rate": 7.846261288561265e-06, "loss": 0.4089066, "memory(GiB)": 13.7, "step": 840, "train_speed(iter/s)": 1.47388 }, { "acc": 0.95608912, "epoch": 0.39606280759315676, "grad_norm": 4.901787757873535, "learning_rate": 7.853176884320285e-06, "loss": 0.35820479, "memory(GiB)": 13.7, "step": 845, "train_speed(iter/s)": 1.47417 }, { "acc": 0.93553839, "epoch": 0.398406374501992, "grad_norm": 12.443031311035156, "learning_rate": 7.860051679866938e-06, "loss": 0.4624434, "memory(GiB)": 13.7, "step": 850, "train_speed(iter/s)": 1.474363 }, { "acc": 0.95376568, "epoch": 0.40074994141082726, "grad_norm": 44.634254455566406, "learning_rate": 7.866886153800229e-06, "loss": 0.31047556, "memory(GiB)": 13.7, "step": 855, "train_speed(iter/s)": 1.474624 }, { "acc": 0.93597403, "epoch": 0.4030935083196625, "grad_norm": 23.492469787597656, "learning_rate": 7.873680776347076e-06, "loss": 0.53762007, "memory(GiB)": 13.7, "step": 860, "train_speed(iter/s)": 1.475103 }, { "acc": 0.91557274, "epoch": 0.40543707522849776, "grad_norm": 10.16146183013916, "learning_rate": 7.880436009556444e-06, "loss": 0.75402322, "memory(GiB)": 13.7, "step": 865, "train_speed(iter/s)": 1.475381 }, { "acc": 0.93701773, "epoch": 0.407780642137333, "grad_norm": 40.522830963134766, "learning_rate": 7.887152307487888e-06, "loss": 0.41278152, "memory(GiB)": 13.7, "step": 870, "train_speed(iter/s)": 1.476102 }, { "acc": 0.94872026, "epoch": 0.41012420904616825, "grad_norm": 6.39769172668457, "learning_rate": 7.893830116394698e-06, "loss": 0.33766048, "memory(GiB)": 13.7, "step": 875, "train_speed(iter/s)": 1.476455 }, { "acc": 0.90014477, "epoch": 0.4124677759550035, "grad_norm": 16.423694610595703, "learning_rate": 7.900469874901819e-06, "loss": 0.76210237, "memory(GiB)": 13.7, "step": 880, "train_speed(iter/s)": 1.476889 }, { "acc": 0.92018013, "epoch": 0.41481134286383875, "grad_norm": 33.69840621948242, "learning_rate": 7.907072014178731e-06, "loss": 0.38467078, "memory(GiB)": 13.7, "step": 885, "train_speed(iter/s)": 1.477516 }, { "acc": 0.97008305, "epoch": 0.417154909772674, "grad_norm": 8.302285194396973, "learning_rate": 7.913636958107468e-06, "loss": 0.23028545, "memory(GiB)": 13.7, "step": 890, "train_speed(iter/s)": 1.478091 }, { "acc": 0.93284721, "epoch": 0.41949847668150925, "grad_norm": 17.656442642211914, "learning_rate": 7.92016512344592e-06, "loss": 0.48090515, "memory(GiB)": 13.7, "step": 895, "train_speed(iter/s)": 1.478364 }, { "acc": 0.94673977, "epoch": 0.4218420435903445, "grad_norm": 5.906007289886475, "learning_rate": 7.92665691998659e-06, "loss": 0.30976562, "memory(GiB)": 13.7, "step": 900, "train_speed(iter/s)": 1.478557 }, { "acc": 0.96177492, "epoch": 0.42418561049917974, "grad_norm": 108.13143157958984, "learning_rate": 7.933112750710957e-06, "loss": 0.29988451, "memory(GiB)": 13.7, "step": 905, "train_speed(iter/s)": 1.478636 }, { "acc": 0.91179924, "epoch": 0.426529177408015, "grad_norm": 18.993844985961914, "learning_rate": 7.939533011939583e-06, "loss": 0.62752938, "memory(GiB)": 13.7, "step": 910, "train_speed(iter/s)": 1.479086 }, { "acc": 0.95127468, "epoch": 0.42887274431685024, "grad_norm": 11.264302253723145, "learning_rate": 7.945918093478096e-06, "loss": 0.29745145, "memory(GiB)": 13.7, "step": 915, "train_speed(iter/s)": 1.479364 }, { "acc": 0.91520834, "epoch": 0.4312163112256855, "grad_norm": 32.68629837036133, "learning_rate": 7.952268378759233e-06, "loss": 0.52033029, "memory(GiB)": 13.7, "step": 920, "train_speed(iter/s)": 1.47934 }, { "acc": 0.9252327, "epoch": 0.43355987813452074, "grad_norm": 4.792352676391602, "learning_rate": 7.958584244980973e-06, "loss": 0.48517842, "memory(GiB)": 13.7, "step": 925, "train_speed(iter/s)": 1.479439 }, { "acc": 0.91095715, "epoch": 0.435903445043356, "grad_norm": 78.77403259277344, "learning_rate": 7.964866063241012e-06, "loss": 0.68244591, "memory(GiB)": 13.7, "step": 930, "train_speed(iter/s)": 1.479596 }, { "acc": 0.95828371, "epoch": 0.43824701195219123, "grad_norm": 7.9591851234436035, "learning_rate": 7.971114198667584e-06, "loss": 0.26774719, "memory(GiB)": 13.7, "step": 935, "train_speed(iter/s)": 1.479949 }, { "acc": 0.94174109, "epoch": 0.4405905788610265, "grad_norm": 20.343584060668945, "learning_rate": 7.977329010546809e-06, "loss": 0.37847123, "memory(GiB)": 13.7, "step": 940, "train_speed(iter/s)": 1.480789 }, { "acc": 0.92014885, "epoch": 0.44293414576986173, "grad_norm": 27.112844467163086, "learning_rate": 7.98351085244668e-06, "loss": 0.55382185, "memory(GiB)": 13.7, "step": 945, "train_speed(iter/s)": 1.481052 }, { "acc": 0.88353481, "epoch": 0.445277712678697, "grad_norm": 20.695343017578125, "learning_rate": 7.989660072337753e-06, "loss": 0.83479729, "memory(GiB)": 13.7, "step": 950, "train_speed(iter/s)": 1.481382 }, { "acc": 0.91640396, "epoch": 0.44762127958753223, "grad_norm": 43.99683380126953, "learning_rate": 7.99577701271068e-06, "loss": 0.61567693, "memory(GiB)": 13.7, "step": 955, "train_speed(iter/s)": 1.481586 }, { "acc": 0.92943964, "epoch": 0.4499648464963675, "grad_norm": 20.30463409423828, "learning_rate": 8.00186201069068e-06, "loss": 0.51239276, "memory(GiB)": 13.7, "step": 960, "train_speed(iter/s)": 1.48196 }, { "acc": 0.93325624, "epoch": 0.4523084134052027, "grad_norm": 6.5350260734558105, "learning_rate": 8.007915398149017e-06, "loss": 0.50926361, "memory(GiB)": 13.7, "step": 965, "train_speed(iter/s)": 1.482182 }, { "acc": 0.91809444, "epoch": 0.454651980314038, "grad_norm": 78.84744262695312, "learning_rate": 8.013937501811621e-06, "loss": 0.49619584, "memory(GiB)": 13.7, "step": 970, "train_speed(iter/s)": 1.482751 }, { "acc": 0.91251612, "epoch": 0.4569955472228732, "grad_norm": 53.46773910522461, "learning_rate": 8.019928643364908e-06, "loss": 0.59957423, "memory(GiB)": 13.7, "step": 975, "train_speed(iter/s)": 1.483296 }, { "acc": 0.89870043, "epoch": 0.45933911413170847, "grad_norm": 23.291635513305664, "learning_rate": 8.025889139558878e-06, "loss": 0.62848902, "memory(GiB)": 13.7, "step": 980, "train_speed(iter/s)": 1.483708 }, { "acc": 0.94613476, "epoch": 0.4616826810405437, "grad_norm": 7.685946941375732, "learning_rate": 8.03181930230762e-06, "loss": 0.35615914, "memory(GiB)": 13.7, "step": 985, "train_speed(iter/s)": 1.483921 }, { "acc": 0.92596893, "epoch": 0.46402624794937897, "grad_norm": 55.857666015625, "learning_rate": 8.037719438787236e-06, "loss": 0.47774267, "memory(GiB)": 13.7, "step": 990, "train_speed(iter/s)": 1.484486 }, { "acc": 0.91399517, "epoch": 0.4663698148582142, "grad_norm": 22.032163619995117, "learning_rate": 8.043589851531338e-06, "loss": 0.59088364, "memory(GiB)": 13.7, "step": 995, "train_speed(iter/s)": 1.484742 }, { "acc": 0.94800596, "epoch": 0.46871338176704946, "grad_norm": 11.320577621459961, "learning_rate": 8.049430838524115e-06, "loss": 0.40958524, "memory(GiB)": 13.7, "step": 1000, "train_speed(iter/s)": 1.484939 }, { "acc": 0.90040798, "epoch": 0.4710569486758847, "grad_norm": 29.750516891479492, "learning_rate": 8.05524269329111e-06, "loss": 0.62261567, "memory(GiB)": 13.7, "step": 1005, "train_speed(iter/s)": 1.485213 }, { "acc": 0.93196583, "epoch": 0.47340051558471996, "grad_norm": 16.014204025268555, "learning_rate": 8.061025704987712e-06, "loss": 0.49181185, "memory(GiB)": 13.7, "step": 1010, "train_speed(iter/s)": 1.48586 }, { "acc": 0.92907429, "epoch": 0.4757440824935552, "grad_norm": 14.89755916595459, "learning_rate": 8.066780158485503e-06, "loss": 0.54254503, "memory(GiB)": 13.7, "step": 1015, "train_speed(iter/s)": 1.486252 }, { "acc": 0.90240803, "epoch": 0.47808764940239046, "grad_norm": 33.30708312988281, "learning_rate": 8.072506334456442e-06, "loss": 0.68747888, "memory(GiB)": 13.7, "step": 1020, "train_speed(iter/s)": 1.486708 }, { "acc": 0.96063051, "epoch": 0.4804312163112257, "grad_norm": 35.151004791259766, "learning_rate": 8.078204509455023e-06, "loss": 0.30970826, "memory(GiB)": 13.7, "step": 1025, "train_speed(iter/s)": 1.486813 }, { "acc": 0.95805407, "epoch": 0.48277478322006095, "grad_norm": 16.181930541992188, "learning_rate": 8.083874955998408e-06, "loss": 0.32050173, "memory(GiB)": 13.7, "step": 1030, "train_speed(iter/s)": 1.486877 }, { "acc": 0.94027195, "epoch": 0.4851183501288962, "grad_norm": 12.707511901855469, "learning_rate": 8.089517942644648e-06, "loss": 0.42640738, "memory(GiB)": 13.7, "step": 1035, "train_speed(iter/s)": 1.487148 }, { "acc": 0.92757435, "epoch": 0.48746191703773145, "grad_norm": 22.96230697631836, "learning_rate": 8.095133734068998e-06, "loss": 0.49892807, "memory(GiB)": 13.7, "step": 1040, "train_speed(iter/s)": 1.487396 }, { "acc": 0.92204103, "epoch": 0.4898054839465667, "grad_norm": 11.852816581726074, "learning_rate": 8.100722591138399e-06, "loss": 0.55475016, "memory(GiB)": 13.7, "step": 1045, "train_speed(iter/s)": 1.487701 }, { "acc": 0.9104373, "epoch": 0.49214905085540195, "grad_norm": 27.078508377075195, "learning_rate": 8.106284770984205e-06, "loss": 0.70850334, "memory(GiB)": 13.7, "step": 1050, "train_speed(iter/s)": 1.488106 }, { "acc": 0.97484627, "epoch": 0.4944926177642372, "grad_norm": 9.324882507324219, "learning_rate": 8.11182052707316e-06, "loss": 0.15453634, "memory(GiB)": 13.7, "step": 1055, "train_speed(iter/s)": 1.489027 }, { "acc": 0.95160789, "epoch": 0.4968361846730724, "grad_norm": 8.623847961425781, "learning_rate": 8.117330109276708e-06, "loss": 0.3330318, "memory(GiB)": 13.7, "step": 1060, "train_speed(iter/s)": 1.489705 }, { "acc": 0.89907112, "epoch": 0.49917975158190764, "grad_norm": 22.119449615478516, "learning_rate": 8.12281376393867e-06, "loss": 0.50406332, "memory(GiB)": 13.7, "step": 1065, "train_speed(iter/s)": 1.490324 }, { "acc": 0.90598412, "epoch": 0.5015233184907429, "grad_norm": 84.36983489990234, "learning_rate": 8.128271733941335e-06, "loss": 0.68546486, "memory(GiB)": 13.7, "step": 1070, "train_speed(iter/s)": 1.490904 }, { "acc": 0.93819447, "epoch": 0.5038668853995781, "grad_norm": 35.49376678466797, "learning_rate": 8.133704258770016e-06, "loss": 0.52867217, "memory(GiB)": 13.7, "step": 1075, "train_speed(iter/s)": 1.491434 }, { "acc": 0.93296013, "epoch": 0.5062104523084134, "grad_norm": 9.005691528320312, "learning_rate": 8.139111574576095e-06, "loss": 0.46076174, "memory(GiB)": 13.7, "step": 1080, "train_speed(iter/s)": 1.491602 }, { "acc": 0.92449398, "epoch": 0.5085540192172486, "grad_norm": 26.314306259155273, "learning_rate": 8.144493914238629e-06, "loss": 0.49954882, "memory(GiB)": 13.7, "step": 1085, "train_speed(iter/s)": 1.491801 }, { "acc": 0.91778135, "epoch": 0.5108975861260839, "grad_norm": 16.506481170654297, "learning_rate": 8.14985150742452e-06, "loss": 0.60946512, "memory(GiB)": 13.7, "step": 1090, "train_speed(iter/s)": 1.492225 }, { "acc": 0.92508392, "epoch": 0.5132411530349191, "grad_norm": 20.62222671508789, "learning_rate": 8.155184580647335e-06, "loss": 0.49939475, "memory(GiB)": 13.7, "step": 1095, "train_speed(iter/s)": 1.49252 }, { "acc": 0.90410004, "epoch": 0.5155847199437544, "grad_norm": 10.050755500793457, "learning_rate": 8.16049335732476e-06, "loss": 0.64515128, "memory(GiB)": 13.7, "step": 1100, "train_speed(iter/s)": 1.492912 }, { "acc": 0.91687737, "epoch": 0.5179282868525896, "grad_norm": 76.841064453125, "learning_rate": 8.16577805783476e-06, "loss": 0.6208447, "memory(GiB)": 13.7, "step": 1105, "train_speed(iter/s)": 1.493205 }, { "acc": 0.96101255, "epoch": 0.5202718537614249, "grad_norm": 24.557058334350586, "learning_rate": 8.17103889957048e-06, "loss": 0.27134228, "memory(GiB)": 13.7, "step": 1110, "train_speed(iter/s)": 1.493558 }, { "acc": 0.91825266, "epoch": 0.5226154206702601, "grad_norm": 9.204221725463867, "learning_rate": 8.1762760969939e-06, "loss": 0.57201357, "memory(GiB)": 13.7, "step": 1115, "train_speed(iter/s)": 1.494163 }, { "acc": 0.94040432, "epoch": 0.5249589875790954, "grad_norm": 124.70111083984375, "learning_rate": 8.181489861688294e-06, "loss": 0.3851685, "memory(GiB)": 13.7, "step": 1120, "train_speed(iter/s)": 1.494362 }, { "acc": 0.90793352, "epoch": 0.5273025544879306, "grad_norm": 9.164875030517578, "learning_rate": 8.186680402409532e-06, "loss": 0.56170292, "memory(GiB)": 13.7, "step": 1125, "train_speed(iter/s)": 1.494656 }, { "acc": 0.93147821, "epoch": 0.5296461213967659, "grad_norm": 62.31510543823242, "learning_rate": 8.191847925136214e-06, "loss": 0.42276878, "memory(GiB)": 13.7, "step": 1130, "train_speed(iter/s)": 1.494709 }, { "acc": 0.91682949, "epoch": 0.5319896883056011, "grad_norm": 31.089811325073242, "learning_rate": 8.19699263311874e-06, "loss": 0.57677541, "memory(GiB)": 13.7, "step": 1135, "train_speed(iter/s)": 1.495099 }, { "acc": 0.91048765, "epoch": 0.5343332552144364, "grad_norm": 69.29338836669922, "learning_rate": 8.202114726927259e-06, "loss": 0.64188132, "memory(GiB)": 13.7, "step": 1140, "train_speed(iter/s)": 1.495233 }, { "acc": 0.93715782, "epoch": 0.5366768221232716, "grad_norm": 34.50437545776367, "learning_rate": 8.207214404498604e-06, "loss": 0.37293975, "memory(GiB)": 13.7, "step": 1145, "train_speed(iter/s)": 1.495549 }, { "acc": 0.91422129, "epoch": 0.5390203890321069, "grad_norm": 4.368232727050781, "learning_rate": 8.212291861182173e-06, "loss": 0.51615281, "memory(GiB)": 13.7, "step": 1150, "train_speed(iter/s)": 1.495811 }, { "acc": 0.9346489, "epoch": 0.5413639559409421, "grad_norm": 72.97452545166016, "learning_rate": 8.21734728978485e-06, "loss": 0.41404266, "memory(GiB)": 13.7, "step": 1155, "train_speed(iter/s)": 1.496334 }, { "acc": 0.93531246, "epoch": 0.5437075228497774, "grad_norm": 115.64360809326172, "learning_rate": 8.22238088061492e-06, "loss": 0.42079859, "memory(GiB)": 13.7, "step": 1160, "train_speed(iter/s)": 1.496519 }, { "acc": 0.93360519, "epoch": 0.5460510897586126, "grad_norm": 124.12381744384766, "learning_rate": 8.22739282152506e-06, "loss": 0.53486376, "memory(GiB)": 13.7, "step": 1165, "train_speed(iter/s)": 1.496532 }, { "acc": 0.94594498, "epoch": 0.5483946566674479, "grad_norm": 14.948965072631836, "learning_rate": 8.232383297954413e-06, "loss": 0.36631334, "memory(GiB)": 13.7, "step": 1170, "train_speed(iter/s)": 1.496844 }, { "acc": 0.94387083, "epoch": 0.5507382235762831, "grad_norm": 23.3656063079834, "learning_rate": 8.237352492969749e-06, "loss": 0.34732015, "memory(GiB)": 13.7, "step": 1175, "train_speed(iter/s)": 1.497605 }, { "acc": 0.93777418, "epoch": 0.5530817904851183, "grad_norm": 35.99724197387695, "learning_rate": 8.242300587305761e-06, "loss": 0.41807184, "memory(GiB)": 13.7, "step": 1180, "train_speed(iter/s)": 1.498153 }, { "acc": 0.92744617, "epoch": 0.5554253573939536, "grad_norm": 20.255847930908203, "learning_rate": 8.247227759404516e-06, "loss": 0.47917614, "memory(GiB)": 13.7, "step": 1185, "train_speed(iter/s)": 1.498538 }, { "acc": 0.91813068, "epoch": 0.5577689243027888, "grad_norm": 24.072532653808594, "learning_rate": 8.252134185454057e-06, "loss": 0.49242101, "memory(GiB)": 13.7, "step": 1190, "train_speed(iter/s)": 1.498558 }, { "acc": 0.93940477, "epoch": 0.5601124912116241, "grad_norm": 7.536691188812256, "learning_rate": 8.257020039426215e-06, "loss": 0.53861818, "memory(GiB)": 13.7, "step": 1195, "train_speed(iter/s)": 1.499077 }, { "acc": 0.89883451, "epoch": 0.5624560581204593, "grad_norm": 49.9314079284668, "learning_rate": 8.26188549311362e-06, "loss": 0.7462842, "memory(GiB)": 13.7, "step": 1200, "train_speed(iter/s)": 1.499112 }, { "acc": 0.92346411, "epoch": 0.5647996250292946, "grad_norm": 101.98506164550781, "learning_rate": 8.266730716165946e-06, "loss": 0.46091666, "memory(GiB)": 13.7, "step": 1205, "train_speed(iter/s)": 1.499182 }, { "acc": 0.91325989, "epoch": 0.5671431919381298, "grad_norm": 14.003578186035156, "learning_rate": 8.271555876125406e-06, "loss": 0.56178756, "memory(GiB)": 13.7, "step": 1210, "train_speed(iter/s)": 1.499783 }, { "acc": 0.89998827, "epoch": 0.5694867588469651, "grad_norm": 98.88941955566406, "learning_rate": 8.276361138461512e-06, "loss": 0.7373579, "memory(GiB)": 13.7, "step": 1215, "train_speed(iter/s)": 1.499781 }, { "acc": 0.91211643, "epoch": 0.5718303257558003, "grad_norm": 14.388083457946777, "learning_rate": 8.28114666660513e-06, "loss": 0.64690237, "memory(GiB)": 13.7, "step": 1220, "train_speed(iter/s)": 1.499983 }, { "acc": 0.93509216, "epoch": 0.5741738926646356, "grad_norm": 16.190183639526367, "learning_rate": 8.285912621981818e-06, "loss": 0.50191441, "memory(GiB)": 13.7, "step": 1225, "train_speed(iter/s)": 1.499991 }, { "acc": 0.93305855, "epoch": 0.5765174595734708, "grad_norm": 16.27609634399414, "learning_rate": 8.290659164044527e-06, "loss": 0.47494612, "memory(GiB)": 13.7, "step": 1230, "train_speed(iter/s)": 1.500224 }, { "acc": 0.93027649, "epoch": 0.5788610264823061, "grad_norm": 18.255781173706055, "learning_rate": 8.295386450305576e-06, "loss": 0.50210838, "memory(GiB)": 13.7, "step": 1235, "train_speed(iter/s)": 1.500784 }, { "acc": 0.92161198, "epoch": 0.5812045933911413, "grad_norm": 29.224855422973633, "learning_rate": 8.300094636368044e-06, "loss": 0.54597702, "memory(GiB)": 13.7, "step": 1240, "train_speed(iter/s)": 1.500955 }, { "acc": 0.93518438, "epoch": 0.5835481602999766, "grad_norm": 38.001991271972656, "learning_rate": 8.304783875956485e-06, "loss": 0.42197342, "memory(GiB)": 13.7, "step": 1245, "train_speed(iter/s)": 1.501273 }, { "acc": 0.9512599, "epoch": 0.5858917272088118, "grad_norm": 20.471195220947266, "learning_rate": 8.309454320947055e-06, "loss": 0.32817748, "memory(GiB)": 13.7, "step": 1250, "train_speed(iter/s)": 1.501531 }, { "acc": 0.91452389, "epoch": 0.5882352941176471, "grad_norm": 52.32083511352539, "learning_rate": 8.314106121397027e-06, "loss": 0.5998445, "memory(GiB)": 13.7, "step": 1255, "train_speed(iter/s)": 1.501916 }, { "acc": 0.95196438, "epoch": 0.5905788610264823, "grad_norm": 16.433813095092773, "learning_rate": 8.31873942557371e-06, "loss": 0.37428439, "memory(GiB)": 13.7, "step": 1260, "train_speed(iter/s)": 1.50212 }, { "acc": 0.93784409, "epoch": 0.5929224279353176, "grad_norm": 17.53717803955078, "learning_rate": 8.323354379982819e-06, "loss": 0.55943255, "memory(GiB)": 13.7, "step": 1265, "train_speed(iter/s)": 1.502278 }, { "acc": 0.9298214, "epoch": 0.5952659948441528, "grad_norm": 19.16267967224121, "learning_rate": 8.32795112939626e-06, "loss": 0.40462956, "memory(GiB)": 13.7, "step": 1270, "train_speed(iter/s)": 1.502718 }, { "acc": 0.93188496, "epoch": 0.5976095617529881, "grad_norm": 7.384457111358643, "learning_rate": 8.332529816879384e-06, "loss": 0.35913908, "memory(GiB)": 13.7, "step": 1275, "train_speed(iter/s)": 1.502775 }, { "acc": 0.91298237, "epoch": 0.5999531286618233, "grad_norm": 20.840084075927734, "learning_rate": 8.337090583817709e-06, "loss": 0.62971158, "memory(GiB)": 13.7, "step": 1280, "train_speed(iter/s)": 1.502777 }, { "acc": 0.92358112, "epoch": 0.6022966955706586, "grad_norm": 11.32038402557373, "learning_rate": 8.341633569943115e-06, "loss": 0.51431785, "memory(GiB)": 13.7, "step": 1285, "train_speed(iter/s)": 1.50268 }, { "acc": 0.92295008, "epoch": 0.6046402624794938, "grad_norm": 47.46485137939453, "learning_rate": 8.346158913359522e-06, "loss": 0.44695258, "memory(GiB)": 13.7, "step": 1290, "train_speed(iter/s)": 1.50315 }, { "acc": 0.92858744, "epoch": 0.6069838293883291, "grad_norm": 22.499359130859375, "learning_rate": 8.350666750568093e-06, "loss": 0.37362597, "memory(GiB)": 13.7, "step": 1295, "train_speed(iter/s)": 1.503384 }, { "acc": 0.91662207, "epoch": 0.6093273962971643, "grad_norm": 24.833759307861328, "learning_rate": 8.355157216491938e-06, "loss": 0.48687239, "memory(GiB)": 13.7, "step": 1300, "train_speed(iter/s)": 1.503724 }, { "acc": 0.90821686, "epoch": 0.6116709632059996, "grad_norm": 15.013077735900879, "learning_rate": 8.359630444500335e-06, "loss": 0.66902928, "memory(GiB)": 13.7, "step": 1305, "train_speed(iter/s)": 1.504043 }, { "acc": 0.94637156, "epoch": 0.6140145301148348, "grad_norm": 18.4085636138916, "learning_rate": 8.364086566432511e-06, "loss": 0.43065634, "memory(GiB)": 13.7, "step": 1310, "train_speed(iter/s)": 1.504351 }, { "acc": 0.9346199, "epoch": 0.6163580970236701, "grad_norm": 18.675464630126953, "learning_rate": 8.36852571262095e-06, "loss": 0.39141736, "memory(GiB)": 13.7, "step": 1315, "train_speed(iter/s)": 1.504536 }, { "acc": 0.91323738, "epoch": 0.6187016639325053, "grad_norm": 30.375642776489258, "learning_rate": 8.372948011914266e-06, "loss": 0.65960903, "memory(GiB)": 13.7, "step": 1320, "train_speed(iter/s)": 1.504577 }, { "acc": 0.93377895, "epoch": 0.6210452308413406, "grad_norm": 22.81257438659668, "learning_rate": 8.377353591699648e-06, "loss": 0.37262616, "memory(GiB)": 13.7, "step": 1325, "train_speed(iter/s)": 1.504984 }, { "acc": 0.91848221, "epoch": 0.6233887977501757, "grad_norm": 36.038516998291016, "learning_rate": 8.381742577924874e-06, "loss": 0.43011022, "memory(GiB)": 13.7, "step": 1330, "train_speed(iter/s)": 1.505264 }, { "acc": 0.92825756, "epoch": 0.625732364659011, "grad_norm": 8.823376655578613, "learning_rate": 8.386115095119915e-06, "loss": 0.5129612, "memory(GiB)": 13.7, "step": 1335, "train_speed(iter/s)": 1.505658 }, { "acc": 0.90244408, "epoch": 0.6280759315678462, "grad_norm": 39.74359893798828, "learning_rate": 8.390471266418138e-06, "loss": 0.74906392, "memory(GiB)": 13.7, "step": 1340, "train_speed(iter/s)": 1.505852 }, { "acc": 0.94670439, "epoch": 0.6304194984766816, "grad_norm": 3.1532609462738037, "learning_rate": 8.394811213577116e-06, "loss": 0.40760784, "memory(GiB)": 13.7, "step": 1345, "train_speed(iter/s)": 1.506103 }, { "acc": 0.92553673, "epoch": 0.6327630653855167, "grad_norm": 29.519821166992188, "learning_rate": 8.399135056999037e-06, "loss": 0.53170576, "memory(GiB)": 13.7, "step": 1350, "train_speed(iter/s)": 1.506278 }, { "acc": 0.91027355, "epoch": 0.635106632294352, "grad_norm": 11.462920188903809, "learning_rate": 8.403442915750758e-06, "loss": 0.60995808, "memory(GiB)": 13.7, "step": 1355, "train_speed(iter/s)": 1.506496 }, { "acc": 0.9372694, "epoch": 0.6374501992031872, "grad_norm": 33.00385284423828, "learning_rate": 8.407734907583472e-06, "loss": 0.48167257, "memory(GiB)": 13.7, "step": 1360, "train_speed(iter/s)": 1.506692 }, { "acc": 0.93285713, "epoch": 0.6397937661120225, "grad_norm": 8.19987964630127, "learning_rate": 8.412011148952028e-06, "loss": 0.36827812, "memory(GiB)": 13.7, "step": 1365, "train_speed(iter/s)": 1.506811 }, { "acc": 0.93467255, "epoch": 0.6421373330208577, "grad_norm": 9.330525398254395, "learning_rate": 8.416271755033878e-06, "loss": 0.4590209, "memory(GiB)": 13.7, "step": 1370, "train_speed(iter/s)": 1.507072 }, { "acc": 0.93847361, "epoch": 0.644480899929693, "grad_norm": 9.667359352111816, "learning_rate": 8.4205168397477e-06, "loss": 0.41091919, "memory(GiB)": 13.7, "step": 1375, "train_speed(iter/s)": 1.507184 }, { "acc": 0.90645456, "epoch": 0.6468244668385282, "grad_norm": 15.4938325881958, "learning_rate": 8.424746515771679e-06, "loss": 0.56824059, "memory(GiB)": 13.7, "step": 1380, "train_speed(iter/s)": 1.507412 }, { "acc": 0.91836033, "epoch": 0.6491680337473635, "grad_norm": 12.021004676818848, "learning_rate": 8.428960894561439e-06, "loss": 0.47284503, "memory(GiB)": 13.7, "step": 1385, "train_speed(iter/s)": 1.507508 }, { "acc": 0.91560955, "epoch": 0.6515116006561987, "grad_norm": 22.051895141601562, "learning_rate": 8.433160086367674e-06, "loss": 0.66824656, "memory(GiB)": 13.7, "step": 1390, "train_speed(iter/s)": 1.507914 }, { "acc": 0.92121487, "epoch": 0.653855167565034, "grad_norm": 15.831750869750977, "learning_rate": 8.43734420025346e-06, "loss": 0.63438354, "memory(GiB)": 13.7, "step": 1395, "train_speed(iter/s)": 1.50813 }, { "acc": 0.93644209, "epoch": 0.6561987344738692, "grad_norm": 8.037611961364746, "learning_rate": 8.441513344111235e-06, "loss": 0.36844077, "memory(GiB)": 13.7, "step": 1400, "train_speed(iter/s)": 1.508494 }, { "acc": 0.94505463, "epoch": 0.6585423013827045, "grad_norm": 14.054093360900879, "learning_rate": 8.44566762467951e-06, "loss": 0.45693307, "memory(GiB)": 13.7, "step": 1405, "train_speed(iter/s)": 1.508844 }, { "acc": 0.94426346, "epoch": 0.6608858682915397, "grad_norm": 16.69098472595215, "learning_rate": 8.449807147559254e-06, "loss": 0.40912995, "memory(GiB)": 13.7, "step": 1410, "train_speed(iter/s)": 1.509069 }, { "acc": 0.9360714, "epoch": 0.6632294352003749, "grad_norm": 9.812045097351074, "learning_rate": 8.453932017230008e-06, "loss": 0.38125668, "memory(GiB)": 13.7, "step": 1415, "train_speed(iter/s)": 1.509312 }, { "acc": 0.90432491, "epoch": 0.6655730021092102, "grad_norm": 27.881528854370117, "learning_rate": 8.4580423370657e-06, "loss": 0.63593321, "memory(GiB)": 13.7, "step": 1420, "train_speed(iter/s)": 1.509636 }, { "acc": 0.9229166, "epoch": 0.6679165690180454, "grad_norm": 14.109856605529785, "learning_rate": 8.4621382093502e-06, "loss": 0.50189686, "memory(GiB)": 13.7, "step": 1425, "train_speed(iter/s)": 1.509805 }, { "acc": 0.94191551, "epoch": 0.6702601359268807, "grad_norm": 7.312868595123291, "learning_rate": 8.466219735292583e-06, "loss": 0.36349497, "memory(GiB)": 13.7, "step": 1430, "train_speed(iter/s)": 1.510029 }, { "acc": 0.93350086, "epoch": 0.6726037028357159, "grad_norm": 8.770842552185059, "learning_rate": 8.470287015042142e-06, "loss": 0.47048798, "memory(GiB)": 13.7, "step": 1435, "train_speed(iter/s)": 1.510126 }, { "acc": 0.92140923, "epoch": 0.6749472697445512, "grad_norm": 14.995141983032227, "learning_rate": 8.474340147703126e-06, "loss": 0.48541722, "memory(GiB)": 13.7, "step": 1440, "train_speed(iter/s)": 1.510354 }, { "acc": 0.91855507, "epoch": 0.6772908366533864, "grad_norm": 47.97265625, "learning_rate": 8.478379231349236e-06, "loss": 0.56363382, "memory(GiB)": 13.7, "step": 1445, "train_speed(iter/s)": 1.510757 }, { "acc": 0.93884277, "epoch": 0.6796344035622217, "grad_norm": 6.723696231842041, "learning_rate": 8.48240436303786e-06, "loss": 0.40712438, "memory(GiB)": 13.7, "step": 1450, "train_speed(iter/s)": 1.511013 }, { "acc": 0.92470255, "epoch": 0.6819779704710569, "grad_norm": 15.672677040100098, "learning_rate": 8.486415638824068e-06, "loss": 0.54766207, "memory(GiB)": 13.7, "step": 1455, "train_speed(iter/s)": 1.511193 }, { "acc": 0.92532902, "epoch": 0.6843215373798922, "grad_norm": 33.362701416015625, "learning_rate": 8.490413153774366e-06, "loss": 0.43570251, "memory(GiB)": 13.7, "step": 1460, "train_speed(iter/s)": 1.511369 }, { "acc": 0.93525295, "epoch": 0.6866651042887274, "grad_norm": 25.267017364501953, "learning_rate": 8.494397001980217e-06, "loss": 0.52633414, "memory(GiB)": 13.7, "step": 1465, "train_speed(iter/s)": 1.511475 }, { "acc": 0.89655533, "epoch": 0.6890086711975627, "grad_norm": 40.4747314453125, "learning_rate": 8.498367276571326e-06, "loss": 0.49318991, "memory(GiB)": 13.7, "step": 1470, "train_speed(iter/s)": 1.511767 }, { "acc": 0.94429436, "epoch": 0.6913522381063979, "grad_norm": 5.378089904785156, "learning_rate": 8.502324069728703e-06, "loss": 0.32886729, "memory(GiB)": 13.7, "step": 1475, "train_speed(iter/s)": 1.512063 }, { "acc": 0.94504461, "epoch": 0.6936958050152332, "grad_norm": 7.844405174255371, "learning_rate": 8.50626747269751e-06, "loss": 0.20926044, "memory(GiB)": 13.7, "step": 1480, "train_speed(iter/s)": 1.512141 }, { "acc": 0.96824903, "epoch": 0.6960393719240684, "grad_norm": 22.0076961517334, "learning_rate": 8.510197575799683e-06, "loss": 0.22783265, "memory(GiB)": 13.7, "step": 1485, "train_speed(iter/s)": 1.51234 }, { "acc": 0.92562437, "epoch": 0.6983829388329037, "grad_norm": 10.325016021728516, "learning_rate": 8.514114468446339e-06, "loss": 0.54806948, "memory(GiB)": 13.7, "step": 1490, "train_speed(iter/s)": 1.512476 }, { "acc": 0.91606112, "epoch": 0.7007265057417389, "grad_norm": 5.391423225402832, "learning_rate": 8.518018239149996e-06, "loss": 0.64835777, "memory(GiB)": 13.7, "step": 1495, "train_speed(iter/s)": 1.512724 }, { "acc": 0.9390296, "epoch": 0.7030700726505742, "grad_norm": 19.185495376586914, "learning_rate": 8.521908975536562e-06, "loss": 0.45546322, "memory(GiB)": 13.7, "step": 1500, "train_speed(iter/s)": 1.513001 }, { "acc": 0.93123207, "epoch": 0.7054136395594094, "grad_norm": 58.42115783691406, "learning_rate": 8.525786764357137e-06, "loss": 0.50947337, "memory(GiB)": 13.7, "step": 1505, "train_speed(iter/s)": 1.513236 }, { "acc": 0.9442605, "epoch": 0.7077572064682447, "grad_norm": 9.643159866333008, "learning_rate": 8.52965169149963e-06, "loss": 0.40954189, "memory(GiB)": 13.7, "step": 1510, "train_speed(iter/s)": 1.513569 }, { "acc": 0.91102638, "epoch": 0.7101007733770799, "grad_norm": 19.981122970581055, "learning_rate": 8.533503842000157e-06, "loss": 0.62111177, "memory(GiB)": 13.7, "step": 1515, "train_speed(iter/s)": 1.51365 }, { "acc": 0.91034489, "epoch": 0.7124443402859152, "grad_norm": 91.45709228515625, "learning_rate": 8.53734330005429e-06, "loss": 0.6491312, "memory(GiB)": 13.7, "step": 1520, "train_speed(iter/s)": 1.513812 }, { "acc": 0.93218994, "epoch": 0.7147879071947504, "grad_norm": 36.78562927246094, "learning_rate": 8.54117014902807e-06, "loss": 0.54148984, "memory(GiB)": 13.7, "step": 1525, "train_speed(iter/s)": 1.514006 }, { "acc": 0.96703415, "epoch": 0.7171314741035857, "grad_norm": 12.602206230163574, "learning_rate": 8.544984471468889e-06, "loss": 0.2860791, "memory(GiB)": 13.7, "step": 1530, "train_speed(iter/s)": 1.514227 }, { "acc": 0.91527328, "epoch": 0.7194750410124209, "grad_norm": 46.803714752197266, "learning_rate": 8.548786349116173e-06, "loss": 0.64160533, "memory(GiB)": 13.7, "step": 1535, "train_speed(iter/s)": 1.514186 }, { "acc": 0.92981796, "epoch": 0.7218186079212562, "grad_norm": 22.887636184692383, "learning_rate": 8.552575862911881e-06, "loss": 0.46903028, "memory(GiB)": 13.7, "step": 1540, "train_speed(iter/s)": 1.514361 }, { "acc": 0.93528728, "epoch": 0.7241621748300914, "grad_norm": 18.12618064880371, "learning_rate": 8.556353093010853e-06, "loss": 0.43458495, "memory(GiB)": 13.7, "step": 1545, "train_speed(iter/s)": 1.514328 }, { "acc": 0.90973616, "epoch": 0.7265057417389267, "grad_norm": 18.05149269104004, "learning_rate": 8.560118118790984e-06, "loss": 0.70654774, "memory(GiB)": 13.7, "step": 1550, "train_speed(iter/s)": 1.514395 }, { "acc": 0.93821602, "epoch": 0.7288493086477619, "grad_norm": 22.15313720703125, "learning_rate": 8.563871018863227e-06, "loss": 0.33748341, "memory(GiB)": 13.7, "step": 1555, "train_speed(iter/s)": 1.514862 }, { "acc": 0.90940971, "epoch": 0.7311928755565972, "grad_norm": 22.933677673339844, "learning_rate": 8.567611871081443e-06, "loss": 0.45904098, "memory(GiB)": 13.7, "step": 1560, "train_speed(iter/s)": 1.515045 }, { "acc": 0.9379611, "epoch": 0.7335364424654324, "grad_norm": 20.650470733642578, "learning_rate": 8.5713407525521e-06, "loss": 0.40546865, "memory(GiB)": 13.7, "step": 1565, "train_speed(iter/s)": 1.515205 }, { "acc": 0.94644232, "epoch": 0.7358800093742677, "grad_norm": 79.12849426269531, "learning_rate": 8.575057739643795e-06, "loss": 0.30639493, "memory(GiB)": 13.7, "step": 1570, "train_speed(iter/s)": 1.515537 }, { "acc": 0.9216116, "epoch": 0.7382235762831029, "grad_norm": 15.041722297668457, "learning_rate": 8.578762907996652e-06, "loss": 0.42314706, "memory(GiB)": 13.7, "step": 1575, "train_speed(iter/s)": 1.515827 }, { "acc": 0.90005951, "epoch": 0.7405671431919382, "grad_norm": 10.69088363647461, "learning_rate": 8.582456332531546e-06, "loss": 0.59630113, "memory(GiB)": 13.7, "step": 1580, "train_speed(iter/s)": 1.516105 }, { "acc": 0.92736111, "epoch": 0.7429107101007734, "grad_norm": 8.253706932067871, "learning_rate": 8.586138087459208e-06, "loss": 0.45129733, "memory(GiB)": 13.7, "step": 1585, "train_speed(iter/s)": 1.51636 }, { "acc": 0.91880951, "epoch": 0.7452542770096087, "grad_norm": 25.551145553588867, "learning_rate": 8.589808246289153e-06, "loss": 0.52050781, "memory(GiB)": 13.7, "step": 1590, "train_speed(iter/s)": 1.516549 }, { "acc": 0.94596043, "epoch": 0.7475978439184439, "grad_norm": 181.94863891601562, "learning_rate": 8.593466881838505e-06, "loss": 0.3189023, "memory(GiB)": 13.7, "step": 1595, "train_speed(iter/s)": 1.516822 }, { "acc": 0.91037521, "epoch": 0.7499414108272792, "grad_norm": 29.74203109741211, "learning_rate": 8.59711406624065e-06, "loss": 0.59567108, "memory(GiB)": 13.7, "step": 1600, "train_speed(iter/s)": 1.516667 }, { "acc": 0.9331728, "epoch": 0.7522849777361144, "grad_norm": 5.95013952255249, "learning_rate": 8.600749870953782e-06, "loss": 0.41085272, "memory(GiB)": 13.7, "step": 1605, "train_speed(iter/s)": 1.516572 }, { "acc": 0.93591528, "epoch": 0.7546285446449497, "grad_norm": 3.173762559890747, "learning_rate": 8.604374366769294e-06, "loss": 0.53539391, "memory(GiB)": 13.7, "step": 1610, "train_speed(iter/s)": 1.516478 }, { "acc": 0.93572779, "epoch": 0.7569721115537849, "grad_norm": 32.43599319458008, "learning_rate": 8.607987623820053e-06, "loss": 0.42097144, "memory(GiB)": 13.7, "step": 1615, "train_speed(iter/s)": 1.516747 }, { "acc": 0.90851154, "epoch": 0.7593156784626202, "grad_norm": 6.276418685913086, "learning_rate": 8.61158971158854e-06, "loss": 0.61402187, "memory(GiB)": 13.7, "step": 1620, "train_speed(iter/s)": 1.517089 }, { "acc": 0.9169508, "epoch": 0.7616592453714554, "grad_norm": 61.39243698120117, "learning_rate": 8.61518069891488e-06, "loss": 0.55401154, "memory(GiB)": 13.7, "step": 1625, "train_speed(iter/s)": 1.517093 }, { "acc": 0.88571968, "epoch": 0.7640028122802907, "grad_norm": 50.71084213256836, "learning_rate": 8.618760654004706e-06, "loss": 0.9523613, "memory(GiB)": 13.7, "step": 1630, "train_speed(iter/s)": 1.517249 }, { "acc": 0.92806549, "epoch": 0.7663463791891258, "grad_norm": 13.961421012878418, "learning_rate": 8.622329644436966e-06, "loss": 0.48974376, "memory(GiB)": 13.7, "step": 1635, "train_speed(iter/s)": 1.517404 }, { "acc": 0.92939262, "epoch": 0.7686899460979612, "grad_norm": 13.372129440307617, "learning_rate": 8.625887737171557e-06, "loss": 0.48220005, "memory(GiB)": 13.7, "step": 1640, "train_speed(iter/s)": 1.517566 }, { "acc": 0.94231234, "epoch": 0.7710335130067963, "grad_norm": 22.37149429321289, "learning_rate": 8.629434998556869e-06, "loss": 0.40814328, "memory(GiB)": 13.7, "step": 1645, "train_speed(iter/s)": 1.517607 }, { "acc": 0.93803577, "epoch": 0.7733770799156315, "grad_norm": 30.03834342956543, "learning_rate": 8.632971494337208e-06, "loss": 0.29171069, "memory(GiB)": 13.7, "step": 1650, "train_speed(iter/s)": 1.51777 }, { "acc": 0.9315526, "epoch": 0.7757206468244668, "grad_norm": 11.963541984558105, "learning_rate": 8.636497289660102e-06, "loss": 0.47859583, "memory(GiB)": 13.7, "step": 1655, "train_speed(iter/s)": 1.518101 }, { "acc": 0.92337799, "epoch": 0.778064213733302, "grad_norm": 13.963833808898926, "learning_rate": 8.640012449083514e-06, "loss": 0.45113173, "memory(GiB)": 13.7, "step": 1660, "train_speed(iter/s)": 1.518334 }, { "acc": 0.93003473, "epoch": 0.7804077806421373, "grad_norm": 13.672419548034668, "learning_rate": 8.643517036582927e-06, "loss": 0.53322744, "memory(GiB)": 13.7, "step": 1665, "train_speed(iter/s)": 1.518843 }, { "acc": 0.92062502, "epoch": 0.7827513475509725, "grad_norm": 38.84975051879883, "learning_rate": 8.647011115558322e-06, "loss": 0.48949986, "memory(GiB)": 13.7, "step": 1670, "train_speed(iter/s)": 1.519052 }, { "acc": 0.92119246, "epoch": 0.7850949144598078, "grad_norm": 43.917720794677734, "learning_rate": 8.65049474884108e-06, "loss": 0.46313257, "memory(GiB)": 13.7, "step": 1675, "train_speed(iter/s)": 1.519214 }, { "acc": 0.94259024, "epoch": 0.787438481368643, "grad_norm": 18.392253875732422, "learning_rate": 8.653967998700741e-06, "loss": 0.4404438, "memory(GiB)": 13.7, "step": 1680, "train_speed(iter/s)": 1.5191 }, { "acc": 0.93559284, "epoch": 0.7897820482774783, "grad_norm": 14.390575408935547, "learning_rate": 8.657430926851696e-06, "loss": 0.43067369, "memory(GiB)": 13.7, "step": 1685, "train_speed(iter/s)": 1.519237 }, { "acc": 0.93446102, "epoch": 0.7921256151863135, "grad_norm": 50.593772888183594, "learning_rate": 8.660883594459761e-06, "loss": 0.34944496, "memory(GiB)": 13.7, "step": 1690, "train_speed(iter/s)": 1.519311 }, { "acc": 0.86854486, "epoch": 0.7944691820951488, "grad_norm": 15.51600456237793, "learning_rate": 8.66432606214866e-06, "loss": 1.00398502, "memory(GiB)": 13.7, "step": 1695, "train_speed(iter/s)": 1.519476 }, { "acc": 0.9291996, "epoch": 0.796812749003984, "grad_norm": 22.959278106689453, "learning_rate": 8.667758390006414e-06, "loss": 0.46942954, "memory(GiB)": 13.7, "step": 1700, "train_speed(iter/s)": 1.519504 }, { "acc": 0.96322918, "epoch": 0.7991563159128193, "grad_norm": 8.463815689086914, "learning_rate": 8.67118063759163e-06, "loss": 0.19848368, "memory(GiB)": 13.7, "step": 1705, "train_speed(iter/s)": 1.519509 }, { "acc": 0.95224209, "epoch": 0.8014998828216545, "grad_norm": 9.861981391906738, "learning_rate": 8.674592863939704e-06, "loss": 0.41192784, "memory(GiB)": 13.7, "step": 1710, "train_speed(iter/s)": 1.519683 }, { "acc": 0.90289822, "epoch": 0.8038434497304898, "grad_norm": 19.726198196411133, "learning_rate": 8.67799512756894e-06, "loss": 0.42375278, "memory(GiB)": 13.7, "step": 1715, "train_speed(iter/s)": 1.51978 }, { "acc": 0.95081425, "epoch": 0.806187016639325, "grad_norm": 22.216880798339844, "learning_rate": 8.681387486486554e-06, "loss": 0.25154619, "memory(GiB)": 13.7, "step": 1720, "train_speed(iter/s)": 1.52007 }, { "acc": 0.95205574, "epoch": 0.8085305835481603, "grad_norm": 5.018809795379639, "learning_rate": 8.68476999819462e-06, "loss": 0.31022766, "memory(GiB)": 13.7, "step": 1725, "train_speed(iter/s)": 1.520319 }, { "acc": 0.92538052, "epoch": 0.8108741504569955, "grad_norm": 16.272865295410156, "learning_rate": 8.68814271969592e-06, "loss": 0.57866306, "memory(GiB)": 13.7, "step": 1730, "train_speed(iter/s)": 1.520493 }, { "acc": 0.9389719, "epoch": 0.8132177173658308, "grad_norm": 20.87274742126465, "learning_rate": 8.691505707499704e-06, "loss": 0.40685115, "memory(GiB)": 13.7, "step": 1735, "train_speed(iter/s)": 1.520508 }, { "acc": 0.95117712, "epoch": 0.815561284274666, "grad_norm": 61.7390251159668, "learning_rate": 8.694859017627365e-06, "loss": 0.34886978, "memory(GiB)": 13.7, "step": 1740, "train_speed(iter/s)": 1.52071 }, { "acc": 0.95339689, "epoch": 0.8179048511835013, "grad_norm": 64.35511779785156, "learning_rate": 8.698202705618052e-06, "loss": 0.33312297, "memory(GiB)": 13.7, "step": 1745, "train_speed(iter/s)": 1.520778 }, { "acc": 0.92687502, "epoch": 0.8202484180923365, "grad_norm": 12.099261283874512, "learning_rate": 8.701536826534175e-06, "loss": 0.42658086, "memory(GiB)": 13.7, "step": 1750, "train_speed(iter/s)": 1.521035 }, { "acc": 0.92168722, "epoch": 0.8225919850011718, "grad_norm": 49.458648681640625, "learning_rate": 8.704861434966858e-06, "loss": 0.5262989, "memory(GiB)": 13.7, "step": 1755, "train_speed(iter/s)": 1.521165 }, { "acc": 0.93345919, "epoch": 0.824935551910007, "grad_norm": 127.89656829833984, "learning_rate": 8.708176585041296e-06, "loss": 0.48534446, "memory(GiB)": 13.7, "step": 1760, "train_speed(iter/s)": 1.521116 }, { "acc": 0.92225571, "epoch": 0.8272791188188423, "grad_norm": 20.954551696777344, "learning_rate": 8.711482330422037e-06, "loss": 0.44397569, "memory(GiB)": 13.7, "step": 1765, "train_speed(iter/s)": 1.521102 }, { "acc": 0.94883928, "epoch": 0.8296226857276775, "grad_norm": 14.330496788024902, "learning_rate": 8.714778724318209e-06, "loss": 0.33817337, "memory(GiB)": 13.7, "step": 1770, "train_speed(iter/s)": 1.521221 }, { "acc": 0.93961458, "epoch": 0.8319662526365128, "grad_norm": 15.544912338256836, "learning_rate": 8.718065819488642e-06, "loss": 0.40652962, "memory(GiB)": 13.7, "step": 1775, "train_speed(iter/s)": 1.521511 }, { "acc": 0.95187502, "epoch": 0.834309819545348, "grad_norm": 101.91620635986328, "learning_rate": 8.721343668246945e-06, "loss": 0.38880491, "memory(GiB)": 13.7, "step": 1780, "train_speed(iter/s)": 1.521949 }, { "acc": 0.90962181, "epoch": 0.8366533864541833, "grad_norm": 19.37135887145996, "learning_rate": 8.724612322466504e-06, "loss": 0.68188257, "memory(GiB)": 13.7, "step": 1785, "train_speed(iter/s)": 1.521892 }, { "acc": 0.92479353, "epoch": 0.8389969533630185, "grad_norm": 27.825580596923828, "learning_rate": 8.727871833585397e-06, "loss": 0.49740887, "memory(GiB)": 13.7, "step": 1790, "train_speed(iter/s)": 1.522108 }, { "acc": 0.92761765, "epoch": 0.8413405202718538, "grad_norm": 8.953219413757324, "learning_rate": 8.731122252611258e-06, "loss": 0.5462678, "memory(GiB)": 13.7, "step": 1795, "train_speed(iter/s)": 1.522242 }, { "acc": 0.92247047, "epoch": 0.843684087180689, "grad_norm": 10.34980297088623, "learning_rate": 8.734363630126067e-06, "loss": 0.49403348, "memory(GiB)": 13.7, "step": 1800, "train_speed(iter/s)": 1.522321 }, { "acc": 0.91390629, "epoch": 0.8460276540895243, "grad_norm": 44.719886779785156, "learning_rate": 8.73759601629087e-06, "loss": 0.59074612, "memory(GiB)": 13.7, "step": 1805, "train_speed(iter/s)": 1.522349 }, { "acc": 0.93961782, "epoch": 0.8483712209983595, "grad_norm": 19.866539001464844, "learning_rate": 8.740819460850433e-06, "loss": 0.41565294, "memory(GiB)": 13.7, "step": 1810, "train_speed(iter/s)": 1.522202 }, { "acc": 0.92868242, "epoch": 0.8507147879071948, "grad_norm": 6.049466133117676, "learning_rate": 8.744034013137851e-06, "loss": 0.4670392, "memory(GiB)": 13.7, "step": 1815, "train_speed(iter/s)": 1.522518 }, { "acc": 0.93982582, "epoch": 0.85305835481603, "grad_norm": 34.501731872558594, "learning_rate": 8.747239722079058e-06, "loss": 0.37364531, "memory(GiB)": 13.7, "step": 1820, "train_speed(iter/s)": 1.522608 }, { "acc": 0.92790699, "epoch": 0.8554019217248653, "grad_norm": 42.73512268066406, "learning_rate": 8.750436636197307e-06, "loss": 0.45631084, "memory(GiB)": 13.7, "step": 1825, "train_speed(iter/s)": 1.522728 }, { "acc": 0.93065529, "epoch": 0.8577454886337005, "grad_norm": 10.19678020477295, "learning_rate": 8.753624803617575e-06, "loss": 0.44997911, "memory(GiB)": 13.7, "step": 1830, "train_speed(iter/s)": 1.522689 }, { "acc": 0.9160471, "epoch": 0.8600890555425358, "grad_norm": 23.003576278686523, "learning_rate": 8.756804272070906e-06, "loss": 0.49618616, "memory(GiB)": 13.7, "step": 1835, "train_speed(iter/s)": 1.522686 }, { "acc": 0.90999508, "epoch": 0.862432622451371, "grad_norm": 31.315114974975586, "learning_rate": 8.759975088898709e-06, "loss": 0.60029082, "memory(GiB)": 13.7, "step": 1840, "train_speed(iter/s)": 1.522768 }, { "acc": 0.91919947, "epoch": 0.8647761893602063, "grad_norm": 31.252979278564453, "learning_rate": 8.763137301056974e-06, "loss": 0.51450014, "memory(GiB)": 13.7, "step": 1845, "train_speed(iter/s)": 1.523036 }, { "acc": 0.91015587, "epoch": 0.8671197562690415, "grad_norm": 12.962067604064941, "learning_rate": 8.76629095512045e-06, "loss": 0.72048426, "memory(GiB)": 13.7, "step": 1850, "train_speed(iter/s)": 1.52307 }, { "acc": 0.94734268, "epoch": 0.8694633231778768, "grad_norm": 34.80970001220703, "learning_rate": 8.769436097286768e-06, "loss": 0.36760788, "memory(GiB)": 13.7, "step": 1855, "train_speed(iter/s)": 1.523192 }, { "acc": 0.93060961, "epoch": 0.871806890086712, "grad_norm": 16.468807220458984, "learning_rate": 8.77257277338049e-06, "loss": 0.57106667, "memory(GiB)": 13.7, "step": 1860, "train_speed(iter/s)": 1.523398 }, { "acc": 0.9256134, "epoch": 0.8741504569955473, "grad_norm": 13.180006980895996, "learning_rate": 8.775701028857119e-06, "loss": 0.48057117, "memory(GiB)": 13.7, "step": 1865, "train_speed(iter/s)": 1.523451 }, { "acc": 0.95062504, "epoch": 0.8764940239043825, "grad_norm": 72.94050598144531, "learning_rate": 8.77882090880706e-06, "loss": 0.3065058, "memory(GiB)": 13.7, "step": 1870, "train_speed(iter/s)": 1.523323 }, { "acc": 0.92098541, "epoch": 0.8788375908132178, "grad_norm": 20.979503631591797, "learning_rate": 8.781932457959502e-06, "loss": 0.48742256, "memory(GiB)": 13.7, "step": 1875, "train_speed(iter/s)": 1.523417 }, { "acc": 0.95283728, "epoch": 0.881181157722053, "grad_norm": 18.832996368408203, "learning_rate": 8.785035720686286e-06, "loss": 0.31892138, "memory(GiB)": 13.7, "step": 1880, "train_speed(iter/s)": 1.523641 }, { "acc": 0.93858738, "epoch": 0.8835247246308882, "grad_norm": 44.85096740722656, "learning_rate": 8.788130741005683e-06, "loss": 0.47501125, "memory(GiB)": 13.7, "step": 1885, "train_speed(iter/s)": 1.523745 }, { "acc": 0.89006939, "epoch": 0.8858682915397235, "grad_norm": 12.737997055053711, "learning_rate": 8.791217562586158e-06, "loss": 0.79622698, "memory(GiB)": 13.7, "step": 1890, "train_speed(iter/s)": 1.523974 }, { "acc": 0.88953285, "epoch": 0.8882118584485587, "grad_norm": 24.52825927734375, "learning_rate": 8.794296228750049e-06, "loss": 0.6243784, "memory(GiB)": 13.7, "step": 1895, "train_speed(iter/s)": 1.524253 }, { "acc": 0.9170536, "epoch": 0.890555425357394, "grad_norm": 13.809172630310059, "learning_rate": 8.797366782477231e-06, "loss": 0.48680029, "memory(GiB)": 13.7, "step": 1900, "train_speed(iter/s)": 1.524107 }, { "acc": 0.94277401, "epoch": 0.8928989922662292, "grad_norm": 13.078413963317871, "learning_rate": 8.800429266408705e-06, "loss": 0.34339905, "memory(GiB)": 13.7, "step": 1905, "train_speed(iter/s)": 1.524209 }, { "acc": 0.94204512, "epoch": 0.8952425591750645, "grad_norm": 11.979008674621582, "learning_rate": 8.80348372285016e-06, "loss": 0.39081173, "memory(GiB)": 13.7, "step": 1910, "train_speed(iter/s)": 1.52443 }, { "acc": 0.93425512, "epoch": 0.8975861260838996, "grad_norm": 51.130287170410156, "learning_rate": 8.806530193775466e-06, "loss": 0.41991715, "memory(GiB)": 13.7, "step": 1915, "train_speed(iter/s)": 1.524395 }, { "acc": 0.95404768, "epoch": 0.899929692992735, "grad_norm": 14.212202072143555, "learning_rate": 8.809568720830158e-06, "loss": 0.29350657, "memory(GiB)": 13.7, "step": 1920, "train_speed(iter/s)": 1.524554 }, { "acc": 0.90637131, "epoch": 0.9022732599015701, "grad_norm": 22.89257049560547, "learning_rate": 8.812599345334821e-06, "loss": 0.74808922, "memory(GiB)": 13.7, "step": 1925, "train_speed(iter/s)": 1.524815 }, { "acc": 0.91574402, "epoch": 0.9046168268104054, "grad_norm": 25.0063533782959, "learning_rate": 8.815622108288493e-06, "loss": 0.56822882, "memory(GiB)": 13.7, "step": 1930, "train_speed(iter/s)": 1.525068 }, { "acc": 0.91050634, "epoch": 0.9069603937192406, "grad_norm": 40.500518798828125, "learning_rate": 8.818637050371969e-06, "loss": 0.53737497, "memory(GiB)": 13.7, "step": 1935, "train_speed(iter/s)": 1.525105 }, { "acc": 0.94365482, "epoch": 0.909303960628076, "grad_norm": 11.621325492858887, "learning_rate": 8.821644211951098e-06, "loss": 0.25770993, "memory(GiB)": 13.7, "step": 1940, "train_speed(iter/s)": 1.525035 }, { "acc": 0.88573036, "epoch": 0.9116475275369111, "grad_norm": 16.34398078918457, "learning_rate": 8.824643633080024e-06, "loss": 0.70893736, "memory(GiB)": 13.7, "step": 1945, "train_speed(iter/s)": 1.525074 }, { "acc": 0.9367239, "epoch": 0.9139910944457464, "grad_norm": 5.949017524719238, "learning_rate": 8.827635353504385e-06, "loss": 0.32963018, "memory(GiB)": 13.7, "step": 1950, "train_speed(iter/s)": 1.525326 }, { "acc": 0.90338097, "epoch": 0.9163346613545816, "grad_norm": 49.06780242919922, "learning_rate": 8.830619412664472e-06, "loss": 0.60068073, "memory(GiB)": 13.7, "step": 1955, "train_speed(iter/s)": 1.525397 }, { "acc": 0.93056049, "epoch": 0.9186782282634169, "grad_norm": 14.003350257873535, "learning_rate": 8.833595849698356e-06, "loss": 0.43576593, "memory(GiB)": 13.7, "step": 1960, "train_speed(iter/s)": 1.525482 }, { "acc": 0.93199883, "epoch": 0.9210217951722521, "grad_norm": 7.19831657409668, "learning_rate": 8.836564703444958e-06, "loss": 0.45540075, "memory(GiB)": 13.7, "step": 1965, "train_speed(iter/s)": 1.525675 }, { "acc": 0.90762424, "epoch": 0.9233653620810874, "grad_norm": 20.378374099731445, "learning_rate": 8.839526012447097e-06, "loss": 0.60567951, "memory(GiB)": 13.7, "step": 1970, "train_speed(iter/s)": 1.525967 }, { "acc": 0.96594725, "epoch": 0.9257089289899226, "grad_norm": 18.695096969604492, "learning_rate": 8.842479814954488e-06, "loss": 0.2386035, "memory(GiB)": 13.7, "step": 1975, "train_speed(iter/s)": 1.52598 }, { "acc": 0.92570305, "epoch": 0.9280524958987579, "grad_norm": 17.331878662109375, "learning_rate": 8.845426148926713e-06, "loss": 0.40020213, "memory(GiB)": 13.7, "step": 1980, "train_speed(iter/s)": 1.526075 }, { "acc": 0.97188492, "epoch": 0.9303960628075931, "grad_norm": 15.993607521057129, "learning_rate": 8.848365052036137e-06, "loss": 0.20263913, "memory(GiB)": 13.7, "step": 1985, "train_speed(iter/s)": 1.526207 }, { "acc": 0.93309498, "epoch": 0.9327396297164284, "grad_norm": 16.164018630981445, "learning_rate": 8.851296561670814e-06, "loss": 0.50997715, "memory(GiB)": 13.7, "step": 1990, "train_speed(iter/s)": 1.526045 }, { "acc": 0.93986607, "epoch": 0.9350831966252636, "grad_norm": 7.314716339111328, "learning_rate": 8.854220714937321e-06, "loss": 0.3327601, "memory(GiB)": 13.7, "step": 1995, "train_speed(iter/s)": 1.526018 }, { "acc": 0.92627182, "epoch": 0.9374267635340989, "grad_norm": 15.224157333374023, "learning_rate": 8.85713754866359e-06, "loss": 0.52654562, "memory(GiB)": 13.7, "step": 2000, "train_speed(iter/s)": 1.526147 }, { "acc": 0.92889252, "epoch": 0.9397703304429341, "grad_norm": 14.350348472595215, "learning_rate": 8.860047099401697e-06, "loss": 0.42962079, "memory(GiB)": 13.7, "step": 2005, "train_speed(iter/s)": 1.52611 }, { "acc": 0.95613613, "epoch": 0.9421138973517694, "grad_norm": 13.3577299118042, "learning_rate": 8.862949403430585e-06, "loss": 0.31326857, "memory(GiB)": 13.7, "step": 2010, "train_speed(iter/s)": 1.526321 }, { "acc": 0.96847219, "epoch": 0.9444574642606046, "grad_norm": 2.7915825843811035, "learning_rate": 8.865844496758807e-06, "loss": 0.27831964, "memory(GiB)": 13.7, "step": 2015, "train_speed(iter/s)": 1.526519 }, { "acc": 0.89763508, "epoch": 0.9468010311694399, "grad_norm": 49.541709899902344, "learning_rate": 8.86873241512719e-06, "loss": 0.75131369, "memory(GiB)": 13.7, "step": 2020, "train_speed(iter/s)": 1.526468 }, { "acc": 0.91027536, "epoch": 0.9491445980782751, "grad_norm": 12.850712776184082, "learning_rate": 8.871613194011482e-06, "loss": 0.58103352, "memory(GiB)": 13.7, "step": 2025, "train_speed(iter/s)": 1.526718 }, { "acc": 0.934375, "epoch": 0.9514881649871104, "grad_norm": 5.523793697357178, "learning_rate": 8.87448686862498e-06, "loss": 0.39186506, "memory(GiB)": 13.7, "step": 2030, "train_speed(iter/s)": 1.526956 }, { "acc": 0.9427084, "epoch": 0.9538317318959456, "grad_norm": 21.3807315826416, "learning_rate": 8.877353473921096e-06, "loss": 0.37754793, "memory(GiB)": 13.7, "step": 2035, "train_speed(iter/s)": 1.527134 }, { "acc": 0.9143178, "epoch": 0.9561752988047809, "grad_norm": 28.37403106689453, "learning_rate": 8.88021304459592e-06, "loss": 0.69792404, "memory(GiB)": 13.7, "step": 2040, "train_speed(iter/s)": 1.527147 }, { "acc": 0.93647575, "epoch": 0.9585188657136161, "grad_norm": 39.258514404296875, "learning_rate": 8.88306561509073e-06, "loss": 0.3819463, "memory(GiB)": 13.7, "step": 2045, "train_speed(iter/s)": 1.527488 }, { "acc": 0.91472273, "epoch": 0.9608624326224514, "grad_norm": 27.4164981842041, "learning_rate": 8.885911219594497e-06, "loss": 0.48489256, "memory(GiB)": 13.7, "step": 2050, "train_speed(iter/s)": 1.527578 }, { "acc": 0.95008926, "epoch": 0.9632059995312866, "grad_norm": 10.714810371398926, "learning_rate": 8.888749892046324e-06, "loss": 0.34334447, "memory(GiB)": 13.7, "step": 2055, "train_speed(iter/s)": 1.527662 }, { "acc": 0.90134268, "epoch": 0.9655495664401219, "grad_norm": 21.807300567626953, "learning_rate": 8.891581666137883e-06, "loss": 0.64867916, "memory(GiB)": 13.7, "step": 2060, "train_speed(iter/s)": 1.52794 }, { "acc": 0.91159859, "epoch": 0.9678931333489571, "grad_norm": 13.933619499206543, "learning_rate": 8.894406575315822e-06, "loss": 0.7518724, "memory(GiB)": 13.7, "step": 2065, "train_speed(iter/s)": 1.528019 }, { "acc": 0.94444618, "epoch": 0.9702367002577924, "grad_norm": 31.620691299438477, "learning_rate": 8.897224652784126e-06, "loss": 0.42242336, "memory(GiB)": 13.7, "step": 2070, "train_speed(iter/s)": 1.528287 }, { "acc": 0.93945885, "epoch": 0.9725802671666276, "grad_norm": 28.989953994750977, "learning_rate": 8.900035931506456e-06, "loss": 0.42325511, "memory(GiB)": 13.7, "step": 2075, "train_speed(iter/s)": 1.528381 }, { "acc": 0.94935837, "epoch": 0.9749238340754629, "grad_norm": 12.318251609802246, "learning_rate": 8.902840444208475e-06, "loss": 0.33235447, "memory(GiB)": 13.7, "step": 2080, "train_speed(iter/s)": 1.528435 }, { "acc": 0.91318741, "epoch": 0.9772674009842981, "grad_norm": 8.429622650146484, "learning_rate": 8.90563822338012e-06, "loss": 0.48147955, "memory(GiB)": 13.7, "step": 2085, "train_speed(iter/s)": 1.528668 }, { "acc": 0.91891012, "epoch": 0.9796109678931334, "grad_norm": 18.589054107666016, "learning_rate": 8.908429301277876e-06, "loss": 0.42391553, "memory(GiB)": 13.7, "step": 2090, "train_speed(iter/s)": 1.528721 }, { "acc": 0.94461708, "epoch": 0.9819545348019686, "grad_norm": 10.018814086914062, "learning_rate": 8.911213709926988e-06, "loss": 0.40618944, "memory(GiB)": 13.7, "step": 2095, "train_speed(iter/s)": 1.528822 }, { "acc": 0.91605053, "epoch": 0.9842981017108039, "grad_norm": 11.569275856018066, "learning_rate": 8.91399148112368e-06, "loss": 0.55961409, "memory(GiB)": 13.7, "step": 2100, "train_speed(iter/s)": 1.529061 }, { "acc": 0.94334774, "epoch": 0.9866416686196391, "grad_norm": 84.49092102050781, "learning_rate": 8.916762646437336e-06, "loss": 0.39692192, "memory(GiB)": 13.7, "step": 2105, "train_speed(iter/s)": 1.529159 }, { "acc": 0.9129735, "epoch": 0.9889852355284744, "grad_norm": 40.530330657958984, "learning_rate": 8.919527237212638e-06, "loss": 0.49913044, "memory(GiB)": 13.7, "step": 2110, "train_speed(iter/s)": 1.529309 }, { "acc": 0.95724277, "epoch": 0.9913288024373096, "grad_norm": 10.575186729431152, "learning_rate": 8.922285284571701e-06, "loss": 0.31773424, "memory(GiB)": 13.7, "step": 2115, "train_speed(iter/s)": 1.52927 }, { "acc": 0.92709684, "epoch": 0.9936723693461448, "grad_norm": 21.463001251220703, "learning_rate": 8.925036819416184e-06, "loss": 0.50780897, "memory(GiB)": 13.7, "step": 2120, "train_speed(iter/s)": 1.529465 }, { "acc": 0.92481651, "epoch": 0.9960159362549801, "grad_norm": 54.0821533203125, "learning_rate": 8.927781872429354e-06, "loss": 0.44001846, "memory(GiB)": 13.7, "step": 2125, "train_speed(iter/s)": 1.52949 }, { "acc": 0.94577503, "epoch": 0.9983595031638153, "grad_norm": 11.976089477539062, "learning_rate": 8.930520474078145e-06, "loss": 0.43587275, "memory(GiB)": 13.7, "step": 2130, "train_speed(iter/s)": 1.52959 }, { "acc": 0.93611526, "epoch": 1.0007030700726505, "grad_norm": 103.05160522460938, "learning_rate": 8.93325265461519e-06, "loss": 0.41136551, "memory(GiB)": 13.7, "step": 2135, "train_speed(iter/s)": 1.529338 }, { "acc": 0.96802807, "epoch": 1.0030466369814859, "grad_norm": 7.799630165100098, "learning_rate": 8.935978444080813e-06, "loss": 0.29769025, "memory(GiB)": 13.7, "step": 2140, "train_speed(iter/s)": 1.529296 }, { "acc": 0.91343594, "epoch": 1.005390203890321, "grad_norm": 19.204246520996094, "learning_rate": 8.93869787230503e-06, "loss": 0.48970537, "memory(GiB)": 13.7, "step": 2145, "train_speed(iter/s)": 1.52943 }, { "acc": 0.91635427, "epoch": 1.0077337707991563, "grad_norm": 42.59782409667969, "learning_rate": 8.941410968909494e-06, "loss": 0.58468885, "memory(GiB)": 13.7, "step": 2150, "train_speed(iter/s)": 1.529582 }, { "acc": 0.93170233, "epoch": 1.0100773377079915, "grad_norm": 23.351118087768555, "learning_rate": 8.944117763309435e-06, "loss": 0.45382462, "memory(GiB)": 13.7, "step": 2155, "train_speed(iter/s)": 1.529621 }, { "acc": 0.9569952, "epoch": 1.0124209046168269, "grad_norm": 6.742073059082031, "learning_rate": 8.946818284715573e-06, "loss": 0.37930942, "memory(GiB)": 13.7, "step": 2160, "train_speed(iter/s)": 1.529421 }, { "acc": 0.92630882, "epoch": 1.014764471525662, "grad_norm": 19.659366607666016, "learning_rate": 8.949512562136012e-06, "loss": 0.57119303, "memory(GiB)": 13.7, "step": 2165, "train_speed(iter/s)": 1.529474 }, { "acc": 0.94289446, "epoch": 1.0171080384344973, "grad_norm": 11.758491516113281, "learning_rate": 8.952200624378106e-06, "loss": 0.40521135, "memory(GiB)": 13.7, "step": 2170, "train_speed(iter/s)": 1.529448 }, { "acc": 0.93473511, "epoch": 1.0194516053433325, "grad_norm": 3.871311664581299, "learning_rate": 8.954882500050307e-06, "loss": 0.4781764, "memory(GiB)": 13.7, "step": 2175, "train_speed(iter/s)": 1.52936 }, { "acc": 0.92704887, "epoch": 1.0217951722521679, "grad_norm": 5.6237406730651855, "learning_rate": 8.957558217563998e-06, "loss": 0.42578249, "memory(GiB)": 13.7, "step": 2180, "train_speed(iter/s)": 1.529568 }, { "acc": 0.91741943, "epoch": 1.024138739161003, "grad_norm": 1.2491629123687744, "learning_rate": 8.960227805135289e-06, "loss": 0.67860765, "memory(GiB)": 13.7, "step": 2185, "train_speed(iter/s)": 1.529686 }, { "acc": 0.91805239, "epoch": 1.0264823060698383, "grad_norm": 50.555503845214844, "learning_rate": 8.962891290786815e-06, "loss": 0.60183268, "memory(GiB)": 13.7, "step": 2190, "train_speed(iter/s)": 1.529814 }, { "acc": 0.94906445, "epoch": 1.0288258729786735, "grad_norm": 72.90596771240234, "learning_rate": 8.965548702349483e-06, "loss": 0.28420157, "memory(GiB)": 13.7, "step": 2195, "train_speed(iter/s)": 1.530016 }, { "acc": 0.92462711, "epoch": 1.0311694398875089, "grad_norm": 17.633392333984375, "learning_rate": 8.968200067464236e-06, "loss": 0.4992856, "memory(GiB)": 13.7, "step": 2200, "train_speed(iter/s)": 1.530192 }, { "acc": 0.90916328, "epoch": 1.033513006796344, "grad_norm": 16.35004234313965, "learning_rate": 8.970845413583773e-06, "loss": 0.51639242, "memory(GiB)": 13.7, "step": 2205, "train_speed(iter/s)": 1.530313 }, { "acc": 0.93655758, "epoch": 1.0358565737051793, "grad_norm": 19.28536605834961, "learning_rate": 8.973484767974237e-06, "loss": 0.44016495, "memory(GiB)": 13.7, "step": 2210, "train_speed(iter/s)": 1.530539 }, { "acc": 0.95587788, "epoch": 1.0382001406140144, "grad_norm": 7.306403160095215, "learning_rate": 8.97611815771693e-06, "loss": 0.37250299, "memory(GiB)": 13.7, "step": 2215, "train_speed(iter/s)": 1.530576 }, { "acc": 0.94559526, "epoch": 1.0405437075228499, "grad_norm": 17.60497283935547, "learning_rate": 8.978745609709957e-06, "loss": 0.39602556, "memory(GiB)": 13.7, "step": 2220, "train_speed(iter/s)": 1.53061 }, { "acc": 0.9345089, "epoch": 1.042887274431685, "grad_norm": 17.244007110595703, "learning_rate": 8.981367150669887e-06, "loss": 0.36983366, "memory(GiB)": 13.7, "step": 2225, "train_speed(iter/s)": 1.530797 }, { "acc": 0.89353466, "epoch": 1.0452308413405202, "grad_norm": 17.370370864868164, "learning_rate": 8.983982807133375e-06, "loss": 0.72834764, "memory(GiB)": 13.7, "step": 2230, "train_speed(iter/s)": 1.530862 }, { "acc": 0.90004654, "epoch": 1.0475744082493554, "grad_norm": 46.778690338134766, "learning_rate": 8.986592605458784e-06, "loss": 0.74301505, "memory(GiB)": 13.7, "step": 2235, "train_speed(iter/s)": 1.530921 }, { "acc": 0.96876984, "epoch": 1.0499179751581909, "grad_norm": 5.021106243133545, "learning_rate": 8.989196571827771e-06, "loss": 0.28141093, "memory(GiB)": 13.7, "step": 2240, "train_speed(iter/s)": 1.531134 }, { "acc": 0.92898235, "epoch": 1.052261542067026, "grad_norm": 8.02913761138916, "learning_rate": 8.99179473224686e-06, "loss": 0.41677485, "memory(GiB)": 13.7, "step": 2245, "train_speed(iter/s)": 1.531076 }, { "acc": 0.94583378, "epoch": 1.0546051089758612, "grad_norm": 6.363182544708252, "learning_rate": 8.994387112549007e-06, "loss": 0.29958806, "memory(GiB)": 13.7, "step": 2250, "train_speed(iter/s)": 1.531172 }, { "acc": 0.91555243, "epoch": 1.0569486758846964, "grad_norm": 12.56616497039795, "learning_rate": 8.996973738395144e-06, "loss": 0.53564148, "memory(GiB)": 13.7, "step": 2255, "train_speed(iter/s)": 1.531268 }, { "acc": 0.90625, "epoch": 1.0592922427935318, "grad_norm": 14.691490173339844, "learning_rate": 8.999554635275692e-06, "loss": 0.57946086, "memory(GiB)": 13.7, "step": 2260, "train_speed(iter/s)": 1.531393 }, { "acc": 0.93233461, "epoch": 1.061635809702367, "grad_norm": 21.45215606689453, "learning_rate": 9.002129828512076e-06, "loss": 0.45484495, "memory(GiB)": 13.7, "step": 2265, "train_speed(iter/s)": 1.531454 }, { "acc": 0.92071667, "epoch": 1.0639793766112022, "grad_norm": 4.162153720855713, "learning_rate": 9.004699343258216e-06, "loss": 0.5298727, "memory(GiB)": 13.7, "step": 2270, "train_speed(iter/s)": 1.531569 }, { "acc": 0.92674141, "epoch": 1.0663229435200374, "grad_norm": 9.492595672607422, "learning_rate": 9.007263204501998e-06, "loss": 0.57966986, "memory(GiB)": 13.7, "step": 2275, "train_speed(iter/s)": 1.531605 }, { "acc": 0.94061184, "epoch": 1.0686665104288728, "grad_norm": 15.71415901184082, "learning_rate": 9.009821437066736e-06, "loss": 0.39435291, "memory(GiB)": 13.7, "step": 2280, "train_speed(iter/s)": 1.531675 }, { "acc": 0.9440979, "epoch": 1.071010077337708, "grad_norm": 38.77849197387695, "learning_rate": 9.012374065612604e-06, "loss": 0.32560081, "memory(GiB)": 13.7, "step": 2285, "train_speed(iter/s)": 1.531581 }, { "acc": 0.94389067, "epoch": 1.0733536442465432, "grad_norm": 15.213113784790039, "learning_rate": 9.01492111463808e-06, "loss": 0.43975525, "memory(GiB)": 13.7, "step": 2290, "train_speed(iter/s)": 1.531776 }, { "acc": 0.91295137, "epoch": 1.0756972111553784, "grad_norm": 72.49857330322266, "learning_rate": 9.017462608481336e-06, "loss": 0.50233631, "memory(GiB)": 13.7, "step": 2295, "train_speed(iter/s)": 1.531889 }, { "acc": 0.961973, "epoch": 1.0780407780642138, "grad_norm": 36.90346145629883, "learning_rate": 9.01999857132165e-06, "loss": 0.28551641, "memory(GiB)": 13.7, "step": 2300, "train_speed(iter/s)": 1.532033 }, { "acc": 0.94156656, "epoch": 1.080384344973049, "grad_norm": 14.872834205627441, "learning_rate": 9.02252902718078e-06, "loss": 0.38001003, "memory(GiB)": 13.7, "step": 2305, "train_speed(iter/s)": 1.532047 }, { "acc": 0.93017111, "epoch": 1.0827279118818842, "grad_norm": 19.404600143432617, "learning_rate": 9.025053999924326e-06, "loss": 0.43376355, "memory(GiB)": 13.7, "step": 2310, "train_speed(iter/s)": 1.53219 }, { "acc": 0.89851933, "epoch": 1.0850714787907194, "grad_norm": 63.79338073730469, "learning_rate": 9.02757351326309e-06, "loss": 0.67603006, "memory(GiB)": 13.7, "step": 2315, "train_speed(iter/s)": 1.532336 }, { "acc": 0.94538727, "epoch": 1.0874150456995548, "grad_norm": 7.068861961364746, "learning_rate": 9.030087590754396e-06, "loss": 0.30884809, "memory(GiB)": 13.7, "step": 2320, "train_speed(iter/s)": 1.532555 }, { "acc": 0.95437317, "epoch": 1.08975861260839, "grad_norm": 10.022075653076172, "learning_rate": 9.032596255803431e-06, "loss": 0.27780862, "memory(GiB)": 13.7, "step": 2325, "train_speed(iter/s)": 1.532705 }, { "acc": 0.95746479, "epoch": 1.0921021795172252, "grad_norm": 2.625520944595337, "learning_rate": 9.035099531664536e-06, "loss": 0.2504576, "memory(GiB)": 13.7, "step": 2330, "train_speed(iter/s)": 1.53285 }, { "acc": 0.90843906, "epoch": 1.0944457464260604, "grad_norm": 21.566328048706055, "learning_rate": 9.037597441442515e-06, "loss": 0.53139715, "memory(GiB)": 13.7, "step": 2335, "train_speed(iter/s)": 1.532908 }, { "acc": 0.94256945, "epoch": 1.0967893133348956, "grad_norm": 26.834930419921875, "learning_rate": 9.04009000809389e-06, "loss": 0.43439574, "memory(GiB)": 13.7, "step": 2340, "train_speed(iter/s)": 1.533191 }, { "acc": 0.94076881, "epoch": 1.099132880243731, "grad_norm": 18.975425720214844, "learning_rate": 9.0425772544282e-06, "loss": 0.43362017, "memory(GiB)": 13.7, "step": 2345, "train_speed(iter/s)": 1.533372 }, { "acc": 0.92785378, "epoch": 1.1014764471525662, "grad_norm": 8.10866928100586, "learning_rate": 9.045059203109226e-06, "loss": 0.47224374, "memory(GiB)": 13.7, "step": 2350, "train_speed(iter/s)": 1.533508 }, { "acc": 0.89333286, "epoch": 1.1038200140614014, "grad_norm": 50.39582061767578, "learning_rate": 9.04753587665624e-06, "loss": 0.68760505, "memory(GiB)": 13.7, "step": 2355, "train_speed(iter/s)": 1.533442 }, { "acc": 0.9511404, "epoch": 1.1061635809702368, "grad_norm": 10.269875526428223, "learning_rate": 9.050007297445239e-06, "loss": 0.39089267, "memory(GiB)": 13.7, "step": 2360, "train_speed(iter/s)": 1.533536 }, { "acc": 0.93893547, "epoch": 1.108507147879072, "grad_norm": 3.168337345123291, "learning_rate": 9.052473487710139e-06, "loss": 0.39498634, "memory(GiB)": 13.7, "step": 2365, "train_speed(iter/s)": 1.533613 }, { "acc": 0.96663818, "epoch": 1.1108507147879072, "grad_norm": 3.913578987121582, "learning_rate": 9.054934469543993e-06, "loss": 0.25451174, "memory(GiB)": 13.7, "step": 2370, "train_speed(iter/s)": 1.533671 }, { "acc": 0.95891781, "epoch": 1.1131942816967424, "grad_norm": 7.232490539550781, "learning_rate": 9.057390264900171e-06, "loss": 0.34121048, "memory(GiB)": 13.7, "step": 2375, "train_speed(iter/s)": 1.533843 }, { "acc": 0.93398666, "epoch": 1.1155378486055776, "grad_norm": 18.47295570373535, "learning_rate": 9.059840895593533e-06, "loss": 0.31284373, "memory(GiB)": 13.7, "step": 2380, "train_speed(iter/s)": 1.533922 }, { "acc": 0.92779875, "epoch": 1.117881415514413, "grad_norm": 11.979978561401367, "learning_rate": 9.0622863833016e-06, "loss": 0.3298846, "memory(GiB)": 13.7, "step": 2385, "train_speed(iter/s)": 1.534106 }, { "acc": 0.93364582, "epoch": 1.1202249824232482, "grad_norm": 33.41872787475586, "learning_rate": 9.064726749565692e-06, "loss": 0.38861735, "memory(GiB)": 13.7, "step": 2390, "train_speed(iter/s)": 1.534172 }, { "acc": 0.93514776, "epoch": 1.1225685493320834, "grad_norm": 9.828370094299316, "learning_rate": 9.067162015792078e-06, "loss": 0.45203276, "memory(GiB)": 13.7, "step": 2395, "train_speed(iter/s)": 1.534408 }, { "acc": 0.92223721, "epoch": 1.1249121162409186, "grad_norm": 30.925506591796875, "learning_rate": 9.069592203253098e-06, "loss": 0.53724241, "memory(GiB)": 13.7, "step": 2400, "train_speed(iter/s)": 1.534454 }, { "acc": 0.92689476, "epoch": 1.127255683149754, "grad_norm": 10.414088249206543, "learning_rate": 9.072017333088275e-06, "loss": 0.49748983, "memory(GiB)": 13.7, "step": 2405, "train_speed(iter/s)": 1.534618 }, { "acc": 0.96350451, "epoch": 1.1295992500585892, "grad_norm": 3.559553384780884, "learning_rate": 9.074437426305423e-06, "loss": 0.20221078, "memory(GiB)": 13.7, "step": 2410, "train_speed(iter/s)": 1.534702 }, { "acc": 0.94563503, "epoch": 1.1319428169674244, "grad_norm": 31.153854370117188, "learning_rate": 9.07685250378174e-06, "loss": 0.3816726, "memory(GiB)": 13.7, "step": 2415, "train_speed(iter/s)": 1.534989 }, { "acc": 0.94384632, "epoch": 1.1342863838762596, "grad_norm": 17.84687042236328, "learning_rate": 9.079262586264882e-06, "loss": 0.27404947, "memory(GiB)": 13.7, "step": 2420, "train_speed(iter/s)": 1.535224 }, { "acc": 0.9188858, "epoch": 1.136629950785095, "grad_norm": 24.35040283203125, "learning_rate": 9.081667694374038e-06, "loss": 0.61684093, "memory(GiB)": 13.7, "step": 2425, "train_speed(iter/s)": 1.535423 }, { "acc": 0.90715189, "epoch": 1.1389735176939302, "grad_norm": 26.29848861694336, "learning_rate": 9.084067848600988e-06, "loss": 0.64596624, "memory(GiB)": 13.7, "step": 2430, "train_speed(iter/s)": 1.535352 }, { "acc": 0.90341272, "epoch": 1.1413170846027654, "grad_norm": 90.03340911865234, "learning_rate": 9.086463069311146e-06, "loss": 0.66108055, "memory(GiB)": 13.7, "step": 2435, "train_speed(iter/s)": 1.535364 }, { "acc": 0.94145832, "epoch": 1.1436606515116006, "grad_norm": 6.047617435455322, "learning_rate": 9.088853376744604e-06, "loss": 0.40625248, "memory(GiB)": 13.7, "step": 2440, "train_speed(iter/s)": 1.535556 }, { "acc": 0.93820915, "epoch": 1.146004218420436, "grad_norm": 20.822437286376953, "learning_rate": 9.091238791017153e-06, "loss": 0.36809821, "memory(GiB)": 13.7, "step": 2445, "train_speed(iter/s)": 1.53559 }, { "acc": 0.90200052, "epoch": 1.1483477853292712, "grad_norm": 20.44481086730957, "learning_rate": 9.093619332121296e-06, "loss": 0.56397858, "memory(GiB)": 13.7, "step": 2450, "train_speed(iter/s)": 1.535639 }, { "acc": 0.92420826, "epoch": 1.1506913522381064, "grad_norm": 14.849730491638184, "learning_rate": 9.095995019927265e-06, "loss": 0.50849423, "memory(GiB)": 13.7, "step": 2455, "train_speed(iter/s)": 1.535662 }, { "acc": 0.92798567, "epoch": 1.1530349191469416, "grad_norm": 37.15998840332031, "learning_rate": 9.098365874184004e-06, "loss": 0.58501725, "memory(GiB)": 13.7, "step": 2460, "train_speed(iter/s)": 1.535804 }, { "acc": 0.95645447, "epoch": 1.155378486055777, "grad_norm": 18.585235595703125, "learning_rate": 9.100731914520159e-06, "loss": 0.23903458, "memory(GiB)": 13.7, "step": 2465, "train_speed(iter/s)": 1.535698 }, { "acc": 0.94849358, "epoch": 1.1577220529646122, "grad_norm": 38.91580581665039, "learning_rate": 9.103093160445054e-06, "loss": 0.31348784, "memory(GiB)": 13.7, "step": 2470, "train_speed(iter/s)": 1.53573 }, { "acc": 0.93856211, "epoch": 1.1600656198734474, "grad_norm": 33.6971321105957, "learning_rate": 9.105449631349653e-06, "loss": 0.40101585, "memory(GiB)": 13.7, "step": 2475, "train_speed(iter/s)": 1.535693 }, { "acc": 0.91369305, "epoch": 1.1624091867822826, "grad_norm": 14.015630722045898, "learning_rate": 9.10780134650752e-06, "loss": 0.455267, "memory(GiB)": 13.7, "step": 2480, "train_speed(iter/s)": 1.535696 }, { "acc": 0.93301744, "epoch": 1.164752753691118, "grad_norm": 95.1298828125, "learning_rate": 9.110148325075762e-06, "loss": 0.44265046, "memory(GiB)": 13.7, "step": 2485, "train_speed(iter/s)": 1.535843 }, { "acc": 0.92255754, "epoch": 1.1670963205999532, "grad_norm": 10.597942352294922, "learning_rate": 9.112490586095961e-06, "loss": 0.52193432, "memory(GiB)": 13.7, "step": 2490, "train_speed(iter/s)": 1.535983 }, { "acc": 0.93330364, "epoch": 1.1694398875087884, "grad_norm": 15.886880874633789, "learning_rate": 9.114828148495112e-06, "loss": 0.44166226, "memory(GiB)": 13.7, "step": 2495, "train_speed(iter/s)": 1.536124 }, { "acc": 0.92259836, "epoch": 1.1717834544176235, "grad_norm": 18.72627067565918, "learning_rate": 9.117161031086532e-06, "loss": 0.62005391, "memory(GiB)": 13.7, "step": 2500, "train_speed(iter/s)": 1.536179 }, { "acc": 0.93449831, "epoch": 1.174127021326459, "grad_norm": 10.099165916442871, "learning_rate": 9.119489252570769e-06, "loss": 0.40493236, "memory(GiB)": 13.7, "step": 2505, "train_speed(iter/s)": 1.535995 }, { "acc": 0.92996359, "epoch": 1.1764705882352942, "grad_norm": 21.92999267578125, "learning_rate": 9.121812831536504e-06, "loss": 0.42834229, "memory(GiB)": 13.7, "step": 2510, "train_speed(iter/s)": 1.535621 }, { "acc": 0.92356644, "epoch": 1.1788141551441293, "grad_norm": 36.550296783447266, "learning_rate": 9.124131786461441e-06, "loss": 0.59876118, "memory(GiB)": 13.7, "step": 2515, "train_speed(iter/s)": 1.53565 }, { "acc": 0.926015, "epoch": 1.1811577220529645, "grad_norm": 17.965087890625, "learning_rate": 9.126446135713188e-06, "loss": 0.50213547, "memory(GiB)": 13.7, "step": 2520, "train_speed(iter/s)": 1.535673 }, { "acc": 0.9460516, "epoch": 1.1835012889618, "grad_norm": 25.92168617248535, "learning_rate": 9.12875589755013e-06, "loss": 0.37145262, "memory(GiB)": 13.7, "step": 2525, "train_speed(iter/s)": 1.535741 }, { "acc": 0.8941144, "epoch": 1.1858448558706352, "grad_norm": 37.81193161010742, "learning_rate": 9.131061090122296e-06, "loss": 0.65720482, "memory(GiB)": 13.7, "step": 2530, "train_speed(iter/s)": 1.535651 }, { "acc": 0.93010426, "epoch": 1.1881884227794703, "grad_norm": 10.358652114868164, "learning_rate": 9.133361731472208e-06, "loss": 0.50779939, "memory(GiB)": 13.7, "step": 2535, "train_speed(iter/s)": 1.535733 }, { "acc": 0.94356937, "epoch": 1.1905319896883055, "grad_norm": 6.708917617797852, "learning_rate": 9.135657839535735e-06, "loss": 0.36504834, "memory(GiB)": 13.7, "step": 2540, "train_speed(iter/s)": 1.535744 }, { "acc": 0.92268543, "epoch": 1.1928755565971407, "grad_norm": 3.7334787845611572, "learning_rate": 9.13794943214293e-06, "loss": 0.52750192, "memory(GiB)": 13.7, "step": 2545, "train_speed(iter/s)": 1.535866 }, { "acc": 0.95825405, "epoch": 1.1952191235059761, "grad_norm": 7.091505527496338, "learning_rate": 9.14023652701886e-06, "loss": 0.2416492, "memory(GiB)": 13.7, "step": 2550, "train_speed(iter/s)": 1.535974 }, { "acc": 0.9505868, "epoch": 1.1975626904148113, "grad_norm": 3.964458465576172, "learning_rate": 9.142519141784428e-06, "loss": 0.44220529, "memory(GiB)": 13.7, "step": 2555, "train_speed(iter/s)": 1.536053 }, { "acc": 0.9616724, "epoch": 1.1999062573236465, "grad_norm": 24.364389419555664, "learning_rate": 9.144797293957187e-06, "loss": 0.26118088, "memory(GiB)": 13.7, "step": 2560, "train_speed(iter/s)": 1.536025 }, { "acc": 0.92726688, "epoch": 1.202249824232482, "grad_norm": 37.35346221923828, "learning_rate": 9.147071000952153e-06, "loss": 0.47596884, "memory(GiB)": 13.7, "step": 2565, "train_speed(iter/s)": 1.536065 }, { "acc": 0.93002892, "epoch": 1.2045933911413171, "grad_norm": 12.123969078063965, "learning_rate": 9.149340280082591e-06, "loss": 0.51355696, "memory(GiB)": 13.7, "step": 2570, "train_speed(iter/s)": 1.536074 }, { "acc": 0.91220579, "epoch": 1.2069369580501523, "grad_norm": 19.34387969970703, "learning_rate": 9.151605148560825e-06, "loss": 0.60172906, "memory(GiB)": 13.7, "step": 2575, "train_speed(iter/s)": 1.53607 }, { "acc": 0.92607059, "epoch": 1.2092805249589875, "grad_norm": 260.5047607421875, "learning_rate": 9.153865623498999e-06, "loss": 0.48830004, "memory(GiB)": 13.7, "step": 2580, "train_speed(iter/s)": 1.536028 }, { "acc": 0.93548393, "epoch": 1.2116240918678227, "grad_norm": 16.008092880249023, "learning_rate": 9.156121721909872e-06, "loss": 0.32810128, "memory(GiB)": 13.7, "step": 2585, "train_speed(iter/s)": 1.535956 }, { "acc": 0.93475065, "epoch": 1.2139676587766581, "grad_norm": 10.481362342834473, "learning_rate": 9.15837346070757e-06, "loss": 0.37859659, "memory(GiB)": 13.7, "step": 2590, "train_speed(iter/s)": 1.535972 }, { "acc": 0.93883839, "epoch": 1.2163112256854933, "grad_norm": 19.019296646118164, "learning_rate": 9.160620856708367e-06, "loss": 0.35954375, "memory(GiB)": 13.7, "step": 2595, "train_speed(iter/s)": 1.536088 }, { "acc": 0.9455431, "epoch": 1.2186547925943285, "grad_norm": 8.938689231872559, "learning_rate": 9.162863926631415e-06, "loss": 0.35554001, "memory(GiB)": 13.7, "step": 2600, "train_speed(iter/s)": 1.53614 }, { "acc": 0.94495544, "epoch": 1.220998359503164, "grad_norm": 17.840024948120117, "learning_rate": 9.165102687099507e-06, "loss": 0.3145071, "memory(GiB)": 13.7, "step": 2605, "train_speed(iter/s)": 1.536143 }, { "acc": 0.96260977, "epoch": 1.2233419264119991, "grad_norm": 9.147497177124023, "learning_rate": 9.16733715463981e-06, "loss": 0.23096876, "memory(GiB)": 13.7, "step": 2610, "train_speed(iter/s)": 1.535976 }, { "acc": 0.92356615, "epoch": 1.2256854933208343, "grad_norm": 24.24043846130371, "learning_rate": 9.169567345684602e-06, "loss": 0.43037124, "memory(GiB)": 13.7, "step": 2615, "train_speed(iter/s)": 1.536049 }, { "acc": 0.94422531, "epoch": 1.2280290602296695, "grad_norm": 9.052006721496582, "learning_rate": 9.171793276571987e-06, "loss": 0.29395757, "memory(GiB)": 13.7, "step": 2620, "train_speed(iter/s)": 1.535996 }, { "acc": 0.90090742, "epoch": 1.2303726271385047, "grad_norm": 23.97992706298828, "learning_rate": 9.174014963546622e-06, "loss": 0.58385639, "memory(GiB)": 13.7, "step": 2625, "train_speed(iter/s)": 1.535959 }, { "acc": 0.91998339, "epoch": 1.2327161940473401, "grad_norm": 35.57135772705078, "learning_rate": 9.176232422760427e-06, "loss": 0.46145511, "memory(GiB)": 13.7, "step": 2630, "train_speed(iter/s)": 1.536078 }, { "acc": 0.94512806, "epoch": 1.2350597609561753, "grad_norm": 13.863736152648926, "learning_rate": 9.178445670273283e-06, "loss": 0.32371593, "memory(GiB)": 13.7, "step": 2635, "train_speed(iter/s)": 1.53622 }, { "acc": 0.93700237, "epoch": 1.2374033278650105, "grad_norm": 12.3265962600708, "learning_rate": 9.180654722053742e-06, "loss": 0.35663538, "memory(GiB)": 13.7, "step": 2640, "train_speed(iter/s)": 1.536054 }, { "acc": 0.94506464, "epoch": 1.2397468947738457, "grad_norm": 20.129478454589844, "learning_rate": 9.182859593979709e-06, "loss": 0.33061719, "memory(GiB)": 13.7, "step": 2645, "train_speed(iter/s)": 1.53622 }, { "acc": 0.96545372, "epoch": 1.2420904616826811, "grad_norm": 7.974246978759766, "learning_rate": 9.185060301839125e-06, "loss": 0.18705261, "memory(GiB)": 13.7, "step": 2650, "train_speed(iter/s)": 1.536299 }, { "acc": 0.94155254, "epoch": 1.2444340285915163, "grad_norm": 10.307149887084961, "learning_rate": 9.187256861330654e-06, "loss": 0.34829578, "memory(GiB)": 13.7, "step": 2655, "train_speed(iter/s)": 1.536374 }, { "acc": 0.91969786, "epoch": 1.2467775955003515, "grad_norm": 15.531084060668945, "learning_rate": 9.189449288064351e-06, "loss": 0.66013107, "memory(GiB)": 13.7, "step": 2660, "train_speed(iter/s)": 1.536326 }, { "acc": 0.90911541, "epoch": 1.2491211624091867, "grad_norm": 28.47270965576172, "learning_rate": 9.191637597562322e-06, "loss": 0.50869751, "memory(GiB)": 13.7, "step": 2665, "train_speed(iter/s)": 1.53639 }, { "acc": 0.92425957, "epoch": 1.251464729318022, "grad_norm": 16.38591194152832, "learning_rate": 9.193821805259392e-06, "loss": 0.50232291, "memory(GiB)": 13.7, "step": 2670, "train_speed(iter/s)": 1.536611 }, { "acc": 0.95048075, "epoch": 1.2538082962268573, "grad_norm": 9.87586498260498, "learning_rate": 9.196001926503754e-06, "loss": 0.28575377, "memory(GiB)": 13.7, "step": 2675, "train_speed(iter/s)": 1.536684 }, { "acc": 0.9377346, "epoch": 1.2561518631356925, "grad_norm": 25.62485694885254, "learning_rate": 9.198177976557617e-06, "loss": 0.44694581, "memory(GiB)": 13.7, "step": 2680, "train_speed(iter/s)": 1.536781 }, { "acc": 0.90939026, "epoch": 1.258495430044528, "grad_norm": 91.33905029296875, "learning_rate": 9.200349970597844e-06, "loss": 0.58866429, "memory(GiB)": 13.7, "step": 2685, "train_speed(iter/s)": 1.536975 }, { "acc": 0.92513142, "epoch": 1.260838996953363, "grad_norm": 12.029544830322266, "learning_rate": 9.202517923716594e-06, "loss": 0.55048771, "memory(GiB)": 13.7, "step": 2690, "train_speed(iter/s)": 1.537058 }, { "acc": 0.9309639, "epoch": 1.2631825638621983, "grad_norm": 30.42771339416504, "learning_rate": 9.204681850921941e-06, "loss": 0.43750477, "memory(GiB)": 13.7, "step": 2695, "train_speed(iter/s)": 1.537042 }, { "acc": 0.91782665, "epoch": 1.2655261307710335, "grad_norm": 74.07315826416016, "learning_rate": 9.206841767138513e-06, "loss": 0.52191386, "memory(GiB)": 13.7, "step": 2700, "train_speed(iter/s)": 1.537088 }, { "acc": 0.92210388, "epoch": 1.2678696976798687, "grad_norm": 12.408794403076172, "learning_rate": 9.20899768720809e-06, "loss": 0.53418627, "memory(GiB)": 13.7, "step": 2705, "train_speed(iter/s)": 1.537294 }, { "acc": 0.94731712, "epoch": 1.2702132645887039, "grad_norm": 9.33137035369873, "learning_rate": 9.211149625890234e-06, "loss": 0.27748816, "memory(GiB)": 13.7, "step": 2710, "train_speed(iter/s)": 1.537393 }, { "acc": 0.95197916, "epoch": 1.2725568314975393, "grad_norm": 5.010521411895752, "learning_rate": 9.21329759786288e-06, "loss": 0.25144587, "memory(GiB)": 13.7, "step": 2715, "train_speed(iter/s)": 1.537652 }, { "acc": 0.93933258, "epoch": 1.2749003984063745, "grad_norm": 26.572267532348633, "learning_rate": 9.21544161772295e-06, "loss": 0.41604414, "memory(GiB)": 13.7, "step": 2720, "train_speed(iter/s)": 1.537655 }, { "acc": 0.93402262, "epoch": 1.2772439653152097, "grad_norm": 9.51113510131836, "learning_rate": 9.217581699986938e-06, "loss": 0.39772205, "memory(GiB)": 13.7, "step": 2725, "train_speed(iter/s)": 1.537615 }, { "acc": 0.93724575, "epoch": 1.279587532224045, "grad_norm": 11.293885231018066, "learning_rate": 9.219717859091505e-06, "loss": 0.39118752, "memory(GiB)": 13.7, "step": 2730, "train_speed(iter/s)": 1.537567 }, { "acc": 0.95006714, "epoch": 1.2819310991328803, "grad_norm": 32.36053466796875, "learning_rate": 9.221850109394063e-06, "loss": 0.33367324, "memory(GiB)": 13.7, "step": 2735, "train_speed(iter/s)": 1.537434 }, { "acc": 0.9347188, "epoch": 1.2842746660417155, "grad_norm": 14.777729034423828, "learning_rate": 9.223978465173354e-06, "loss": 0.49950347, "memory(GiB)": 13.7, "step": 2740, "train_speed(iter/s)": 1.537516 }, { "acc": 0.9290699, "epoch": 1.2866182329505507, "grad_norm": 88.62045288085938, "learning_rate": 9.22610294063002e-06, "loss": 0.44952664, "memory(GiB)": 13.7, "step": 2745, "train_speed(iter/s)": 1.537854 }, { "acc": 0.94630947, "epoch": 1.2889617998593859, "grad_norm": 21.08638572692871, "learning_rate": 9.228223549887178e-06, "loss": 0.36379786, "memory(GiB)": 13.7, "step": 2750, "train_speed(iter/s)": 1.538045 }, { "acc": 0.91219883, "epoch": 1.2913053667682213, "grad_norm": 34.34843063354492, "learning_rate": 9.230340306990974e-06, "loss": 0.5493691, "memory(GiB)": 13.7, "step": 2755, "train_speed(iter/s)": 1.538146 }, { "acc": 0.9538826, "epoch": 1.2936489336770565, "grad_norm": 5.991532802581787, "learning_rate": 9.232453225911156e-06, "loss": 0.26483655, "memory(GiB)": 13.7, "step": 2760, "train_speed(iter/s)": 1.538079 }, { "acc": 0.92782345, "epoch": 1.2959925005858917, "grad_norm": 80.34374237060547, "learning_rate": 9.234562320541608e-06, "loss": 0.32973571, "memory(GiB)": 13.7, "step": 2765, "train_speed(iter/s)": 1.538197 }, { "acc": 0.9200942, "epoch": 1.298336067494727, "grad_norm": 8.379515647888184, "learning_rate": 9.236667604700914e-06, "loss": 0.50018125, "memory(GiB)": 13.7, "step": 2770, "train_speed(iter/s)": 1.538348 }, { "acc": 0.94223213, "epoch": 1.3006796344035623, "grad_norm": 6.545696258544922, "learning_rate": 9.238769092132897e-06, "loss": 0.38291895, "memory(GiB)": 13.7, "step": 2775, "train_speed(iter/s)": 1.538314 }, { "acc": 0.93645706, "epoch": 1.3030232013123975, "grad_norm": 20.251529693603516, "learning_rate": 9.240866796507153e-06, "loss": 0.40630994, "memory(GiB)": 13.7, "step": 2780, "train_speed(iter/s)": 1.538368 }, { "acc": 0.93757534, "epoch": 1.3053667682212327, "grad_norm": 14.02149486541748, "learning_rate": 9.242960731419584e-06, "loss": 0.36999702, "memory(GiB)": 13.7, "step": 2785, "train_speed(iter/s)": 1.538305 }, { "acc": 0.93264408, "epoch": 1.3077103351300678, "grad_norm": 17.071449279785156, "learning_rate": 9.245050910392937e-06, "loss": 0.5220046, "memory(GiB)": 13.7, "step": 2790, "train_speed(iter/s)": 1.538298 }, { "acc": 0.92434521, "epoch": 1.3100539020389033, "grad_norm": 9.846770286560059, "learning_rate": 9.247137346877317e-06, "loss": 0.41818628, "memory(GiB)": 13.7, "step": 2795, "train_speed(iter/s)": 1.538325 }, { "acc": 0.89959068, "epoch": 1.3123974689477385, "grad_norm": 33.370811462402344, "learning_rate": 9.249220054250711e-06, "loss": 0.67040577, "memory(GiB)": 13.7, "step": 2800, "train_speed(iter/s)": 1.538359 }, { "acc": 0.95084591, "epoch": 1.3147410358565736, "grad_norm": 7.133469581604004, "learning_rate": 9.251299045819505e-06, "loss": 0.37708836, "memory(GiB)": 13.7, "step": 2805, "train_speed(iter/s)": 1.538294 }, { "acc": 0.92266865, "epoch": 1.317084602765409, "grad_norm": 0.6323421597480774, "learning_rate": 9.253374334818987e-06, "loss": 0.52466807, "memory(GiB)": 13.7, "step": 2810, "train_speed(iter/s)": 1.538304 }, { "acc": 0.95892477, "epoch": 1.3194281696742443, "grad_norm": 10.382954597473145, "learning_rate": 9.255445934413858e-06, "loss": 0.27691703, "memory(GiB)": 13.7, "step": 2815, "train_speed(iter/s)": 1.538482 }, { "acc": 0.93523846, "epoch": 1.3217717365830794, "grad_norm": 56.470672607421875, "learning_rate": 9.257513857698732e-06, "loss": 0.34887478, "memory(GiB)": 13.7, "step": 2820, "train_speed(iter/s)": 1.538392 }, { "acc": 0.94259453, "epoch": 1.3241153034919146, "grad_norm": 40.445404052734375, "learning_rate": 9.259578117698631e-06, "loss": 0.50001822, "memory(GiB)": 13.7, "step": 2825, "train_speed(iter/s)": 1.538504 }, { "acc": 0.92054434, "epoch": 1.3264588704007498, "grad_norm": 61.72869873046875, "learning_rate": 9.261638727369486e-06, "loss": 0.50736217, "memory(GiB)": 13.7, "step": 2830, "train_speed(iter/s)": 1.538524 }, { "acc": 0.92154255, "epoch": 1.3288024373095852, "grad_norm": 22.126386642456055, "learning_rate": 9.263695699598603e-06, "loss": 0.54525046, "memory(GiB)": 13.7, "step": 2835, "train_speed(iter/s)": 1.538575 }, { "acc": 0.9211874, "epoch": 1.3311460042184204, "grad_norm": 44.788631439208984, "learning_rate": 9.265749047205177e-06, "loss": 0.46565876, "memory(GiB)": 13.7, "step": 2840, "train_speed(iter/s)": 1.538641 }, { "acc": 0.94249992, "epoch": 1.3334895711272556, "grad_norm": 7.273321151733398, "learning_rate": 9.267798782940745e-06, "loss": 0.2741565, "memory(GiB)": 13.7, "step": 2845, "train_speed(iter/s)": 1.538797 }, { "acc": 0.9328373, "epoch": 1.335833138036091, "grad_norm": 4.240345001220703, "learning_rate": 9.269844919489676e-06, "loss": 0.45632172, "memory(GiB)": 13.7, "step": 2850, "train_speed(iter/s)": 1.538814 }, { "acc": 0.91540661, "epoch": 1.3381767049449262, "grad_norm": 18.270959854125977, "learning_rate": 9.271887469469639e-06, "loss": 0.63198986, "memory(GiB)": 13.7, "step": 2855, "train_speed(iter/s)": 1.538866 }, { "acc": 0.94529762, "epoch": 1.3405202718537614, "grad_norm": 27.293428421020508, "learning_rate": 9.27392644543206e-06, "loss": 0.39970899, "memory(GiB)": 13.7, "step": 2860, "train_speed(iter/s)": 1.538876 }, { "acc": 0.92885323, "epoch": 1.3428638387625966, "grad_norm": 13.059736251831055, "learning_rate": 9.275961859862605e-06, "loss": 0.43738194, "memory(GiB)": 13.7, "step": 2865, "train_speed(iter/s)": 1.538748 }, { "acc": 0.94570885, "epoch": 1.3452074056714318, "grad_norm": 16.238698959350586, "learning_rate": 9.277993725181619e-06, "loss": 0.27417562, "memory(GiB)": 13.7, "step": 2870, "train_speed(iter/s)": 1.538968 }, { "acc": 0.91324406, "epoch": 1.3475509725802672, "grad_norm": 30.1038761138916, "learning_rate": 9.280022053744592e-06, "loss": 0.48371778, "memory(GiB)": 13.7, "step": 2875, "train_speed(iter/s)": 1.53894 }, { "acc": 0.93234034, "epoch": 1.3498945394891024, "grad_norm": 11.760212898254395, "learning_rate": 9.282046857842603e-06, "loss": 0.45829134, "memory(GiB)": 13.7, "step": 2880, "train_speed(iter/s)": 1.538934 }, { "acc": 0.95597172, "epoch": 1.3522381063979376, "grad_norm": 6.633667469024658, "learning_rate": 9.284068149702776e-06, "loss": 0.26940393, "memory(GiB)": 13.7, "step": 2885, "train_speed(iter/s)": 1.539103 }, { "acc": 0.94348173, "epoch": 1.354581673306773, "grad_norm": 15.799162864685059, "learning_rate": 9.286085941488713e-06, "loss": 0.41331625, "memory(GiB)": 13.7, "step": 2890, "train_speed(iter/s)": 1.539229 }, { "acc": 0.95618153, "epoch": 1.3569252402156082, "grad_norm": 25.583980560302734, "learning_rate": 9.28810024530094e-06, "loss": 0.35203166, "memory(GiB)": 13.7, "step": 2895, "train_speed(iter/s)": 1.539315 }, { "acc": 0.92737694, "epoch": 1.3592688071244434, "grad_norm": 13.34396743774414, "learning_rate": 9.290111073177337e-06, "loss": 0.52407198, "memory(GiB)": 13.7, "step": 2900, "train_speed(iter/s)": 1.53943 }, { "acc": 0.93865356, "epoch": 1.3616123740332786, "grad_norm": 25.538372039794922, "learning_rate": 9.292118437093576e-06, "loss": 0.42456222, "memory(GiB)": 13.7, "step": 2905, "train_speed(iter/s)": 1.539477 }, { "acc": 0.94209824, "epoch": 1.3639559409421138, "grad_norm": 3.038835048675537, "learning_rate": 9.294122348963544e-06, "loss": 0.39506836, "memory(GiB)": 13.7, "step": 2910, "train_speed(iter/s)": 1.539632 }, { "acc": 0.9161499, "epoch": 1.3662995078509492, "grad_norm": 14.505648612976074, "learning_rate": 9.29612282063977e-06, "loss": 0.3724174, "memory(GiB)": 13.7, "step": 2915, "train_speed(iter/s)": 1.539556 }, { "acc": 0.9303896, "epoch": 1.3686430747597844, "grad_norm": 36.1570930480957, "learning_rate": 9.298119863913843e-06, "loss": 0.52807474, "memory(GiB)": 13.7, "step": 2920, "train_speed(iter/s)": 1.5396 }, { "acc": 0.94077454, "epoch": 1.3709866416686196, "grad_norm": 25.979345321655273, "learning_rate": 9.30011349051683e-06, "loss": 0.34496636, "memory(GiB)": 13.7, "step": 2925, "train_speed(iter/s)": 1.539723 }, { "acc": 0.92786112, "epoch": 1.373330208577455, "grad_norm": 30.321428298950195, "learning_rate": 9.302103712119693e-06, "loss": 0.38877473, "memory(GiB)": 13.7, "step": 2930, "train_speed(iter/s)": 1.539794 }, { "acc": 0.95049114, "epoch": 1.3756737754862902, "grad_norm": 13.032219886779785, "learning_rate": 9.304090540333693e-06, "loss": 0.30112314, "memory(GiB)": 13.7, "step": 2935, "train_speed(iter/s)": 1.539855 }, { "acc": 0.88992968, "epoch": 1.3780173423951254, "grad_norm": 20.068817138671875, "learning_rate": 9.306073986710801e-06, "loss": 0.71557531, "memory(GiB)": 13.7, "step": 2940, "train_speed(iter/s)": 1.539905 }, { "acc": 0.93385315, "epoch": 1.3803609093039606, "grad_norm": 8.351790428161621, "learning_rate": 9.3080540627441e-06, "loss": 0.47910566, "memory(GiB)": 13.7, "step": 2945, "train_speed(iter/s)": 1.539959 }, { "acc": 0.90604172, "epoch": 1.3827044762127958, "grad_norm": 13.05016803741455, "learning_rate": 9.31003077986818e-06, "loss": 0.67023664, "memory(GiB)": 13.7, "step": 2950, "train_speed(iter/s)": 1.540117 }, { "acc": 0.95213947, "epoch": 1.385048043121631, "grad_norm": 26.591209411621094, "learning_rate": 9.312004149459542e-06, "loss": 0.36943765, "memory(GiB)": 13.7, "step": 2955, "train_speed(iter/s)": 1.540038 }, { "acc": 0.93356228, "epoch": 1.3873916100304664, "grad_norm": 31.31604766845703, "learning_rate": 9.313974182836986e-06, "loss": 0.34876206, "memory(GiB)": 13.7, "step": 2960, "train_speed(iter/s)": 1.54001 }, { "acc": 0.92625065, "epoch": 1.3897351769393016, "grad_norm": 45.75977325439453, "learning_rate": 9.315940891262004e-06, "loss": 0.46740794, "memory(GiB)": 13.7, "step": 2965, "train_speed(iter/s)": 1.539954 }, { "acc": 0.92192907, "epoch": 1.3920787438481368, "grad_norm": 26.702301025390625, "learning_rate": 9.317904285939158e-06, "loss": 0.5727355, "memory(GiB)": 13.7, "step": 2970, "train_speed(iter/s)": 1.539947 }, { "acc": 0.93468752, "epoch": 1.3944223107569722, "grad_norm": 18.63602066040039, "learning_rate": 9.319864378016474e-06, "loss": 0.45881081, "memory(GiB)": 13.7, "step": 2975, "train_speed(iter/s)": 1.539975 }, { "acc": 0.94585009, "epoch": 1.3967658776658074, "grad_norm": 10.841561317443848, "learning_rate": 9.321821178585816e-06, "loss": 0.31126816, "memory(GiB)": 13.7, "step": 2980, "train_speed(iter/s)": 1.539882 }, { "acc": 0.90806141, "epoch": 1.3991094445746426, "grad_norm": 13.583032608032227, "learning_rate": 9.32377469868326e-06, "loss": 0.51988916, "memory(GiB)": 13.7, "step": 2985, "train_speed(iter/s)": 1.539947 }, { "acc": 0.90605164, "epoch": 1.4014530114834778, "grad_norm": 28.200740814208984, "learning_rate": 9.325724949289474e-06, "loss": 0.63685646, "memory(GiB)": 13.7, "step": 2990, "train_speed(iter/s)": 1.539782 }, { "acc": 0.94406261, "epoch": 1.403796578392313, "grad_norm": 6.717202186584473, "learning_rate": 9.327671941330082e-06, "loss": 0.33063927, "memory(GiB)": 13.7, "step": 2995, "train_speed(iter/s)": 1.539873 }, { "acc": 0.92895212, "epoch": 1.4061401453011484, "grad_norm": 13.828283309936523, "learning_rate": 9.329615685676038e-06, "loss": 0.42580733, "memory(GiB)": 13.7, "step": 3000, "train_speed(iter/s)": 1.54 }, { "acc": 0.92873917, "epoch": 1.4084837122099836, "grad_norm": 6.33258581161499, "learning_rate": 9.331556193143983e-06, "loss": 0.50113316, "memory(GiB)": 13.7, "step": 3005, "train_speed(iter/s)": 1.540077 }, { "acc": 0.93805227, "epoch": 1.4108272791188188, "grad_norm": 77.67060852050781, "learning_rate": 9.333493474496616e-06, "loss": 0.33106542, "memory(GiB)": 13.7, "step": 3010, "train_speed(iter/s)": 1.540161 }, { "acc": 0.94353676, "epoch": 1.4131708460276542, "grad_norm": 106.97830963134766, "learning_rate": 9.335427540443032e-06, "loss": 0.3951211, "memory(GiB)": 13.7, "step": 3015, "train_speed(iter/s)": 1.540165 }, { "acc": 0.95133924, "epoch": 1.4155144129364894, "grad_norm": 31.73031234741211, "learning_rate": 9.337358401639107e-06, "loss": 0.36520112, "memory(GiB)": 13.7, "step": 3020, "train_speed(iter/s)": 1.5403 }, { "acc": 0.91056938, "epoch": 1.4178579798453246, "grad_norm": 19.089916229248047, "learning_rate": 9.339286068687823e-06, "loss": 0.51227298, "memory(GiB)": 13.7, "step": 3025, "train_speed(iter/s)": 1.540358 }, { "acc": 0.91751175, "epoch": 1.4202015467541598, "grad_norm": 95.19351196289062, "learning_rate": 9.341210552139635e-06, "loss": 0.58384476, "memory(GiB)": 13.7, "step": 3030, "train_speed(iter/s)": 1.540322 }, { "acc": 0.92602139, "epoch": 1.422545113662995, "grad_norm": 12.715084075927734, "learning_rate": 9.34313186249281e-06, "loss": 0.44792051, "memory(GiB)": 13.7, "step": 3035, "train_speed(iter/s)": 1.540552 }, { "acc": 0.91595907, "epoch": 1.4248886805718304, "grad_norm": 8.247701644897461, "learning_rate": 9.345050010193765e-06, "loss": 0.38640141, "memory(GiB)": 13.7, "step": 3040, "train_speed(iter/s)": 1.540705 }, { "acc": 0.95937281, "epoch": 1.4272322474806656, "grad_norm": 18.4952392578125, "learning_rate": 9.346965005637426e-06, "loss": 0.26639445, "memory(GiB)": 13.7, "step": 3045, "train_speed(iter/s)": 1.540916 }, { "acc": 0.9364583, "epoch": 1.4295758143895008, "grad_norm": 23.322542190551758, "learning_rate": 9.348876859167545e-06, "loss": 0.40607524, "memory(GiB)": 13.7, "step": 3050, "train_speed(iter/s)": 1.541042 }, { "acc": 0.91833382, "epoch": 1.4319193812983362, "grad_norm": 29.11284828186035, "learning_rate": 9.350785581077049e-06, "loss": 0.62080975, "memory(GiB)": 13.7, "step": 3055, "train_speed(iter/s)": 1.541205 }, { "acc": 0.95632229, "epoch": 1.4342629482071714, "grad_norm": 11.495265007019043, "learning_rate": 9.352691181608365e-06, "loss": 0.35146284, "memory(GiB)": 13.7, "step": 3060, "train_speed(iter/s)": 1.541216 }, { "acc": 0.90731058, "epoch": 1.4366065151160066, "grad_norm": 40.83644104003906, "learning_rate": 9.354593670953759e-06, "loss": 0.62807202, "memory(GiB)": 13.7, "step": 3065, "train_speed(iter/s)": 1.541203 }, { "acc": 0.93194122, "epoch": 1.4389500820248418, "grad_norm": 23.981430053710938, "learning_rate": 9.35649305925565e-06, "loss": 0.52335815, "memory(GiB)": 13.7, "step": 3070, "train_speed(iter/s)": 1.5413 }, { "acc": 0.92340641, "epoch": 1.441293648933677, "grad_norm": 20.71373176574707, "learning_rate": 9.358389356606946e-06, "loss": 0.53212514, "memory(GiB)": 13.7, "step": 3075, "train_speed(iter/s)": 1.541342 }, { "acc": 0.9628355, "epoch": 1.4436372158425124, "grad_norm": 5.475480079650879, "learning_rate": 9.360282573051357e-06, "loss": 0.32209103, "memory(GiB)": 13.7, "step": 3080, "train_speed(iter/s)": 1.541432 }, { "acc": 0.91874199, "epoch": 1.4459807827513476, "grad_norm": 32.38075256347656, "learning_rate": 9.362172718583726e-06, "loss": 0.55955038, "memory(GiB)": 13.7, "step": 3085, "train_speed(iter/s)": 1.541382 }, { "acc": 0.92375002, "epoch": 1.4483243496601828, "grad_norm": 9.809704780578613, "learning_rate": 9.364059803150332e-06, "loss": 0.46487799, "memory(GiB)": 13.7, "step": 3090, "train_speed(iter/s)": 1.541259 }, { "acc": 0.93376045, "epoch": 1.4506679165690182, "grad_norm": 20.892717361450195, "learning_rate": 9.365943836649208e-06, "loss": 0.44829664, "memory(GiB)": 13.7, "step": 3095, "train_speed(iter/s)": 1.541371 }, { "acc": 0.93241339, "epoch": 1.4530114834778534, "grad_norm": 19.142427444458008, "learning_rate": 9.36782482893046e-06, "loss": 0.46436973, "memory(GiB)": 13.7, "step": 3100, "train_speed(iter/s)": 1.541435 }, { "acc": 0.93263016, "epoch": 1.4553550503866886, "grad_norm": 24.644418716430664, "learning_rate": 9.369702789796571e-06, "loss": 0.45555553, "memory(GiB)": 13.7, "step": 3105, "train_speed(iter/s)": 1.541626 }, { "acc": 0.94905643, "epoch": 1.4576986172955237, "grad_norm": 31.893999099731445, "learning_rate": 9.371577729002703e-06, "loss": 0.31253092, "memory(GiB)": 13.7, "step": 3110, "train_speed(iter/s)": 1.541623 }, { "acc": 0.9608036, "epoch": 1.460042184204359, "grad_norm": 3.2581779956817627, "learning_rate": 9.373449656257006e-06, "loss": 0.22560487, "memory(GiB)": 13.7, "step": 3115, "train_speed(iter/s)": 1.541643 }, { "acc": 0.94302483, "epoch": 1.4623857511131944, "grad_norm": 13.145017623901367, "learning_rate": 9.37531858122092e-06, "loss": 0.38357904, "memory(GiB)": 13.7, "step": 3120, "train_speed(iter/s)": 1.54155 }, { "acc": 0.95048294, "epoch": 1.4647293180220295, "grad_norm": 26.473873138427734, "learning_rate": 9.377184513509472e-06, "loss": 0.36346612, "memory(GiB)": 13.7, "step": 3125, "train_speed(iter/s)": 1.541582 }, { "acc": 0.95784225, "epoch": 1.4670728849308647, "grad_norm": 46.255889892578125, "learning_rate": 9.379047462691578e-06, "loss": 0.30441375, "memory(GiB)": 13.7, "step": 3130, "train_speed(iter/s)": 1.541602 }, { "acc": 0.93359623, "epoch": 1.4694164518397002, "grad_norm": 13.561768531799316, "learning_rate": 9.380907438290323e-06, "loss": 0.36066737, "memory(GiB)": 13.7, "step": 3135, "train_speed(iter/s)": 1.54154 }, { "acc": 0.94623833, "epoch": 1.4717600187485353, "grad_norm": 8.518343925476074, "learning_rate": 9.382764449783273e-06, "loss": 0.38200431, "memory(GiB)": 13.7, "step": 3140, "train_speed(iter/s)": 1.5415 }, { "acc": 0.94870777, "epoch": 1.4741035856573705, "grad_norm": 23.396991729736328, "learning_rate": 9.38461850660275e-06, "loss": 0.32838292, "memory(GiB)": 13.7, "step": 3145, "train_speed(iter/s)": 1.541504 }, { "acc": 0.95293655, "epoch": 1.4764471525662057, "grad_norm": 3.108872652053833, "learning_rate": 9.386469618136126e-06, "loss": 0.40246973, "memory(GiB)": 13.7, "step": 3150, "train_speed(iter/s)": 1.541438 }, { "acc": 0.94689484, "epoch": 1.478790719475041, "grad_norm": 1.1878817081451416, "learning_rate": 9.388317793726112e-06, "loss": 0.40892658, "memory(GiB)": 13.7, "step": 3155, "train_speed(iter/s)": 1.541403 }, { "acc": 0.96535711, "epoch": 1.4811342863838763, "grad_norm": 21.771154403686523, "learning_rate": 9.390163042671024e-06, "loss": 0.22706056, "memory(GiB)": 13.7, "step": 3160, "train_speed(iter/s)": 1.541513 }, { "acc": 0.91910944, "epoch": 1.4834778532927115, "grad_norm": 15.002859115600586, "learning_rate": 9.392005374225083e-06, "loss": 0.46272974, "memory(GiB)": 13.7, "step": 3165, "train_speed(iter/s)": 1.54162 }, { "acc": 0.93630953, "epoch": 1.4858214202015467, "grad_norm": 55.87933349609375, "learning_rate": 9.393844797598685e-06, "loss": 0.48625116, "memory(GiB)": 13.7, "step": 3170, "train_speed(iter/s)": 1.541662 }, { "acc": 0.93936195, "epoch": 1.488164987110382, "grad_norm": 42.60429763793945, "learning_rate": 9.395681321958677e-06, "loss": 0.49988136, "memory(GiB)": 13.7, "step": 3175, "train_speed(iter/s)": 1.541586 }, { "acc": 0.93946075, "epoch": 1.4905085540192173, "grad_norm": 29.46053695678711, "learning_rate": 9.39751495642863e-06, "loss": 0.40842948, "memory(GiB)": 13.7, "step": 3180, "train_speed(iter/s)": 1.541709 }, { "acc": 0.91553078, "epoch": 1.4928521209280525, "grad_norm": 203.29412841796875, "learning_rate": 9.39934571008912e-06, "loss": 0.64797382, "memory(GiB)": 13.7, "step": 3185, "train_speed(iter/s)": 1.541812 }, { "acc": 0.93724852, "epoch": 1.4951956878368877, "grad_norm": 10.817401885986328, "learning_rate": 9.401173591977983e-06, "loss": 0.38736105, "memory(GiB)": 13.7, "step": 3190, "train_speed(iter/s)": 1.541667 }, { "acc": 0.92770882, "epoch": 1.497539254745723, "grad_norm": 20.985958099365234, "learning_rate": 9.402998611090594e-06, "loss": 0.4212513, "memory(GiB)": 13.7, "step": 3195, "train_speed(iter/s)": 1.541823 }, { "acc": 0.95406246, "epoch": 1.499882821654558, "grad_norm": 21.52180290222168, "learning_rate": 9.40482077638013e-06, "loss": 0.42181201, "memory(GiB)": 13.7, "step": 3200, "train_speed(iter/s)": 1.5419 }, { "acc": 0.93387241, "epoch": 1.5022263885633935, "grad_norm": 11.1646146774292, "learning_rate": 9.406640096757827e-06, "loss": 0.33825521, "memory(GiB)": 13.7, "step": 3205, "train_speed(iter/s)": 1.542045 }, { "acc": 0.92307453, "epoch": 1.5045699554722287, "grad_norm": 19.84252166748047, "learning_rate": 9.408456581093258e-06, "loss": 0.49603214, "memory(GiB)": 13.7, "step": 3210, "train_speed(iter/s)": 1.541918 }, { "acc": 0.95196838, "epoch": 1.5069135223810641, "grad_norm": 6.66267728805542, "learning_rate": 9.410270238214576e-06, "loss": 0.26525483, "memory(GiB)": 13.7, "step": 3215, "train_speed(iter/s)": 1.542075 }, { "acc": 0.93777294, "epoch": 1.5092570892898993, "grad_norm": 6.683133125305176, "learning_rate": 9.41208107690877e-06, "loss": 0.39870658, "memory(GiB)": 13.7, "step": 3220, "train_speed(iter/s)": 1.542108 }, { "acc": 0.90503244, "epoch": 1.5116006561987345, "grad_norm": 21.65446662902832, "learning_rate": 9.413889105921939e-06, "loss": 0.51299224, "memory(GiB)": 13.7, "step": 3225, "train_speed(iter/s)": 1.542152 }, { "acc": 0.89004116, "epoch": 1.5139442231075697, "grad_norm": 51.054931640625, "learning_rate": 9.41569433395953e-06, "loss": 0.62105155, "memory(GiB)": 13.7, "step": 3230, "train_speed(iter/s)": 1.542097 }, { "acc": 0.95994053, "epoch": 1.516287790016405, "grad_norm": 44.16984558105469, "learning_rate": 9.41749676968659e-06, "loss": 0.24963512, "memory(GiB)": 13.7, "step": 3235, "train_speed(iter/s)": 1.542098 }, { "acc": 0.96236811, "epoch": 1.51863135692524, "grad_norm": 5.805840492248535, "learning_rate": 9.41929642172802e-06, "loss": 0.26584692, "memory(GiB)": 13.7, "step": 3240, "train_speed(iter/s)": 1.542171 }, { "acc": 0.92872763, "epoch": 1.5209749238340755, "grad_norm": 16.518959045410156, "learning_rate": 9.421093298668826e-06, "loss": 0.39178748, "memory(GiB)": 13.7, "step": 3245, "train_speed(iter/s)": 1.542117 }, { "acc": 0.91222725, "epoch": 1.5233184907429107, "grad_norm": 20.480098724365234, "learning_rate": 9.422887409054355e-06, "loss": 0.46833048, "memory(GiB)": 13.7, "step": 3250, "train_speed(iter/s)": 1.542109 }, { "acc": 0.91548958, "epoch": 1.5256620576517461, "grad_norm": 21.852603912353516, "learning_rate": 9.424678761390552e-06, "loss": 0.60723629, "memory(GiB)": 13.7, "step": 3255, "train_speed(iter/s)": 1.542144 }, { "acc": 0.92198715, "epoch": 1.5280056245605813, "grad_norm": 6.338356018066406, "learning_rate": 9.426467364144182e-06, "loss": 0.50867081, "memory(GiB)": 13.7, "step": 3260, "train_speed(iter/s)": 1.542237 }, { "acc": 0.93364868, "epoch": 1.5303491914694165, "grad_norm": 12.269129753112793, "learning_rate": 9.428253225743095e-06, "loss": 0.39254217, "memory(GiB)": 13.7, "step": 3265, "train_speed(iter/s)": 1.542191 }, { "acc": 0.95821428, "epoch": 1.5326927583782517, "grad_norm": 9.478461265563965, "learning_rate": 9.430036354576445e-06, "loss": 0.29336097, "memory(GiB)": 13.7, "step": 3270, "train_speed(iter/s)": 1.542264 }, { "acc": 0.92524357, "epoch": 1.535036325287087, "grad_norm": 16.44774627685547, "learning_rate": 9.431816758994928e-06, "loss": 0.47700448, "memory(GiB)": 13.7, "step": 3275, "train_speed(iter/s)": 1.54232 }, { "acc": 0.92247305, "epoch": 1.537379892195922, "grad_norm": 10.58325481414795, "learning_rate": 9.433594447311036e-06, "loss": 0.44392986, "memory(GiB)": 13.7, "step": 3280, "train_speed(iter/s)": 1.542311 }, { "acc": 0.96161709, "epoch": 1.5397234591047573, "grad_norm": 13.416815757751465, "learning_rate": 9.43536942779926e-06, "loss": 0.27485242, "memory(GiB)": 13.7, "step": 3285, "train_speed(iter/s)": 1.542366 }, { "acc": 0.93649445, "epoch": 1.5420670260135927, "grad_norm": 36.702308654785156, "learning_rate": 9.437141708696347e-06, "loss": 0.4139925, "memory(GiB)": 13.7, "step": 3290, "train_speed(iter/s)": 1.542345 }, { "acc": 0.94105167, "epoch": 1.5444105929224279, "grad_norm": 6.0860772132873535, "learning_rate": 9.438911298201512e-06, "loss": 0.47004743, "memory(GiB)": 13.7, "step": 3295, "train_speed(iter/s)": 1.542361 }, { "acc": 0.94136562, "epoch": 1.5467541598312633, "grad_norm": 14.92050838470459, "learning_rate": 9.440678204476683e-06, "loss": 0.40040989, "memory(GiB)": 13.7, "step": 3300, "train_speed(iter/s)": 1.542326 }, { "acc": 0.92326393, "epoch": 1.5490977267400985, "grad_norm": 3.6044347286224365, "learning_rate": 9.442442435646703e-06, "loss": 0.56690068, "memory(GiB)": 13.7, "step": 3305, "train_speed(iter/s)": 1.542402 }, { "acc": 0.93576126, "epoch": 1.5514412936489337, "grad_norm": 8.216384887695312, "learning_rate": 9.444203999799578e-06, "loss": 0.37620454, "memory(GiB)": 13.7, "step": 3310, "train_speed(iter/s)": 1.542482 }, { "acc": 0.9636795, "epoch": 1.5537848605577689, "grad_norm": 5.727360248565674, "learning_rate": 9.445962904986682e-06, "loss": 0.23363814, "memory(GiB)": 13.7, "step": 3315, "train_speed(iter/s)": 1.542392 }, { "acc": 0.95076981, "epoch": 1.556128427466604, "grad_norm": 12.699637413024902, "learning_rate": 9.447719159222992e-06, "loss": 0.30419421, "memory(GiB)": 13.7, "step": 3320, "train_speed(iter/s)": 1.542386 }, { "acc": 0.95824089, "epoch": 1.5584719943754393, "grad_norm": 9.397107124328613, "learning_rate": 9.449472770487293e-06, "loss": 0.28217092, "memory(GiB)": 13.7, "step": 3325, "train_speed(iter/s)": 1.542565 }, { "acc": 0.92912312, "epoch": 1.5608155612842747, "grad_norm": 15.15665054321289, "learning_rate": 9.451223746722403e-06, "loss": 0.41175365, "memory(GiB)": 13.7, "step": 3330, "train_speed(iter/s)": 1.542622 }, { "acc": 0.93615036, "epoch": 1.5631591281931099, "grad_norm": 13.963147163391113, "learning_rate": 9.452972095835396e-06, "loss": 0.45075369, "memory(GiB)": 13.7, "step": 3335, "train_speed(iter/s)": 1.542541 }, { "acc": 0.92918158, "epoch": 1.5655026951019453, "grad_norm": 16.4451961517334, "learning_rate": 9.4547178256978e-06, "loss": 0.46116509, "memory(GiB)": 13.7, "step": 3340, "train_speed(iter/s)": 1.542477 }, { "acc": 0.93344326, "epoch": 1.5678462620107805, "grad_norm": 6.471035480499268, "learning_rate": 9.456460944145822e-06, "loss": 0.42897587, "memory(GiB)": 13.7, "step": 3345, "train_speed(iter/s)": 1.54248 }, { "acc": 0.930618, "epoch": 1.5701898289196157, "grad_norm": 19.213626861572266, "learning_rate": 9.458201458980555e-06, "loss": 0.4320972, "memory(GiB)": 13.7, "step": 3350, "train_speed(iter/s)": 1.542572 }, { "acc": 0.92778053, "epoch": 1.5725333958284509, "grad_norm": 36.121463775634766, "learning_rate": 9.45993937796819e-06, "loss": 0.46080284, "memory(GiB)": 13.7, "step": 3355, "train_speed(iter/s)": 1.542558 }, { "acc": 0.91165972, "epoch": 1.574876962737286, "grad_norm": 141.37799072265625, "learning_rate": 9.461674708840217e-06, "loss": 0.54897938, "memory(GiB)": 13.7, "step": 3360, "train_speed(iter/s)": 1.542662 }, { "acc": 0.93187504, "epoch": 1.5772205296461212, "grad_norm": 3.3454151153564453, "learning_rate": 9.46340745929364e-06, "loss": 0.41359024, "memory(GiB)": 13.7, "step": 3365, "train_speed(iter/s)": 1.542812 }, { "acc": 0.9359127, "epoch": 1.5795640965549567, "grad_norm": 5.57786226272583, "learning_rate": 9.465137636991172e-06, "loss": 0.38877485, "memory(GiB)": 13.7, "step": 3370, "train_speed(iter/s)": 1.542896 }, { "acc": 0.92189674, "epoch": 1.5819076634637919, "grad_norm": 12.55984115600586, "learning_rate": 9.466865249561453e-06, "loss": 0.45786805, "memory(GiB)": 13.7, "step": 3375, "train_speed(iter/s)": 1.542932 }, { "acc": 0.93227262, "epoch": 1.5842512303726273, "grad_norm": 21.052330017089844, "learning_rate": 9.468590304599238e-06, "loss": 0.56301551, "memory(GiB)": 13.7, "step": 3380, "train_speed(iter/s)": 1.543033 }, { "acc": 0.95378208, "epoch": 1.5865947972814625, "grad_norm": 12.623372077941895, "learning_rate": 9.470312809665602e-06, "loss": 0.32060609, "memory(GiB)": 13.7, "step": 3385, "train_speed(iter/s)": 1.543059 }, { "acc": 0.91807003, "epoch": 1.5889383641902977, "grad_norm": 53.35356903076172, "learning_rate": 9.472032772288139e-06, "loss": 0.44538255, "memory(GiB)": 13.7, "step": 3390, "train_speed(iter/s)": 1.543128 }, { "acc": 0.92753601, "epoch": 1.5912819310991329, "grad_norm": 15.028409957885742, "learning_rate": 9.473750199961159e-06, "loss": 0.43701863, "memory(GiB)": 13.7, "step": 3395, "train_speed(iter/s)": 1.543258 }, { "acc": 0.93450222, "epoch": 1.593625498007968, "grad_norm": 25.523284912109375, "learning_rate": 9.475465100145891e-06, "loss": 0.32506521, "memory(GiB)": 13.7, "step": 3400, "train_speed(iter/s)": 1.543291 }, { "acc": 0.9364357, "epoch": 1.5959690649168032, "grad_norm": 130.09815979003906, "learning_rate": 9.477177480270663e-06, "loss": 0.46326089, "memory(GiB)": 13.7, "step": 3405, "train_speed(iter/s)": 1.543291 }, { "acc": 0.91918468, "epoch": 1.5983126318256387, "grad_norm": 7.4056620597839355, "learning_rate": 9.478887347731107e-06, "loss": 0.47247596, "memory(GiB)": 13.7, "step": 3410, "train_speed(iter/s)": 1.543401 }, { "acc": 0.91814489, "epoch": 1.6006561987344738, "grad_norm": 25.839771270751953, "learning_rate": 9.480594709890345e-06, "loss": 0.51241698, "memory(GiB)": 13.7, "step": 3415, "train_speed(iter/s)": 1.543377 }, { "acc": 0.92880507, "epoch": 1.6029997656433093, "grad_norm": 12.235885620117188, "learning_rate": 9.482299574079182e-06, "loss": 0.45626931, "memory(GiB)": 13.7, "step": 3420, "train_speed(iter/s)": 1.543468 }, { "acc": 0.97606411, "epoch": 1.6053433325521445, "grad_norm": 19.58888053894043, "learning_rate": 9.484001947596296e-06, "loss": 0.15384886, "memory(GiB)": 13.7, "step": 3425, "train_speed(iter/s)": 1.543458 }, { "acc": 0.94709349, "epoch": 1.6076868994609796, "grad_norm": 20.38860321044922, "learning_rate": 9.485701837708416e-06, "loss": 0.29389563, "memory(GiB)": 13.7, "step": 3430, "train_speed(iter/s)": 1.543428 }, { "acc": 0.95453987, "epoch": 1.6100304663698148, "grad_norm": 19.595001220703125, "learning_rate": 9.487399251650525e-06, "loss": 0.2302937, "memory(GiB)": 13.7, "step": 3435, "train_speed(iter/s)": 1.543471 }, { "acc": 0.94975986, "epoch": 1.61237403327865, "grad_norm": 19.7029972076416, "learning_rate": 9.489094196626031e-06, "loss": 0.28341756, "memory(GiB)": 13.7, "step": 3440, "train_speed(iter/s)": 1.543408 }, { "acc": 0.91693974, "epoch": 1.6147176001874852, "grad_norm": 18.64924430847168, "learning_rate": 9.490786679806948e-06, "loss": 0.52298417, "memory(GiB)": 13.7, "step": 3445, "train_speed(iter/s)": 1.543497 }, { "acc": 0.94583969, "epoch": 1.6170611670963206, "grad_norm": 8.366013526916504, "learning_rate": 9.492476708334096e-06, "loss": 0.3139003, "memory(GiB)": 13.7, "step": 3450, "train_speed(iter/s)": 1.543522 }, { "acc": 0.94812183, "epoch": 1.6194047340051558, "grad_norm": 8.468960762023926, "learning_rate": 9.494164289317266e-06, "loss": 0.32044234, "memory(GiB)": 13.7, "step": 3455, "train_speed(iter/s)": 1.543451 }, { "acc": 0.91587791, "epoch": 1.6217483009139912, "grad_norm": 35.51292419433594, "learning_rate": 9.4958494298354e-06, "loss": 0.49238243, "memory(GiB)": 13.7, "step": 3460, "train_speed(iter/s)": 1.543534 }, { "acc": 0.9361578, "epoch": 1.6240918678228264, "grad_norm": 40.27918243408203, "learning_rate": 9.497532136936773e-06, "loss": 0.39187753, "memory(GiB)": 13.7, "step": 3465, "train_speed(iter/s)": 1.543658 }, { "acc": 0.92205811, "epoch": 1.6264354347316616, "grad_norm": 47.35123062133789, "learning_rate": 9.499212417639181e-06, "loss": 0.50511799, "memory(GiB)": 13.7, "step": 3470, "train_speed(iter/s)": 1.543631 }, { "acc": 0.92865524, "epoch": 1.6287790016404968, "grad_norm": 10.992225646972656, "learning_rate": 9.500890278930093e-06, "loss": 0.31721473, "memory(GiB)": 13.7, "step": 3475, "train_speed(iter/s)": 1.543793 }, { "acc": 0.91244936, "epoch": 1.631122568549332, "grad_norm": 46.88095474243164, "learning_rate": 9.502565727766841e-06, "loss": 0.5521657, "memory(GiB)": 13.7, "step": 3480, "train_speed(iter/s)": 1.543856 }, { "acc": 0.94975195, "epoch": 1.6334661354581672, "grad_norm": 57.3482666015625, "learning_rate": 9.504238771076798e-06, "loss": 0.29994788, "memory(GiB)": 13.7, "step": 3485, "train_speed(iter/s)": 1.543952 }, { "acc": 0.92459831, "epoch": 1.6358097023670026, "grad_norm": 43.15546417236328, "learning_rate": 9.505909415757528e-06, "loss": 0.39805825, "memory(GiB)": 13.7, "step": 3490, "train_speed(iter/s)": 1.543957 }, { "acc": 0.93358364, "epoch": 1.6381532692758378, "grad_norm": 11.753393173217773, "learning_rate": 9.507577668676983e-06, "loss": 0.40968189, "memory(GiB)": 13.7, "step": 3495, "train_speed(iter/s)": 1.544087 }, { "acc": 0.9533391, "epoch": 1.6404968361846732, "grad_norm": 12.955971717834473, "learning_rate": 9.509243536673653e-06, "loss": 0.25591073, "memory(GiB)": 13.7, "step": 3500, "train_speed(iter/s)": 1.54398 }, { "acc": 0.96343412, "epoch": 1.6428404030935084, "grad_norm": 13.342907905578613, "learning_rate": 9.510907026556741e-06, "loss": 0.21501379, "memory(GiB)": 13.7, "step": 3505, "train_speed(iter/s)": 1.543978 }, { "acc": 0.93154716, "epoch": 1.6451839700023436, "grad_norm": 20.461299896240234, "learning_rate": 9.512568145106338e-06, "loss": 0.47197456, "memory(GiB)": 13.7, "step": 3510, "train_speed(iter/s)": 1.544062 }, { "acc": 0.95756321, "epoch": 1.6475275369111788, "grad_norm": 4.098404407501221, "learning_rate": 9.514226899073566e-06, "loss": 0.25100489, "memory(GiB)": 13.7, "step": 3515, "train_speed(iter/s)": 1.544024 }, { "acc": 0.90044003, "epoch": 1.649871103820014, "grad_norm": 20.433048248291016, "learning_rate": 9.515883295180774e-06, "loss": 0.61447124, "memory(GiB)": 13.7, "step": 3520, "train_speed(iter/s)": 1.544052 }, { "acc": 0.94963856, "epoch": 1.6522146707288492, "grad_norm": 27.728132247924805, "learning_rate": 9.517537340121673e-06, "loss": 0.35326142, "memory(GiB)": 13.7, "step": 3525, "train_speed(iter/s)": 1.544107 }, { "acc": 0.94360123, "epoch": 1.6545582376376844, "grad_norm": 5.46586275100708, "learning_rate": 9.519189040561516e-06, "loss": 0.2408407, "memory(GiB)": 13.7, "step": 3530, "train_speed(iter/s)": 1.544049 }, { "acc": 0.95735569, "epoch": 1.6569018045465198, "grad_norm": 10.291119575500488, "learning_rate": 9.520838403137252e-06, "loss": 0.28038273, "memory(GiB)": 13.7, "step": 3535, "train_speed(iter/s)": 1.543978 }, { "acc": 0.94945707, "epoch": 1.659245371455355, "grad_norm": 14.041739463806152, "learning_rate": 9.522485434457686e-06, "loss": 0.35282304, "memory(GiB)": 13.7, "step": 3540, "train_speed(iter/s)": 1.543977 }, { "acc": 0.93860207, "epoch": 1.6615889383641904, "grad_norm": 12.913825988769531, "learning_rate": 9.524130141103642e-06, "loss": 0.3733202, "memory(GiB)": 13.7, "step": 3545, "train_speed(iter/s)": 1.543919 }, { "acc": 0.93255424, "epoch": 1.6639325052730256, "grad_norm": 3.6803793907165527, "learning_rate": 9.525772529628117e-06, "loss": 0.40634732, "memory(GiB)": 13.7, "step": 3550, "train_speed(iter/s)": 1.544042 }, { "acc": 0.92974968, "epoch": 1.6662760721818608, "grad_norm": 34.18198013305664, "learning_rate": 9.52741260655644e-06, "loss": 0.3516314, "memory(GiB)": 13.7, "step": 3555, "train_speed(iter/s)": 1.544031 }, { "acc": 0.93842878, "epoch": 1.668619639090696, "grad_norm": 18.698074340820312, "learning_rate": 9.529050378386423e-06, "loss": 0.37224896, "memory(GiB)": 13.7, "step": 3560, "train_speed(iter/s)": 1.543976 }, { "acc": 0.94772873, "epoch": 1.6709632059995312, "grad_norm": 15.040266036987305, "learning_rate": 9.53068585158852e-06, "loss": 0.36022775, "memory(GiB)": 13.7, "step": 3565, "train_speed(iter/s)": 1.544035 }, { "acc": 0.91883926, "epoch": 1.6733067729083664, "grad_norm": 14.38985538482666, "learning_rate": 9.532319032605982e-06, "loss": 0.47626734, "memory(GiB)": 13.7, "step": 3570, "train_speed(iter/s)": 1.544082 }, { "acc": 0.92786036, "epoch": 1.6756503398172018, "grad_norm": 38.710262298583984, "learning_rate": 9.533949927855e-06, "loss": 0.39657204, "memory(GiB)": 13.7, "step": 3575, "train_speed(iter/s)": 1.544034 }, { "acc": 0.95278273, "epoch": 1.677993906726037, "grad_norm": 4.540277004241943, "learning_rate": 9.535578543724873e-06, "loss": 0.26947331, "memory(GiB)": 13.7, "step": 3580, "train_speed(iter/s)": 1.54412 }, { "acc": 0.93622208, "epoch": 1.6803374736348724, "grad_norm": 11.958709716796875, "learning_rate": 9.537204886578139e-06, "loss": 0.43277516, "memory(GiB)": 13.7, "step": 3585, "train_speed(iter/s)": 1.544315 }, { "acc": 0.94415474, "epoch": 1.6826810405437076, "grad_norm": 6.275749683380127, "learning_rate": 9.538828962750735e-06, "loss": 0.43222408, "memory(GiB)": 13.7, "step": 3590, "train_speed(iter/s)": 1.54442 }, { "acc": 0.94847679, "epoch": 1.6850246074525428, "grad_norm": 14.976080894470215, "learning_rate": 9.540450778552146e-06, "loss": 0.35103745, "memory(GiB)": 13.7, "step": 3595, "train_speed(iter/s)": 1.544431 }, { "acc": 0.95502481, "epoch": 1.687368174361378, "grad_norm": 21.096364974975586, "learning_rate": 9.542070340265543e-06, "loss": 0.32812319, "memory(GiB)": 13.7, "step": 3600, "train_speed(iter/s)": 1.544353 }, { "acc": 0.9372776, "epoch": 1.6897117412702132, "grad_norm": 7.183588981628418, "learning_rate": 9.543687654147945e-06, "loss": 0.38951006, "memory(GiB)": 13.7, "step": 3605, "train_speed(iter/s)": 1.544368 }, { "acc": 0.93000002, "epoch": 1.6920553081790484, "grad_norm": 9.198705673217773, "learning_rate": 9.545302726430347e-06, "loss": 0.48288317, "memory(GiB)": 13.7, "step": 3610, "train_speed(iter/s)": 1.544527 }, { "acc": 0.90335226, "epoch": 1.6943988750878838, "grad_norm": 28.924867630004883, "learning_rate": 9.54691556331787e-06, "loss": 0.66324286, "memory(GiB)": 13.7, "step": 3615, "train_speed(iter/s)": 1.54451 }, { "acc": 0.94722137, "epoch": 1.696742441996719, "grad_norm": 11.452778816223145, "learning_rate": 9.548526170989912e-06, "loss": 0.41279359, "memory(GiB)": 13.7, "step": 3620, "train_speed(iter/s)": 1.544434 }, { "acc": 0.96576691, "epoch": 1.6990860089055544, "grad_norm": 11.096539497375488, "learning_rate": 9.550134555600277e-06, "loss": 0.21724787, "memory(GiB)": 13.7, "step": 3625, "train_speed(iter/s)": 1.544521 }, { "acc": 0.92777958, "epoch": 1.7014295758143896, "grad_norm": 12.713263511657715, "learning_rate": 9.55174072327733e-06, "loss": 0.40416837, "memory(GiB)": 13.7, "step": 3630, "train_speed(iter/s)": 1.54465 }, { "acc": 0.92540808, "epoch": 1.7037731427232248, "grad_norm": 9.85625171661377, "learning_rate": 9.55334468012412e-06, "loss": 0.43263741, "memory(GiB)": 13.7, "step": 3635, "train_speed(iter/s)": 1.54474 }, { "acc": 0.91809387, "epoch": 1.70611670963206, "grad_norm": 12.01563549041748, "learning_rate": 9.554946432218534e-06, "loss": 0.49894199, "memory(GiB)": 13.7, "step": 3640, "train_speed(iter/s)": 1.544781 }, { "acc": 0.95075283, "epoch": 1.7084602765408952, "grad_norm": 13.415730476379395, "learning_rate": 9.556545985613435e-06, "loss": 0.34938679, "memory(GiB)": 13.7, "step": 3645, "train_speed(iter/s)": 1.5449 }, { "acc": 0.92851944, "epoch": 1.7108038434497304, "grad_norm": 5.879095554351807, "learning_rate": 9.558143346336785e-06, "loss": 0.44391336, "memory(GiB)": 13.7, "step": 3650, "train_speed(iter/s)": 1.544922 }, { "acc": 0.94463997, "epoch": 1.7131474103585658, "grad_norm": 5.11519718170166, "learning_rate": 9.559738520391793e-06, "loss": 0.35744276, "memory(GiB)": 13.7, "step": 3655, "train_speed(iter/s)": 1.544889 }, { "acc": 0.91475849, "epoch": 1.715490977267401, "grad_norm": 52.10969924926758, "learning_rate": 9.56133151375705e-06, "loss": 0.55789404, "memory(GiB)": 13.7, "step": 3660, "train_speed(iter/s)": 1.544968 }, { "acc": 0.93003464, "epoch": 1.7178345441762364, "grad_norm": 12.813775062561035, "learning_rate": 9.562922332386665e-06, "loss": 0.41649985, "memory(GiB)": 13.7, "step": 3665, "train_speed(iter/s)": 1.545057 }, { "acc": 0.93251419, "epoch": 1.7201781110850716, "grad_norm": 34.94902801513672, "learning_rate": 9.564510982210385e-06, "loss": 0.41552043, "memory(GiB)": 13.7, "step": 3670, "train_speed(iter/s)": 1.545072 }, { "acc": 0.91906309, "epoch": 1.7225216779939068, "grad_norm": 15.802664756774902, "learning_rate": 9.566097469133743e-06, "loss": 0.64610472, "memory(GiB)": 13.7, "step": 3675, "train_speed(iter/s)": 1.545223 }, { "acc": 0.91697693, "epoch": 1.724865244902742, "grad_norm": 15.267062187194824, "learning_rate": 9.567681799038186e-06, "loss": 0.45724363, "memory(GiB)": 13.7, "step": 3680, "train_speed(iter/s)": 1.545195 }, { "acc": 0.94743252, "epoch": 1.7272088118115771, "grad_norm": 29.789304733276367, "learning_rate": 9.569263977781202e-06, "loss": 0.33629346, "memory(GiB)": 13.7, "step": 3685, "train_speed(iter/s)": 1.545161 }, { "acc": 0.92409487, "epoch": 1.7295523787204123, "grad_norm": 7.867985725402832, "learning_rate": 9.57084401119645e-06, "loss": 0.52666349, "memory(GiB)": 13.7, "step": 3690, "train_speed(iter/s)": 1.545214 }, { "acc": 0.93652067, "epoch": 1.7318959456292478, "grad_norm": 16.979055404663086, "learning_rate": 9.572421905093895e-06, "loss": 0.3734467, "memory(GiB)": 13.7, "step": 3695, "train_speed(iter/s)": 1.545245 }, { "acc": 0.95290375, "epoch": 1.734239512538083, "grad_norm": 37.10063552856445, "learning_rate": 9.573997665259928e-06, "loss": 0.2442601, "memory(GiB)": 13.7, "step": 3700, "train_speed(iter/s)": 1.54539 }, { "acc": 0.92857227, "epoch": 1.7365830794469184, "grad_norm": 27.1126708984375, "learning_rate": 9.5755712974575e-06, "loss": 0.43916845, "memory(GiB)": 13.7, "step": 3705, "train_speed(iter/s)": 1.545462 }, { "acc": 0.92874298, "epoch": 1.7389266463557536, "grad_norm": 25.269472122192383, "learning_rate": 9.577142807426246e-06, "loss": 0.45362062, "memory(GiB)": 13.7, "step": 3710, "train_speed(iter/s)": 1.545708 }, { "acc": 0.89950705, "epoch": 1.7412702132645888, "grad_norm": 18.99563217163086, "learning_rate": 9.578712200882608e-06, "loss": 0.83734818, "memory(GiB)": 13.7, "step": 3715, "train_speed(iter/s)": 1.54581 }, { "acc": 0.9168642, "epoch": 1.743613780173424, "grad_norm": 42.846439361572266, "learning_rate": 9.580279483519967e-06, "loss": 0.56597605, "memory(GiB)": 13.7, "step": 3720, "train_speed(iter/s)": 1.545863 }, { "acc": 0.89327793, "epoch": 1.7459573470822591, "grad_norm": 31.230443954467773, "learning_rate": 9.581844661008756e-06, "loss": 0.74132357, "memory(GiB)": 13.7, "step": 3725, "train_speed(iter/s)": 1.545993 }, { "acc": 0.96230612, "epoch": 1.7483009139910943, "grad_norm": 8.650124549865723, "learning_rate": 9.583407738996598e-06, "loss": 0.25422258, "memory(GiB)": 13.7, "step": 3730, "train_speed(iter/s)": 1.545903 }, { "acc": 0.94666662, "epoch": 1.7506444808999297, "grad_norm": 7.19757604598999, "learning_rate": 9.584968723108408e-06, "loss": 0.24745443, "memory(GiB)": 13.7, "step": 3735, "train_speed(iter/s)": 1.545833 }, { "acc": 0.94552078, "epoch": 1.752988047808765, "grad_norm": 4.886751174926758, "learning_rate": 9.586527618946537e-06, "loss": 0.29734161, "memory(GiB)": 13.7, "step": 3740, "train_speed(iter/s)": 1.545935 }, { "acc": 0.93566113, "epoch": 1.7553316147176004, "grad_norm": 10.952622413635254, "learning_rate": 9.588084432090873e-06, "loss": 0.31842203, "memory(GiB)": 13.7, "step": 3745, "train_speed(iter/s)": 1.54597 }, { "acc": 0.94787207, "epoch": 1.7576751816264355, "grad_norm": 10.555068969726562, "learning_rate": 9.58963916809898e-06, "loss": 0.31665649, "memory(GiB)": 13.7, "step": 3750, "train_speed(iter/s)": 1.546 }, { "acc": 0.92900295, "epoch": 1.7600187485352707, "grad_norm": 16.89608383178711, "learning_rate": 9.591191832506194e-06, "loss": 0.47233973, "memory(GiB)": 13.7, "step": 3755, "train_speed(iter/s)": 1.546213 }, { "acc": 0.93682308, "epoch": 1.762362315444106, "grad_norm": 15.57622241973877, "learning_rate": 9.592742430825762e-06, "loss": 0.37438655, "memory(GiB)": 13.7, "step": 3760, "train_speed(iter/s)": 1.546267 }, { "acc": 0.92177887, "epoch": 1.7647058823529411, "grad_norm": 19.574596405029297, "learning_rate": 9.594290968548951e-06, "loss": 0.51120095, "memory(GiB)": 13.7, "step": 3765, "train_speed(iter/s)": 1.546356 }, { "acc": 0.94003496, "epoch": 1.7670494492617763, "grad_norm": 15.791162490844727, "learning_rate": 9.595837451145159e-06, "loss": 0.37714543, "memory(GiB)": 13.7, "step": 3770, "train_speed(iter/s)": 1.546387 }, { "acc": 0.95140686, "epoch": 1.7693930161706115, "grad_norm": 75.54621124267578, "learning_rate": 9.597381884062047e-06, "loss": 0.31157241, "memory(GiB)": 13.7, "step": 3775, "train_speed(iter/s)": 1.54648 }, { "acc": 0.92758923, "epoch": 1.771736583079447, "grad_norm": 31.649587631225586, "learning_rate": 9.598924272725633e-06, "loss": 0.47800694, "memory(GiB)": 13.7, "step": 3780, "train_speed(iter/s)": 1.546662 }, { "acc": 0.94363689, "epoch": 1.7740801499882821, "grad_norm": 100.5798110961914, "learning_rate": 9.600464622540428e-06, "loss": 0.40734558, "memory(GiB)": 13.7, "step": 3785, "train_speed(iter/s)": 1.546747 }, { "acc": 0.92588711, "epoch": 1.7764237168971175, "grad_norm": 8.323132514953613, "learning_rate": 9.602002938889526e-06, "loss": 0.47651544, "memory(GiB)": 13.7, "step": 3790, "train_speed(iter/s)": 1.546903 }, { "acc": 0.91687355, "epoch": 1.7787672838059527, "grad_norm": 11.79304027557373, "learning_rate": 9.603539227134743e-06, "loss": 0.5678936, "memory(GiB)": 13.7, "step": 3795, "train_speed(iter/s)": 1.5469 }, { "acc": 0.91324825, "epoch": 1.781110850714788, "grad_norm": 19.798784255981445, "learning_rate": 9.605073492616708e-06, "loss": 0.42459235, "memory(GiB)": 13.7, "step": 3800, "train_speed(iter/s)": 1.54688 }, { "acc": 0.94285984, "epoch": 1.7834544176236231, "grad_norm": 22.978744506835938, "learning_rate": 9.606605740654983e-06, "loss": 0.34240527, "memory(GiB)": 13.7, "step": 3805, "train_speed(iter/s)": 1.547046 }, { "acc": 0.96025372, "epoch": 1.7857979845324583, "grad_norm": 58.14365005493164, "learning_rate": 9.608135976548182e-06, "loss": 0.29903932, "memory(GiB)": 13.7, "step": 3810, "train_speed(iter/s)": 1.547038 }, { "acc": 0.92661848, "epoch": 1.7881415514412935, "grad_norm": 8.036941528320312, "learning_rate": 9.609664205574058e-06, "loss": 0.56754026, "memory(GiB)": 13.7, "step": 3815, "train_speed(iter/s)": 1.547051 }, { "acc": 0.92745037, "epoch": 1.790485118350129, "grad_norm": 48.3647575378418, "learning_rate": 9.611190432989634e-06, "loss": 0.4451582, "memory(GiB)": 13.7, "step": 3820, "train_speed(iter/s)": 1.547001 }, { "acc": 0.93666668, "epoch": 1.792828685258964, "grad_norm": 12.77093505859375, "learning_rate": 9.612714664031308e-06, "loss": 0.32665544, "memory(GiB)": 13.7, "step": 3825, "train_speed(iter/s)": 1.547031 }, { "acc": 0.93664684, "epoch": 1.7951722521677995, "grad_norm": 15.354454040527344, "learning_rate": 9.614236903914944e-06, "loss": 0.45508661, "memory(GiB)": 13.7, "step": 3830, "train_speed(iter/s)": 1.547076 }, { "acc": 0.92871246, "epoch": 1.7975158190766347, "grad_norm": 16.339765548706055, "learning_rate": 9.615757157836004e-06, "loss": 0.37051065, "memory(GiB)": 13.7, "step": 3835, "train_speed(iter/s)": 1.547086 }, { "acc": 0.9182188, "epoch": 1.79985938598547, "grad_norm": 21.780431747436523, "learning_rate": 9.617275430969632e-06, "loss": 0.43854876, "memory(GiB)": 13.7, "step": 3840, "train_speed(iter/s)": 1.547082 }, { "acc": 0.9385499, "epoch": 1.802202952894305, "grad_norm": 19.52313995361328, "learning_rate": 9.618791728470783e-06, "loss": 0.37335038, "memory(GiB)": 13.7, "step": 3845, "train_speed(iter/s)": 1.547025 }, { "acc": 0.91533241, "epoch": 1.8045465198031403, "grad_norm": 30.47667121887207, "learning_rate": 9.620306055474298e-06, "loss": 0.46096249, "memory(GiB)": 13.7, "step": 3850, "train_speed(iter/s)": 1.547009 }, { "acc": 0.95379467, "epoch": 1.8068900867119755, "grad_norm": 29.57823944091797, "learning_rate": 9.621818417095038e-06, "loss": 0.31910841, "memory(GiB)": 13.7, "step": 3855, "train_speed(iter/s)": 1.54688 }, { "acc": 0.9388134, "epoch": 1.809233653620811, "grad_norm": 39.8218994140625, "learning_rate": 9.62332881842797e-06, "loss": 0.37038798, "memory(GiB)": 13.7, "step": 3860, "train_speed(iter/s)": 1.547039 }, { "acc": 0.93945885, "epoch": 1.811577220529646, "grad_norm": 26.931377410888672, "learning_rate": 9.624837264548274e-06, "loss": 0.27501893, "memory(GiB)": 13.7, "step": 3865, "train_speed(iter/s)": 1.547138 }, { "acc": 0.92526417, "epoch": 1.8139207874384815, "grad_norm": 17.332435607910156, "learning_rate": 9.626343760511444e-06, "loss": 0.48950791, "memory(GiB)": 13.7, "step": 3870, "train_speed(iter/s)": 1.547153 }, { "acc": 0.92520256, "epoch": 1.8162643543473167, "grad_norm": 10.548784255981445, "learning_rate": 9.627848311353403e-06, "loss": 0.46655674, "memory(GiB)": 13.7, "step": 3875, "train_speed(iter/s)": 1.54714 }, { "acc": 0.92436199, "epoch": 1.818607921256152, "grad_norm": 41.672935485839844, "learning_rate": 9.629350922090574e-06, "loss": 0.48510036, "memory(GiB)": 13.7, "step": 3880, "train_speed(iter/s)": 1.547185 }, { "acc": 0.95786934, "epoch": 1.820951488164987, "grad_norm": 10.033368110656738, "learning_rate": 9.630851597720018e-06, "loss": 0.21693003, "memory(GiB)": 13.7, "step": 3885, "train_speed(iter/s)": 1.547081 }, { "acc": 0.9311039, "epoch": 1.8232950550738223, "grad_norm": 12.196885108947754, "learning_rate": 9.632350343219501e-06, "loss": 0.42778769, "memory(GiB)": 13.7, "step": 3890, "train_speed(iter/s)": 1.547144 }, { "acc": 0.9503274, "epoch": 1.8256386219826575, "grad_norm": 15.70168685913086, "learning_rate": 9.633847163547614e-06, "loss": 0.28087468, "memory(GiB)": 13.7, "step": 3895, "train_speed(iter/s)": 1.547185 }, { "acc": 0.94291668, "epoch": 1.8279821888914929, "grad_norm": 8.037769317626953, "learning_rate": 9.63534206364386e-06, "loss": 0.29280634, "memory(GiB)": 13.7, "step": 3900, "train_speed(iter/s)": 1.54727 }, { "acc": 0.95137405, "epoch": 1.830325755800328, "grad_norm": 8.952593803405762, "learning_rate": 9.636835048428763e-06, "loss": 0.35872915, "memory(GiB)": 13.7, "step": 3905, "train_speed(iter/s)": 1.547291 }, { "acc": 0.92196426, "epoch": 1.8326693227091635, "grad_norm": 16.95298957824707, "learning_rate": 9.63832612280395e-06, "loss": 0.48177419, "memory(GiB)": 13.7, "step": 3910, "train_speed(iter/s)": 1.54739 }, { "acc": 0.92654762, "epoch": 1.8350128896179987, "grad_norm": 16.224323272705078, "learning_rate": 9.639815291652258e-06, "loss": 0.37008328, "memory(GiB)": 13.7, "step": 3915, "train_speed(iter/s)": 1.547467 }, { "acc": 0.95076561, "epoch": 1.8373564565268339, "grad_norm": 13.92662239074707, "learning_rate": 9.641302559837833e-06, "loss": 0.37141802, "memory(GiB)": 13.7, "step": 3920, "train_speed(iter/s)": 1.54747 }, { "acc": 0.94223862, "epoch": 1.839700023435669, "grad_norm": 15.090031623840332, "learning_rate": 9.642787932206213e-06, "loss": 0.34948099, "memory(GiB)": 13.7, "step": 3925, "train_speed(iter/s)": 1.547458 }, { "acc": 0.94757624, "epoch": 1.8420435903445043, "grad_norm": 19.83092498779297, "learning_rate": 9.644271413584434e-06, "loss": 0.31343093, "memory(GiB)": 13.7, "step": 3930, "train_speed(iter/s)": 1.54755 }, { "acc": 0.92544146, "epoch": 1.8443871572533395, "grad_norm": 5.70922327041626, "learning_rate": 9.645753008781118e-06, "loss": 0.43403983, "memory(GiB)": 13.7, "step": 3935, "train_speed(iter/s)": 1.547621 }, { "acc": 0.94210224, "epoch": 1.8467307241621749, "grad_norm": 16.450328826904297, "learning_rate": 9.647232722586572e-06, "loss": 0.3246604, "memory(GiB)": 13.7, "step": 3940, "train_speed(iter/s)": 1.547685 }, { "acc": 0.95561333, "epoch": 1.84907429107101, "grad_norm": 7.855712413787842, "learning_rate": 9.648710559772872e-06, "loss": 0.3327461, "memory(GiB)": 13.7, "step": 3945, "train_speed(iter/s)": 1.547654 }, { "acc": 0.93190479, "epoch": 1.8514178579798455, "grad_norm": 34.798946380615234, "learning_rate": 9.650186525093964e-06, "loss": 0.41490021, "memory(GiB)": 13.7, "step": 3950, "train_speed(iter/s)": 1.547699 }, { "acc": 0.93216209, "epoch": 1.8537614248886807, "grad_norm": 22.99726104736328, "learning_rate": 9.651660623285752e-06, "loss": 0.35001397, "memory(GiB)": 13.7, "step": 3955, "train_speed(iter/s)": 1.547644 }, { "acc": 0.93910084, "epoch": 1.8561049917975159, "grad_norm": 21.18451499938965, "learning_rate": 9.65313285906619e-06, "loss": 0.38330719, "memory(GiB)": 13.7, "step": 3960, "train_speed(iter/s)": 1.547729 }, { "acc": 0.93173857, "epoch": 1.858448558706351, "grad_norm": 16.134572982788086, "learning_rate": 9.654603237135368e-06, "loss": 0.40470448, "memory(GiB)": 13.7, "step": 3965, "train_speed(iter/s)": 1.547923 }, { "acc": 0.96519346, "epoch": 1.8607921256151863, "grad_norm": 8.110372543334961, "learning_rate": 9.656071762175616e-06, "loss": 0.29297719, "memory(GiB)": 13.7, "step": 3970, "train_speed(iter/s)": 1.547994 }, { "acc": 0.91865368, "epoch": 1.8631356925240214, "grad_norm": 48.88608169555664, "learning_rate": 9.657538438851572e-06, "loss": 0.57015781, "memory(GiB)": 13.7, "step": 3975, "train_speed(iter/s)": 1.548126 }, { "acc": 0.92735119, "epoch": 1.8654792594328569, "grad_norm": 13.917364120483398, "learning_rate": 9.65900327181029e-06, "loss": 0.5234273, "memory(GiB)": 13.7, "step": 3980, "train_speed(iter/s)": 1.548115 }, { "acc": 0.93556824, "epoch": 1.867822826341692, "grad_norm": 10.235032081604004, "learning_rate": 9.66046626568132e-06, "loss": 0.3371366, "memory(GiB)": 13.7, "step": 3985, "train_speed(iter/s)": 1.548136 }, { "acc": 0.91818867, "epoch": 1.8701663932505272, "grad_norm": 18.069087982177734, "learning_rate": 9.661927425076798e-06, "loss": 0.58636017, "memory(GiB)": 13.7, "step": 3990, "train_speed(iter/s)": 1.548152 }, { "acc": 0.95803261, "epoch": 1.8725099601593627, "grad_norm": 10.935789108276367, "learning_rate": 9.663386754591523e-06, "loss": 0.29188895, "memory(GiB)": 13.7, "step": 3995, "train_speed(iter/s)": 1.548138 }, { "acc": 0.91139879, "epoch": 1.8748535270681979, "grad_norm": 20.736692428588867, "learning_rate": 9.664844258803068e-06, "loss": 0.55028372, "memory(GiB)": 13.7, "step": 4000, "train_speed(iter/s)": 1.548105 }, { "acc": 0.96044388, "epoch": 1.877197093977033, "grad_norm": 10.465744018554688, "learning_rate": 9.666299942271838e-06, "loss": 0.25446324, "memory(GiB)": 13.7, "step": 4005, "train_speed(iter/s)": 1.548121 }, { "acc": 0.95827456, "epoch": 1.8795406608858682, "grad_norm": 12.240138053894043, "learning_rate": 9.667753809541172e-06, "loss": 0.24379809, "memory(GiB)": 13.7, "step": 4010, "train_speed(iter/s)": 1.548185 }, { "acc": 0.92672644, "epoch": 1.8818842277947034, "grad_norm": 22.82806968688965, "learning_rate": 9.669205865137429e-06, "loss": 0.47595377, "memory(GiB)": 13.7, "step": 4015, "train_speed(iter/s)": 1.548141 }, { "acc": 0.93774834, "epoch": 1.8842277947035386, "grad_norm": 16.49411964416504, "learning_rate": 9.670656113570062e-06, "loss": 0.43387299, "memory(GiB)": 13.7, "step": 4020, "train_speed(iter/s)": 1.548272 }, { "acc": 0.91459694, "epoch": 1.886571361612374, "grad_norm": 20.32213020324707, "learning_rate": 9.672104559331713e-06, "loss": 0.50772343, "memory(GiB)": 13.7, "step": 4025, "train_speed(iter/s)": 1.548379 }, { "acc": 0.91760807, "epoch": 1.8889149285212092, "grad_norm": 18.02452850341797, "learning_rate": 9.673551206898285e-06, "loss": 0.55305462, "memory(GiB)": 13.7, "step": 4030, "train_speed(iter/s)": 1.548417 }, { "acc": 0.945644, "epoch": 1.8912584954300447, "grad_norm": 2.0467123985290527, "learning_rate": 9.674996060729039e-06, "loss": 0.41371226, "memory(GiB)": 13.7, "step": 4035, "train_speed(iter/s)": 1.548355 }, { "acc": 0.9377759, "epoch": 1.8936020623388798, "grad_norm": 39.250247955322266, "learning_rate": 9.676439125266667e-06, "loss": 0.29468369, "memory(GiB)": 13.7, "step": 4040, "train_speed(iter/s)": 1.548311 }, { "acc": 0.91532469, "epoch": 1.895945629247715, "grad_norm": 8.245469093322754, "learning_rate": 9.677880404937373e-06, "loss": 0.45339203, "memory(GiB)": 13.7, "step": 4045, "train_speed(iter/s)": 1.548313 }, { "acc": 0.90888758, "epoch": 1.8982891961565502, "grad_norm": 22.657329559326172, "learning_rate": 9.67931990415096e-06, "loss": 0.53528562, "memory(GiB)": 13.7, "step": 4050, "train_speed(iter/s)": 1.548257 }, { "acc": 0.94321661, "epoch": 1.9006327630653854, "grad_norm": 7.647562026977539, "learning_rate": 9.680757627300908e-06, "loss": 0.35680432, "memory(GiB)": 13.7, "step": 4055, "train_speed(iter/s)": 1.548323 }, { "acc": 0.91856079, "epoch": 1.9029763299742206, "grad_norm": 14.554849624633789, "learning_rate": 9.682193578764458e-06, "loss": 0.51773729, "memory(GiB)": 13.7, "step": 4060, "train_speed(iter/s)": 1.548359 }, { "acc": 0.93015728, "epoch": 1.905319896883056, "grad_norm": 13.032387733459473, "learning_rate": 9.683627762902683e-06, "loss": 0.4661437, "memory(GiB)": 13.7, "step": 4065, "train_speed(iter/s)": 1.548373 }, { "acc": 0.93187981, "epoch": 1.9076634637918912, "grad_norm": 12.370738983154297, "learning_rate": 9.685060184060573e-06, "loss": 0.39110658, "memory(GiB)": 13.7, "step": 4070, "train_speed(iter/s)": 1.548324 }, { "acc": 0.96148682, "epoch": 1.9100070307007266, "grad_norm": 3.726677656173706, "learning_rate": 9.686490846567122e-06, "loss": 0.2242981, "memory(GiB)": 13.7, "step": 4075, "train_speed(iter/s)": 1.548232 }, { "acc": 0.93843746, "epoch": 1.9123505976095618, "grad_norm": 18.348636627197266, "learning_rate": 9.687919754735397e-06, "loss": 0.30083303, "memory(GiB)": 13.7, "step": 4080, "train_speed(iter/s)": 1.548257 }, { "acc": 0.92892284, "epoch": 1.914694164518397, "grad_norm": 12.413043022155762, "learning_rate": 9.68934691286261e-06, "loss": 0.46799088, "memory(GiB)": 13.7, "step": 4085, "train_speed(iter/s)": 1.548257 }, { "acc": 0.94986916, "epoch": 1.9170377314272322, "grad_norm": 8.235394477844238, "learning_rate": 9.69077232523021e-06, "loss": 0.23959112, "memory(GiB)": 13.7, "step": 4090, "train_speed(iter/s)": 1.548343 }, { "acc": 0.95372028, "epoch": 1.9193812983360674, "grad_norm": 7.664574146270752, "learning_rate": 9.692195996103951e-06, "loss": 0.29777715, "memory(GiB)": 13.7, "step": 4095, "train_speed(iter/s)": 1.548425 }, { "acc": 0.9295536, "epoch": 1.9217248652449026, "grad_norm": 21.62623405456543, "learning_rate": 9.693617929733974e-06, "loss": 0.4022222, "memory(GiB)": 13.7, "step": 4100, "train_speed(iter/s)": 1.548489 }, { "acc": 0.94639282, "epoch": 1.924068432153738, "grad_norm": 20.952280044555664, "learning_rate": 9.695038130354881e-06, "loss": 0.35595531, "memory(GiB)": 13.7, "step": 4105, "train_speed(iter/s)": 1.548544 }, { "acc": 0.92452612, "epoch": 1.9264119990625732, "grad_norm": 35.09859848022461, "learning_rate": 9.696456602185801e-06, "loss": 0.42269173, "memory(GiB)": 13.7, "step": 4110, "train_speed(iter/s)": 1.548684 }, { "acc": 0.93633308, "epoch": 1.9287555659714086, "grad_norm": 36.316165924072266, "learning_rate": 9.697873349430483e-06, "loss": 0.47742138, "memory(GiB)": 13.7, "step": 4115, "train_speed(iter/s)": 1.548709 }, { "acc": 0.95203323, "epoch": 1.9310991328802438, "grad_norm": 11.615418434143066, "learning_rate": 9.69928837627736e-06, "loss": 0.28695138, "memory(GiB)": 13.7, "step": 4120, "train_speed(iter/s)": 1.548803 }, { "acc": 0.91584177, "epoch": 1.933442699789079, "grad_norm": 14.8284330368042, "learning_rate": 9.700701686899625e-06, "loss": 0.49965038, "memory(GiB)": 13.7, "step": 4125, "train_speed(iter/s)": 1.548788 }, { "acc": 0.95448723, "epoch": 1.9357862666979142, "grad_norm": 10.943206787109375, "learning_rate": 9.702113285455301e-06, "loss": 0.27114673, "memory(GiB)": 13.7, "step": 4130, "train_speed(iter/s)": 1.548901 }, { "acc": 0.95634356, "epoch": 1.9381298336067494, "grad_norm": 5.075706481933594, "learning_rate": 9.703523176087322e-06, "loss": 0.30775161, "memory(GiB)": 13.7, "step": 4135, "train_speed(iter/s)": 1.548952 }, { "acc": 0.91914749, "epoch": 1.9404734005155846, "grad_norm": 42.586856842041016, "learning_rate": 9.704931362923601e-06, "loss": 0.48530006, "memory(GiB)": 13.7, "step": 4140, "train_speed(iter/s)": 1.548932 }, { "acc": 0.94590235, "epoch": 1.94281696742442, "grad_norm": 4.810439586639404, "learning_rate": 9.70633785007711e-06, "loss": 0.32672691, "memory(GiB)": 13.7, "step": 4145, "train_speed(iter/s)": 1.548961 }, { "acc": 0.95976658, "epoch": 1.9451605343332552, "grad_norm": 10.012856483459473, "learning_rate": 9.707742641645933e-06, "loss": 0.23883054, "memory(GiB)": 13.7, "step": 4150, "train_speed(iter/s)": 1.548973 }, { "acc": 0.94092674, "epoch": 1.9475041012420906, "grad_norm": 15.315107345581055, "learning_rate": 9.709145741713362e-06, "loss": 0.29500346, "memory(GiB)": 13.7, "step": 4155, "train_speed(iter/s)": 1.549022 }, { "acc": 0.92648869, "epoch": 1.9498476681509258, "grad_norm": 12.215503692626953, "learning_rate": 9.71054715434795e-06, "loss": 0.41136084, "memory(GiB)": 13.7, "step": 4160, "train_speed(iter/s)": 1.54909 }, { "acc": 0.95067225, "epoch": 1.952191235059761, "grad_norm": 42.21363830566406, "learning_rate": 9.711946883603595e-06, "loss": 0.29314101, "memory(GiB)": 13.7, "step": 4165, "train_speed(iter/s)": 1.549028 }, { "acc": 0.89703751, "epoch": 1.9545348019685962, "grad_norm": 27.828304290771484, "learning_rate": 9.713344933519598e-06, "loss": 0.72524428, "memory(GiB)": 13.7, "step": 4170, "train_speed(iter/s)": 1.549041 }, { "acc": 0.966187, "epoch": 1.9568783688774314, "grad_norm": 11.385687828063965, "learning_rate": 9.71474130812074e-06, "loss": 0.25775638, "memory(GiB)": 13.7, "step": 4175, "train_speed(iter/s)": 1.549012 }, { "acc": 0.97032204, "epoch": 1.9592219357862666, "grad_norm": 0.42739760875701904, "learning_rate": 9.716136011417352e-06, "loss": 0.15206029, "memory(GiB)": 13.7, "step": 4180, "train_speed(iter/s)": 1.54904 }, { "acc": 0.93982716, "epoch": 1.961565502695102, "grad_norm": 25.48752212524414, "learning_rate": 9.717529047405384e-06, "loss": 0.31620631, "memory(GiB)": 13.7, "step": 4185, "train_speed(iter/s)": 1.549105 }, { "acc": 0.92524662, "epoch": 1.9639090696039372, "grad_norm": 16.00632095336914, "learning_rate": 9.718920420066465e-06, "loss": 0.38609419, "memory(GiB)": 13.7, "step": 4190, "train_speed(iter/s)": 1.54904 }, { "acc": 0.95075893, "epoch": 1.9662526365127726, "grad_norm": 9.419489860534668, "learning_rate": 9.720310133367987e-06, "loss": 0.32683065, "memory(GiB)": 13.7, "step": 4195, "train_speed(iter/s)": 1.549063 }, { "acc": 0.89690342, "epoch": 1.9685962034216078, "grad_norm": 84.55511474609375, "learning_rate": 9.72169819126316e-06, "loss": 0.66871128, "memory(GiB)": 13.7, "step": 4200, "train_speed(iter/s)": 1.549173 }, { "acc": 0.93070354, "epoch": 1.970939770330443, "grad_norm": 5.928374290466309, "learning_rate": 9.723084597691082e-06, "loss": 0.4079072, "memory(GiB)": 13.7, "step": 4205, "train_speed(iter/s)": 1.549279 }, { "acc": 0.94604006, "epoch": 1.9732833372392782, "grad_norm": 7.875173091888428, "learning_rate": 9.724469356576815e-06, "loss": 0.33955047, "memory(GiB)": 13.7, "step": 4210, "train_speed(iter/s)": 1.549312 }, { "acc": 0.93778324, "epoch": 1.9756269041481134, "grad_norm": 121.6863021850586, "learning_rate": 9.725852471831434e-06, "loss": 0.49602799, "memory(GiB)": 13.7, "step": 4215, "train_speed(iter/s)": 1.549349 }, { "acc": 0.91284657, "epoch": 1.9779704710569486, "grad_norm": 11.023106575012207, "learning_rate": 9.727233947352114e-06, "loss": 0.41039586, "memory(GiB)": 13.7, "step": 4220, "train_speed(iter/s)": 1.549369 }, { "acc": 0.92072983, "epoch": 1.9803140379657838, "grad_norm": 16.241573333740234, "learning_rate": 9.728613787022178e-06, "loss": 0.61126881, "memory(GiB)": 13.7, "step": 4225, "train_speed(iter/s)": 1.549415 }, { "acc": 0.94836407, "epoch": 1.9826576048746192, "grad_norm": 14.711127281188965, "learning_rate": 9.729991994711179e-06, "loss": 0.28973284, "memory(GiB)": 13.7, "step": 4230, "train_speed(iter/s)": 1.549443 }, { "acc": 0.93526134, "epoch": 1.9850011717834544, "grad_norm": 8.150860786437988, "learning_rate": 9.731368574274944e-06, "loss": 0.36548352, "memory(GiB)": 13.7, "step": 4235, "train_speed(iter/s)": 1.549482 }, { "acc": 0.9194953, "epoch": 1.9873447386922898, "grad_norm": 19.586172103881836, "learning_rate": 9.732743529555661e-06, "loss": 0.41445961, "memory(GiB)": 13.7, "step": 4240, "train_speed(iter/s)": 1.549481 }, { "acc": 0.93093109, "epoch": 1.989688305601125, "grad_norm": 11.43782901763916, "learning_rate": 9.734116864381931e-06, "loss": 0.44831395, "memory(GiB)": 13.7, "step": 4245, "train_speed(iter/s)": 1.549512 }, { "acc": 0.95830355, "epoch": 1.9920318725099602, "grad_norm": 7.065206527709961, "learning_rate": 9.735488582568833e-06, "loss": 0.31629725, "memory(GiB)": 13.7, "step": 4250, "train_speed(iter/s)": 1.549467 }, { "acc": 0.92827816, "epoch": 1.9943754394187954, "grad_norm": 37.292850494384766, "learning_rate": 9.736858687917988e-06, "loss": 0.46858683, "memory(GiB)": 13.7, "step": 4255, "train_speed(iter/s)": 1.549448 }, { "acc": 0.97214346, "epoch": 1.9967190063276306, "grad_norm": 13.751409530639648, "learning_rate": 9.738227184217623e-06, "loss": 0.14583677, "memory(GiB)": 13.7, "step": 4260, "train_speed(iter/s)": 1.549418 }, { "acc": 0.94636908, "epoch": 1.9990625732364657, "grad_norm": 9.216523170471191, "learning_rate": 9.73959407524264e-06, "loss": 0.26428077, "memory(GiB)": 13.7, "step": 4265, "train_speed(iter/s)": 1.549451 }, { "acc": 0.96068449, "epoch": 2.001406140145301, "grad_norm": 6.0600056648254395, "learning_rate": 9.740959364754666e-06, "loss": 0.20680308, "memory(GiB)": 13.7, "step": 4270, "train_speed(iter/s)": 1.549327 }, { "acc": 0.94188585, "epoch": 2.0037497070541366, "grad_norm": 8.617395401000977, "learning_rate": 9.742323056502125e-06, "loss": 0.35313182, "memory(GiB)": 13.7, "step": 4275, "train_speed(iter/s)": 1.549426 }, { "acc": 0.96640873, "epoch": 2.0060932739629718, "grad_norm": 5.315345764160156, "learning_rate": 9.74368515422029e-06, "loss": 0.1647701, "memory(GiB)": 13.7, "step": 4280, "train_speed(iter/s)": 1.549446 }, { "acc": 0.94609709, "epoch": 2.008436840871807, "grad_norm": 18.1451358795166, "learning_rate": 9.74504566163136e-06, "loss": 0.36171913, "memory(GiB)": 13.7, "step": 4285, "train_speed(iter/s)": 1.549371 }, { "acc": 0.92597227, "epoch": 2.010780407780642, "grad_norm": 13.292984962463379, "learning_rate": 9.746404582444508e-06, "loss": 0.50909853, "memory(GiB)": 13.7, "step": 4290, "train_speed(iter/s)": 1.549442 }, { "acc": 0.948631, "epoch": 2.0131239746894773, "grad_norm": 42.876399993896484, "learning_rate": 9.747761920355941e-06, "loss": 0.31488507, "memory(GiB)": 13.7, "step": 4295, "train_speed(iter/s)": 1.549464 }, { "acc": 0.91618614, "epoch": 2.0154675415983125, "grad_norm": 22.267417907714844, "learning_rate": 9.749117679048971e-06, "loss": 0.45180173, "memory(GiB)": 13.7, "step": 4300, "train_speed(iter/s)": 1.549474 }, { "acc": 0.93235617, "epoch": 2.0178111085071477, "grad_norm": 7.579451084136963, "learning_rate": 9.750471862194066e-06, "loss": 0.39235992, "memory(GiB)": 13.7, "step": 4305, "train_speed(iter/s)": 1.549519 }, { "acc": 0.93685217, "epoch": 2.020154675415983, "grad_norm": 19.308740615844727, "learning_rate": 9.751824473448911e-06, "loss": 0.36938212, "memory(GiB)": 13.7, "step": 4310, "train_speed(iter/s)": 1.549506 }, { "acc": 0.94200172, "epoch": 2.0224982423248186, "grad_norm": 17.97163963317871, "learning_rate": 9.753175516458473e-06, "loss": 0.36539202, "memory(GiB)": 13.7, "step": 4315, "train_speed(iter/s)": 1.549466 }, { "acc": 0.95932178, "epoch": 2.0248418092336538, "grad_norm": 7.091568946838379, "learning_rate": 9.754524994855049e-06, "loss": 0.28625898, "memory(GiB)": 13.7, "step": 4320, "train_speed(iter/s)": 1.549493 }, { "acc": 0.92798519, "epoch": 2.027185376142489, "grad_norm": 21.163320541381836, "learning_rate": 9.75587291225834e-06, "loss": 0.34188607, "memory(GiB)": 13.7, "step": 4325, "train_speed(iter/s)": 1.549484 }, { "acc": 0.94250813, "epoch": 2.029528943051324, "grad_norm": 5.546146869659424, "learning_rate": 9.757219272275487e-06, "loss": 0.36073973, "memory(GiB)": 13.7, "step": 4330, "train_speed(iter/s)": 1.54947 }, { "acc": 0.92247581, "epoch": 2.0318725099601593, "grad_norm": 18.294404983520508, "learning_rate": 9.75856407850116e-06, "loss": 0.46803141, "memory(GiB)": 13.7, "step": 4335, "train_speed(iter/s)": 1.549452 }, { "acc": 0.93383579, "epoch": 2.0342160768689945, "grad_norm": 6.277382850646973, "learning_rate": 9.759907334517582e-06, "loss": 0.33692517, "memory(GiB)": 13.7, "step": 4340, "train_speed(iter/s)": 1.549412 }, { "acc": 0.92827206, "epoch": 2.0365596437778297, "grad_norm": 15.196666717529297, "learning_rate": 9.76124904389461e-06, "loss": 0.43832631, "memory(GiB)": 13.7, "step": 4345, "train_speed(iter/s)": 1.549431 }, { "acc": 0.95883932, "epoch": 2.038903210686665, "grad_norm": 15.96358585357666, "learning_rate": 9.762589210189783e-06, "loss": 0.30930882, "memory(GiB)": 13.7, "step": 4350, "train_speed(iter/s)": 1.549459 }, { "acc": 0.96524296, "epoch": 2.0412467775955006, "grad_norm": 12.900349617004395, "learning_rate": 9.76392783694838e-06, "loss": 0.24882996, "memory(GiB)": 13.7, "step": 4355, "train_speed(iter/s)": 1.549467 }, { "acc": 0.94235115, "epoch": 2.0435903445043357, "grad_norm": 13.491178512573242, "learning_rate": 9.765264927703473e-06, "loss": 0.35796263, "memory(GiB)": 13.7, "step": 4360, "train_speed(iter/s)": 1.549476 }, { "acc": 0.95346985, "epoch": 2.045933911413171, "grad_norm": 89.53975677490234, "learning_rate": 9.766600485975991e-06, "loss": 0.2378531, "memory(GiB)": 13.7, "step": 4365, "train_speed(iter/s)": 1.54942 }, { "acc": 0.94036083, "epoch": 2.048277478322006, "grad_norm": 8.10925579071045, "learning_rate": 9.767934515274765e-06, "loss": 0.31357265, "memory(GiB)": 13.7, "step": 4370, "train_speed(iter/s)": 1.549329 }, { "acc": 0.92421627, "epoch": 2.0506210452308413, "grad_norm": 52.951316833496094, "learning_rate": 9.769267019096594e-06, "loss": 0.40678678, "memory(GiB)": 13.7, "step": 4375, "train_speed(iter/s)": 1.549422 }, { "acc": 0.92464285, "epoch": 2.0529646121396765, "grad_norm": 21.27706527709961, "learning_rate": 9.77059800092629e-06, "loss": 0.53775125, "memory(GiB)": 13.7, "step": 4380, "train_speed(iter/s)": 1.549438 }, { "acc": 0.96355419, "epoch": 2.0553081790485117, "grad_norm": 19.42011260986328, "learning_rate": 9.77192746423674e-06, "loss": 0.2277004, "memory(GiB)": 13.7, "step": 4385, "train_speed(iter/s)": 1.549491 }, { "acc": 0.89402399, "epoch": 2.057651745957347, "grad_norm": 20.224464416503906, "learning_rate": 9.773255412488957e-06, "loss": 0.76906605, "memory(GiB)": 13.7, "step": 4390, "train_speed(iter/s)": 1.549538 }, { "acc": 0.9267045, "epoch": 2.0599953128661825, "grad_norm": 15.791003227233887, "learning_rate": 9.77458184913214e-06, "loss": 0.5291152, "memory(GiB)": 13.7, "step": 4395, "train_speed(iter/s)": 1.549551 }, { "acc": 0.93642845, "epoch": 2.0623388797750177, "grad_norm": 23.934879302978516, "learning_rate": 9.775906777603715e-06, "loss": 0.31415606, "memory(GiB)": 13.7, "step": 4400, "train_speed(iter/s)": 1.54963 }, { "acc": 0.94321327, "epoch": 2.064682446683853, "grad_norm": 7.692541599273682, "learning_rate": 9.777230201329397e-06, "loss": 0.35800231, "memory(GiB)": 13.7, "step": 4405, "train_speed(iter/s)": 1.549668 }, { "acc": 0.96269407, "epoch": 2.067026013592688, "grad_norm": 28.568538665771484, "learning_rate": 9.77855212372325e-06, "loss": 0.24604826, "memory(GiB)": 13.7, "step": 4410, "train_speed(iter/s)": 1.549663 }, { "acc": 0.94888573, "epoch": 2.0693695805015233, "grad_norm": 11.667896270751953, "learning_rate": 9.779872548187722e-06, "loss": 0.38852725, "memory(GiB)": 13.7, "step": 4415, "train_speed(iter/s)": 1.549624 }, { "acc": 0.93980722, "epoch": 2.0717131474103585, "grad_norm": 24.04849624633789, "learning_rate": 9.781191478113714e-06, "loss": 0.40079718, "memory(GiB)": 13.7, "step": 4420, "train_speed(iter/s)": 1.549521 }, { "acc": 0.93942461, "epoch": 2.0740567143191937, "grad_norm": 31.3345890045166, "learning_rate": 9.782508916880628e-06, "loss": 0.30277257, "memory(GiB)": 13.7, "step": 4425, "train_speed(iter/s)": 1.549572 }, { "acc": 0.9502121, "epoch": 2.076400281228029, "grad_norm": 15.599456787109375, "learning_rate": 9.783824867856407e-06, "loss": 0.31782732, "memory(GiB)": 13.7, "step": 4430, "train_speed(iter/s)": 1.549605 }, { "acc": 0.91878185, "epoch": 2.078743848136864, "grad_norm": 19.048534393310547, "learning_rate": 9.785139334397607e-06, "loss": 0.53903122, "memory(GiB)": 13.7, "step": 4435, "train_speed(iter/s)": 1.549611 }, { "acc": 0.93507671, "epoch": 2.0810874150456997, "grad_norm": 18.76300811767578, "learning_rate": 9.786452319849435e-06, "loss": 0.39849637, "memory(GiB)": 13.7, "step": 4440, "train_speed(iter/s)": 1.549626 }, { "acc": 0.94006681, "epoch": 2.083430981954535, "grad_norm": 25.213153839111328, "learning_rate": 9.787763827545796e-06, "loss": 0.37426286, "memory(GiB)": 13.7, "step": 4445, "train_speed(iter/s)": 1.549598 }, { "acc": 0.94126358, "epoch": 2.08577454886337, "grad_norm": 40.569095611572266, "learning_rate": 9.789073860809362e-06, "loss": 0.38605399, "memory(GiB)": 13.7, "step": 4450, "train_speed(iter/s)": 1.549491 }, { "acc": 0.950243, "epoch": 2.0881181157722053, "grad_norm": 29.436260223388672, "learning_rate": 9.790382422951607e-06, "loss": 0.27182329, "memory(GiB)": 13.7, "step": 4455, "train_speed(iter/s)": 1.549493 }, { "acc": 0.93418941, "epoch": 2.0904616826810405, "grad_norm": 19.75221824645996, "learning_rate": 9.791689517272853e-06, "loss": 0.44425931, "memory(GiB)": 13.7, "step": 4460, "train_speed(iter/s)": 1.549401 }, { "acc": 0.96205883, "epoch": 2.0928052495898757, "grad_norm": 23.506792068481445, "learning_rate": 9.79299514706234e-06, "loss": 0.2412725, "memory(GiB)": 13.7, "step": 4465, "train_speed(iter/s)": 1.549466 }, { "acc": 0.93148918, "epoch": 2.095148816498711, "grad_norm": 11.413657188415527, "learning_rate": 9.794299315598262e-06, "loss": 0.37918408, "memory(GiB)": 13.7, "step": 4470, "train_speed(iter/s)": 1.549487 }, { "acc": 0.92711802, "epoch": 2.0974923834075465, "grad_norm": 14.551810264587402, "learning_rate": 9.795602026147814e-06, "loss": 0.33986349, "memory(GiB)": 13.7, "step": 4475, "train_speed(iter/s)": 1.549499 }, { "acc": 0.95879755, "epoch": 2.0998359503163817, "grad_norm": 34.043190002441406, "learning_rate": 9.796903281967249e-06, "loss": 0.21922722, "memory(GiB)": 13.7, "step": 4480, "train_speed(iter/s)": 1.549506 }, { "acc": 0.94974651, "epoch": 2.102179517225217, "grad_norm": 12.79688835144043, "learning_rate": 9.798203086301919e-06, "loss": 0.40702963, "memory(GiB)": 13.7, "step": 4485, "train_speed(iter/s)": 1.549578 }, { "acc": 0.9447752, "epoch": 2.104523084134052, "grad_norm": 19.5885066986084, "learning_rate": 9.799501442386338e-06, "loss": 0.30949159, "memory(GiB)": 13.7, "step": 4490, "train_speed(iter/s)": 1.549577 }, { "acc": 0.92785759, "epoch": 2.1068666510428873, "grad_norm": 13.488692283630371, "learning_rate": 9.800798353444208e-06, "loss": 0.42788758, "memory(GiB)": 13.7, "step": 4495, "train_speed(iter/s)": 1.549545 }, { "acc": 0.95032606, "epoch": 2.1092102179517225, "grad_norm": 13.97780704498291, "learning_rate": 9.802093822688485e-06, "loss": 0.29236813, "memory(GiB)": 13.7, "step": 4500, "train_speed(iter/s)": 1.54954 }, { "acc": 0.93456802, "epoch": 2.1115537848605577, "grad_norm": 6.250412464141846, "learning_rate": 9.803387853321426e-06, "loss": 0.45499349, "memory(GiB)": 13.7, "step": 4505, "train_speed(iter/s)": 1.549458 }, { "acc": 0.93516426, "epoch": 2.113897351769393, "grad_norm": 7.474565029144287, "learning_rate": 9.804680448534621e-06, "loss": 0.35616636, "memory(GiB)": 13.7, "step": 4510, "train_speed(iter/s)": 1.549441 }, { "acc": 0.95005264, "epoch": 2.116240918678228, "grad_norm": 21.177106857299805, "learning_rate": 9.805971611509061e-06, "loss": 0.28176847, "memory(GiB)": 13.7, "step": 4515, "train_speed(iter/s)": 1.549457 }, { "acc": 0.90196037, "epoch": 2.1185844855870637, "grad_norm": 15.344156265258789, "learning_rate": 9.807261345415167e-06, "loss": 0.44603252, "memory(GiB)": 13.7, "step": 4520, "train_speed(iter/s)": 1.549434 }, { "acc": 0.92627525, "epoch": 2.120928052495899, "grad_norm": 5.741992950439453, "learning_rate": 9.808549653412852e-06, "loss": 0.30355153, "memory(GiB)": 13.7, "step": 4525, "train_speed(iter/s)": 1.549439 }, { "acc": 0.9426897, "epoch": 2.123271619404734, "grad_norm": 31.852771759033203, "learning_rate": 9.809836538651552e-06, "loss": 0.35421233, "memory(GiB)": 13.7, "step": 4530, "train_speed(iter/s)": 1.5495 }, { "acc": 0.9099823, "epoch": 2.1256151863135693, "grad_norm": 14.997759819030762, "learning_rate": 9.811122004270286e-06, "loss": 0.55954046, "memory(GiB)": 13.7, "step": 4535, "train_speed(iter/s)": 1.549484 }, { "acc": 0.90514259, "epoch": 2.1279587532224045, "grad_norm": 43.91106414794922, "learning_rate": 9.812406053397693e-06, "loss": 0.65988941, "memory(GiB)": 13.7, "step": 4540, "train_speed(iter/s)": 1.549503 }, { "acc": 0.92527781, "epoch": 2.1303023201312397, "grad_norm": 30.033523559570312, "learning_rate": 9.81368868915208e-06, "loss": 0.45176167, "memory(GiB)": 13.7, "step": 4545, "train_speed(iter/s)": 1.549582 }, { "acc": 0.95836582, "epoch": 2.132645887040075, "grad_norm": 9.170270919799805, "learning_rate": 9.814969914641477e-06, "loss": 0.30028157, "memory(GiB)": 13.7, "step": 4550, "train_speed(iter/s)": 1.549584 }, { "acc": 0.9184185, "epoch": 2.13498945394891, "grad_norm": 12.874971389770508, "learning_rate": 9.816249732963656e-06, "loss": 0.60094242, "memory(GiB)": 13.7, "step": 4555, "train_speed(iter/s)": 1.549591 }, { "acc": 0.94231949, "epoch": 2.1373330208577457, "grad_norm": 7.073579788208008, "learning_rate": 9.817528147206214e-06, "loss": 0.38160503, "memory(GiB)": 13.7, "step": 4560, "train_speed(iter/s)": 1.549639 }, { "acc": 0.94677334, "epoch": 2.139676587766581, "grad_norm": 15.27489948272705, "learning_rate": 9.818805160446577e-06, "loss": 0.34761903, "memory(GiB)": 13.7, "step": 4565, "train_speed(iter/s)": 1.549631 }, { "acc": 0.9618948, "epoch": 2.142020154675416, "grad_norm": 9.140786170959473, "learning_rate": 9.820080775752082e-06, "loss": 0.20567427, "memory(GiB)": 13.7, "step": 4570, "train_speed(iter/s)": 1.549624 }, { "acc": 0.93993406, "epoch": 2.1443637215842513, "grad_norm": 22.60311508178711, "learning_rate": 9.821354996179992e-06, "loss": 0.28662331, "memory(GiB)": 13.7, "step": 4575, "train_speed(iter/s)": 1.549573 }, { "acc": 0.93693933, "epoch": 2.1467072884930865, "grad_norm": 12.717187881469727, "learning_rate": 9.822627824777557e-06, "loss": 0.40133848, "memory(GiB)": 13.7, "step": 4580, "train_speed(iter/s)": 1.549572 }, { "acc": 0.91963062, "epoch": 2.1490508554019216, "grad_norm": 13.480738639831543, "learning_rate": 9.82389926458205e-06, "loss": 0.46647034, "memory(GiB)": 13.7, "step": 4585, "train_speed(iter/s)": 1.549592 }, { "acc": 0.9583333, "epoch": 2.151394422310757, "grad_norm": 4.13616943359375, "learning_rate": 9.825169318620814e-06, "loss": 0.30371237, "memory(GiB)": 13.7, "step": 4590, "train_speed(iter/s)": 1.549578 }, { "acc": 0.92184219, "epoch": 2.153737989219592, "grad_norm": 29.833436965942383, "learning_rate": 9.8264379899113e-06, "loss": 0.43061013, "memory(GiB)": 13.7, "step": 4595, "train_speed(iter/s)": 1.549585 }, { "acc": 0.94868288, "epoch": 2.1560815561284277, "grad_norm": 11.017670631408691, "learning_rate": 9.827705281461128e-06, "loss": 0.31639526, "memory(GiB)": 13.7, "step": 4600, "train_speed(iter/s)": 1.549664 }, { "acc": 0.95481644, "epoch": 2.158425123037263, "grad_norm": 6.998105049133301, "learning_rate": 9.828971196268097e-06, "loss": 0.26213908, "memory(GiB)": 13.7, "step": 4605, "train_speed(iter/s)": 1.549746 }, { "acc": 0.90943174, "epoch": 2.160768689946098, "grad_norm": 18.160938262939453, "learning_rate": 9.830235737320258e-06, "loss": 0.63711958, "memory(GiB)": 13.7, "step": 4610, "train_speed(iter/s)": 1.549733 }, { "acc": 0.96434526, "epoch": 2.1631122568549332, "grad_norm": 10.90941047668457, "learning_rate": 9.83149890759594e-06, "loss": 0.25657692, "memory(GiB)": 13.7, "step": 4615, "train_speed(iter/s)": 1.549722 }, { "acc": 0.92439632, "epoch": 2.1654558237637684, "grad_norm": 15.404640197753906, "learning_rate": 9.832760710063804e-06, "loss": 0.42505903, "memory(GiB)": 13.7, "step": 4620, "train_speed(iter/s)": 1.549638 }, { "acc": 0.93962002, "epoch": 2.1677993906726036, "grad_norm": 11.261982917785645, "learning_rate": 9.83402114768287e-06, "loss": 0.32411966, "memory(GiB)": 13.7, "step": 4625, "train_speed(iter/s)": 1.549653 }, { "acc": 0.95455809, "epoch": 2.170142957581439, "grad_norm": 8.682068824768066, "learning_rate": 9.835280223402566e-06, "loss": 0.20989141, "memory(GiB)": 13.7, "step": 4630, "train_speed(iter/s)": 1.549679 }, { "acc": 0.92089672, "epoch": 2.172486524490274, "grad_norm": 17.968996047973633, "learning_rate": 9.836537940162777e-06, "loss": 0.45105462, "memory(GiB)": 13.7, "step": 4635, "train_speed(iter/s)": 1.54976 }, { "acc": 0.9329319, "epoch": 2.1748300913991097, "grad_norm": 9.135053634643555, "learning_rate": 9.837794300893873e-06, "loss": 0.51445093, "memory(GiB)": 13.7, "step": 4640, "train_speed(iter/s)": 1.549817 }, { "acc": 0.95638304, "epoch": 2.177173658307945, "grad_norm": 10.702774047851562, "learning_rate": 9.839049308516758e-06, "loss": 0.28186028, "memory(GiB)": 13.7, "step": 4645, "train_speed(iter/s)": 1.549878 }, { "acc": 0.94108505, "epoch": 2.17951722521678, "grad_norm": 7.195772171020508, "learning_rate": 9.840302965942909e-06, "loss": 0.35132821, "memory(GiB)": 13.7, "step": 4650, "train_speed(iter/s)": 1.549957 }, { "acc": 0.9319643, "epoch": 2.1818607921256152, "grad_norm": 16.272275924682617, "learning_rate": 9.841555276074412e-06, "loss": 0.3987483, "memory(GiB)": 13.7, "step": 4655, "train_speed(iter/s)": 1.549942 }, { "acc": 0.93970242, "epoch": 2.1842043590344504, "grad_norm": 9.861739158630371, "learning_rate": 9.842806241804013e-06, "loss": 0.36519594, "memory(GiB)": 13.7, "step": 4660, "train_speed(iter/s)": 1.549958 }, { "acc": 0.95636215, "epoch": 2.1865479259432856, "grad_norm": 19.388063430786133, "learning_rate": 9.844055866015148e-06, "loss": 0.3209686, "memory(GiB)": 13.7, "step": 4665, "train_speed(iter/s)": 1.550009 }, { "acc": 0.9442461, "epoch": 2.188891492852121, "grad_norm": 9.976675987243652, "learning_rate": 9.845304151581989e-06, "loss": 0.23392937, "memory(GiB)": 13.7, "step": 4670, "train_speed(iter/s)": 1.550034 }, { "acc": 0.93700018, "epoch": 2.191235059760956, "grad_norm": 7.661254405975342, "learning_rate": 9.846551101369477e-06, "loss": 0.4802887, "memory(GiB)": 13.7, "step": 4675, "train_speed(iter/s)": 1.549979 }, { "acc": 0.93301096, "epoch": 2.193578626669791, "grad_norm": 8.718160629272461, "learning_rate": 9.847796718233366e-06, "loss": 0.38843229, "memory(GiB)": 13.7, "step": 4680, "train_speed(iter/s)": 1.550071 }, { "acc": 0.940382, "epoch": 2.195922193578627, "grad_norm": 19.886686325073242, "learning_rate": 9.849041005020268e-06, "loss": 0.38142595, "memory(GiB)": 13.7, "step": 4685, "train_speed(iter/s)": 1.550021 }, { "acc": 0.95791664, "epoch": 2.198265760487462, "grad_norm": 15.0247220993042, "learning_rate": 9.850283964567677e-06, "loss": 0.32230392, "memory(GiB)": 13.7, "step": 4690, "train_speed(iter/s)": 1.550011 }, { "acc": 0.92874451, "epoch": 2.200609327396297, "grad_norm": 18.8386287689209, "learning_rate": 9.851525599704023e-06, "loss": 0.3923219, "memory(GiB)": 13.7, "step": 4695, "train_speed(iter/s)": 1.549971 }, { "acc": 0.92948418, "epoch": 2.2029528943051324, "grad_norm": 11.28355598449707, "learning_rate": 9.852765913248702e-06, "loss": 0.50166283, "memory(GiB)": 13.7, "step": 4700, "train_speed(iter/s)": 1.549951 }, { "acc": 0.91894341, "epoch": 2.2052964612139676, "grad_norm": 9.944426536560059, "learning_rate": 9.85400490801212e-06, "loss": 0.4133152, "memory(GiB)": 13.7, "step": 4705, "train_speed(iter/s)": 1.549984 }, { "acc": 0.95723562, "epoch": 2.207640028122803, "grad_norm": 11.323949813842773, "learning_rate": 9.85524258679572e-06, "loss": 0.29436169, "memory(GiB)": 13.7, "step": 4710, "train_speed(iter/s)": 1.549997 }, { "acc": 0.95441217, "epoch": 2.209983595031638, "grad_norm": 44.7696418762207, "learning_rate": 9.856478952392034e-06, "loss": 0.29571974, "memory(GiB)": 13.7, "step": 4715, "train_speed(iter/s)": 1.550074 }, { "acc": 0.95604706, "epoch": 2.2123271619404736, "grad_norm": 5.566567897796631, "learning_rate": 9.857714007584716e-06, "loss": 0.24137859, "memory(GiB)": 13.7, "step": 4720, "train_speed(iter/s)": 1.550069 }, { "acc": 0.97143583, "epoch": 2.214670728849309, "grad_norm": 10.307550430297852, "learning_rate": 9.858947755148575e-06, "loss": 0.2316622, "memory(GiB)": 13.7, "step": 4725, "train_speed(iter/s)": 1.550083 }, { "acc": 0.94621305, "epoch": 2.217014295758144, "grad_norm": 11.913725852966309, "learning_rate": 9.860180197849617e-06, "loss": 0.24516335, "memory(GiB)": 13.7, "step": 4730, "train_speed(iter/s)": 1.550213 }, { "acc": 0.94026995, "epoch": 2.219357862666979, "grad_norm": 35.23158264160156, "learning_rate": 9.861411338445079e-06, "loss": 0.3561552, "memory(GiB)": 13.7, "step": 4735, "train_speed(iter/s)": 1.550116 }, { "acc": 0.92161942, "epoch": 2.2217014295758144, "grad_norm": 33.28219223022461, "learning_rate": 9.862641179683469e-06, "loss": 0.50257673, "memory(GiB)": 13.7, "step": 4740, "train_speed(iter/s)": 1.550139 }, { "acc": 0.95926743, "epoch": 2.2240449964846496, "grad_norm": 3.9789819717407227, "learning_rate": 9.863869724304608e-06, "loss": 0.2438782, "memory(GiB)": 13.7, "step": 4745, "train_speed(iter/s)": 1.550179 }, { "acc": 0.94604979, "epoch": 2.226388563393485, "grad_norm": 0.9855837225914001, "learning_rate": 9.865096975039648e-06, "loss": 0.32241571, "memory(GiB)": 13.7, "step": 4750, "train_speed(iter/s)": 1.550106 }, { "acc": 0.96603174, "epoch": 2.22873213030232, "grad_norm": 10.187470436096191, "learning_rate": 9.86632293461113e-06, "loss": 0.2553364, "memory(GiB)": 13.7, "step": 4755, "train_speed(iter/s)": 1.550208 }, { "acc": 0.9578537, "epoch": 2.231075697211155, "grad_norm": 10.501839637756348, "learning_rate": 9.867547605733012e-06, "loss": 0.23376799, "memory(GiB)": 13.7, "step": 4760, "train_speed(iter/s)": 1.550325 }, { "acc": 0.94878721, "epoch": 2.233419264119991, "grad_norm": 7.666924476623535, "learning_rate": 9.868770991110694e-06, "loss": 0.32709527, "memory(GiB)": 13.7, "step": 4765, "train_speed(iter/s)": 1.550319 }, { "acc": 0.93198185, "epoch": 2.235762831028826, "grad_norm": 37.57619094848633, "learning_rate": 9.869993093441078e-06, "loss": 0.37085414, "memory(GiB)": 13.7, "step": 4770, "train_speed(iter/s)": 1.550279 }, { "acc": 0.93014069, "epoch": 2.238106397937661, "grad_norm": 179.87037658691406, "learning_rate": 9.871213915412576e-06, "loss": 0.34947128, "memory(GiB)": 13.7, "step": 4775, "train_speed(iter/s)": 1.55033 }, { "acc": 0.95625, "epoch": 2.2404499648464964, "grad_norm": 4.672774314880371, "learning_rate": 9.87243345970517e-06, "loss": 0.18995249, "memory(GiB)": 13.7, "step": 4780, "train_speed(iter/s)": 1.55037 }, { "acc": 0.94454947, "epoch": 2.2427935317553316, "grad_norm": 31.079397201538086, "learning_rate": 9.873651728990428e-06, "loss": 0.32185712, "memory(GiB)": 13.7, "step": 4785, "train_speed(iter/s)": 1.550438 }, { "acc": 0.94538689, "epoch": 2.2451370986641668, "grad_norm": 22.45745849609375, "learning_rate": 9.874868725931558e-06, "loss": 0.27274146, "memory(GiB)": 13.7, "step": 4790, "train_speed(iter/s)": 1.550555 }, { "acc": 0.92929792, "epoch": 2.247480665573002, "grad_norm": 11.097777366638184, "learning_rate": 9.876084453183416e-06, "loss": 0.39037232, "memory(GiB)": 13.7, "step": 4795, "train_speed(iter/s)": 1.550674 }, { "acc": 0.96431541, "epoch": 2.249824232481837, "grad_norm": 31.73798942565918, "learning_rate": 9.877298913392575e-06, "loss": 0.22880964, "memory(GiB)": 13.7, "step": 4800, "train_speed(iter/s)": 1.550647 }, { "acc": 0.9530159, "epoch": 2.252167799390673, "grad_norm": 9.198077201843262, "learning_rate": 9.878512109197331e-06, "loss": 0.36959901, "memory(GiB)": 13.7, "step": 4805, "train_speed(iter/s)": 1.550668 }, { "acc": 0.95790672, "epoch": 2.254511366299508, "grad_norm": 8.658666610717773, "learning_rate": 9.87972404322775e-06, "loss": 0.22012925, "memory(GiB)": 13.7, "step": 4810, "train_speed(iter/s)": 1.550684 }, { "acc": 0.94400892, "epoch": 2.256854933208343, "grad_norm": 33.469242095947266, "learning_rate": 9.880934718105705e-06, "loss": 0.31876931, "memory(GiB)": 13.7, "step": 4815, "train_speed(iter/s)": 1.550714 }, { "acc": 0.90851612, "epoch": 2.2591985001171784, "grad_norm": 54.715087890625, "learning_rate": 9.882144136444899e-06, "loss": 0.63703351, "memory(GiB)": 13.7, "step": 4820, "train_speed(iter/s)": 1.550704 }, { "acc": 0.93870192, "epoch": 2.2615420670260136, "grad_norm": 7.096887588500977, "learning_rate": 9.883352300850912e-06, "loss": 0.39482012, "memory(GiB)": 13.7, "step": 4825, "train_speed(iter/s)": 1.550722 }, { "acc": 0.91604662, "epoch": 2.2638856339348488, "grad_norm": 14.405167579650879, "learning_rate": 9.884559213921218e-06, "loss": 0.53215189, "memory(GiB)": 13.7, "step": 4830, "train_speed(iter/s)": 1.550787 }, { "acc": 0.94122629, "epoch": 2.266229200843684, "grad_norm": 26.78900909423828, "learning_rate": 9.885764878245237e-06, "loss": 0.38959627, "memory(GiB)": 13.7, "step": 4835, "train_speed(iter/s)": 1.550885 }, { "acc": 0.94898758, "epoch": 2.268572767752519, "grad_norm": 6.582890033721924, "learning_rate": 9.886969296404359e-06, "loss": 0.34878097, "memory(GiB)": 13.7, "step": 4840, "train_speed(iter/s)": 1.550901 }, { "acc": 0.9293849, "epoch": 2.2709163346613543, "grad_norm": 19.28904914855957, "learning_rate": 9.888172470971975e-06, "loss": 0.47258229, "memory(GiB)": 13.7, "step": 4845, "train_speed(iter/s)": 1.550936 }, { "acc": 0.92669945, "epoch": 2.27325990157019, "grad_norm": 15.493429183959961, "learning_rate": 9.889374404513516e-06, "loss": 0.51946764, "memory(GiB)": 13.7, "step": 4850, "train_speed(iter/s)": 1.550953 }, { "acc": 0.92704411, "epoch": 2.275603468479025, "grad_norm": 24.78073501586914, "learning_rate": 9.890575099586478e-06, "loss": 0.4200428, "memory(GiB)": 13.7, "step": 4855, "train_speed(iter/s)": 1.550978 }, { "acc": 0.93442545, "epoch": 2.2779470353878604, "grad_norm": 10.821568489074707, "learning_rate": 9.891774558740465e-06, "loss": 0.50078158, "memory(GiB)": 13.7, "step": 4860, "train_speed(iter/s)": 1.551002 }, { "acc": 0.94092245, "epoch": 2.2802906022966956, "grad_norm": 9.519121170043945, "learning_rate": 9.892972784517211e-06, "loss": 0.30660129, "memory(GiB)": 13.7, "step": 4865, "train_speed(iter/s)": 1.551059 }, { "acc": 0.94430447, "epoch": 2.2826341692055307, "grad_norm": 9.727057456970215, "learning_rate": 9.894169779450624e-06, "loss": 0.34405324, "memory(GiB)": 13.7, "step": 4870, "train_speed(iter/s)": 1.551089 }, { "acc": 0.93001671, "epoch": 2.284977736114366, "grad_norm": 22.283872604370117, "learning_rate": 9.895365546066802e-06, "loss": 0.43490334, "memory(GiB)": 13.7, "step": 4875, "train_speed(iter/s)": 1.55112 }, { "acc": 0.92047901, "epoch": 2.287321303023201, "grad_norm": 29.289663314819336, "learning_rate": 9.896560086884083e-06, "loss": 0.4823493, "memory(GiB)": 13.7, "step": 4880, "train_speed(iter/s)": 1.551153 }, { "acc": 0.95585098, "epoch": 2.2896648699320368, "grad_norm": 13.603925704956055, "learning_rate": 9.89775340441306e-06, "loss": 0.27570305, "memory(GiB)": 13.7, "step": 4885, "train_speed(iter/s)": 1.551096 }, { "acc": 0.94830437, "epoch": 2.292008436840872, "grad_norm": 19.271474838256836, "learning_rate": 9.898945501156629e-06, "loss": 0.25919003, "memory(GiB)": 13.7, "step": 4890, "train_speed(iter/s)": 1.551104 }, { "acc": 0.93755207, "epoch": 2.294352003749707, "grad_norm": 23.609031677246094, "learning_rate": 9.900136379610006e-06, "loss": 0.29886739, "memory(GiB)": 13.7, "step": 4895, "train_speed(iter/s)": 1.551143 }, { "acc": 0.94821434, "epoch": 2.2966955706585424, "grad_norm": 59.72626495361328, "learning_rate": 9.901326042260775e-06, "loss": 0.38882408, "memory(GiB)": 13.7, "step": 4900, "train_speed(iter/s)": 1.551126 }, { "acc": 0.96719246, "epoch": 2.2990391375673775, "grad_norm": 11.479090690612793, "learning_rate": 9.90251449158889e-06, "loss": 0.22851396, "memory(GiB)": 13.7, "step": 4905, "train_speed(iter/s)": 1.55111 }, { "acc": 0.95845642, "epoch": 2.3013827044762127, "grad_norm": 7.368232727050781, "learning_rate": 9.903701730066743e-06, "loss": 0.23639524, "memory(GiB)": 13.7, "step": 4910, "train_speed(iter/s)": 1.551095 }, { "acc": 0.93394079, "epoch": 2.303726271385048, "grad_norm": 33.9710807800293, "learning_rate": 9.904887760159168e-06, "loss": 0.45462646, "memory(GiB)": 13.7, "step": 4915, "train_speed(iter/s)": 1.55114 }, { "acc": 0.95314007, "epoch": 2.306069838293883, "grad_norm": 37.97688674926758, "learning_rate": 9.906072584323481e-06, "loss": 0.28650312, "memory(GiB)": 13.7, "step": 4920, "train_speed(iter/s)": 1.551261 }, { "acc": 0.94511356, "epoch": 2.3084134052027183, "grad_norm": 11.092713356018066, "learning_rate": 9.907256205009512e-06, "loss": 0.29199195, "memory(GiB)": 13.7, "step": 4925, "train_speed(iter/s)": 1.551249 }, { "acc": 0.93733587, "epoch": 2.310756972111554, "grad_norm": 8.099349975585938, "learning_rate": 9.908438624659636e-06, "loss": 0.3386054, "memory(GiB)": 13.7, "step": 4930, "train_speed(iter/s)": 1.55131 }, { "acc": 0.9660615, "epoch": 2.313100539020389, "grad_norm": 6.759310722351074, "learning_rate": 9.909619845708792e-06, "loss": 0.21709116, "memory(GiB)": 13.7, "step": 4935, "train_speed(iter/s)": 1.551209 }, { "acc": 0.92833433, "epoch": 2.3154441059292243, "grad_norm": 19.062376022338867, "learning_rate": 9.910799870584531e-06, "loss": 0.47040892, "memory(GiB)": 13.7, "step": 4940, "train_speed(iter/s)": 1.551214 }, { "acc": 0.90727816, "epoch": 2.3177876728380595, "grad_norm": 35.07841873168945, "learning_rate": 9.911978701707031e-06, "loss": 0.67673841, "memory(GiB)": 13.7, "step": 4945, "train_speed(iter/s)": 1.5512 }, { "acc": 0.92713604, "epoch": 2.3201312397468947, "grad_norm": 12.999733924865723, "learning_rate": 9.91315634148913e-06, "loss": 0.52192349, "memory(GiB)": 13.7, "step": 4950, "train_speed(iter/s)": 1.551243 }, { "acc": 0.93065472, "epoch": 2.32247480665573, "grad_norm": 23.251588821411133, "learning_rate": 9.914332792336367e-06, "loss": 0.43854651, "memory(GiB)": 13.7, "step": 4955, "train_speed(iter/s)": 1.551321 }, { "acc": 0.95268307, "epoch": 2.324818373564565, "grad_norm": 21.815187454223633, "learning_rate": 9.915508056646999e-06, "loss": 0.31939197, "memory(GiB)": 13.7, "step": 4960, "train_speed(iter/s)": 1.551365 }, { "acc": 0.93281164, "epoch": 2.3271619404734007, "grad_norm": 6.694558143615723, "learning_rate": 9.916682136812027e-06, "loss": 0.39070799, "memory(GiB)": 13.7, "step": 4965, "train_speed(iter/s)": 1.551431 }, { "acc": 0.94995937, "epoch": 2.329505507382236, "grad_norm": 4.021695137023926, "learning_rate": 9.91785503521524e-06, "loss": 0.24145713, "memory(GiB)": 13.7, "step": 4970, "train_speed(iter/s)": 1.551398 }, { "acc": 0.95485115, "epoch": 2.331849074291071, "grad_norm": 4.581928730010986, "learning_rate": 9.919026754233233e-06, "loss": 0.25944188, "memory(GiB)": 13.7, "step": 4975, "train_speed(iter/s)": 1.551434 }, { "acc": 0.96270332, "epoch": 2.3341926411999063, "grad_norm": 12.455164909362793, "learning_rate": 9.920197296235437e-06, "loss": 0.21080687, "memory(GiB)": 13.7, "step": 4980, "train_speed(iter/s)": 1.551429 }, { "acc": 0.91050472, "epoch": 2.3365362081087415, "grad_norm": 12.336514472961426, "learning_rate": 9.921366663584159e-06, "loss": 0.55963383, "memory(GiB)": 13.7, "step": 4985, "train_speed(iter/s)": 1.551425 }, { "acc": 0.92566423, "epoch": 2.3388797750175767, "grad_norm": 18.991207122802734, "learning_rate": 9.92253485863459e-06, "loss": 0.49231124, "memory(GiB)": 13.7, "step": 4990, "train_speed(iter/s)": 1.551458 }, { "acc": 0.9249157, "epoch": 2.341223341926412, "grad_norm": 41.40108108520508, "learning_rate": 9.92370188373485e-06, "loss": 0.52971678, "memory(GiB)": 13.7, "step": 4995, "train_speed(iter/s)": 1.551486 }, { "acc": 0.94192963, "epoch": 2.343566908835247, "grad_norm": 11.349958419799805, "learning_rate": 9.924867741226011e-06, "loss": 0.34332614, "memory(GiB)": 13.7, "step": 5000, "train_speed(iter/s)": 1.55149 }, { "acc": 0.96248512, "epoch": 2.3459104757440823, "grad_norm": 8.497904777526855, "learning_rate": 9.926032433442121e-06, "loss": 0.26588364, "memory(GiB)": 13.7, "step": 5005, "train_speed(iter/s)": 1.551505 }, { "acc": 0.94998913, "epoch": 2.348254042652918, "grad_norm": 13.38961124420166, "learning_rate": 9.927195962710245e-06, "loss": 0.25386829, "memory(GiB)": 13.7, "step": 5010, "train_speed(iter/s)": 1.551449 }, { "acc": 0.93124504, "epoch": 2.350597609561753, "grad_norm": 66.40021514892578, "learning_rate": 9.928358331350478e-06, "loss": 0.41267147, "memory(GiB)": 13.7, "step": 5015, "train_speed(iter/s)": 1.551426 }, { "acc": 0.92634497, "epoch": 2.3529411764705883, "grad_norm": 22.192344665527344, "learning_rate": 9.929519541675981e-06, "loss": 0.47550735, "memory(GiB)": 13.7, "step": 5020, "train_speed(iter/s)": 1.551441 }, { "acc": 0.93277855, "epoch": 2.3552847433794235, "grad_norm": 161.2303466796875, "learning_rate": 9.930679595993004e-06, "loss": 0.50793438, "memory(GiB)": 13.7, "step": 5025, "train_speed(iter/s)": 1.551514 }, { "acc": 0.96424675, "epoch": 2.3576283102882587, "grad_norm": 6.1240434646606445, "learning_rate": 9.931838496600917e-06, "loss": 0.16163909, "memory(GiB)": 13.7, "step": 5030, "train_speed(iter/s)": 1.551529 }, { "acc": 0.95747976, "epoch": 2.359971877197094, "grad_norm": 13.267778396606445, "learning_rate": 9.93299624579224e-06, "loss": 0.28120217, "memory(GiB)": 13.7, "step": 5035, "train_speed(iter/s)": 1.55157 }, { "acc": 0.90322828, "epoch": 2.362315444105929, "grad_norm": 21.663293838500977, "learning_rate": 9.934152845852665e-06, "loss": 0.49681211, "memory(GiB)": 13.7, "step": 5040, "train_speed(iter/s)": 1.551653 }, { "acc": 0.93321857, "epoch": 2.3646590110147643, "grad_norm": 8.993553161621094, "learning_rate": 9.93530829906108e-06, "loss": 0.44557343, "memory(GiB)": 13.7, "step": 5045, "train_speed(iter/s)": 1.551705 }, { "acc": 0.93250065, "epoch": 2.3670025779236, "grad_norm": 6.133238792419434, "learning_rate": 9.936462607689607e-06, "loss": 0.41267872, "memory(GiB)": 13.7, "step": 5050, "train_speed(iter/s)": 1.551692 }, { "acc": 0.93242378, "epoch": 2.369346144832435, "grad_norm": 24.485599517822266, "learning_rate": 9.937615774003619e-06, "loss": 0.47327089, "memory(GiB)": 13.7, "step": 5055, "train_speed(iter/s)": 1.551697 }, { "acc": 0.94084797, "epoch": 2.3716897117412703, "grad_norm": 7.156787395477295, "learning_rate": 9.938767800261772e-06, "loss": 0.43534498, "memory(GiB)": 13.7, "step": 5060, "train_speed(iter/s)": 1.551717 }, { "acc": 0.94645739, "epoch": 2.3740332786501055, "grad_norm": 36.602542877197266, "learning_rate": 9.939918688716029e-06, "loss": 0.37569695, "memory(GiB)": 13.7, "step": 5065, "train_speed(iter/s)": 1.551719 }, { "acc": 0.96967258, "epoch": 2.3763768455589407, "grad_norm": 8.376718521118164, "learning_rate": 9.941068441611684e-06, "loss": 0.17594758, "memory(GiB)": 13.7, "step": 5070, "train_speed(iter/s)": 1.551701 }, { "acc": 0.94442263, "epoch": 2.378720412467776, "grad_norm": 10.415666580200195, "learning_rate": 9.942217061187396e-06, "loss": 0.3601963, "memory(GiB)": 13.7, "step": 5075, "train_speed(iter/s)": 1.551734 }, { "acc": 0.94586811, "epoch": 2.381063979376611, "grad_norm": 34.355133056640625, "learning_rate": 9.943364549675211e-06, "loss": 0.32251108, "memory(GiB)": 13.7, "step": 5080, "train_speed(iter/s)": 1.551664 }, { "acc": 0.95814142, "epoch": 2.3834075462854463, "grad_norm": 14.30457878112793, "learning_rate": 9.944510909300584e-06, "loss": 0.29689105, "memory(GiB)": 13.7, "step": 5085, "train_speed(iter/s)": 1.551707 }, { "acc": 0.94144344, "epoch": 2.3857511131942815, "grad_norm": 5.7565717697143555, "learning_rate": 9.945656142282409e-06, "loss": 0.38449724, "memory(GiB)": 13.7, "step": 5090, "train_speed(iter/s)": 1.551632 }, { "acc": 0.93173161, "epoch": 2.388094680103117, "grad_norm": 19.576147079467773, "learning_rate": 9.946800250833043e-06, "loss": 0.47296638, "memory(GiB)": 13.7, "step": 5095, "train_speed(iter/s)": 1.551591 }, { "acc": 0.93414478, "epoch": 2.3904382470119523, "grad_norm": 16.15438461303711, "learning_rate": 9.947943237158339e-06, "loss": 0.41784801, "memory(GiB)": 13.7, "step": 5100, "train_speed(iter/s)": 1.551594 }, { "acc": 0.93148594, "epoch": 2.3927818139207875, "grad_norm": 6.475334167480469, "learning_rate": 9.949085103457658e-06, "loss": 0.43187342, "memory(GiB)": 13.7, "step": 5105, "train_speed(iter/s)": 1.551592 }, { "acc": 0.93821201, "epoch": 2.3951253808296227, "grad_norm": 18.167430877685547, "learning_rate": 9.950225851923904e-06, "loss": 0.33264649, "memory(GiB)": 13.7, "step": 5110, "train_speed(iter/s)": 1.551666 }, { "acc": 0.92814951, "epoch": 2.397468947738458, "grad_norm": 10.710617065429688, "learning_rate": 9.951365484743552e-06, "loss": 0.4712635, "memory(GiB)": 13.7, "step": 5115, "train_speed(iter/s)": 1.551649 }, { "acc": 0.92738132, "epoch": 2.399812514647293, "grad_norm": 30.682479858398438, "learning_rate": 9.952504004096664e-06, "loss": 0.50129814, "memory(GiB)": 13.7, "step": 5120, "train_speed(iter/s)": 1.551695 }, { "acc": 0.92611103, "epoch": 2.4021560815561283, "grad_norm": 17.644515991210938, "learning_rate": 9.953641412156918e-06, "loss": 0.43074813, "memory(GiB)": 13.7, "step": 5125, "train_speed(iter/s)": 1.551694 }, { "acc": 0.95514107, "epoch": 2.404499648464964, "grad_norm": 12.745389938354492, "learning_rate": 9.954777711091627e-06, "loss": 0.31419399, "memory(GiB)": 13.7, "step": 5130, "train_speed(iter/s)": 1.551855 }, { "acc": 0.94345245, "epoch": 2.406843215373799, "grad_norm": 50.0371208190918, "learning_rate": 9.955912903061787e-06, "loss": 0.28217421, "memory(GiB)": 13.7, "step": 5135, "train_speed(iter/s)": 1.551992 }, { "acc": 0.92950306, "epoch": 2.4091867822826343, "grad_norm": 7.767352104187012, "learning_rate": 9.95704699022207e-06, "loss": 0.45470495, "memory(GiB)": 13.7, "step": 5140, "train_speed(iter/s)": 1.551986 }, { "acc": 0.96376877, "epoch": 2.4115303491914695, "grad_norm": 7.751644611358643, "learning_rate": 9.958179974720862e-06, "loss": 0.27916956, "memory(GiB)": 13.7, "step": 5145, "train_speed(iter/s)": 1.552033 }, { "acc": 0.96175594, "epoch": 2.4138739161003047, "grad_norm": 11.149489402770996, "learning_rate": 9.959311858700302e-06, "loss": 0.1581259, "memory(GiB)": 13.7, "step": 5150, "train_speed(iter/s)": 1.551973 }, { "acc": 0.92632847, "epoch": 2.41621748300914, "grad_norm": 153.9701690673828, "learning_rate": 9.960442644296279e-06, "loss": 0.40884895, "memory(GiB)": 13.7, "step": 5155, "train_speed(iter/s)": 1.551957 }, { "acc": 0.94793987, "epoch": 2.418561049917975, "grad_norm": 9.37898063659668, "learning_rate": 9.961572333638476e-06, "loss": 0.3644115, "memory(GiB)": 13.7, "step": 5160, "train_speed(iter/s)": 1.551937 }, { "acc": 0.94047852, "epoch": 2.4209046168268102, "grad_norm": 144.03758239746094, "learning_rate": 9.96270092885039e-06, "loss": 0.3613709, "memory(GiB)": 13.7, "step": 5165, "train_speed(iter/s)": 1.551948 }, { "acc": 0.95153732, "epoch": 2.4232481837356454, "grad_norm": 16.992891311645508, "learning_rate": 9.963828432049349e-06, "loss": 0.30600352, "memory(GiB)": 13.7, "step": 5170, "train_speed(iter/s)": 1.551934 }, { "acc": 0.93956795, "epoch": 2.425591750644481, "grad_norm": 7.421746730804443, "learning_rate": 9.964954845346543e-06, "loss": 0.3166852, "memory(GiB)": 13.7, "step": 5175, "train_speed(iter/s)": 1.551921 }, { "acc": 0.91022987, "epoch": 2.4279353175533163, "grad_norm": 5.636128902435303, "learning_rate": 9.966080170847048e-06, "loss": 0.61338053, "memory(GiB)": 13.7, "step": 5180, "train_speed(iter/s)": 1.551982 }, { "acc": 0.92401934, "epoch": 2.4302788844621515, "grad_norm": 14.441650390625, "learning_rate": 9.967204410649844e-06, "loss": 0.56610403, "memory(GiB)": 13.7, "step": 5185, "train_speed(iter/s)": 1.551963 }, { "acc": 0.94455719, "epoch": 2.4326224513709866, "grad_norm": 23.622804641723633, "learning_rate": 9.968327566847845e-06, "loss": 0.35639477, "memory(GiB)": 13.7, "step": 5190, "train_speed(iter/s)": 1.551929 }, { "acc": 0.92882385, "epoch": 2.434966018279822, "grad_norm": 10.76087474822998, "learning_rate": 9.969449641527911e-06, "loss": 0.51181908, "memory(GiB)": 13.7, "step": 5195, "train_speed(iter/s)": 1.551864 }, { "acc": 0.93352547, "epoch": 2.437309585188657, "grad_norm": 13.319668769836426, "learning_rate": 9.970570636770893e-06, "loss": 0.34591112, "memory(GiB)": 13.7, "step": 5200, "train_speed(iter/s)": 1.55188 }, { "acc": 0.92463207, "epoch": 2.4396531520974922, "grad_norm": 10.727113723754883, "learning_rate": 9.971690554651627e-06, "loss": 0.56935892, "memory(GiB)": 13.7, "step": 5205, "train_speed(iter/s)": 1.551784 }, { "acc": 0.9526021, "epoch": 2.441996719006328, "grad_norm": 27.411779403686523, "learning_rate": 9.972809397238986e-06, "loss": 0.29995527, "memory(GiB)": 13.7, "step": 5210, "train_speed(iter/s)": 1.551747 }, { "acc": 0.93181791, "epoch": 2.444340285915163, "grad_norm": 16.423173904418945, "learning_rate": 9.973927166595878e-06, "loss": 0.38346751, "memory(GiB)": 13.7, "step": 5215, "train_speed(iter/s)": 1.551856 }, { "acc": 0.9593338, "epoch": 2.4466838528239983, "grad_norm": 22.29077911376953, "learning_rate": 9.97504386477929e-06, "loss": 0.2447628, "memory(GiB)": 13.7, "step": 5220, "train_speed(iter/s)": 1.551835 }, { "acc": 0.94673262, "epoch": 2.4490274197328334, "grad_norm": 7.32407808303833, "learning_rate": 9.976159493840294e-06, "loss": 0.3489471, "memory(GiB)": 13.7, "step": 5225, "train_speed(iter/s)": 1.551828 }, { "acc": 0.94489584, "epoch": 2.4513709866416686, "grad_norm": 7.554961204528809, "learning_rate": 9.977274055824082e-06, "loss": 0.29425213, "memory(GiB)": 13.7, "step": 5230, "train_speed(iter/s)": 1.551775 }, { "acc": 0.93590775, "epoch": 2.453714553550504, "grad_norm": 7.509471893310547, "learning_rate": 9.978387552769976e-06, "loss": 0.43810415, "memory(GiB)": 13.7, "step": 5235, "train_speed(iter/s)": 1.551734 }, { "acc": 0.95376492, "epoch": 2.456058120459339, "grad_norm": 17.908437728881836, "learning_rate": 9.979499986711464e-06, "loss": 0.30094404, "memory(GiB)": 13.7, "step": 5240, "train_speed(iter/s)": 1.551774 }, { "acc": 0.94630032, "epoch": 2.458401687368174, "grad_norm": 15.476669311523438, "learning_rate": 9.980611359676216e-06, "loss": 0.37058311, "memory(GiB)": 13.7, "step": 5245, "train_speed(iter/s)": 1.551843 }, { "acc": 0.93767862, "epoch": 2.4607452542770094, "grad_norm": 11.19997787475586, "learning_rate": 9.981721673686098e-06, "loss": 0.19985508, "memory(GiB)": 13.7, "step": 5250, "train_speed(iter/s)": 1.551872 }, { "acc": 0.93227673, "epoch": 2.463088821185845, "grad_norm": 7.002888202667236, "learning_rate": 9.982830930757213e-06, "loss": 0.29510956, "memory(GiB)": 13.7, "step": 5255, "train_speed(iter/s)": 1.551697 }, { "acc": 0.94268026, "epoch": 2.4654323880946802, "grad_norm": 30.98567008972168, "learning_rate": 9.983939132899904e-06, "loss": 0.31870012, "memory(GiB)": 13.7, "step": 5260, "train_speed(iter/s)": 1.551638 }, { "acc": 0.9404068, "epoch": 2.4677759550035154, "grad_norm": 7.187641143798828, "learning_rate": 9.985046282118783e-06, "loss": 0.32170873, "memory(GiB)": 13.7, "step": 5265, "train_speed(iter/s)": 1.551615 }, { "acc": 0.91218739, "epoch": 2.4701195219123506, "grad_norm": 8.614727973937988, "learning_rate": 9.986152380412759e-06, "loss": 0.56001844, "memory(GiB)": 13.7, "step": 5270, "train_speed(iter/s)": 1.55153 }, { "acc": 0.94908371, "epoch": 2.472463088821186, "grad_norm": 13.544483184814453, "learning_rate": 9.987257429775053e-06, "loss": 0.36280894, "memory(GiB)": 13.7, "step": 5275, "train_speed(iter/s)": 1.55153 }, { "acc": 0.92961178, "epoch": 2.474806655730021, "grad_norm": 7.881147384643555, "learning_rate": 9.988361432193219e-06, "loss": 0.5087595, "memory(GiB)": 13.7, "step": 5280, "train_speed(iter/s)": 1.55151 }, { "acc": 0.94767351, "epoch": 2.477150222638856, "grad_norm": 20.016544342041016, "learning_rate": 9.989464389649167e-06, "loss": 0.31246128, "memory(GiB)": 13.7, "step": 5285, "train_speed(iter/s)": 1.551415 }, { "acc": 0.95297918, "epoch": 2.4794937895476914, "grad_norm": 10.856407165527344, "learning_rate": 9.990566304119185e-06, "loss": 0.28427274, "memory(GiB)": 13.7, "step": 5290, "train_speed(iter/s)": 1.55147 }, { "acc": 0.93367233, "epoch": 2.481837356456527, "grad_norm": 22.09404754638672, "learning_rate": 9.991667177573962e-06, "loss": 0.45871282, "memory(GiB)": 13.7, "step": 5295, "train_speed(iter/s)": 1.551493 }, { "acc": 0.91940479, "epoch": 2.4841809233653622, "grad_norm": 19.7838077545166, "learning_rate": 9.992767011978601e-06, "loss": 0.64859385, "memory(GiB)": 13.7, "step": 5300, "train_speed(iter/s)": 1.551529 }, { "acc": 0.96010818, "epoch": 2.4865244902741974, "grad_norm": 0.5944722890853882, "learning_rate": 9.993865809292656e-06, "loss": 0.2299232, "memory(GiB)": 13.7, "step": 5305, "train_speed(iter/s)": 1.551537 }, { "acc": 0.96385984, "epoch": 2.4888680571830326, "grad_norm": 24.993328094482422, "learning_rate": 9.994963571470131e-06, "loss": 0.21325245, "memory(GiB)": 13.7, "step": 5310, "train_speed(iter/s)": 1.551536 }, { "acc": 0.94283123, "epoch": 2.491211624091868, "grad_norm": 13.854077339172363, "learning_rate": 9.996060300459525e-06, "loss": 0.27267852, "memory(GiB)": 13.7, "step": 5315, "train_speed(iter/s)": 1.551462 }, { "acc": 0.9507328, "epoch": 2.493555191000703, "grad_norm": 15.783101081848145, "learning_rate": 9.997155998203827e-06, "loss": 0.30192156, "memory(GiB)": 13.7, "step": 5320, "train_speed(iter/s)": 1.551541 }, { "acc": 0.93400297, "epoch": 2.495898757909538, "grad_norm": 21.429519653320312, "learning_rate": 9.998250666640566e-06, "loss": 0.36540096, "memory(GiB)": 13.7, "step": 5325, "train_speed(iter/s)": 1.551578 }, { "acc": 0.93420048, "epoch": 2.4982423248183734, "grad_norm": 13.90503215789795, "learning_rate": 9.999344307701799e-06, "loss": 0.44006238, "memory(GiB)": 13.7, "step": 5330, "train_speed(iter/s)": 1.551622 }, { "acc": 0.91618366, "epoch": 2.5005858917272086, "grad_norm": 55.87800979614258, "learning_rate": 9.999999990386277e-06, "loss": 0.58224583, "memory(GiB)": 13.7, "step": 5335, "train_speed(iter/s)": 1.551657 }, { "acc": 0.93651905, "epoch": 2.502929458636044, "grad_norm": 12.409942626953125, "learning_rate": 9.999999882231877e-06, "loss": 0.29635596, "memory(GiB)": 13.7, "step": 5340, "train_speed(iter/s)": 1.551641 }, { "acc": 0.96418018, "epoch": 2.5052730255448794, "grad_norm": 11.230177879333496, "learning_rate": 9.999999653905925e-06, "loss": 0.23116875, "memory(GiB)": 13.7, "step": 5345, "train_speed(iter/s)": 1.551662 }, { "acc": 0.91321201, "epoch": 2.5076165924537146, "grad_norm": 49.320377349853516, "learning_rate": 9.999999305408425e-06, "loss": 0.49676046, "memory(GiB)": 13.7, "step": 5350, "train_speed(iter/s)": 1.551723 }, { "acc": 0.94405012, "epoch": 2.50996015936255, "grad_norm": 37.70219802856445, "learning_rate": 9.999998836739388e-06, "loss": 0.41280642, "memory(GiB)": 13.7, "step": 5355, "train_speed(iter/s)": 1.551812 }, { "acc": 0.95469694, "epoch": 2.512303726271385, "grad_norm": 8.731870651245117, "learning_rate": 9.999998247898822e-06, "loss": 0.21353076, "memory(GiB)": 13.7, "step": 5360, "train_speed(iter/s)": 1.551857 }, { "acc": 0.96738091, "epoch": 2.51464729318022, "grad_norm": 3.397590160369873, "learning_rate": 9.999997538886745e-06, "loss": 0.20583725, "memory(GiB)": 13.7, "step": 5365, "train_speed(iter/s)": 1.551895 }, { "acc": 0.93306541, "epoch": 2.516990860089056, "grad_norm": 11.677973747253418, "learning_rate": 9.999996709703168e-06, "loss": 0.36996872, "memory(GiB)": 13.7, "step": 5370, "train_speed(iter/s)": 1.55187 }, { "acc": 0.92625999, "epoch": 2.519334426997891, "grad_norm": 15.277714729309082, "learning_rate": 9.999995760348118e-06, "loss": 0.47082539, "memory(GiB)": 13.7, "step": 5375, "train_speed(iter/s)": 1.551861 }, { "acc": 0.95447807, "epoch": 2.521677993906726, "grad_norm": 13.02117919921875, "learning_rate": 9.999994690821613e-06, "loss": 0.3452996, "memory(GiB)": 13.7, "step": 5380, "train_speed(iter/s)": 1.55189 }, { "acc": 0.93593836, "epoch": 2.5240215608155614, "grad_norm": 10.438427925109863, "learning_rate": 9.99999350112368e-06, "loss": 0.40737295, "memory(GiB)": 13.7, "step": 5385, "train_speed(iter/s)": 1.551918 }, { "acc": 0.94178028, "epoch": 2.5263651277243966, "grad_norm": 60.27611541748047, "learning_rate": 9.999992191254351e-06, "loss": 0.35984945, "memory(GiB)": 13.7, "step": 5390, "train_speed(iter/s)": 1.551893 }, { "acc": 0.92638273, "epoch": 2.528708694633232, "grad_norm": 7.288618087768555, "learning_rate": 9.999990761213651e-06, "loss": 0.41316891, "memory(GiB)": 13.7, "step": 5395, "train_speed(iter/s)": 1.55191 }, { "acc": 0.94140482, "epoch": 2.531052261542067, "grad_norm": 9.929224967956543, "learning_rate": 9.99998921100162e-06, "loss": 0.33640349, "memory(GiB)": 13.7, "step": 5400, "train_speed(iter/s)": 1.5519 }, { "acc": 0.91914825, "epoch": 2.533395828450902, "grad_norm": 129.08944702148438, "learning_rate": 9.99998754061829e-06, "loss": 0.55739985, "memory(GiB)": 13.7, "step": 5405, "train_speed(iter/s)": 1.552027 }, { "acc": 0.95622978, "epoch": 2.5357393953597374, "grad_norm": 14.396370887756348, "learning_rate": 9.999985750063707e-06, "loss": 0.19992812, "memory(GiB)": 13.7, "step": 5410, "train_speed(iter/s)": 1.551969 }, { "acc": 0.95849295, "epoch": 2.5380829622685726, "grad_norm": 14.121668815612793, "learning_rate": 9.99998383933791e-06, "loss": 0.26497297, "memory(GiB)": 13.7, "step": 5415, "train_speed(iter/s)": 1.551928 }, { "acc": 0.9341547, "epoch": 2.5404265291774077, "grad_norm": 19.128772735595703, "learning_rate": 9.999981808440944e-06, "loss": 0.40524263, "memory(GiB)": 13.7, "step": 5420, "train_speed(iter/s)": 1.55195 }, { "acc": 0.91329594, "epoch": 2.5427700960862434, "grad_norm": 6.9660139083862305, "learning_rate": 9.999979657372863e-06, "loss": 0.61055212, "memory(GiB)": 13.7, "step": 5425, "train_speed(iter/s)": 1.551977 }, { "acc": 0.92365055, "epoch": 2.5451136629950786, "grad_norm": 16.26067543029785, "learning_rate": 9.999977386133714e-06, "loss": 0.50425148, "memory(GiB)": 13.7, "step": 5430, "train_speed(iter/s)": 1.551994 }, { "acc": 0.96836309, "epoch": 2.5474572299039138, "grad_norm": 2.9962966442108154, "learning_rate": 9.999974994723553e-06, "loss": 0.24373827, "memory(GiB)": 13.7, "step": 5435, "train_speed(iter/s)": 1.552086 }, { "acc": 0.91404581, "epoch": 2.549800796812749, "grad_norm": 33.063316345214844, "learning_rate": 9.999972483142439e-06, "loss": 0.49125462, "memory(GiB)": 13.7, "step": 5440, "train_speed(iter/s)": 1.552163 }, { "acc": 0.95186005, "epoch": 2.552144363721584, "grad_norm": 8.765106201171875, "learning_rate": 9.999969851390431e-06, "loss": 0.28272865, "memory(GiB)": 13.7, "step": 5445, "train_speed(iter/s)": 1.552196 }, { "acc": 0.95481062, "epoch": 2.5544879306304193, "grad_norm": 15.177305221557617, "learning_rate": 9.999967099467591e-06, "loss": 0.2102155, "memory(GiB)": 13.7, "step": 5450, "train_speed(iter/s)": 1.552286 }, { "acc": 0.94232264, "epoch": 2.556831497539255, "grad_norm": 15.446446418762207, "learning_rate": 9.999964227373988e-06, "loss": 0.36168203, "memory(GiB)": 13.7, "step": 5455, "train_speed(iter/s)": 1.552323 }, { "acc": 0.93722849, "epoch": 2.55917506444809, "grad_norm": 9.11079216003418, "learning_rate": 9.999961235109687e-06, "loss": 0.26296587, "memory(GiB)": 13.7, "step": 5460, "train_speed(iter/s)": 1.552399 }, { "acc": 0.95086727, "epoch": 2.5615186313569254, "grad_norm": 11.770450592041016, "learning_rate": 9.999958122674766e-06, "loss": 0.24533849, "memory(GiB)": 13.7, "step": 5465, "train_speed(iter/s)": 1.552435 }, { "acc": 0.95280704, "epoch": 2.5638621982657606, "grad_norm": 10.502599716186523, "learning_rate": 9.999954890069294e-06, "loss": 0.2440764, "memory(GiB)": 13.7, "step": 5470, "train_speed(iter/s)": 1.552444 }, { "acc": 0.94511642, "epoch": 2.5662057651745958, "grad_norm": 6.2253336906433105, "learning_rate": 9.99995153729335e-06, "loss": 0.32531404, "memory(GiB)": 13.7, "step": 5475, "train_speed(iter/s)": 1.552452 }, { "acc": 0.94476509, "epoch": 2.568549332083431, "grad_norm": 15.822288513183594, "learning_rate": 9.999948064347018e-06, "loss": 0.30586958, "memory(GiB)": 13.7, "step": 5480, "train_speed(iter/s)": 1.552479 }, { "acc": 0.95688648, "epoch": 2.570892898992266, "grad_norm": 12.87890338897705, "learning_rate": 9.999944471230378e-06, "loss": 0.28629079, "memory(GiB)": 13.7, "step": 5485, "train_speed(iter/s)": 1.552409 }, { "acc": 0.94168358, "epoch": 2.5732364659011013, "grad_norm": 15.128656387329102, "learning_rate": 9.999940757943516e-06, "loss": 0.3132266, "memory(GiB)": 13.7, "step": 5490, "train_speed(iter/s)": 1.552422 }, { "acc": 0.93413696, "epoch": 2.5755800328099365, "grad_norm": 25.51789093017578, "learning_rate": 9.999936924486523e-06, "loss": 0.35642796, "memory(GiB)": 13.7, "step": 5495, "train_speed(iter/s)": 1.552375 }, { "acc": 0.95515032, "epoch": 2.5779235997187717, "grad_norm": 0.5724906921386719, "learning_rate": 9.999932970859493e-06, "loss": 0.36160247, "memory(GiB)": 13.7, "step": 5500, "train_speed(iter/s)": 1.552442 }, { "acc": 0.93263893, "epoch": 2.5802671666276074, "grad_norm": 15.256494522094727, "learning_rate": 9.999928897062517e-06, "loss": 0.34931941, "memory(GiB)": 13.7, "step": 5505, "train_speed(iter/s)": 1.552535 }, { "acc": 0.93977795, "epoch": 2.5826107335364425, "grad_norm": 18.716339111328125, "learning_rate": 9.999924703095693e-06, "loss": 0.39941931, "memory(GiB)": 13.7, "step": 5510, "train_speed(iter/s)": 1.552516 }, { "acc": 0.92420597, "epoch": 2.5849543004452777, "grad_norm": 58.062477111816406, "learning_rate": 9.999920388959127e-06, "loss": 0.51425939, "memory(GiB)": 13.7, "step": 5515, "train_speed(iter/s)": 1.552556 }, { "acc": 0.9081728, "epoch": 2.587297867354113, "grad_norm": 21.621004104614258, "learning_rate": 9.999915954652918e-06, "loss": 0.69107819, "memory(GiB)": 13.7, "step": 5520, "train_speed(iter/s)": 1.552663 }, { "acc": 0.95833788, "epoch": 2.589641434262948, "grad_norm": 12.137258529663086, "learning_rate": 9.999911400177174e-06, "loss": 0.28025143, "memory(GiB)": 13.7, "step": 5525, "train_speed(iter/s)": 1.552716 }, { "acc": 0.94920387, "epoch": 2.5919850011717833, "grad_norm": 14.747017860412598, "learning_rate": 9.999906725532002e-06, "loss": 0.32726912, "memory(GiB)": 13.7, "step": 5530, "train_speed(iter/s)": 1.552707 }, { "acc": 0.92710638, "epoch": 2.594328568080619, "grad_norm": 8.18014144897461, "learning_rate": 9.999901930717519e-06, "loss": 0.57108622, "memory(GiB)": 13.7, "step": 5535, "train_speed(iter/s)": 1.552717 }, { "acc": 0.91448431, "epoch": 2.596672134989454, "grad_norm": 22.882736206054688, "learning_rate": 9.999897015733838e-06, "loss": 0.61384206, "memory(GiB)": 13.7, "step": 5540, "train_speed(iter/s)": 1.552752 }, { "acc": 0.92288542, "epoch": 2.5990157018982893, "grad_norm": 45.03098678588867, "learning_rate": 9.999891980581074e-06, "loss": 0.50648565, "memory(GiB)": 13.7, "step": 5545, "train_speed(iter/s)": 1.552813 }, { "acc": 0.95682182, "epoch": 2.6013592688071245, "grad_norm": 2.819988965988159, "learning_rate": 9.999886825259354e-06, "loss": 0.31591549, "memory(GiB)": 13.7, "step": 5550, "train_speed(iter/s)": 1.552797 }, { "acc": 0.92292662, "epoch": 2.6037028357159597, "grad_norm": 10.80585765838623, "learning_rate": 9.999881549768798e-06, "loss": 0.49733129, "memory(GiB)": 13.7, "step": 5555, "train_speed(iter/s)": 1.552838 }, { "acc": 0.95065928, "epoch": 2.606046402624795, "grad_norm": 14.825238227844238, "learning_rate": 9.999876154109535e-06, "loss": 0.32393951, "memory(GiB)": 13.7, "step": 5560, "train_speed(iter/s)": 1.552876 }, { "acc": 0.9265913, "epoch": 2.60838996953363, "grad_norm": 7.4141364097595215, "learning_rate": 9.999870638281693e-06, "loss": 0.41916752, "memory(GiB)": 13.7, "step": 5565, "train_speed(iter/s)": 1.552838 }, { "acc": 0.96099701, "epoch": 2.6107335364424653, "grad_norm": 1.0534236431121826, "learning_rate": 9.999865002285405e-06, "loss": 0.22788062, "memory(GiB)": 13.7, "step": 5570, "train_speed(iter/s)": 1.552895 }, { "acc": 0.94448872, "epoch": 2.6130771033513005, "grad_norm": 20.525789260864258, "learning_rate": 9.999859246120807e-06, "loss": 0.23708975, "memory(GiB)": 13.7, "step": 5575, "train_speed(iter/s)": 1.552919 }, { "acc": 0.96823864, "epoch": 2.6154206702601357, "grad_norm": 10.305414199829102, "learning_rate": 9.999853369788036e-06, "loss": 0.22911599, "memory(GiB)": 13.7, "step": 5580, "train_speed(iter/s)": 1.552968 }, { "acc": 0.94576397, "epoch": 2.6177642371689713, "grad_norm": 8.592719078063965, "learning_rate": 9.999847373287234e-06, "loss": 0.33686066, "memory(GiB)": 13.7, "step": 5585, "train_speed(iter/s)": 1.553015 }, { "acc": 0.94584856, "epoch": 2.6201078040778065, "grad_norm": 25.144474029541016, "learning_rate": 9.999841256618546e-06, "loss": 0.37664931, "memory(GiB)": 13.7, "step": 5590, "train_speed(iter/s)": 1.553154 }, { "acc": 0.94218016, "epoch": 2.6224513709866417, "grad_norm": 12.701719284057617, "learning_rate": 9.999835019782117e-06, "loss": 0.29088855, "memory(GiB)": 13.7, "step": 5595, "train_speed(iter/s)": 1.553162 }, { "acc": 0.95228176, "epoch": 2.624794937895477, "grad_norm": 13.260804176330566, "learning_rate": 9.9998286627781e-06, "loss": 0.31446977, "memory(GiB)": 13.7, "step": 5600, "train_speed(iter/s)": 1.553207 }, { "acc": 0.92417622, "epoch": 2.627138504804312, "grad_norm": 5.766970157623291, "learning_rate": 9.999822185606644e-06, "loss": 0.36930633, "memory(GiB)": 13.7, "step": 5605, "train_speed(iter/s)": 1.553271 }, { "acc": 0.91290073, "epoch": 2.6294820717131473, "grad_norm": 5.731888294219971, "learning_rate": 9.999815588267909e-06, "loss": 0.55371056, "memory(GiB)": 13.7, "step": 5610, "train_speed(iter/s)": 1.553302 }, { "acc": 0.92816372, "epoch": 2.631825638621983, "grad_norm": 30.45522689819336, "learning_rate": 9.999808870762049e-06, "loss": 0.45112753, "memory(GiB)": 13.7, "step": 5615, "train_speed(iter/s)": 1.553329 }, { "acc": 0.92077379, "epoch": 2.634169205530818, "grad_norm": 18.552988052368164, "learning_rate": 9.99980203308923e-06, "loss": 0.46624517, "memory(GiB)": 13.7, "step": 5620, "train_speed(iter/s)": 1.553436 }, { "acc": 0.93925152, "epoch": 2.6365127724396533, "grad_norm": 6.90907621383667, "learning_rate": 9.999795075249613e-06, "loss": 0.42022395, "memory(GiB)": 13.7, "step": 5625, "train_speed(iter/s)": 1.553444 }, { "acc": 0.9400198, "epoch": 2.6388563393484885, "grad_norm": 37.263431549072266, "learning_rate": 9.999787997243364e-06, "loss": 0.38162894, "memory(GiB)": 13.7, "step": 5630, "train_speed(iter/s)": 1.553551 }, { "acc": 0.95751324, "epoch": 2.6411999062573237, "grad_norm": 14.920866966247559, "learning_rate": 9.99978079907066e-06, "loss": 0.2916431, "memory(GiB)": 13.7, "step": 5635, "train_speed(iter/s)": 1.553602 }, { "acc": 0.9514185, "epoch": 2.643543473166159, "grad_norm": 15.227550506591797, "learning_rate": 9.999773480731665e-06, "loss": 0.29961421, "memory(GiB)": 13.7, "step": 5640, "train_speed(iter/s)": 1.553645 }, { "acc": 0.93772469, "epoch": 2.645887040074994, "grad_norm": 87.77665710449219, "learning_rate": 9.999766042226562e-06, "loss": 0.33840027, "memory(GiB)": 13.7, "step": 5645, "train_speed(iter/s)": 1.553591 }, { "acc": 0.93479576, "epoch": 2.6482306069838293, "grad_norm": 9.037654876708984, "learning_rate": 9.999758483555528e-06, "loss": 0.32602689, "memory(GiB)": 13.7, "step": 5650, "train_speed(iter/s)": 1.553653 }, { "acc": 0.93849249, "epoch": 2.6505741738926645, "grad_norm": 11.524103164672852, "learning_rate": 9.999750804718742e-06, "loss": 0.42140307, "memory(GiB)": 13.7, "step": 5655, "train_speed(iter/s)": 1.553666 }, { "acc": 0.9418335, "epoch": 2.6529177408014997, "grad_norm": 16.949094772338867, "learning_rate": 9.999743005716391e-06, "loss": 0.35141973, "memory(GiB)": 13.7, "step": 5660, "train_speed(iter/s)": 1.553655 }, { "acc": 0.9255228, "epoch": 2.655261307710335, "grad_norm": 194.17066955566406, "learning_rate": 9.999735086548664e-06, "loss": 0.53068762, "memory(GiB)": 13.7, "step": 5665, "train_speed(iter/s)": 1.553681 }, { "acc": 0.93829098, "epoch": 2.6576048746191705, "grad_norm": 11.996020317077637, "learning_rate": 9.999727047215747e-06, "loss": 0.35030346, "memory(GiB)": 13.7, "step": 5670, "train_speed(iter/s)": 1.553688 }, { "acc": 0.94826889, "epoch": 2.6599484415280057, "grad_norm": 17.41728973388672, "learning_rate": 9.999718887717835e-06, "loss": 0.22601864, "memory(GiB)": 13.7, "step": 5675, "train_speed(iter/s)": 1.553706 }, { "acc": 0.92430534, "epoch": 2.662292008436841, "grad_norm": 11.271665573120117, "learning_rate": 9.999710608055126e-06, "loss": 0.43410411, "memory(GiB)": 13.7, "step": 5680, "train_speed(iter/s)": 1.553723 }, { "acc": 0.91497965, "epoch": 2.664635575345676, "grad_norm": 11.592718124389648, "learning_rate": 9.999702208227818e-06, "loss": 0.46467695, "memory(GiB)": 13.7, "step": 5685, "train_speed(iter/s)": 1.553761 }, { "acc": 0.94433985, "epoch": 2.6669791422545113, "grad_norm": 20.93683624267578, "learning_rate": 9.99969368823611e-06, "loss": 0.4119823, "memory(GiB)": 13.7, "step": 5690, "train_speed(iter/s)": 1.553731 }, { "acc": 0.95880318, "epoch": 2.6693227091633465, "grad_norm": 9.463440895080566, "learning_rate": 9.999685048080212e-06, "loss": 0.30676076, "memory(GiB)": 13.7, "step": 5695, "train_speed(iter/s)": 1.553741 }, { "acc": 0.93799343, "epoch": 2.671666276072182, "grad_norm": 82.02616119384766, "learning_rate": 9.999676287760327e-06, "loss": 0.32815733, "memory(GiB)": 13.7, "step": 5700, "train_speed(iter/s)": 1.553756 }, { "acc": 0.96318359, "epoch": 2.6740098429810173, "grad_norm": 15.666023254394531, "learning_rate": 9.999667407276672e-06, "loss": 0.26420281, "memory(GiB)": 13.7, "step": 5705, "train_speed(iter/s)": 1.55374 }, { "acc": 0.95449162, "epoch": 2.6763534098898525, "grad_norm": 10.334200859069824, "learning_rate": 9.999658406629452e-06, "loss": 0.30457356, "memory(GiB)": 13.7, "step": 5710, "train_speed(iter/s)": 1.55367 }, { "acc": 0.94878616, "epoch": 2.6786969767986877, "grad_norm": 3.5125343799591064, "learning_rate": 9.999649285818887e-06, "loss": 0.3582032, "memory(GiB)": 13.7, "step": 5715, "train_speed(iter/s)": 1.553619 }, { "acc": 0.91499557, "epoch": 2.681040543707523, "grad_norm": 18.655956268310547, "learning_rate": 9.9996400448452e-06, "loss": 0.54460788, "memory(GiB)": 13.7, "step": 5720, "train_speed(iter/s)": 1.553649 }, { "acc": 0.95029764, "epoch": 2.683384110616358, "grad_norm": 20.35218620300293, "learning_rate": 9.999630683708607e-06, "loss": 0.33425083, "memory(GiB)": 13.7, "step": 5725, "train_speed(iter/s)": 1.55363 }, { "acc": 0.96505461, "epoch": 2.6857276775251933, "grad_norm": 14.654104232788086, "learning_rate": 9.999621202409335e-06, "loss": 0.23574474, "memory(GiB)": 13.7, "step": 5730, "train_speed(iter/s)": 1.553696 }, { "acc": 0.93444595, "epoch": 2.6880712444340285, "grad_norm": 18.577476501464844, "learning_rate": 9.999611600947614e-06, "loss": 0.48014956, "memory(GiB)": 13.7, "step": 5735, "train_speed(iter/s)": 1.553679 }, { "acc": 0.94322376, "epoch": 2.6904148113428636, "grad_norm": 11.897527694702148, "learning_rate": 9.999601879323676e-06, "loss": 0.24286454, "memory(GiB)": 13.7, "step": 5740, "train_speed(iter/s)": 1.553746 }, { "acc": 0.92529221, "epoch": 2.692758378251699, "grad_norm": 15.677740097045898, "learning_rate": 9.999592037537749e-06, "loss": 0.4371253, "memory(GiB)": 13.7, "step": 5745, "train_speed(iter/s)": 1.553798 }, { "acc": 0.95499878, "epoch": 2.6951019451605345, "grad_norm": 11.557509422302246, "learning_rate": 9.999582075590073e-06, "loss": 0.27522264, "memory(GiB)": 13.7, "step": 5750, "train_speed(iter/s)": 1.553772 }, { "acc": 0.95665054, "epoch": 2.6974455120693697, "grad_norm": 6.916213512420654, "learning_rate": 9.999571993480887e-06, "loss": 0.30048015, "memory(GiB)": 13.7, "step": 5755, "train_speed(iter/s)": 1.553861 }, { "acc": 0.93124866, "epoch": 2.699789078978205, "grad_norm": 13.170984268188477, "learning_rate": 9.999561791210435e-06, "loss": 0.38699477, "memory(GiB)": 13.7, "step": 5760, "train_speed(iter/s)": 1.553872 }, { "acc": 0.9452282, "epoch": 2.70213264588704, "grad_norm": 7.347804069519043, "learning_rate": 9.999551468778959e-06, "loss": 0.28890162, "memory(GiB)": 13.7, "step": 5765, "train_speed(iter/s)": 1.553918 }, { "acc": 0.9441721, "epoch": 2.7044762127958752, "grad_norm": 9.874876022338867, "learning_rate": 9.99954102618671e-06, "loss": 0.37542312, "memory(GiB)": 13.7, "step": 5770, "train_speed(iter/s)": 1.553902 }, { "acc": 0.94710312, "epoch": 2.7068197797047104, "grad_norm": 8.288105964660645, "learning_rate": 9.99953046343394e-06, "loss": 0.23134189, "memory(GiB)": 13.7, "step": 5775, "train_speed(iter/s)": 1.553876 }, { "acc": 0.93675604, "epoch": 2.709163346613546, "grad_norm": 11.045976638793945, "learning_rate": 9.999519780520896e-06, "loss": 0.56985517, "memory(GiB)": 13.7, "step": 5780, "train_speed(iter/s)": 1.553873 }, { "acc": 0.9538332, "epoch": 2.7115069135223813, "grad_norm": 34.79753112792969, "learning_rate": 9.999508977447844e-06, "loss": 0.30459471, "memory(GiB)": 13.7, "step": 5785, "train_speed(iter/s)": 1.553892 }, { "acc": 0.92895699, "epoch": 2.7138504804312165, "grad_norm": 11.868322372436523, "learning_rate": 9.999498054215036e-06, "loss": 0.39118016, "memory(GiB)": 13.7, "step": 5790, "train_speed(iter/s)": 1.55384 }, { "acc": 0.94517994, "epoch": 2.7161940473400517, "grad_norm": 20.652069091796875, "learning_rate": 9.999487010822739e-06, "loss": 0.26689377, "memory(GiB)": 13.7, "step": 5795, "train_speed(iter/s)": 1.553876 }, { "acc": 0.9353754, "epoch": 2.718537614248887, "grad_norm": 5.185434341430664, "learning_rate": 9.999475847271216e-06, "loss": 0.32805653, "memory(GiB)": 13.7, "step": 5800, "train_speed(iter/s)": 1.55393 }, { "acc": 0.95234785, "epoch": 2.720881181157722, "grad_norm": 9.95887565612793, "learning_rate": 9.999464563560738e-06, "loss": 0.31802392, "memory(GiB)": 13.7, "step": 5805, "train_speed(iter/s)": 1.553945 }, { "acc": 0.94810658, "epoch": 2.7232247480665572, "grad_norm": 11.24233341217041, "learning_rate": 9.999453159691575e-06, "loss": 0.3154578, "memory(GiB)": 13.7, "step": 5810, "train_speed(iter/s)": 1.553939 }, { "acc": 0.97842255, "epoch": 2.7255683149753924, "grad_norm": 5.740185737609863, "learning_rate": 9.999441635664e-06, "loss": 0.08969553, "memory(GiB)": 13.7, "step": 5815, "train_speed(iter/s)": 1.553963 }, { "acc": 0.93187208, "epoch": 2.7279118818842276, "grad_norm": 15.95216178894043, "learning_rate": 9.999429991478291e-06, "loss": 0.36420219, "memory(GiB)": 13.7, "step": 5820, "train_speed(iter/s)": 1.553953 }, { "acc": 0.94990978, "epoch": 2.730255448793063, "grad_norm": 9.883259773254395, "learning_rate": 9.999418227134728e-06, "loss": 0.30105243, "memory(GiB)": 13.7, "step": 5825, "train_speed(iter/s)": 1.554091 }, { "acc": 0.95108137, "epoch": 2.7325990157018984, "grad_norm": 4.940636157989502, "learning_rate": 9.999406342633595e-06, "loss": 0.35879335, "memory(GiB)": 13.7, "step": 5830, "train_speed(iter/s)": 1.554066 }, { "acc": 0.94769344, "epoch": 2.7349425826107336, "grad_norm": 10.499080657958984, "learning_rate": 9.999394337975173e-06, "loss": 0.35926359, "memory(GiB)": 13.7, "step": 5835, "train_speed(iter/s)": 1.554036 }, { "acc": 0.94822922, "epoch": 2.737286149519569, "grad_norm": 11.721025466918945, "learning_rate": 9.999382213159756e-06, "loss": 0.3196836, "memory(GiB)": 13.7, "step": 5840, "train_speed(iter/s)": 1.55399 }, { "acc": 0.9472661, "epoch": 2.739629716428404, "grad_norm": 12.989082336425781, "learning_rate": 9.999369968187632e-06, "loss": 0.24981213, "memory(GiB)": 13.7, "step": 5845, "train_speed(iter/s)": 1.554083 }, { "acc": 0.93372221, "epoch": 2.741973283337239, "grad_norm": 18.203947067260742, "learning_rate": 9.999357603059098e-06, "loss": 0.44607139, "memory(GiB)": 13.7, "step": 5850, "train_speed(iter/s)": 1.554072 }, { "acc": 0.95735836, "epoch": 2.7443168502460744, "grad_norm": 4.464509010314941, "learning_rate": 9.999345117774448e-06, "loss": 0.31792336, "memory(GiB)": 13.7, "step": 5855, "train_speed(iter/s)": 1.554038 }, { "acc": 0.96096725, "epoch": 2.74666041715491, "grad_norm": 15.202982902526855, "learning_rate": 9.999332512333985e-06, "loss": 0.22531075, "memory(GiB)": 13.7, "step": 5860, "train_speed(iter/s)": 1.554066 }, { "acc": 0.93346729, "epoch": 2.7490039840637452, "grad_norm": 8.394481658935547, "learning_rate": 9.99931978673801e-06, "loss": 0.34608216, "memory(GiB)": 13.7, "step": 5865, "train_speed(iter/s)": 1.554044 }, { "acc": 0.91598492, "epoch": 2.7513475509725804, "grad_norm": 18.991802215576172, "learning_rate": 9.99930694098683e-06, "loss": 0.50994191, "memory(GiB)": 13.7, "step": 5870, "train_speed(iter/s)": 1.554072 }, { "acc": 0.93899117, "epoch": 2.7536911178814156, "grad_norm": 32.59332275390625, "learning_rate": 9.999293975080754e-06, "loss": 0.38006771, "memory(GiB)": 13.7, "step": 5875, "train_speed(iter/s)": 1.554052 }, { "acc": 0.93033295, "epoch": 2.756034684790251, "grad_norm": 4.894580364227295, "learning_rate": 9.999280889020094e-06, "loss": 0.43766022, "memory(GiB)": 13.7, "step": 5880, "train_speed(iter/s)": 1.554084 }, { "acc": 0.9338974, "epoch": 2.758378251699086, "grad_norm": 17.33428192138672, "learning_rate": 9.999267682805162e-06, "loss": 0.39068198, "memory(GiB)": 13.7, "step": 5885, "train_speed(iter/s)": 1.554112 }, { "acc": 0.96075077, "epoch": 2.760721818607921, "grad_norm": 7.79929256439209, "learning_rate": 9.999254356436277e-06, "loss": 0.26768785, "memory(GiB)": 13.7, "step": 5890, "train_speed(iter/s)": 1.554137 }, { "acc": 0.95854568, "epoch": 2.7630653855167564, "grad_norm": 6.5753278732299805, "learning_rate": 9.999240909913761e-06, "loss": 0.2274138, "memory(GiB)": 13.7, "step": 5895, "train_speed(iter/s)": 1.554113 }, { "acc": 0.94767599, "epoch": 2.7654089524255916, "grad_norm": 19.433181762695312, "learning_rate": 9.999227343237935e-06, "loss": 0.39491851, "memory(GiB)": 13.7, "step": 5900, "train_speed(iter/s)": 1.554173 }, { "acc": 0.95371113, "epoch": 2.767752519334427, "grad_norm": 72.9825439453125, "learning_rate": 9.999213656409126e-06, "loss": 0.21742992, "memory(GiB)": 13.7, "step": 5905, "train_speed(iter/s)": 1.554153 }, { "acc": 0.92360115, "epoch": 2.770096086243262, "grad_norm": 8.643649101257324, "learning_rate": 9.999199849427662e-06, "loss": 0.40000181, "memory(GiB)": 13.7, "step": 5910, "train_speed(iter/s)": 1.554147 }, { "acc": 0.95786705, "epoch": 2.7724396531520976, "grad_norm": 7.140395641326904, "learning_rate": 9.999185922293876e-06, "loss": 0.2191288, "memory(GiB)": 13.7, "step": 5915, "train_speed(iter/s)": 1.554169 }, { "acc": 0.93487282, "epoch": 2.774783220060933, "grad_norm": 38.7420539855957, "learning_rate": 9.999171875008102e-06, "loss": 0.36721497, "memory(GiB)": 13.7, "step": 5920, "train_speed(iter/s)": 1.554247 }, { "acc": 0.9302496, "epoch": 2.777126786969768, "grad_norm": 6.557816028594971, "learning_rate": 9.99915770757068e-06, "loss": 0.44525356, "memory(GiB)": 13.7, "step": 5925, "train_speed(iter/s)": 1.55427 }, { "acc": 0.9709321, "epoch": 2.779470353878603, "grad_norm": 5.7991943359375, "learning_rate": 9.999143419981946e-06, "loss": 0.20650282, "memory(GiB)": 13.7, "step": 5930, "train_speed(iter/s)": 1.554315 }, { "acc": 0.92696571, "epoch": 2.7818139207874384, "grad_norm": 12.271601676940918, "learning_rate": 9.999129012242247e-06, "loss": 0.50095215, "memory(GiB)": 13.7, "step": 5935, "train_speed(iter/s)": 1.554265 }, { "acc": 0.97590771, "epoch": 2.7841574876962736, "grad_norm": 7.124655723571777, "learning_rate": 9.999114484351929e-06, "loss": 0.19442463, "memory(GiB)": 13.7, "step": 5940, "train_speed(iter/s)": 1.554247 }, { "acc": 0.93261538, "epoch": 2.786501054605109, "grad_norm": 10.309128761291504, "learning_rate": 9.99909983631134e-06, "loss": 0.47776194, "memory(GiB)": 13.7, "step": 5945, "train_speed(iter/s)": 1.554317 }, { "acc": 0.96193457, "epoch": 2.7888446215139444, "grad_norm": 17.546396255493164, "learning_rate": 9.999085068120832e-06, "loss": 0.19172698, "memory(GiB)": 13.7, "step": 5950, "train_speed(iter/s)": 1.554302 }, { "acc": 0.94023819, "epoch": 2.7911881884227796, "grad_norm": 19.531604766845703, "learning_rate": 9.999070179780761e-06, "loss": 0.25471957, "memory(GiB)": 13.7, "step": 5955, "train_speed(iter/s)": 1.554312 }, { "acc": 0.96512146, "epoch": 2.793531755331615, "grad_norm": 13.745462417602539, "learning_rate": 9.999055171291486e-06, "loss": 0.29900122, "memory(GiB)": 13.7, "step": 5960, "train_speed(iter/s)": 1.554374 }, { "acc": 0.95395832, "epoch": 2.79587532224045, "grad_norm": 10.345134735107422, "learning_rate": 9.999040042653364e-06, "loss": 0.30425732, "memory(GiB)": 13.7, "step": 5965, "train_speed(iter/s)": 1.554414 }, { "acc": 0.95432129, "epoch": 2.798218889149285, "grad_norm": 15.279536247253418, "learning_rate": 9.99902479386676e-06, "loss": 0.29536772, "memory(GiB)": 13.7, "step": 5970, "train_speed(iter/s)": 1.554484 }, { "acc": 0.95058393, "epoch": 2.8005624560581204, "grad_norm": 69.63038635253906, "learning_rate": 9.999009424932043e-06, "loss": 0.16830051, "memory(GiB)": 13.7, "step": 5975, "train_speed(iter/s)": 1.554498 }, { "acc": 0.94094162, "epoch": 2.8029060229669556, "grad_norm": 10.223361015319824, "learning_rate": 9.99899393584958e-06, "loss": 0.41220016, "memory(GiB)": 13.7, "step": 5980, "train_speed(iter/s)": 1.554495 }, { "acc": 0.93284836, "epoch": 2.8052495898757908, "grad_norm": 5.258616924285889, "learning_rate": 9.998978326619747e-06, "loss": 0.4343698, "memory(GiB)": 13.7, "step": 5985, "train_speed(iter/s)": 1.554476 }, { "acc": 0.93414497, "epoch": 2.807593156784626, "grad_norm": 18.544614791870117, "learning_rate": 9.998962597242912e-06, "loss": 0.41491218, "memory(GiB)": 13.7, "step": 5990, "train_speed(iter/s)": 1.554427 }, { "acc": 0.97238121, "epoch": 2.8099367236934616, "grad_norm": 7.241886615753174, "learning_rate": 9.99894674771946e-06, "loss": 0.10494225, "memory(GiB)": 13.7, "step": 5995, "train_speed(iter/s)": 1.554449 }, { "acc": 0.92901649, "epoch": 2.812280290602297, "grad_norm": 28.985383987426758, "learning_rate": 9.99893077804977e-06, "loss": 0.40290542, "memory(GiB)": 13.7, "step": 6000, "train_speed(iter/s)": 1.554456 }, { "acc": 0.90413513, "epoch": 2.814623857511132, "grad_norm": 19.75521469116211, "learning_rate": 9.998914688234224e-06, "loss": 0.43189878, "memory(GiB)": 13.7, "step": 6005, "train_speed(iter/s)": 1.554493 }, { "acc": 0.96872025, "epoch": 2.816967424419967, "grad_norm": 9.272529602050781, "learning_rate": 9.99889847827321e-06, "loss": 0.18914412, "memory(GiB)": 13.7, "step": 6010, "train_speed(iter/s)": 1.554595 }, { "acc": 0.95440245, "epoch": 2.8193109913288024, "grad_norm": 10.364670753479004, "learning_rate": 9.998882148167117e-06, "loss": 0.26846626, "memory(GiB)": 13.7, "step": 6015, "train_speed(iter/s)": 1.554638 }, { "acc": 0.94872704, "epoch": 2.8216545582376376, "grad_norm": 15.829972267150879, "learning_rate": 9.998865697916339e-06, "loss": 0.32119336, "memory(GiB)": 13.7, "step": 6020, "train_speed(iter/s)": 1.554609 }, { "acc": 0.93905039, "epoch": 2.823998125146473, "grad_norm": 7.538615703582764, "learning_rate": 9.998849127521272e-06, "loss": 0.37927244, "memory(GiB)": 13.7, "step": 6025, "train_speed(iter/s)": 1.554562 }, { "acc": 0.92522135, "epoch": 2.8263416920553084, "grad_norm": 10.458122253417969, "learning_rate": 9.99883243698231e-06, "loss": 0.41058803, "memory(GiB)": 13.7, "step": 6030, "train_speed(iter/s)": 1.554581 }, { "acc": 0.93767872, "epoch": 2.8286852589641436, "grad_norm": 15.49891471862793, "learning_rate": 9.998815626299859e-06, "loss": 0.35813603, "memory(GiB)": 13.7, "step": 6035, "train_speed(iter/s)": 1.554653 }, { "acc": 0.94224205, "epoch": 2.8310288258729788, "grad_norm": 18.408613204956055, "learning_rate": 9.998798695474319e-06, "loss": 0.41698799, "memory(GiB)": 13.7, "step": 6040, "train_speed(iter/s)": 1.554667 }, { "acc": 0.94885416, "epoch": 2.833372392781814, "grad_norm": 24.487239837646484, "learning_rate": 9.9987816445061e-06, "loss": 0.34983542, "memory(GiB)": 13.7, "step": 6045, "train_speed(iter/s)": 1.554634 }, { "acc": 0.93654327, "epoch": 2.835715959690649, "grad_norm": 12.738755226135254, "learning_rate": 9.998764473395611e-06, "loss": 0.44318066, "memory(GiB)": 13.7, "step": 6050, "train_speed(iter/s)": 1.554667 }, { "acc": 0.94472713, "epoch": 2.8380595265994844, "grad_norm": 30.04566192626953, "learning_rate": 9.998747182143265e-06, "loss": 0.31116703, "memory(GiB)": 13.7, "step": 6055, "train_speed(iter/s)": 1.554658 }, { "acc": 0.93729172, "epoch": 2.8404030935083195, "grad_norm": 14.004118919372559, "learning_rate": 9.998729770749477e-06, "loss": 0.35688148, "memory(GiB)": 13.7, "step": 6060, "train_speed(iter/s)": 1.554704 }, { "acc": 0.94928617, "epoch": 2.8427466604171547, "grad_norm": 7.234675407409668, "learning_rate": 9.998712239214665e-06, "loss": 0.2796854, "memory(GiB)": 13.7, "step": 6065, "train_speed(iter/s)": 1.554651 }, { "acc": 0.95573416, "epoch": 2.84509022732599, "grad_norm": 9.124752044677734, "learning_rate": 9.998694587539254e-06, "loss": 0.28910158, "memory(GiB)": 13.7, "step": 6070, "train_speed(iter/s)": 1.554691 }, { "acc": 0.97418652, "epoch": 2.8474337942348256, "grad_norm": 10.15334701538086, "learning_rate": 9.998676815723663e-06, "loss": 0.19625781, "memory(GiB)": 13.7, "step": 6075, "train_speed(iter/s)": 1.554742 }, { "acc": 0.92020969, "epoch": 2.8497773611436608, "grad_norm": 9.048722267150879, "learning_rate": 9.998658923768321e-06, "loss": 0.51032877, "memory(GiB)": 13.7, "step": 6080, "train_speed(iter/s)": 1.554736 }, { "acc": 0.9398201, "epoch": 2.852120928052496, "grad_norm": 9.72745132446289, "learning_rate": 9.99864091167366e-06, "loss": 0.3631737, "memory(GiB)": 13.7, "step": 6085, "train_speed(iter/s)": 1.554735 }, { "acc": 0.94710045, "epoch": 2.854464494961331, "grad_norm": 8.343494415283203, "learning_rate": 9.998622779440112e-06, "loss": 0.27245176, "memory(GiB)": 13.7, "step": 6090, "train_speed(iter/s)": 1.554774 }, { "acc": 0.94639874, "epoch": 2.8568080618701663, "grad_norm": 7.861966133117676, "learning_rate": 9.998604527068112e-06, "loss": 0.23675828, "memory(GiB)": 13.7, "step": 6095, "train_speed(iter/s)": 1.554778 }, { "acc": 0.95007439, "epoch": 2.8591516287790015, "grad_norm": 5.147767066955566, "learning_rate": 9.998586154558098e-06, "loss": 0.30563002, "memory(GiB)": 13.7, "step": 6100, "train_speed(iter/s)": 1.554711 }, { "acc": 0.95572224, "epoch": 2.8614951956878367, "grad_norm": 13.543646812438965, "learning_rate": 9.998567661910514e-06, "loss": 0.32790122, "memory(GiB)": 13.7, "step": 6105, "train_speed(iter/s)": 1.554717 }, { "acc": 0.95312719, "epoch": 2.8638387625966724, "grad_norm": 72.91788482666016, "learning_rate": 9.998549049125803e-06, "loss": 0.3805213, "memory(GiB)": 13.7, "step": 6110, "train_speed(iter/s)": 1.554758 }, { "acc": 0.95403976, "epoch": 2.8661823295055076, "grad_norm": 42.38298034667969, "learning_rate": 9.998530316204414e-06, "loss": 0.31160421, "memory(GiB)": 13.7, "step": 6115, "train_speed(iter/s)": 1.55479 }, { "acc": 0.95777779, "epoch": 2.8685258964143427, "grad_norm": 20.342273712158203, "learning_rate": 9.998511463146795e-06, "loss": 0.30523338, "memory(GiB)": 13.7, "step": 6120, "train_speed(iter/s)": 1.554768 }, { "acc": 0.95017862, "epoch": 2.870869463323178, "grad_norm": 11.203091621398926, "learning_rate": 9.9984924899534e-06, "loss": 0.37789941, "memory(GiB)": 13.7, "step": 6125, "train_speed(iter/s)": 1.554883 }, { "acc": 0.93161869, "epoch": 2.873213030232013, "grad_norm": 17.3555908203125, "learning_rate": 9.998473396624686e-06, "loss": 0.36321254, "memory(GiB)": 13.7, "step": 6130, "train_speed(iter/s)": 1.55487 }, { "acc": 0.93034801, "epoch": 2.8755565971408483, "grad_norm": 41.02442169189453, "learning_rate": 9.99845418316111e-06, "loss": 0.44103394, "memory(GiB)": 13.7, "step": 6135, "train_speed(iter/s)": 1.554877 }, { "acc": 0.94996719, "epoch": 2.8779001640496835, "grad_norm": 9.635926246643066, "learning_rate": 9.998434849563137e-06, "loss": 0.25273557, "memory(GiB)": 13.7, "step": 6140, "train_speed(iter/s)": 1.554956 }, { "acc": 0.94377766, "epoch": 2.8802437309585187, "grad_norm": 8.349613189697266, "learning_rate": 9.998415395831227e-06, "loss": 0.38091111, "memory(GiB)": 13.7, "step": 6145, "train_speed(iter/s)": 1.554974 }, { "acc": 0.94338684, "epoch": 2.882587297867354, "grad_norm": 19.1554012298584, "learning_rate": 9.998395821965853e-06, "loss": 0.29505758, "memory(GiB)": 13.7, "step": 6150, "train_speed(iter/s)": 1.554974 }, { "acc": 0.94537201, "epoch": 2.884930864776189, "grad_norm": 27.797040939331055, "learning_rate": 9.998376127967481e-06, "loss": 0.33919234, "memory(GiB)": 13.7, "step": 6155, "train_speed(iter/s)": 1.554993 }, { "acc": 0.93637991, "epoch": 2.8872744316850247, "grad_norm": 16.709001541137695, "learning_rate": 9.998356313836587e-06, "loss": 0.38495779, "memory(GiB)": 13.7, "step": 6160, "train_speed(iter/s)": 1.554986 }, { "acc": 0.91344967, "epoch": 2.88961799859386, "grad_norm": 10.695219039916992, "learning_rate": 9.998336379573645e-06, "loss": 0.48191633, "memory(GiB)": 13.7, "step": 6165, "train_speed(iter/s)": 1.554978 }, { "acc": 0.93514347, "epoch": 2.891961565502695, "grad_norm": 46.764915466308594, "learning_rate": 9.998316325179138e-06, "loss": 0.30109267, "memory(GiB)": 13.7, "step": 6170, "train_speed(iter/s)": 1.554929 }, { "acc": 0.97688494, "epoch": 2.8943051324115303, "grad_norm": 12.101093292236328, "learning_rate": 9.998296150653542e-06, "loss": 0.16342221, "memory(GiB)": 13.7, "step": 6175, "train_speed(iter/s)": 1.554953 }, { "acc": 0.95213165, "epoch": 2.8966486993203655, "grad_norm": 6.3268866539001465, "learning_rate": 9.99827585599735e-06, "loss": 0.33047915, "memory(GiB)": 13.7, "step": 6180, "train_speed(iter/s)": 1.554937 }, { "acc": 0.92481022, "epoch": 2.8989922662292007, "grad_norm": 20.021780014038086, "learning_rate": 9.998255441211042e-06, "loss": 0.58774633, "memory(GiB)": 13.7, "step": 6185, "train_speed(iter/s)": 1.554935 }, { "acc": 0.94509726, "epoch": 2.9013358331380363, "grad_norm": 9.092549324035645, "learning_rate": 9.998234906295113e-06, "loss": 0.37483001, "memory(GiB)": 13.7, "step": 6190, "train_speed(iter/s)": 1.554874 }, { "acc": 0.93827362, "epoch": 2.9036794000468715, "grad_norm": 46.91816329956055, "learning_rate": 9.998214251250056e-06, "loss": 0.34374492, "memory(GiB)": 13.7, "step": 6195, "train_speed(iter/s)": 1.554916 }, { "acc": 0.93720541, "epoch": 2.9060229669557067, "grad_norm": 24.784439086914062, "learning_rate": 9.998193476076368e-06, "loss": 0.2936065, "memory(GiB)": 13.7, "step": 6200, "train_speed(iter/s)": 1.554969 }, { "acc": 0.9450346, "epoch": 2.908366533864542, "grad_norm": 11.266094207763672, "learning_rate": 9.998172580774546e-06, "loss": 0.31825867, "memory(GiB)": 13.7, "step": 6205, "train_speed(iter/s)": 1.554946 }, { "acc": 0.93989811, "epoch": 2.910710100773377, "grad_norm": 37.02286148071289, "learning_rate": 9.998151565345095e-06, "loss": 0.38093238, "memory(GiB)": 13.7, "step": 6210, "train_speed(iter/s)": 1.554936 }, { "acc": 0.92493057, "epoch": 2.9130536676822123, "grad_norm": 11.963959693908691, "learning_rate": 9.998130429788518e-06, "loss": 0.51464825, "memory(GiB)": 13.7, "step": 6215, "train_speed(iter/s)": 1.555013 }, { "acc": 0.93761539, "epoch": 2.9153972345910475, "grad_norm": 36.26850891113281, "learning_rate": 9.998109174105325e-06, "loss": 0.42093091, "memory(GiB)": 13.7, "step": 6220, "train_speed(iter/s)": 1.555035 }, { "acc": 0.94899387, "epoch": 2.9177408014998827, "grad_norm": 18.071290969848633, "learning_rate": 9.998087798296024e-06, "loss": 0.26417418, "memory(GiB)": 13.7, "step": 6225, "train_speed(iter/s)": 1.555078 }, { "acc": 0.93639545, "epoch": 2.920084368408718, "grad_norm": 6.169285297393799, "learning_rate": 9.998066302361132e-06, "loss": 0.39531481, "memory(GiB)": 13.7, "step": 6230, "train_speed(iter/s)": 1.555058 }, { "acc": 0.94263391, "epoch": 2.922427935317553, "grad_norm": 15.597792625427246, "learning_rate": 9.998044686301166e-06, "loss": 0.32849, "memory(GiB)": 13.7, "step": 6235, "train_speed(iter/s)": 1.555062 }, { "acc": 0.95854168, "epoch": 2.9247715022263887, "grad_norm": 6.559393882751465, "learning_rate": 9.998022950116642e-06, "loss": 0.26665511, "memory(GiB)": 13.7, "step": 6240, "train_speed(iter/s)": 1.555062 }, { "acc": 0.95732956, "epoch": 2.927115069135224, "grad_norm": 30.32440948486328, "learning_rate": 9.998001093808083e-06, "loss": 0.27164984, "memory(GiB)": 13.7, "step": 6245, "train_speed(iter/s)": 1.555062 }, { "acc": 0.91900005, "epoch": 2.929458636044059, "grad_norm": 60.89857482910156, "learning_rate": 9.99797911737602e-06, "loss": 0.54467983, "memory(GiB)": 13.7, "step": 6250, "train_speed(iter/s)": 1.555041 }, { "acc": 0.96136971, "epoch": 2.9318022029528943, "grad_norm": 4.655951023101807, "learning_rate": 9.997957020820973e-06, "loss": 0.20423508, "memory(GiB)": 13.7, "step": 6255, "train_speed(iter/s)": 1.555003 }, { "acc": 0.9312973, "epoch": 2.9341457698617295, "grad_norm": 26.61594581604004, "learning_rate": 9.997934804143477e-06, "loss": 0.3734767, "memory(GiB)": 13.7, "step": 6260, "train_speed(iter/s)": 1.555024 }, { "acc": 0.95659084, "epoch": 2.9364893367705647, "grad_norm": 66.29957580566406, "learning_rate": 9.99791246734407e-06, "loss": 0.28980894, "memory(GiB)": 13.7, "step": 6265, "train_speed(iter/s)": 1.554979 }, { "acc": 0.96534462, "epoch": 2.9388329036794003, "grad_norm": 4.945909023284912, "learning_rate": 9.997890010423281e-06, "loss": 0.16743727, "memory(GiB)": 13.7, "step": 6270, "train_speed(iter/s)": 1.555029 }, { "acc": 0.94326391, "epoch": 2.9411764705882355, "grad_norm": 7.271257400512695, "learning_rate": 9.997867433381656e-06, "loss": 0.31865971, "memory(GiB)": 13.7, "step": 6275, "train_speed(iter/s)": 1.554999 }, { "acc": 0.95526791, "epoch": 2.9435200374970707, "grad_norm": 8.099864959716797, "learning_rate": 9.997844736219735e-06, "loss": 0.23576853, "memory(GiB)": 13.7, "step": 6280, "train_speed(iter/s)": 1.55501 }, { "acc": 0.9363183, "epoch": 2.945863604405906, "grad_norm": 30.359939575195312, "learning_rate": 9.997821918938064e-06, "loss": 0.43060036, "memory(GiB)": 13.7, "step": 6285, "train_speed(iter/s)": 1.554983 }, { "acc": 0.95915337, "epoch": 2.948207171314741, "grad_norm": 9.17224407196045, "learning_rate": 9.997798981537192e-06, "loss": 0.24107866, "memory(GiB)": 13.7, "step": 6290, "train_speed(iter/s)": 1.554987 }, { "acc": 0.94718609, "epoch": 2.9505507382235763, "grad_norm": 9.732553482055664, "learning_rate": 9.99777592401767e-06, "loss": 0.28936694, "memory(GiB)": 13.7, "step": 6295, "train_speed(iter/s)": 1.555096 }, { "acc": 0.94549103, "epoch": 2.9528943051324115, "grad_norm": 9.037803649902344, "learning_rate": 9.997752746380052e-06, "loss": 0.37544086, "memory(GiB)": 13.7, "step": 6300, "train_speed(iter/s)": 1.555083 }, { "acc": 0.94131899, "epoch": 2.9552378720412467, "grad_norm": 21.354820251464844, "learning_rate": 9.997729448624895e-06, "loss": 0.28204596, "memory(GiB)": 13.7, "step": 6305, "train_speed(iter/s)": 1.555112 }, { "acc": 0.9644455, "epoch": 2.957581438950082, "grad_norm": 9.836956977844238, "learning_rate": 9.99770603075276e-06, "loss": 0.23880029, "memory(GiB)": 13.7, "step": 6310, "train_speed(iter/s)": 1.555142 }, { "acc": 0.94985571, "epoch": 2.959925005858917, "grad_norm": 7.729393005371094, "learning_rate": 9.997682492764209e-06, "loss": 0.30674071, "memory(GiB)": 13.7, "step": 6315, "train_speed(iter/s)": 1.555181 }, { "acc": 0.94819984, "epoch": 2.9622685727677527, "grad_norm": 21.381105422973633, "learning_rate": 9.997658834659806e-06, "loss": 0.3792912, "memory(GiB)": 13.7, "step": 6320, "train_speed(iter/s)": 1.555188 }, { "acc": 0.9361805, "epoch": 2.964612139676588, "grad_norm": 5.540212154388428, "learning_rate": 9.997635056440122e-06, "loss": 0.26351676, "memory(GiB)": 13.7, "step": 6325, "train_speed(iter/s)": 1.555129 }, { "acc": 0.95855799, "epoch": 2.966955706585423, "grad_norm": 6.718682289123535, "learning_rate": 9.997611158105729e-06, "loss": 0.31427493, "memory(GiB)": 13.7, "step": 6330, "train_speed(iter/s)": 1.555092 }, { "acc": 0.93983946, "epoch": 2.9692992734942583, "grad_norm": 11.73001766204834, "learning_rate": 9.997587139657202e-06, "loss": 0.27742095, "memory(GiB)": 13.7, "step": 6335, "train_speed(iter/s)": 1.555181 }, { "acc": 0.9268898, "epoch": 2.9716428404030935, "grad_norm": 15.747381210327148, "learning_rate": 9.997563001095115e-06, "loss": 0.48440514, "memory(GiB)": 13.7, "step": 6340, "train_speed(iter/s)": 1.555149 }, { "acc": 0.94751987, "epoch": 2.9739864073119286, "grad_norm": 13.675532341003418, "learning_rate": 9.99753874242005e-06, "loss": 0.39384804, "memory(GiB)": 13.7, "step": 6345, "train_speed(iter/s)": 1.555179 }, { "acc": 0.9239583, "epoch": 2.976329974220764, "grad_norm": 13.535879135131836, "learning_rate": 9.99751436363259e-06, "loss": 0.4162787, "memory(GiB)": 13.7, "step": 6350, "train_speed(iter/s)": 1.555243 }, { "acc": 0.95273848, "epoch": 2.9786735411295995, "grad_norm": 7.91370153427124, "learning_rate": 9.997489864733322e-06, "loss": 0.29283299, "memory(GiB)": 13.7, "step": 6355, "train_speed(iter/s)": 1.555263 }, { "acc": 0.9379715, "epoch": 2.9810171080384347, "grad_norm": 2.9167656898498535, "learning_rate": 9.997465245722835e-06, "loss": 0.38639545, "memory(GiB)": 13.7, "step": 6360, "train_speed(iter/s)": 1.555169 }, { "acc": 0.9541748, "epoch": 2.98336067494727, "grad_norm": 4.401777744293213, "learning_rate": 9.997440506601716e-06, "loss": 0.24523482, "memory(GiB)": 13.7, "step": 6365, "train_speed(iter/s)": 1.555104 }, { "acc": 0.95385447, "epoch": 2.985704241856105, "grad_norm": 14.287004470825195, "learning_rate": 9.997415647370567e-06, "loss": 0.36566868, "memory(GiB)": 13.7, "step": 6370, "train_speed(iter/s)": 1.555082 }, { "acc": 0.95784988, "epoch": 2.9880478087649402, "grad_norm": 7.367951393127441, "learning_rate": 9.997390668029982e-06, "loss": 0.27864676, "memory(GiB)": 13.7, "step": 6375, "train_speed(iter/s)": 1.555148 }, { "acc": 0.9521368, "epoch": 2.9903913756737754, "grad_norm": 26.058725357055664, "learning_rate": 9.997365568580559e-06, "loss": 0.2954226, "memory(GiB)": 13.7, "step": 6380, "train_speed(iter/s)": 1.555104 }, { "acc": 0.93827381, "epoch": 2.9927349425826106, "grad_norm": 87.31898498535156, "learning_rate": 9.997340349022905e-06, "loss": 0.36841838, "memory(GiB)": 13.7, "step": 6385, "train_speed(iter/s)": 1.555139 }, { "acc": 0.94444313, "epoch": 2.995078509491446, "grad_norm": 9.692107200622559, "learning_rate": 9.997315009357626e-06, "loss": 0.3185658, "memory(GiB)": 13.7, "step": 6390, "train_speed(iter/s)": 1.555158 }, { "acc": 0.94572544, "epoch": 2.997422076400281, "grad_norm": 28.683012008666992, "learning_rate": 9.997289549585329e-06, "loss": 0.27898107, "memory(GiB)": 13.7, "step": 6395, "train_speed(iter/s)": 1.555193 }, { "acc": 0.94071226, "epoch": 2.999765643309116, "grad_norm": 18.909427642822266, "learning_rate": 9.997263969706627e-06, "loss": 0.31149268, "memory(GiB)": 13.7, "step": 6400, "train_speed(iter/s)": 1.555157 }, { "acc": 0.88671007, "epoch": 3.002109210217952, "grad_norm": 48.90134811401367, "learning_rate": 9.997238269722135e-06, "loss": 0.64463296, "memory(GiB)": 13.7, "step": 6405, "train_speed(iter/s)": 1.554997 }, { "acc": 0.93037205, "epoch": 3.004452777126787, "grad_norm": 6.636682987213135, "learning_rate": 9.997212449632469e-06, "loss": 0.41659789, "memory(GiB)": 13.7, "step": 6410, "train_speed(iter/s)": 1.555003 }, { "acc": 0.95064144, "epoch": 3.0067963440356222, "grad_norm": 5.812780380249023, "learning_rate": 9.997186509438254e-06, "loss": 0.2547761, "memory(GiB)": 13.7, "step": 6415, "train_speed(iter/s)": 1.555035 }, { "acc": 0.95434484, "epoch": 3.0091399109444574, "grad_norm": 5.733876705169678, "learning_rate": 9.99716044914011e-06, "loss": 0.25655482, "memory(GiB)": 13.7, "step": 6420, "train_speed(iter/s)": 1.555016 }, { "acc": 0.96258345, "epoch": 3.0114834778532926, "grad_norm": 1.9219439029693604, "learning_rate": 9.997134268738662e-06, "loss": 0.20239229, "memory(GiB)": 13.7, "step": 6425, "train_speed(iter/s)": 1.554993 }, { "acc": 0.9350893, "epoch": 3.013827044762128, "grad_norm": 34.67985153198242, "learning_rate": 9.997107968234541e-06, "loss": 0.30814748, "memory(GiB)": 13.7, "step": 6430, "train_speed(iter/s)": 1.555006 }, { "acc": 0.95315247, "epoch": 3.016170611670963, "grad_norm": 13.902237892150879, "learning_rate": 9.997081547628384e-06, "loss": 0.31143689, "memory(GiB)": 13.7, "step": 6435, "train_speed(iter/s)": 1.555044 }, { "acc": 0.95084438, "epoch": 3.0185141785797986, "grad_norm": 4.899942874908447, "learning_rate": 9.997055006920818e-06, "loss": 0.31430917, "memory(GiB)": 13.7, "step": 6440, "train_speed(iter/s)": 1.554998 }, { "acc": 0.92782173, "epoch": 3.020857745488634, "grad_norm": 14.823246955871582, "learning_rate": 9.997028346112485e-06, "loss": 0.40843883, "memory(GiB)": 13.7, "step": 6445, "train_speed(iter/s)": 1.554998 }, { "acc": 0.96460228, "epoch": 3.023201312397469, "grad_norm": 15.76479434967041, "learning_rate": 9.997001565204027e-06, "loss": 0.31466255, "memory(GiB)": 13.7, "step": 6450, "train_speed(iter/s)": 1.554989 }, { "acc": 0.93744221, "epoch": 3.0255448793063042, "grad_norm": 6.721680164337158, "learning_rate": 9.996974664196084e-06, "loss": 0.48446178, "memory(GiB)": 13.7, "step": 6455, "train_speed(iter/s)": 1.555086 }, { "acc": 0.92561274, "epoch": 3.0278884462151394, "grad_norm": 61.28119659423828, "learning_rate": 9.996947643089307e-06, "loss": 0.51054182, "memory(GiB)": 13.7, "step": 6460, "train_speed(iter/s)": 1.555073 }, { "acc": 0.92797022, "epoch": 3.0302320131239746, "grad_norm": 26.4671573638916, "learning_rate": 9.996920501884342e-06, "loss": 0.39993958, "memory(GiB)": 13.7, "step": 6465, "train_speed(iter/s)": 1.55509 }, { "acc": 0.91366806, "epoch": 3.03257558003281, "grad_norm": 30.18863296508789, "learning_rate": 9.996893240581841e-06, "loss": 0.50593624, "memory(GiB)": 13.7, "step": 6470, "train_speed(iter/s)": 1.555128 }, { "acc": 0.93647404, "epoch": 3.034919146941645, "grad_norm": 8.5145263671875, "learning_rate": 9.996865859182464e-06, "loss": 0.31646359, "memory(GiB)": 13.7, "step": 6475, "train_speed(iter/s)": 1.55511 }, { "acc": 0.96249886, "epoch": 3.0372627138504806, "grad_norm": 4.480012893676758, "learning_rate": 9.996838357686865e-06, "loss": 0.22128239, "memory(GiB)": 13.7, "step": 6480, "train_speed(iter/s)": 1.555075 }, { "acc": 0.93091307, "epoch": 3.039606280759316, "grad_norm": 10.4728364944458, "learning_rate": 9.996810736095705e-06, "loss": 0.38828821, "memory(GiB)": 13.7, "step": 6485, "train_speed(iter/s)": 1.555048 }, { "acc": 0.94385824, "epoch": 3.041949847668151, "grad_norm": 8.464871406555176, "learning_rate": 9.996782994409651e-06, "loss": 0.31353357, "memory(GiB)": 13.7, "step": 6490, "train_speed(iter/s)": 1.555061 }, { "acc": 0.93885899, "epoch": 3.044293414576986, "grad_norm": 24.07598876953125, "learning_rate": 9.996755132629367e-06, "loss": 0.38142309, "memory(GiB)": 13.7, "step": 6495, "train_speed(iter/s)": 1.555061 }, { "acc": 0.92929497, "epoch": 3.0466369814858214, "grad_norm": 9.862593650817871, "learning_rate": 9.996727150755523e-06, "loss": 0.33870416, "memory(GiB)": 13.7, "step": 6500, "train_speed(iter/s)": 1.555069 }, { "acc": 0.94973221, "epoch": 3.0489805483946566, "grad_norm": 13.349555969238281, "learning_rate": 9.996699048788792e-06, "loss": 0.29860775, "memory(GiB)": 13.7, "step": 6505, "train_speed(iter/s)": 1.555079 }, { "acc": 0.97297621, "epoch": 3.051324115303492, "grad_norm": 4.400337219238281, "learning_rate": 9.99667082672985e-06, "loss": 0.22223654, "memory(GiB)": 13.7, "step": 6510, "train_speed(iter/s)": 1.555115 }, { "acc": 0.9514782, "epoch": 3.053667682212327, "grad_norm": 12.045110702514648, "learning_rate": 9.996642484579377e-06, "loss": 0.25538926, "memory(GiB)": 13.7, "step": 6515, "train_speed(iter/s)": 1.555138 }, { "acc": 0.93505955, "epoch": 3.0560112491211626, "grad_norm": 6.016844272613525, "learning_rate": 9.99661402233805e-06, "loss": 0.40539474, "memory(GiB)": 13.7, "step": 6520, "train_speed(iter/s)": 1.555125 }, { "acc": 0.95606842, "epoch": 3.058354816029998, "grad_norm": 8.462370872497559, "learning_rate": 9.996585440006556e-06, "loss": 0.23200946, "memory(GiB)": 13.7, "step": 6525, "train_speed(iter/s)": 1.555109 }, { "acc": 0.90659952, "epoch": 3.060698382938833, "grad_norm": 49.546905517578125, "learning_rate": 9.99655673758558e-06, "loss": 0.65232711, "memory(GiB)": 13.7, "step": 6530, "train_speed(iter/s)": 1.555158 }, { "acc": 0.9710146, "epoch": 3.063041949847668, "grad_norm": 13.26839542388916, "learning_rate": 9.996527915075816e-06, "loss": 0.25947809, "memory(GiB)": 13.7, "step": 6535, "train_speed(iter/s)": 1.55513 }, { "acc": 0.95027924, "epoch": 3.0653855167565034, "grad_norm": 17.969026565551758, "learning_rate": 9.996498972477952e-06, "loss": 0.31926932, "memory(GiB)": 13.7, "step": 6540, "train_speed(iter/s)": 1.555082 }, { "acc": 0.94705105, "epoch": 3.0677290836653386, "grad_norm": 26.17026138305664, "learning_rate": 9.996469909792688e-06, "loss": 0.33528206, "memory(GiB)": 13.7, "step": 6545, "train_speed(iter/s)": 1.555112 }, { "acc": 0.97661457, "epoch": 3.070072650574174, "grad_norm": 7.166118621826172, "learning_rate": 9.996440727020719e-06, "loss": 0.14577806, "memory(GiB)": 13.7, "step": 6550, "train_speed(iter/s)": 1.555098 }, { "acc": 0.95180187, "epoch": 3.072416217483009, "grad_norm": 19.059377670288086, "learning_rate": 9.996411424162746e-06, "loss": 0.24660628, "memory(GiB)": 13.7, "step": 6555, "train_speed(iter/s)": 1.555121 }, { "acc": 0.93738098, "epoch": 3.074759784391844, "grad_norm": 10.349313735961914, "learning_rate": 9.996382001219477e-06, "loss": 0.42992773, "memory(GiB)": 13.7, "step": 6560, "train_speed(iter/s)": 1.555174 }, { "acc": 0.96613102, "epoch": 3.07710335130068, "grad_norm": 7.228046894073486, "learning_rate": 9.996352458191619e-06, "loss": 0.1652064, "memory(GiB)": 13.7, "step": 6565, "train_speed(iter/s)": 1.555174 }, { "acc": 0.96294641, "epoch": 3.079446918209515, "grad_norm": 4.602513790130615, "learning_rate": 9.99632279507988e-06, "loss": 0.19724287, "memory(GiB)": 13.7, "step": 6570, "train_speed(iter/s)": 1.555135 }, { "acc": 0.92762852, "epoch": 3.08179048511835, "grad_norm": 7.308422565460205, "learning_rate": 9.996293011884972e-06, "loss": 0.40766678, "memory(GiB)": 13.7, "step": 6575, "train_speed(iter/s)": 1.555195 }, { "acc": 0.94838476, "epoch": 3.0841340520271854, "grad_norm": 17.075284957885742, "learning_rate": 9.996263108607613e-06, "loss": 0.30802875, "memory(GiB)": 13.7, "step": 6580, "train_speed(iter/s)": 1.555178 }, { "acc": 0.96071911, "epoch": 3.0864776189360206, "grad_norm": 6.398344993591309, "learning_rate": 9.996233085248521e-06, "loss": 0.27180676, "memory(GiB)": 13.7, "step": 6585, "train_speed(iter/s)": 1.555193 }, { "acc": 0.95183582, "epoch": 3.0888211858448558, "grad_norm": 18.817394256591797, "learning_rate": 9.996202941808419e-06, "loss": 0.31704116, "memory(GiB)": 13.7, "step": 6590, "train_speed(iter/s)": 1.55517 }, { "acc": 0.92729692, "epoch": 3.091164752753691, "grad_norm": 10.375032424926758, "learning_rate": 9.99617267828803e-06, "loss": 0.57389965, "memory(GiB)": 13.7, "step": 6595, "train_speed(iter/s)": 1.55521 }, { "acc": 0.9466403, "epoch": 3.093508319662526, "grad_norm": 8.387781143188477, "learning_rate": 9.996142294688082e-06, "loss": 0.35655932, "memory(GiB)": 13.7, "step": 6600, "train_speed(iter/s)": 1.555216 }, { "acc": 0.96086311, "epoch": 3.095851886571362, "grad_norm": 8.96748161315918, "learning_rate": 9.996111791009305e-06, "loss": 0.18899906, "memory(GiB)": 13.7, "step": 6605, "train_speed(iter/s)": 1.555202 }, { "acc": 0.94170094, "epoch": 3.098195453480197, "grad_norm": 28.602750778198242, "learning_rate": 9.996081167252431e-06, "loss": 0.32544217, "memory(GiB)": 13.7, "step": 6610, "train_speed(iter/s)": 1.555261 }, { "acc": 0.96083755, "epoch": 3.100539020389032, "grad_norm": 14.598017692565918, "learning_rate": 9.996050423418198e-06, "loss": 0.25823622, "memory(GiB)": 13.7, "step": 6615, "train_speed(iter/s)": 1.555274 }, { "acc": 0.94313946, "epoch": 3.1028825872978674, "grad_norm": 27.379610061645508, "learning_rate": 9.996019559507346e-06, "loss": 0.34840164, "memory(GiB)": 13.7, "step": 6620, "train_speed(iter/s)": 1.55526 }, { "acc": 0.95403767, "epoch": 3.1052261542067026, "grad_norm": 9.357771873474121, "learning_rate": 9.995988575520615e-06, "loss": 0.2490057, "memory(GiB)": 13.7, "step": 6625, "train_speed(iter/s)": 1.555277 }, { "acc": 0.95404339, "epoch": 3.1075697211155378, "grad_norm": 4.803847312927246, "learning_rate": 9.995957471458748e-06, "loss": 0.30395842, "memory(GiB)": 13.7, "step": 6630, "train_speed(iter/s)": 1.555201 }, { "acc": 0.95031252, "epoch": 3.109913288024373, "grad_norm": 47.1729736328125, "learning_rate": 9.995926247322499e-06, "loss": 0.19705113, "memory(GiB)": 13.7, "step": 6635, "train_speed(iter/s)": 1.555241 }, { "acc": 0.96914654, "epoch": 3.112256854933208, "grad_norm": 8.960881233215332, "learning_rate": 9.995894903112609e-06, "loss": 0.22505989, "memory(GiB)": 13.7, "step": 6640, "train_speed(iter/s)": 1.555237 }, { "acc": 0.94289532, "epoch": 3.1146004218420438, "grad_norm": 2.313004732131958, "learning_rate": 9.995863438829839e-06, "loss": 0.35262423, "memory(GiB)": 13.7, "step": 6645, "train_speed(iter/s)": 1.555262 }, { "acc": 0.95101795, "epoch": 3.116943988750879, "grad_norm": 8.310992240905762, "learning_rate": 9.995831854474945e-06, "loss": 0.3013309, "memory(GiB)": 13.7, "step": 6650, "train_speed(iter/s)": 1.555288 }, { "acc": 0.95505486, "epoch": 3.119287555659714, "grad_norm": 23.773351669311523, "learning_rate": 9.995800150048682e-06, "loss": 0.24908068, "memory(GiB)": 13.7, "step": 6655, "train_speed(iter/s)": 1.555227 }, { "acc": 0.95801592, "epoch": 3.1216311225685494, "grad_norm": 10.015145301818848, "learning_rate": 9.995768325551815e-06, "loss": 0.3289881, "memory(GiB)": 13.7, "step": 6660, "train_speed(iter/s)": 1.555197 }, { "acc": 0.94032793, "epoch": 3.1239746894773845, "grad_norm": 17.7419376373291, "learning_rate": 9.995736380985108e-06, "loss": 0.41038804, "memory(GiB)": 13.7, "step": 6665, "train_speed(iter/s)": 1.555178 }, { "acc": 0.94147692, "epoch": 3.1263182563862197, "grad_norm": 15.05077838897705, "learning_rate": 9.995704316349329e-06, "loss": 0.48112335, "memory(GiB)": 13.7, "step": 6670, "train_speed(iter/s)": 1.555193 }, { "acc": 0.94415188, "epoch": 3.128661823295055, "grad_norm": 14.594313621520996, "learning_rate": 9.995672131645247e-06, "loss": 0.29013772, "memory(GiB)": 13.7, "step": 6675, "train_speed(iter/s)": 1.555232 }, { "acc": 0.94382038, "epoch": 3.13100539020389, "grad_norm": 32.04849624633789, "learning_rate": 9.99563982687364e-06, "loss": 0.52380795, "memory(GiB)": 13.7, "step": 6680, "train_speed(iter/s)": 1.555207 }, { "acc": 0.92765923, "epoch": 3.1333489571127258, "grad_norm": 13.937979698181152, "learning_rate": 9.995607402035281e-06, "loss": 0.38441408, "memory(GiB)": 13.7, "step": 6685, "train_speed(iter/s)": 1.555215 }, { "acc": 0.94950294, "epoch": 3.135692524021561, "grad_norm": 17.502214431762695, "learning_rate": 9.995574857130949e-06, "loss": 0.18695402, "memory(GiB)": 13.7, "step": 6690, "train_speed(iter/s)": 1.55524 }, { "acc": 0.92411366, "epoch": 3.138036090930396, "grad_norm": 14.709664344787598, "learning_rate": 9.995542192161431e-06, "loss": 0.50501871, "memory(GiB)": 13.7, "step": 6695, "train_speed(iter/s)": 1.555208 }, { "acc": 0.94495411, "epoch": 3.1403796578392313, "grad_norm": 10.275245666503906, "learning_rate": 9.995509407127505e-06, "loss": 0.264324, "memory(GiB)": 13.7, "step": 6700, "train_speed(iter/s)": 1.555287 }, { "acc": 0.94837303, "epoch": 3.1427232247480665, "grad_norm": 38.964717864990234, "learning_rate": 9.995476502029964e-06, "loss": 0.31074142, "memory(GiB)": 13.7, "step": 6705, "train_speed(iter/s)": 1.555253 }, { "acc": 0.95010147, "epoch": 3.1450667916569017, "grad_norm": 25.823883056640625, "learning_rate": 9.995443476869597e-06, "loss": 0.26876979, "memory(GiB)": 13.7, "step": 6710, "train_speed(iter/s)": 1.555233 }, { "acc": 0.96330929, "epoch": 3.147410358565737, "grad_norm": 5.775352478027344, "learning_rate": 9.995410331647199e-06, "loss": 0.22744606, "memory(GiB)": 13.7, "step": 6715, "train_speed(iter/s)": 1.555276 }, { "acc": 0.93274307, "epoch": 3.149753925474572, "grad_norm": 15.270330429077148, "learning_rate": 9.995377066363567e-06, "loss": 0.40292206, "memory(GiB)": 13.7, "step": 6720, "train_speed(iter/s)": 1.555272 }, { "acc": 0.93777599, "epoch": 3.1520974923834078, "grad_norm": 23.51432991027832, "learning_rate": 9.995343681019499e-06, "loss": 0.5123981, "memory(GiB)": 13.7, "step": 6725, "train_speed(iter/s)": 1.555256 }, { "acc": 0.96077385, "epoch": 3.154441059292243, "grad_norm": 9.61791706085205, "learning_rate": 9.995310175615798e-06, "loss": 0.21172185, "memory(GiB)": 13.7, "step": 6730, "train_speed(iter/s)": 1.555305 }, { "acc": 0.9604167, "epoch": 3.156784626201078, "grad_norm": 5.79518461227417, "learning_rate": 9.995276550153271e-06, "loss": 0.24952037, "memory(GiB)": 13.7, "step": 6735, "train_speed(iter/s)": 1.55529 }, { "acc": 0.95947781, "epoch": 3.1591281931099133, "grad_norm": 7.312999725341797, "learning_rate": 9.995242804632723e-06, "loss": 0.26055734, "memory(GiB)": 13.7, "step": 6740, "train_speed(iter/s)": 1.555273 }, { "acc": 0.97463522, "epoch": 3.1614717600187485, "grad_norm": 2.6204347610473633, "learning_rate": 9.995208939054968e-06, "loss": 0.15685252, "memory(GiB)": 13.7, "step": 6745, "train_speed(iter/s)": 1.555307 }, { "acc": 0.92601194, "epoch": 3.1638153269275837, "grad_norm": 10.571784019470215, "learning_rate": 9.995174953420818e-06, "loss": 0.42306046, "memory(GiB)": 13.7, "step": 6750, "train_speed(iter/s)": 1.555313 }, { "acc": 0.94938383, "epoch": 3.166158893836419, "grad_norm": 3.3197152614593506, "learning_rate": 9.995140847731093e-06, "loss": 0.35093236, "memory(GiB)": 13.7, "step": 6755, "train_speed(iter/s)": 1.555279 }, { "acc": 0.93000002, "epoch": 3.168502460745254, "grad_norm": 34.056400299072266, "learning_rate": 9.995106621986608e-06, "loss": 0.3435112, "memory(GiB)": 13.7, "step": 6760, "train_speed(iter/s)": 1.555199 }, { "acc": 0.96106501, "epoch": 3.1708460276540897, "grad_norm": 7.232293128967285, "learning_rate": 9.995072276188191e-06, "loss": 0.18973536, "memory(GiB)": 13.7, "step": 6765, "train_speed(iter/s)": 1.555144 }, { "acc": 0.9584547, "epoch": 3.173189594562925, "grad_norm": 12.466383934020996, "learning_rate": 9.995037810336662e-06, "loss": 0.40939927, "memory(GiB)": 13.7, "step": 6770, "train_speed(iter/s)": 1.555181 }, { "acc": 0.95396652, "epoch": 3.17553316147176, "grad_norm": 14.682823181152344, "learning_rate": 9.995003224432855e-06, "loss": 0.3561147, "memory(GiB)": 13.7, "step": 6775, "train_speed(iter/s)": 1.55523 }, { "acc": 0.93351269, "epoch": 3.1778767283805953, "grad_norm": 14.199426651000977, "learning_rate": 9.994968518477597e-06, "loss": 0.36175106, "memory(GiB)": 13.7, "step": 6780, "train_speed(iter/s)": 1.555274 }, { "acc": 0.9624898, "epoch": 3.1802202952894305, "grad_norm": 6.202935695648193, "learning_rate": 9.994933692471727e-06, "loss": 0.21148133, "memory(GiB)": 13.7, "step": 6785, "train_speed(iter/s)": 1.555342 }, { "acc": 0.95891552, "epoch": 3.1825638621982657, "grad_norm": 16.103086471557617, "learning_rate": 9.994898746416075e-06, "loss": 0.27060535, "memory(GiB)": 13.7, "step": 6790, "train_speed(iter/s)": 1.555359 }, { "acc": 0.95303526, "epoch": 3.184907429107101, "grad_norm": 5.3719353675842285, "learning_rate": 9.994863680311488e-06, "loss": 0.29178646, "memory(GiB)": 13.7, "step": 6795, "train_speed(iter/s)": 1.555352 }, { "acc": 0.94987555, "epoch": 3.187250996015936, "grad_norm": 10.476934432983398, "learning_rate": 9.994828494158804e-06, "loss": 0.23990936, "memory(GiB)": 13.7, "step": 6800, "train_speed(iter/s)": 1.555325 }, { "acc": 0.96060877, "epoch": 3.1895945629247713, "grad_norm": 19.603504180908203, "learning_rate": 9.994793187958871e-06, "loss": 0.25266819, "memory(GiB)": 13.7, "step": 6805, "train_speed(iter/s)": 1.555382 }, { "acc": 0.93442001, "epoch": 3.191938129833607, "grad_norm": 11.302990913391113, "learning_rate": 9.994757761712539e-06, "loss": 0.27950807, "memory(GiB)": 13.7, "step": 6810, "train_speed(iter/s)": 1.555395 }, { "acc": 0.97267857, "epoch": 3.194281696742442, "grad_norm": 4.807924270629883, "learning_rate": 9.994722215420654e-06, "loss": 0.18608036, "memory(GiB)": 13.7, "step": 6815, "train_speed(iter/s)": 1.555413 }, { "acc": 0.93857689, "epoch": 3.1966252636512773, "grad_norm": 14.445274353027344, "learning_rate": 9.994686549084078e-06, "loss": 0.36125104, "memory(GiB)": 13.7, "step": 6820, "train_speed(iter/s)": 1.555426 }, { "acc": 0.95133438, "epoch": 3.1989688305601125, "grad_norm": 9.056671142578125, "learning_rate": 9.994650762703661e-06, "loss": 0.36553307, "memory(GiB)": 13.7, "step": 6825, "train_speed(iter/s)": 1.555406 }, { "acc": 0.94691658, "epoch": 3.2013123974689477, "grad_norm": 6.8322014808654785, "learning_rate": 9.994614856280271e-06, "loss": 0.37863498, "memory(GiB)": 13.7, "step": 6830, "train_speed(iter/s)": 1.555441 }, { "acc": 0.93971272, "epoch": 3.203655964377783, "grad_norm": 10.342216491699219, "learning_rate": 9.994578829814764e-06, "loss": 0.36524959, "memory(GiB)": 13.7, "step": 6835, "train_speed(iter/s)": 1.555436 }, { "acc": 0.95723209, "epoch": 3.205999531286618, "grad_norm": 21.665584564208984, "learning_rate": 9.994542683308008e-06, "loss": 0.17361505, "memory(GiB)": 13.7, "step": 6840, "train_speed(iter/s)": 1.555427 }, { "acc": 0.95911694, "epoch": 3.2083430981954537, "grad_norm": 11.13305377960205, "learning_rate": 9.994506416760875e-06, "loss": 0.16965513, "memory(GiB)": 13.7, "step": 6845, "train_speed(iter/s)": 1.555484 }, { "acc": 0.95358696, "epoch": 3.210686665104289, "grad_norm": 5.275207996368408, "learning_rate": 9.994470030174233e-06, "loss": 0.25889897, "memory(GiB)": 13.7, "step": 6850, "train_speed(iter/s)": 1.555437 }, { "acc": 0.97149954, "epoch": 3.213030232013124, "grad_norm": 3.3612537384033203, "learning_rate": 9.994433523548958e-06, "loss": 0.19121706, "memory(GiB)": 13.7, "step": 6855, "train_speed(iter/s)": 1.555469 }, { "acc": 0.93420868, "epoch": 3.2153737989219593, "grad_norm": 10.128812789916992, "learning_rate": 9.994396896885928e-06, "loss": 0.28836722, "memory(GiB)": 13.7, "step": 6860, "train_speed(iter/s)": 1.555539 }, { "acc": 0.96029758, "epoch": 3.2177173658307945, "grad_norm": 7.610215187072754, "learning_rate": 9.994360150186024e-06, "loss": 0.2582767, "memory(GiB)": 13.7, "step": 6865, "train_speed(iter/s)": 1.555594 }, { "acc": 0.95028849, "epoch": 3.2200609327396297, "grad_norm": 16.63969612121582, "learning_rate": 9.994323283450124e-06, "loss": 0.17968729, "memory(GiB)": 13.7, "step": 6870, "train_speed(iter/s)": 1.555614 }, { "acc": 0.96061068, "epoch": 3.222404499648465, "grad_norm": 6.203117847442627, "learning_rate": 9.994286296679123e-06, "loss": 0.25718412, "memory(GiB)": 13.7, "step": 6875, "train_speed(iter/s)": 1.555634 }, { "acc": 0.96418991, "epoch": 3.2247480665573, "grad_norm": 6.7097907066345215, "learning_rate": 9.994249189873903e-06, "loss": 0.25034859, "memory(GiB)": 13.7, "step": 6880, "train_speed(iter/s)": 1.555677 }, { "acc": 0.94882393, "epoch": 3.2270916334661353, "grad_norm": 21.14051055908203, "learning_rate": 9.994211963035358e-06, "loss": 0.3492903, "memory(GiB)": 13.7, "step": 6885, "train_speed(iter/s)": 1.555718 }, { "acc": 0.94260378, "epoch": 3.229435200374971, "grad_norm": 18.315460205078125, "learning_rate": 9.994174616164382e-06, "loss": 0.33659792, "memory(GiB)": 13.7, "step": 6890, "train_speed(iter/s)": 1.555666 }, { "acc": 0.95217056, "epoch": 3.231778767283806, "grad_norm": 18.216108322143555, "learning_rate": 9.994137149261875e-06, "loss": 0.27185116, "memory(GiB)": 13.7, "step": 6895, "train_speed(iter/s)": 1.55564 }, { "acc": 0.92243462, "epoch": 3.2341223341926413, "grad_norm": 11.034706115722656, "learning_rate": 9.994099562328737e-06, "loss": 0.55962286, "memory(GiB)": 13.7, "step": 6900, "train_speed(iter/s)": 1.55569 }, { "acc": 0.94930477, "epoch": 3.2364659011014765, "grad_norm": 21.47245216369629, "learning_rate": 9.99406185536587e-06, "loss": 0.34608207, "memory(GiB)": 13.7, "step": 6905, "train_speed(iter/s)": 1.555647 }, { "acc": 0.95626898, "epoch": 3.2388094680103117, "grad_norm": 11.202547073364258, "learning_rate": 9.994024028374183e-06, "loss": 0.23797278, "memory(GiB)": 13.7, "step": 6910, "train_speed(iter/s)": 1.555664 }, { "acc": 0.93165188, "epoch": 3.241153034919147, "grad_norm": 15.378278732299805, "learning_rate": 9.99398608135458e-06, "loss": 0.45180268, "memory(GiB)": 13.7, "step": 6915, "train_speed(iter/s)": 1.555674 }, { "acc": 0.93460484, "epoch": 3.243496601827982, "grad_norm": 15.999361038208008, "learning_rate": 9.993948014307978e-06, "loss": 0.41773691, "memory(GiB)": 13.7, "step": 6920, "train_speed(iter/s)": 1.555592 }, { "acc": 0.92904634, "epoch": 3.2458401687368172, "grad_norm": 11.796830177307129, "learning_rate": 9.993909827235293e-06, "loss": 0.41338234, "memory(GiB)": 13.7, "step": 6925, "train_speed(iter/s)": 1.555602 }, { "acc": 0.94592524, "epoch": 3.248183735645653, "grad_norm": 25.746646881103516, "learning_rate": 9.993871520137436e-06, "loss": 0.31274726, "memory(GiB)": 13.7, "step": 6930, "train_speed(iter/s)": 1.555577 }, { "acc": 0.94701567, "epoch": 3.250527302554488, "grad_norm": 12.343482971191406, "learning_rate": 9.993833093015333e-06, "loss": 0.32578225, "memory(GiB)": 13.7, "step": 6935, "train_speed(iter/s)": 1.555516 }, { "acc": 0.93363266, "epoch": 3.2528708694633233, "grad_norm": 19.602914810180664, "learning_rate": 9.99379454586991e-06, "loss": 0.48918009, "memory(GiB)": 13.7, "step": 6940, "train_speed(iter/s)": 1.555528 }, { "acc": 0.93276567, "epoch": 3.2552144363721585, "grad_norm": 8.709884643554688, "learning_rate": 9.993755878702087e-06, "loss": 0.43628116, "memory(GiB)": 13.7, "step": 6945, "train_speed(iter/s)": 1.555554 }, { "acc": 0.95935936, "epoch": 3.2575580032809937, "grad_norm": 9.260605812072754, "learning_rate": 9.993717091512798e-06, "loss": 0.2806098, "memory(GiB)": 13.7, "step": 6950, "train_speed(iter/s)": 1.555602 }, { "acc": 0.94298601, "epoch": 3.259901570189829, "grad_norm": 87.45747375488281, "learning_rate": 9.993678184302975e-06, "loss": 0.40193291, "memory(GiB)": 13.7, "step": 6955, "train_speed(iter/s)": 1.55559 }, { "acc": 0.9353899, "epoch": 3.262245137098664, "grad_norm": 30.50124168395996, "learning_rate": 9.993639157073551e-06, "loss": 0.35912528, "memory(GiB)": 13.7, "step": 6960, "train_speed(iter/s)": 1.555551 }, { "acc": 0.92599621, "epoch": 3.2645887040074992, "grad_norm": 36.35224533081055, "learning_rate": 9.993600009825466e-06, "loss": 0.52028713, "memory(GiB)": 13.7, "step": 6965, "train_speed(iter/s)": 1.555619 }, { "acc": 0.94043608, "epoch": 3.2669322709163344, "grad_norm": 11.028350830078125, "learning_rate": 9.99356074255966e-06, "loss": 0.36512403, "memory(GiB)": 13.7, "step": 6970, "train_speed(iter/s)": 1.555711 }, { "acc": 0.94638805, "epoch": 3.26927583782517, "grad_norm": 38.00587844848633, "learning_rate": 9.993521355277078e-06, "loss": 0.27271338, "memory(GiB)": 13.7, "step": 6975, "train_speed(iter/s)": 1.555758 }, { "acc": 0.92558889, "epoch": 3.2716194047340053, "grad_norm": 19.944786071777344, "learning_rate": 9.993481847978664e-06, "loss": 0.5157917, "memory(GiB)": 13.7, "step": 6980, "train_speed(iter/s)": 1.555732 }, { "acc": 0.97652779, "epoch": 3.2739629716428404, "grad_norm": 7.8209004402160645, "learning_rate": 9.99344222066537e-06, "loss": 0.15275576, "memory(GiB)": 13.7, "step": 6985, "train_speed(iter/s)": 1.555723 }, { "acc": 0.96683979, "epoch": 3.2763065385516756, "grad_norm": 7.601544380187988, "learning_rate": 9.99340247333815e-06, "loss": 0.26740954, "memory(GiB)": 13.7, "step": 6990, "train_speed(iter/s)": 1.555743 }, { "acc": 0.95286465, "epoch": 3.278650105460511, "grad_norm": 18.63364028930664, "learning_rate": 9.993362605997958e-06, "loss": 0.30666003, "memory(GiB)": 13.7, "step": 6995, "train_speed(iter/s)": 1.555787 }, { "acc": 0.96276779, "epoch": 3.280993672369346, "grad_norm": 29.855314254760742, "learning_rate": 9.99332261864575e-06, "loss": 0.17650084, "memory(GiB)": 13.7, "step": 7000, "train_speed(iter/s)": 1.555747 }, { "acc": 0.94076681, "epoch": 3.283337239278181, "grad_norm": 12.300317764282227, "learning_rate": 9.99328251128249e-06, "loss": 0.30596166, "memory(GiB)": 13.7, "step": 7005, "train_speed(iter/s)": 1.555738 }, { "acc": 0.92239704, "epoch": 3.285680806187017, "grad_norm": 7.336281776428223, "learning_rate": 9.99324228390914e-06, "loss": 0.53066158, "memory(GiB)": 13.7, "step": 7010, "train_speed(iter/s)": 1.555768 }, { "acc": 0.9564106, "epoch": 3.288024373095852, "grad_norm": 9.85887622833252, "learning_rate": 9.993201936526668e-06, "loss": 0.25796723, "memory(GiB)": 13.7, "step": 7015, "train_speed(iter/s)": 1.555812 }, { "acc": 0.96989269, "epoch": 3.2903679400046872, "grad_norm": 6.194490909576416, "learning_rate": 9.993161469136045e-06, "loss": 0.19481652, "memory(GiB)": 13.7, "step": 7020, "train_speed(iter/s)": 1.555845 }, { "acc": 0.94388142, "epoch": 3.2927115069135224, "grad_norm": 9.560114860534668, "learning_rate": 9.993120881738242e-06, "loss": 0.29530575, "memory(GiB)": 13.7, "step": 7025, "train_speed(iter/s)": 1.555821 }, { "acc": 0.93282738, "epoch": 3.2950550738223576, "grad_norm": 4.298898220062256, "learning_rate": 9.993080174334234e-06, "loss": 0.3982785, "memory(GiB)": 13.7, "step": 7030, "train_speed(iter/s)": 1.555857 }, { "acc": 0.92437906, "epoch": 3.297398640731193, "grad_norm": 14.272817611694336, "learning_rate": 9.993039346924999e-06, "loss": 0.49423838, "memory(GiB)": 13.7, "step": 7035, "train_speed(iter/s)": 1.555903 }, { "acc": 0.9529459, "epoch": 3.299742207640028, "grad_norm": 8.958892822265625, "learning_rate": 9.992998399511523e-06, "loss": 0.33219419, "memory(GiB)": 13.7, "step": 7040, "train_speed(iter/s)": 1.555926 }, { "acc": 0.94529581, "epoch": 3.302085774548863, "grad_norm": 11.873414039611816, "learning_rate": 9.992957332094786e-06, "loss": 0.33303771, "memory(GiB)": 13.7, "step": 7045, "train_speed(iter/s)": 1.555967 }, { "acc": 0.95800381, "epoch": 3.3044293414576984, "grad_norm": 2.050703287124634, "learning_rate": 9.992916144675778e-06, "loss": 0.25453334, "memory(GiB)": 13.7, "step": 7050, "train_speed(iter/s)": 1.555996 }, { "acc": 0.94881954, "epoch": 3.306772908366534, "grad_norm": 6.95504093170166, "learning_rate": 9.992874837255484e-06, "loss": 0.24382377, "memory(GiB)": 13.7, "step": 7055, "train_speed(iter/s)": 1.555973 }, { "acc": 0.94546337, "epoch": 3.3091164752753692, "grad_norm": 15.93572998046875, "learning_rate": 9.992833409834902e-06, "loss": 0.30728302, "memory(GiB)": 13.7, "step": 7060, "train_speed(iter/s)": 1.555976 }, { "acc": 0.94409904, "epoch": 3.3114600421842044, "grad_norm": 6.459467887878418, "learning_rate": 9.992791862415024e-06, "loss": 0.27553203, "memory(GiB)": 13.7, "step": 7065, "train_speed(iter/s)": 1.556 }, { "acc": 0.95655022, "epoch": 3.3138036090930396, "grad_norm": 22.616220474243164, "learning_rate": 9.992750194996853e-06, "loss": 0.23385172, "memory(GiB)": 13.7, "step": 7070, "train_speed(iter/s)": 1.555979 }, { "acc": 0.95021553, "epoch": 3.316147176001875, "grad_norm": 0.4129790663719177, "learning_rate": 9.992708407581387e-06, "loss": 0.17753696, "memory(GiB)": 13.7, "step": 7075, "train_speed(iter/s)": 1.555981 }, { "acc": 0.9502634, "epoch": 3.31849074291071, "grad_norm": 17.313156127929688, "learning_rate": 9.992666500169632e-06, "loss": 0.26969459, "memory(GiB)": 13.7, "step": 7080, "train_speed(iter/s)": 1.555992 }, { "acc": 0.95282316, "epoch": 3.320834309819545, "grad_norm": 4.077566146850586, "learning_rate": 9.992624472762595e-06, "loss": 0.32493501, "memory(GiB)": 13.7, "step": 7085, "train_speed(iter/s)": 1.555956 }, { "acc": 0.95403271, "epoch": 3.323177876728381, "grad_norm": 14.476713180541992, "learning_rate": 9.992582325361287e-06, "loss": 0.23977964, "memory(GiB)": 13.7, "step": 7090, "train_speed(iter/s)": 1.555954 }, { "acc": 0.96689377, "epoch": 3.325521443637216, "grad_norm": 7.803499221801758, "learning_rate": 9.992540057966716e-06, "loss": 0.14905362, "memory(GiB)": 13.7, "step": 7095, "train_speed(iter/s)": 1.555943 }, { "acc": 0.95865564, "epoch": 3.327865010546051, "grad_norm": 3.820547103881836, "learning_rate": 9.992497670579906e-06, "loss": 0.3688602, "memory(GiB)": 13.7, "step": 7100, "train_speed(iter/s)": 1.555922 }, { "acc": 0.95515242, "epoch": 3.3302085774548864, "grad_norm": 3.9665281772613525, "learning_rate": 9.992455163201871e-06, "loss": 0.19822279, "memory(GiB)": 13.7, "step": 7105, "train_speed(iter/s)": 1.556019 }, { "acc": 0.95086803, "epoch": 3.3325521443637216, "grad_norm": 9.43240737915039, "learning_rate": 9.992412535833633e-06, "loss": 0.39509926, "memory(GiB)": 13.7, "step": 7110, "train_speed(iter/s)": 1.55598 }, { "acc": 0.96534719, "epoch": 3.334895711272557, "grad_norm": 102.46112060546875, "learning_rate": 9.992369788476218e-06, "loss": 0.28924713, "memory(GiB)": 13.7, "step": 7115, "train_speed(iter/s)": 1.555992 }, { "acc": 0.94070911, "epoch": 3.337239278181392, "grad_norm": 16.391437530517578, "learning_rate": 9.992326921130655e-06, "loss": 0.38314586, "memory(GiB)": 13.7, "step": 7120, "train_speed(iter/s)": 1.556034 }, { "acc": 0.9457901, "epoch": 3.339582845090227, "grad_norm": 18.331432342529297, "learning_rate": 9.992283933797969e-06, "loss": 0.45685878, "memory(GiB)": 13.7, "step": 7125, "train_speed(iter/s)": 1.556021 }, { "acc": 0.94456844, "epoch": 3.3419264119990624, "grad_norm": 9.413476943969727, "learning_rate": 9.992240826479198e-06, "loss": 0.32266693, "memory(GiB)": 13.7, "step": 7130, "train_speed(iter/s)": 1.556035 }, { "acc": 0.94224224, "epoch": 3.344269978907898, "grad_norm": 6.641750335693359, "learning_rate": 9.992197599175377e-06, "loss": 0.42981672, "memory(GiB)": 13.7, "step": 7135, "train_speed(iter/s)": 1.556016 }, { "acc": 0.93421125, "epoch": 3.346613545816733, "grad_norm": 17.27361297607422, "learning_rate": 9.992154251887544e-06, "loss": 0.50081291, "memory(GiB)": 13.7, "step": 7140, "train_speed(iter/s)": 1.555995 }, { "acc": 0.94427929, "epoch": 3.3489571127255684, "grad_norm": 24.905412673950195, "learning_rate": 9.99211078461674e-06, "loss": 0.30848346, "memory(GiB)": 13.7, "step": 7145, "train_speed(iter/s)": 1.555992 }, { "acc": 0.92086, "epoch": 3.3513006796344036, "grad_norm": 15.211112976074219, "learning_rate": 9.992067197364013e-06, "loss": 0.46341457, "memory(GiB)": 13.7, "step": 7150, "train_speed(iter/s)": 1.555981 }, { "acc": 0.93939981, "epoch": 3.353644246543239, "grad_norm": 45.75273895263672, "learning_rate": 9.99202349013041e-06, "loss": 0.32784219, "memory(GiB)": 13.7, "step": 7155, "train_speed(iter/s)": 1.556011 }, { "acc": 0.92996292, "epoch": 3.355987813452074, "grad_norm": 4.107438087463379, "learning_rate": 9.991979662916981e-06, "loss": 0.43284674, "memory(GiB)": 13.7, "step": 7160, "train_speed(iter/s)": 1.555967 }, { "acc": 0.95043736, "epoch": 3.358331380360909, "grad_norm": 14.676185607910156, "learning_rate": 9.991935715724777e-06, "loss": 0.27695847, "memory(GiB)": 13.7, "step": 7165, "train_speed(iter/s)": 1.55601 }, { "acc": 0.96212521, "epoch": 3.3606749472697444, "grad_norm": 13.832722663879395, "learning_rate": 9.99189164855486e-06, "loss": 0.29497886, "memory(GiB)": 13.7, "step": 7170, "train_speed(iter/s)": 1.555998 }, { "acc": 0.95723305, "epoch": 3.36301851417858, "grad_norm": 6.371514797210693, "learning_rate": 9.991847461408282e-06, "loss": 0.31512139, "memory(GiB)": 13.7, "step": 7175, "train_speed(iter/s)": 1.555999 }, { "acc": 0.96460972, "epoch": 3.365362081087415, "grad_norm": 46.6375617980957, "learning_rate": 9.99180315428611e-06, "loss": 0.28077581, "memory(GiB)": 13.7, "step": 7180, "train_speed(iter/s)": 1.555965 }, { "acc": 0.97802086, "epoch": 3.3677056479962504, "grad_norm": 3.901151180267334, "learning_rate": 9.991758727189408e-06, "loss": 0.22700117, "memory(GiB)": 13.7, "step": 7185, "train_speed(iter/s)": 1.555982 }, { "acc": 0.95244055, "epoch": 3.3700492149050856, "grad_norm": 12.4163179397583, "learning_rate": 9.991714180119245e-06, "loss": 0.42401581, "memory(GiB)": 13.7, "step": 7190, "train_speed(iter/s)": 1.555991 }, { "acc": 0.96911707, "epoch": 3.3723927818139208, "grad_norm": 2.985799789428711, "learning_rate": 9.991669513076689e-06, "loss": 0.18937445, "memory(GiB)": 13.7, "step": 7195, "train_speed(iter/s)": 1.555986 }, { "acc": 0.94137554, "epoch": 3.374736348722756, "grad_norm": 10.763205528259277, "learning_rate": 9.991624726062815e-06, "loss": 0.36813087, "memory(GiB)": 13.7, "step": 7200, "train_speed(iter/s)": 1.556001 }, { "acc": 0.9445982, "epoch": 3.377079915631591, "grad_norm": 6.161069869995117, "learning_rate": 9.991579819078702e-06, "loss": 0.2828424, "memory(GiB)": 13.7, "step": 7205, "train_speed(iter/s)": 1.556072 }, { "acc": 0.9489583, "epoch": 3.3794234825404263, "grad_norm": 8.467489242553711, "learning_rate": 9.991534792125424e-06, "loss": 0.29173486, "memory(GiB)": 13.7, "step": 7210, "train_speed(iter/s)": 1.55606 }, { "acc": 0.96486473, "epoch": 3.3817670494492615, "grad_norm": 10.103063583374023, "learning_rate": 9.991489645204068e-06, "loss": 0.24983356, "memory(GiB)": 13.7, "step": 7215, "train_speed(iter/s)": 1.556111 }, { "acc": 0.97342262, "epoch": 3.384110616358097, "grad_norm": 5.850557327270508, "learning_rate": 9.991444378315718e-06, "loss": 0.15424492, "memory(GiB)": 13.7, "step": 7220, "train_speed(iter/s)": 1.556093 }, { "acc": 0.96541538, "epoch": 3.3864541832669324, "grad_norm": 10.912637710571289, "learning_rate": 9.991398991461462e-06, "loss": 0.16239436, "memory(GiB)": 13.7, "step": 7225, "train_speed(iter/s)": 1.556079 }, { "acc": 0.95870228, "epoch": 3.3887977501757676, "grad_norm": 3.2453627586364746, "learning_rate": 9.99135348464239e-06, "loss": 0.3172647, "memory(GiB)": 13.7, "step": 7230, "train_speed(iter/s)": 1.556156 }, { "acc": 0.95156517, "epoch": 3.3911413170846028, "grad_norm": 25.878538131713867, "learning_rate": 9.991307857859595e-06, "loss": 0.38382406, "memory(GiB)": 13.7, "step": 7235, "train_speed(iter/s)": 1.55615 }, { "acc": 0.97240086, "epoch": 3.393484883993438, "grad_norm": 3.3696272373199463, "learning_rate": 9.991262111114176e-06, "loss": 0.12690804, "memory(GiB)": 13.7, "step": 7240, "train_speed(iter/s)": 1.556148 }, { "acc": 0.92919445, "epoch": 3.395828450902273, "grad_norm": 12.126562118530273, "learning_rate": 9.991216244407232e-06, "loss": 0.42415409, "memory(GiB)": 13.7, "step": 7245, "train_speed(iter/s)": 1.55615 }, { "acc": 0.95544643, "epoch": 3.3981720178111083, "grad_norm": 8.111949920654297, "learning_rate": 9.991170257739866e-06, "loss": 0.23790405, "memory(GiB)": 13.7, "step": 7250, "train_speed(iter/s)": 1.556189 }, { "acc": 0.96954231, "epoch": 3.400515584719944, "grad_norm": 9.846674919128418, "learning_rate": 9.991124151113182e-06, "loss": 0.2035871, "memory(GiB)": 13.7, "step": 7255, "train_speed(iter/s)": 1.556211 }, { "acc": 0.93910332, "epoch": 3.402859151628779, "grad_norm": 8.63251781463623, "learning_rate": 9.99107792452829e-06, "loss": 0.44517708, "memory(GiB)": 13.7, "step": 7260, "train_speed(iter/s)": 1.556219 }, { "acc": 0.94335566, "epoch": 3.4052027185376144, "grad_norm": 7.296109199523926, "learning_rate": 9.991031577986299e-06, "loss": 0.34207656, "memory(GiB)": 13.7, "step": 7265, "train_speed(iter/s)": 1.556249 }, { "acc": 0.94136677, "epoch": 3.4075462854464496, "grad_norm": 20.747146606445312, "learning_rate": 9.990985111488323e-06, "loss": 0.45291061, "memory(GiB)": 13.7, "step": 7270, "train_speed(iter/s)": 1.556272 }, { "acc": 0.9383234, "epoch": 3.4098898523552847, "grad_norm": 21.402484893798828, "learning_rate": 9.990938525035482e-06, "loss": 0.36267231, "memory(GiB)": 13.7, "step": 7275, "train_speed(iter/s)": 1.556261 }, { "acc": 0.93486986, "epoch": 3.41223341926412, "grad_norm": 9.687506675720215, "learning_rate": 9.990891818628892e-06, "loss": 0.4640666, "memory(GiB)": 13.7, "step": 7280, "train_speed(iter/s)": 1.556282 }, { "acc": 0.94294682, "epoch": 3.414576986172955, "grad_norm": 16.55972671508789, "learning_rate": 9.990844992269678e-06, "loss": 0.40928473, "memory(GiB)": 13.7, "step": 7285, "train_speed(iter/s)": 1.556284 }, { "acc": 0.94267864, "epoch": 3.4169205530817903, "grad_norm": 13.894211769104004, "learning_rate": 9.990798045958965e-06, "loss": 0.37333417, "memory(GiB)": 13.7, "step": 7290, "train_speed(iter/s)": 1.55627 }, { "acc": 0.9515645, "epoch": 3.4192641199906255, "grad_norm": 11.90516471862793, "learning_rate": 9.990750979697883e-06, "loss": 0.23853569, "memory(GiB)": 13.7, "step": 7295, "train_speed(iter/s)": 1.556309 }, { "acc": 0.96275206, "epoch": 3.421607686899461, "grad_norm": 13.412803649902344, "learning_rate": 9.990703793487559e-06, "loss": 0.2337172, "memory(GiB)": 13.7, "step": 7300, "train_speed(iter/s)": 1.556344 }, { "acc": 0.95223484, "epoch": 3.4239512538082963, "grad_norm": 8.82390022277832, "learning_rate": 9.990656487329131e-06, "loss": 0.15316955, "memory(GiB)": 13.7, "step": 7305, "train_speed(iter/s)": 1.556367 }, { "acc": 0.97202721, "epoch": 3.4262948207171315, "grad_norm": 3.875462770462036, "learning_rate": 9.990609061223735e-06, "loss": 0.12285485, "memory(GiB)": 13.7, "step": 7310, "train_speed(iter/s)": 1.556396 }, { "acc": 0.94711113, "epoch": 3.4286383876259667, "grad_norm": 29.4096622467041, "learning_rate": 9.990561515172512e-06, "loss": 0.29681859, "memory(GiB)": 13.7, "step": 7315, "train_speed(iter/s)": 1.556447 }, { "acc": 0.94077129, "epoch": 3.430981954534802, "grad_norm": 13.727685928344727, "learning_rate": 9.990513849176602e-06, "loss": 0.33105562, "memory(GiB)": 13.7, "step": 7320, "train_speed(iter/s)": 1.556437 }, { "acc": 0.95258579, "epoch": 3.433325521443637, "grad_norm": 16.99050521850586, "learning_rate": 9.990466063237153e-06, "loss": 0.29771867, "memory(GiB)": 13.7, "step": 7325, "train_speed(iter/s)": 1.55643 }, { "acc": 0.95454445, "epoch": 3.4356690883524723, "grad_norm": 21.32193946838379, "learning_rate": 9.990418157355312e-06, "loss": 0.29223514, "memory(GiB)": 13.7, "step": 7330, "train_speed(iter/s)": 1.55646 }, { "acc": 0.9556385, "epoch": 3.438012655261308, "grad_norm": 9.649971008300781, "learning_rate": 9.990370131532234e-06, "loss": 0.25286973, "memory(GiB)": 13.7, "step": 7335, "train_speed(iter/s)": 1.556456 }, { "acc": 0.96805553, "epoch": 3.440356222170143, "grad_norm": 2.080064296722412, "learning_rate": 9.99032198576907e-06, "loss": 0.20933328, "memory(GiB)": 13.7, "step": 7340, "train_speed(iter/s)": 1.556433 }, { "acc": 0.94107819, "epoch": 3.4426997890789783, "grad_norm": 21.243310928344727, "learning_rate": 9.990273720066979e-06, "loss": 0.38764465, "memory(GiB)": 13.7, "step": 7345, "train_speed(iter/s)": 1.55641 }, { "acc": 0.9676733, "epoch": 3.4450433559878135, "grad_norm": 9.833715438842773, "learning_rate": 9.99022533442712e-06, "loss": 0.22551031, "memory(GiB)": 13.7, "step": 7350, "train_speed(iter/s)": 1.556387 }, { "acc": 0.95907745, "epoch": 3.4473869228966487, "grad_norm": 12.187207221984863, "learning_rate": 9.990176828850656e-06, "loss": 0.19618082, "memory(GiB)": 13.7, "step": 7355, "train_speed(iter/s)": 1.556408 }, { "acc": 0.9337615, "epoch": 3.449730489805484, "grad_norm": 178.22213745117188, "learning_rate": 9.990128203338752e-06, "loss": 0.44918966, "memory(GiB)": 13.7, "step": 7360, "train_speed(iter/s)": 1.556451 }, { "acc": 0.94297924, "epoch": 3.452074056714319, "grad_norm": 4.698665142059326, "learning_rate": 9.990079457892581e-06, "loss": 0.27994173, "memory(GiB)": 13.7, "step": 7365, "train_speed(iter/s)": 1.556488 }, { "acc": 0.94936419, "epoch": 3.4544176236231543, "grad_norm": 13.293583869934082, "learning_rate": 9.99003059251331e-06, "loss": 0.25862834, "memory(GiB)": 13.7, "step": 7370, "train_speed(iter/s)": 1.55653 }, { "acc": 0.9539732, "epoch": 3.4567611905319895, "grad_norm": 16.17930793762207, "learning_rate": 9.989981607202116e-06, "loss": 0.27058105, "memory(GiB)": 13.7, "step": 7375, "train_speed(iter/s)": 1.556519 }, { "acc": 0.94398184, "epoch": 3.459104757440825, "grad_norm": 25.1085262298584, "learning_rate": 9.989932501960176e-06, "loss": 0.28980761, "memory(GiB)": 13.7, "step": 7380, "train_speed(iter/s)": 1.556509 }, { "acc": 0.95550518, "epoch": 3.4614483243496603, "grad_norm": 3.754875659942627, "learning_rate": 9.989883276788672e-06, "loss": 0.24546468, "memory(GiB)": 13.7, "step": 7385, "train_speed(iter/s)": 1.556477 }, { "acc": 0.947752, "epoch": 3.4637918912584955, "grad_norm": 7.675051212310791, "learning_rate": 9.989833931688783e-06, "loss": 0.30974259, "memory(GiB)": 13.7, "step": 7390, "train_speed(iter/s)": 1.556465 }, { "acc": 0.96436157, "epoch": 3.4661354581673307, "grad_norm": 9.590437889099121, "learning_rate": 9.989784466661698e-06, "loss": 0.18444247, "memory(GiB)": 13.7, "step": 7395, "train_speed(iter/s)": 1.556428 }, { "acc": 0.95291348, "epoch": 3.468479025076166, "grad_norm": 7.032813549041748, "learning_rate": 9.989734881708606e-06, "loss": 0.25184636, "memory(GiB)": 13.7, "step": 7400, "train_speed(iter/s)": 1.556434 }, { "acc": 0.95850811, "epoch": 3.470822591985001, "grad_norm": 27.783071517944336, "learning_rate": 9.989685176830699e-06, "loss": 0.22513754, "memory(GiB)": 13.7, "step": 7405, "train_speed(iter/s)": 1.556423 }, { "acc": 0.9420969, "epoch": 3.4731661588938363, "grad_norm": 3.716146230697632, "learning_rate": 9.98963535202917e-06, "loss": 0.27628031, "memory(GiB)": 13.7, "step": 7410, "train_speed(iter/s)": 1.556413 }, { "acc": 0.95410013, "epoch": 3.4755097258026715, "grad_norm": 3.8845198154449463, "learning_rate": 9.989585407305217e-06, "loss": 0.21756575, "memory(GiB)": 13.7, "step": 7415, "train_speed(iter/s)": 1.556393 }, { "acc": 0.94683609, "epoch": 3.477853292711507, "grad_norm": 130.90025329589844, "learning_rate": 9.989535342660042e-06, "loss": 0.31767554, "memory(GiB)": 13.7, "step": 7420, "train_speed(iter/s)": 1.556355 }, { "acc": 0.93298025, "epoch": 3.4801968596203423, "grad_norm": 20.01125144958496, "learning_rate": 9.989485158094848e-06, "loss": 0.36557834, "memory(GiB)": 13.7, "step": 7425, "train_speed(iter/s)": 1.556381 }, { "acc": 0.95416794, "epoch": 3.4825404265291775, "grad_norm": 3.5290634632110596, "learning_rate": 9.989434853610842e-06, "loss": 0.24416101, "memory(GiB)": 13.7, "step": 7430, "train_speed(iter/s)": 1.556345 }, { "acc": 0.95282688, "epoch": 3.4848839934380127, "grad_norm": 11.715051651000977, "learning_rate": 9.989384429209229e-06, "loss": 0.26720159, "memory(GiB)": 13.7, "step": 7435, "train_speed(iter/s)": 1.556326 }, { "acc": 0.94349432, "epoch": 3.487227560346848, "grad_norm": 10.343969345092773, "learning_rate": 9.989333884891225e-06, "loss": 0.34774408, "memory(GiB)": 13.7, "step": 7440, "train_speed(iter/s)": 1.556307 }, { "acc": 0.93727245, "epoch": 3.489571127255683, "grad_norm": 12.521062850952148, "learning_rate": 9.989283220658047e-06, "loss": 0.32536998, "memory(GiB)": 13.7, "step": 7445, "train_speed(iter/s)": 1.556241 }, { "acc": 0.96631756, "epoch": 3.4919146941645183, "grad_norm": 11.096390724182129, "learning_rate": 9.989232436510908e-06, "loss": 0.18358634, "memory(GiB)": 13.7, "step": 7450, "train_speed(iter/s)": 1.556217 }, { "acc": 0.94948864, "epoch": 3.4942582610733535, "grad_norm": 5.211429119110107, "learning_rate": 9.989181532451028e-06, "loss": 0.2247443, "memory(GiB)": 13.7, "step": 7455, "train_speed(iter/s)": 1.556286 }, { "acc": 0.95790176, "epoch": 3.4966018279821887, "grad_norm": 6.692792892456055, "learning_rate": 9.989130508479636e-06, "loss": 0.21731243, "memory(GiB)": 13.7, "step": 7460, "train_speed(iter/s)": 1.556334 }, { "acc": 0.95690098, "epoch": 3.4989453948910243, "grad_norm": 9.703324317932129, "learning_rate": 9.989079364597954e-06, "loss": 0.2544337, "memory(GiB)": 13.7, "step": 7465, "train_speed(iter/s)": 1.556322 }, { "acc": 0.95347099, "epoch": 3.5012889617998595, "grad_norm": 17.47481346130371, "learning_rate": 9.989028100807214e-06, "loss": 0.21153665, "memory(GiB)": 13.7, "step": 7470, "train_speed(iter/s)": 1.556274 }, { "acc": 0.95001736, "epoch": 3.5036325287086947, "grad_norm": 14.314044952392578, "learning_rate": 9.988976717108645e-06, "loss": 0.35145671, "memory(GiB)": 13.7, "step": 7475, "train_speed(iter/s)": 1.556294 }, { "acc": 0.9370779, "epoch": 3.50597609561753, "grad_norm": 14.477071762084961, "learning_rate": 9.988925213503487e-06, "loss": 0.40808935, "memory(GiB)": 13.7, "step": 7480, "train_speed(iter/s)": 1.556278 }, { "acc": 0.92898102, "epoch": 3.508319662526365, "grad_norm": 34.6147346496582, "learning_rate": 9.988873589992974e-06, "loss": 0.28166494, "memory(GiB)": 13.7, "step": 7485, "train_speed(iter/s)": 1.556322 }, { "acc": 0.94764624, "epoch": 3.5106632294352003, "grad_norm": 11.630359649658203, "learning_rate": 9.988821846578347e-06, "loss": 0.31534827, "memory(GiB)": 13.7, "step": 7490, "train_speed(iter/s)": 1.556321 }, { "acc": 0.92997026, "epoch": 3.513006796344036, "grad_norm": 60.052249908447266, "learning_rate": 9.988769983260851e-06, "loss": 0.36988556, "memory(GiB)": 13.7, "step": 7495, "train_speed(iter/s)": 1.556274 }, { "acc": 0.93773317, "epoch": 3.515350363252871, "grad_norm": 7.752779006958008, "learning_rate": 9.988718000041734e-06, "loss": 0.41013584, "memory(GiB)": 13.7, "step": 7500, "train_speed(iter/s)": 1.556328 }, { "acc": 0.94823322, "epoch": 3.5176939301617063, "grad_norm": 8.202313423156738, "learning_rate": 9.988665896922242e-06, "loss": 0.31431532, "memory(GiB)": 13.7, "step": 7505, "train_speed(iter/s)": 1.556314 }, { "acc": 0.93561497, "epoch": 3.5200374970705415, "grad_norm": 13.675561904907227, "learning_rate": 9.98861367390363e-06, "loss": 0.36887901, "memory(GiB)": 13.7, "step": 7510, "train_speed(iter/s)": 1.556404 }, { "acc": 0.94649258, "epoch": 3.5223810639793767, "grad_norm": 7.703769207000732, "learning_rate": 9.988561330987151e-06, "loss": 0.3995101, "memory(GiB)": 13.7, "step": 7515, "train_speed(iter/s)": 1.556389 }, { "acc": 0.9449297, "epoch": 3.524724630888212, "grad_norm": 3.445309638977051, "learning_rate": 9.988508868174066e-06, "loss": 0.31800587, "memory(GiB)": 13.7, "step": 7520, "train_speed(iter/s)": 1.556414 }, { "acc": 0.95652771, "epoch": 3.527068197797047, "grad_norm": 18.579322814941406, "learning_rate": 9.988456285465635e-06, "loss": 0.27360716, "memory(GiB)": 13.7, "step": 7525, "train_speed(iter/s)": 1.556357 }, { "acc": 0.92579708, "epoch": 3.5294117647058822, "grad_norm": 24.0922908782959, "learning_rate": 9.988403582863123e-06, "loss": 0.418507, "memory(GiB)": 13.7, "step": 7530, "train_speed(iter/s)": 1.556373 }, { "acc": 0.96285725, "epoch": 3.5317553316147174, "grad_norm": 13.867951393127441, "learning_rate": 9.988350760367793e-06, "loss": 0.25160551, "memory(GiB)": 13.7, "step": 7535, "train_speed(iter/s)": 1.556344 }, { "acc": 0.95048647, "epoch": 3.5340988985235526, "grad_norm": 7.155215740203857, "learning_rate": 9.988297817980917e-06, "loss": 0.27889252, "memory(GiB)": 13.7, "step": 7540, "train_speed(iter/s)": 1.556376 }, { "acc": 0.94793148, "epoch": 3.536442465432388, "grad_norm": 8.377474784851074, "learning_rate": 9.988244755703769e-06, "loss": 0.28471036, "memory(GiB)": 13.7, "step": 7545, "train_speed(iter/s)": 1.55639 }, { "acc": 0.9609787, "epoch": 3.5387860323412235, "grad_norm": 7.557427883148193, "learning_rate": 9.988191573537622e-06, "loss": 0.23088527, "memory(GiB)": 13.7, "step": 7550, "train_speed(iter/s)": 1.556442 }, { "acc": 0.94891672, "epoch": 3.5411295992500587, "grad_norm": 3.656031847000122, "learning_rate": 9.988138271483755e-06, "loss": 0.26774874, "memory(GiB)": 13.7, "step": 7555, "train_speed(iter/s)": 1.556481 }, { "acc": 0.94739552, "epoch": 3.543473166158894, "grad_norm": 17.9091854095459, "learning_rate": 9.988084849543451e-06, "loss": 0.2323638, "memory(GiB)": 13.7, "step": 7560, "train_speed(iter/s)": 1.55649 }, { "acc": 0.96383181, "epoch": 3.545816733067729, "grad_norm": 73.2594985961914, "learning_rate": 9.988031307717993e-06, "loss": 0.28514256, "memory(GiB)": 13.7, "step": 7565, "train_speed(iter/s)": 1.55654 }, { "acc": 0.93602276, "epoch": 3.5481602999765642, "grad_norm": 6.965292453765869, "learning_rate": 9.987977646008666e-06, "loss": 0.37317619, "memory(GiB)": 13.7, "step": 7570, "train_speed(iter/s)": 1.556581 }, { "acc": 0.94450893, "epoch": 3.5505038668853994, "grad_norm": 9.41700553894043, "learning_rate": 9.987923864416763e-06, "loss": 0.31756444, "memory(GiB)": 13.7, "step": 7575, "train_speed(iter/s)": 1.556657 }, { "acc": 0.94086676, "epoch": 3.552847433794235, "grad_norm": 9.219010353088379, "learning_rate": 9.987869962943575e-06, "loss": 0.31341386, "memory(GiB)": 13.7, "step": 7580, "train_speed(iter/s)": 1.55666 }, { "acc": 0.94012251, "epoch": 3.5551910007030703, "grad_norm": 26.72408103942871, "learning_rate": 9.987815941590397e-06, "loss": 0.36736207, "memory(GiB)": 13.7, "step": 7585, "train_speed(iter/s)": 1.556642 }, { "acc": 0.95888395, "epoch": 3.5575345676119055, "grad_norm": 8.779577255249023, "learning_rate": 9.98776180035853e-06, "loss": 0.21748214, "memory(GiB)": 13.7, "step": 7590, "train_speed(iter/s)": 1.556622 }, { "acc": 0.96581163, "epoch": 3.5598781345207406, "grad_norm": 9.796586036682129, "learning_rate": 9.987707539249272e-06, "loss": 0.23428423, "memory(GiB)": 13.7, "step": 7595, "train_speed(iter/s)": 1.556613 }, { "acc": 0.95696964, "epoch": 3.562221701429576, "grad_norm": 10.291820526123047, "learning_rate": 9.987653158263928e-06, "loss": 0.30785279, "memory(GiB)": 13.7, "step": 7600, "train_speed(iter/s)": 1.556579 }, { "acc": 0.9674387, "epoch": 3.564565268338411, "grad_norm": 4.697143077850342, "learning_rate": 9.987598657403809e-06, "loss": 0.13447517, "memory(GiB)": 13.7, "step": 7605, "train_speed(iter/s)": 1.556633 }, { "acc": 0.9635498, "epoch": 3.5669088352472462, "grad_norm": 7.378899097442627, "learning_rate": 9.987544036670222e-06, "loss": 0.20081484, "memory(GiB)": 13.7, "step": 7610, "train_speed(iter/s)": 1.556728 }, { "acc": 0.92973671, "epoch": 3.5692524021560814, "grad_norm": 26.18071937561035, "learning_rate": 9.987489296064477e-06, "loss": 0.42344699, "memory(GiB)": 13.7, "step": 7615, "train_speed(iter/s)": 1.556744 }, { "acc": 0.93403854, "epoch": 3.5715959690649166, "grad_norm": 20.35898780822754, "learning_rate": 9.987434435587896e-06, "loss": 0.3636251, "memory(GiB)": 13.7, "step": 7620, "train_speed(iter/s)": 1.556756 }, { "acc": 0.93702383, "epoch": 3.573939535973752, "grad_norm": 7.420248031616211, "learning_rate": 9.987379455241794e-06, "loss": 0.3541641, "memory(GiB)": 13.7, "step": 7625, "train_speed(iter/s)": 1.556772 }, { "acc": 0.96782608, "epoch": 3.5762831028825874, "grad_norm": 11.295764923095703, "learning_rate": 9.987324355027491e-06, "loss": 0.22617416, "memory(GiB)": 13.7, "step": 7630, "train_speed(iter/s)": 1.55678 }, { "acc": 0.96104164, "epoch": 3.5786266697914226, "grad_norm": 8.656214714050293, "learning_rate": 9.987269134946316e-06, "loss": 0.19932171, "memory(GiB)": 13.7, "step": 7635, "train_speed(iter/s)": 1.556829 }, { "acc": 0.94508934, "epoch": 3.580970236700258, "grad_norm": 10.688053131103516, "learning_rate": 9.987213794999592e-06, "loss": 0.40528121, "memory(GiB)": 13.7, "step": 7640, "train_speed(iter/s)": 1.556861 }, { "acc": 0.93362751, "epoch": 3.583313803609093, "grad_norm": 8.508455276489258, "learning_rate": 9.987158335188652e-06, "loss": 0.38554173, "memory(GiB)": 13.7, "step": 7645, "train_speed(iter/s)": 1.556872 }, { "acc": 0.95778275, "epoch": 3.585657370517928, "grad_norm": 36.840492248535156, "learning_rate": 9.987102755514829e-06, "loss": 0.30911388, "memory(GiB)": 13.7, "step": 7650, "train_speed(iter/s)": 1.556957 }, { "acc": 0.92129288, "epoch": 3.5880009374267634, "grad_norm": 7.127768039703369, "learning_rate": 9.987047055979457e-06, "loss": 0.4610342, "memory(GiB)": 13.7, "step": 7655, "train_speed(iter/s)": 1.557008 }, { "acc": 0.95503731, "epoch": 3.590344504335599, "grad_norm": 11.034863471984863, "learning_rate": 9.986991236583874e-06, "loss": 0.26472495, "memory(GiB)": 13.7, "step": 7660, "train_speed(iter/s)": 1.556978 }, { "acc": 0.93449574, "epoch": 3.5926880712444342, "grad_norm": 18.458786010742188, "learning_rate": 9.986935297329425e-06, "loss": 0.45123267, "memory(GiB)": 13.7, "step": 7665, "train_speed(iter/s)": 1.557016 }, { "acc": 0.93223209, "epoch": 3.5950316381532694, "grad_norm": 34.164390563964844, "learning_rate": 9.986879238217454e-06, "loss": 0.48535209, "memory(GiB)": 13.7, "step": 7670, "train_speed(iter/s)": 1.557069 }, { "acc": 0.94929295, "epoch": 3.5973752050621046, "grad_norm": 11.404995918273926, "learning_rate": 9.986823059249306e-06, "loss": 0.3655709, "memory(GiB)": 13.7, "step": 7675, "train_speed(iter/s)": 1.557098 }, { "acc": 0.93322086, "epoch": 3.59971877197094, "grad_norm": 15.05374526977539, "learning_rate": 9.986766760426335e-06, "loss": 0.4649158, "memory(GiB)": 13.7, "step": 7680, "train_speed(iter/s)": 1.557105 }, { "acc": 0.95706348, "epoch": 3.602062338879775, "grad_norm": 6.748859882354736, "learning_rate": 9.98671034174989e-06, "loss": 0.28277628, "memory(GiB)": 13.7, "step": 7685, "train_speed(iter/s)": 1.557093 }, { "acc": 0.9547411, "epoch": 3.60440590578861, "grad_norm": 30.166662216186523, "learning_rate": 9.986653803221332e-06, "loss": 0.29831011, "memory(GiB)": 13.7, "step": 7690, "train_speed(iter/s)": 1.557103 }, { "acc": 0.95538282, "epoch": 3.6067494726974454, "grad_norm": 4.655599594116211, "learning_rate": 9.986597144842016e-06, "loss": 0.22659435, "memory(GiB)": 13.7, "step": 7695, "train_speed(iter/s)": 1.557151 }, { "acc": 0.91679173, "epoch": 3.6090930396062806, "grad_norm": 9.944583892822266, "learning_rate": 9.986540366613306e-06, "loss": 0.39523125, "memory(GiB)": 13.7, "step": 7700, "train_speed(iter/s)": 1.557174 }, { "acc": 0.96546059, "epoch": 3.6114366065151158, "grad_norm": 6.934338092803955, "learning_rate": 9.986483468536565e-06, "loss": 0.26372135, "memory(GiB)": 13.7, "step": 7705, "train_speed(iter/s)": 1.557188 }, { "acc": 0.95979576, "epoch": 3.6137801734239514, "grad_norm": 6.7092132568359375, "learning_rate": 9.986426450613162e-06, "loss": 0.27485352, "memory(GiB)": 13.7, "step": 7710, "train_speed(iter/s)": 1.557244 }, { "acc": 0.92899799, "epoch": 3.6161237403327866, "grad_norm": 10.270439147949219, "learning_rate": 9.986369312844467e-06, "loss": 0.33253636, "memory(GiB)": 13.7, "step": 7715, "train_speed(iter/s)": 1.557315 }, { "acc": 0.95643845, "epoch": 3.618467307241622, "grad_norm": 12.249202728271484, "learning_rate": 9.986312055231853e-06, "loss": 0.26613326, "memory(GiB)": 13.7, "step": 7720, "train_speed(iter/s)": 1.557332 }, { "acc": 0.93922977, "epoch": 3.620810874150457, "grad_norm": 12.627117156982422, "learning_rate": 9.986254677776698e-06, "loss": 0.36917505, "memory(GiB)": 13.7, "step": 7725, "train_speed(iter/s)": 1.557287 }, { "acc": 0.94727936, "epoch": 3.623154441059292, "grad_norm": 34.01533126831055, "learning_rate": 9.98619718048038e-06, "loss": 0.23716507, "memory(GiB)": 13.7, "step": 7730, "train_speed(iter/s)": 1.557291 }, { "acc": 0.96163692, "epoch": 3.6254980079681274, "grad_norm": 1.2257189750671387, "learning_rate": 9.986139563344279e-06, "loss": 0.19804912, "memory(GiB)": 13.7, "step": 7735, "train_speed(iter/s)": 1.557314 }, { "acc": 0.95355167, "epoch": 3.6278415748769626, "grad_norm": 11.797004699707031, "learning_rate": 9.986081826369784e-06, "loss": 0.33632331, "memory(GiB)": 13.7, "step": 7740, "train_speed(iter/s)": 1.55735 }, { "acc": 0.95906658, "epoch": 3.630185141785798, "grad_norm": 176.3705291748047, "learning_rate": 9.986023969558281e-06, "loss": 0.27126117, "memory(GiB)": 13.7, "step": 7745, "train_speed(iter/s)": 1.557379 }, { "acc": 0.92379999, "epoch": 3.6325287086946334, "grad_norm": 6.116888999938965, "learning_rate": 9.98596599291116e-06, "loss": 0.52520723, "memory(GiB)": 13.7, "step": 7750, "train_speed(iter/s)": 1.557384 }, { "acc": 0.94007702, "epoch": 3.6348722756034686, "grad_norm": 93.22689819335938, "learning_rate": 9.985907896429814e-06, "loss": 0.2786294, "memory(GiB)": 13.7, "step": 7755, "train_speed(iter/s)": 1.557392 }, { "acc": 0.94898796, "epoch": 3.637215842512304, "grad_norm": 27.250837326049805, "learning_rate": 9.985849680115643e-06, "loss": 0.23964, "memory(GiB)": 13.7, "step": 7760, "train_speed(iter/s)": 1.557375 }, { "acc": 0.96330271, "epoch": 3.639559409421139, "grad_norm": 5.450591564178467, "learning_rate": 9.98579134397004e-06, "loss": 0.25115361, "memory(GiB)": 13.7, "step": 7765, "train_speed(iter/s)": 1.557392 }, { "acc": 0.92422409, "epoch": 3.641902976329974, "grad_norm": 9.708312034606934, "learning_rate": 9.985732887994414e-06, "loss": 0.45592155, "memory(GiB)": 13.7, "step": 7770, "train_speed(iter/s)": 1.557391 }, { "acc": 0.94900255, "epoch": 3.6442465432388094, "grad_norm": 19.780567169189453, "learning_rate": 9.985674312190166e-06, "loss": 0.35817623, "memory(GiB)": 13.7, "step": 7775, "train_speed(iter/s)": 1.557378 }, { "acc": 0.95541592, "epoch": 3.6465901101476446, "grad_norm": 12.78069019317627, "learning_rate": 9.985615616558705e-06, "loss": 0.346348, "memory(GiB)": 13.7, "step": 7780, "train_speed(iter/s)": 1.55737 }, { "acc": 0.95302467, "epoch": 3.6489336770564798, "grad_norm": 17.177370071411133, "learning_rate": 9.985556801101443e-06, "loss": 0.33112864, "memory(GiB)": 13.7, "step": 7785, "train_speed(iter/s)": 1.557412 }, { "acc": 0.93331938, "epoch": 3.651277243965315, "grad_norm": 16.34337615966797, "learning_rate": 9.985497865819794e-06, "loss": 0.3520895, "memory(GiB)": 13.7, "step": 7790, "train_speed(iter/s)": 1.557469 }, { "acc": 0.95926018, "epoch": 3.6536208108741506, "grad_norm": 8.449729919433594, "learning_rate": 9.98543881071517e-06, "loss": 0.29427457, "memory(GiB)": 13.7, "step": 7795, "train_speed(iter/s)": 1.557472 }, { "acc": 0.92432175, "epoch": 3.6559643777829858, "grad_norm": 8.220274925231934, "learning_rate": 9.985379635788995e-06, "loss": 0.42802892, "memory(GiB)": 13.7, "step": 7800, "train_speed(iter/s)": 1.557543 }, { "acc": 0.94296513, "epoch": 3.658307944691821, "grad_norm": 15.029058456420898, "learning_rate": 9.98532034104269e-06, "loss": 0.20711117, "memory(GiB)": 13.7, "step": 7805, "train_speed(iter/s)": 1.557603 }, { "acc": 0.93761368, "epoch": 3.660651511600656, "grad_norm": 38.64278793334961, "learning_rate": 9.985260926477682e-06, "loss": 0.24454064, "memory(GiB)": 13.7, "step": 7810, "train_speed(iter/s)": 1.557669 }, { "acc": 0.96058445, "epoch": 3.6629950785094914, "grad_norm": 16.475242614746094, "learning_rate": 9.985201392095397e-06, "loss": 0.21402502, "memory(GiB)": 13.7, "step": 7815, "train_speed(iter/s)": 1.55767 }, { "acc": 0.94511909, "epoch": 3.6653386454183265, "grad_norm": 13.351364135742188, "learning_rate": 9.985141737897264e-06, "loss": 0.34093063, "memory(GiB)": 13.7, "step": 7820, "train_speed(iter/s)": 1.557676 }, { "acc": 0.93613987, "epoch": 3.667682212327162, "grad_norm": 11.741955757141113, "learning_rate": 9.985081963884724e-06, "loss": 0.43243275, "memory(GiB)": 13.7, "step": 7825, "train_speed(iter/s)": 1.557668 }, { "acc": 0.96057072, "epoch": 3.6700257792359974, "grad_norm": 19.91680908203125, "learning_rate": 9.985022070059207e-06, "loss": 0.26521382, "memory(GiB)": 13.7, "step": 7830, "train_speed(iter/s)": 1.557665 }, { "acc": 0.96179924, "epoch": 3.6723693461448326, "grad_norm": 20.838300704956055, "learning_rate": 9.984962056422153e-06, "loss": 0.23393674, "memory(GiB)": 13.7, "step": 7835, "train_speed(iter/s)": 1.557726 }, { "acc": 0.9588541, "epoch": 3.6747129130536678, "grad_norm": 8.081332206726074, "learning_rate": 9.984901922975007e-06, "loss": 0.28479877, "memory(GiB)": 13.7, "step": 7840, "train_speed(iter/s)": 1.557744 }, { "acc": 0.94016323, "epoch": 3.677056479962503, "grad_norm": 14.360787391662598, "learning_rate": 9.984841669719214e-06, "loss": 0.34970245, "memory(GiB)": 13.7, "step": 7845, "train_speed(iter/s)": 1.557779 }, { "acc": 0.95433264, "epoch": 3.679400046871338, "grad_norm": 5.15763521194458, "learning_rate": 9.984781296656223e-06, "loss": 0.18335204, "memory(GiB)": 13.7, "step": 7850, "train_speed(iter/s)": 1.557747 }, { "acc": 0.94300594, "epoch": 3.6817436137801733, "grad_norm": 3.8830833435058594, "learning_rate": 9.984720803787484e-06, "loss": 0.28957698, "memory(GiB)": 13.7, "step": 7855, "train_speed(iter/s)": 1.557745 }, { "acc": 0.95403566, "epoch": 3.6840871806890085, "grad_norm": 4.679279804229736, "learning_rate": 9.984660191114452e-06, "loss": 0.29143322, "memory(GiB)": 13.7, "step": 7860, "train_speed(iter/s)": 1.557759 }, { "acc": 0.96143579, "epoch": 3.6864307475978437, "grad_norm": 13.736980438232422, "learning_rate": 9.984599458638581e-06, "loss": 0.3020925, "memory(GiB)": 13.7, "step": 7865, "train_speed(iter/s)": 1.557734 }, { "acc": 0.96833935, "epoch": 3.688774314506679, "grad_norm": 4.670040607452393, "learning_rate": 9.984538606361335e-06, "loss": 0.20829368, "memory(GiB)": 13.7, "step": 7870, "train_speed(iter/s)": 1.557751 }, { "acc": 0.97160721, "epoch": 3.6911178814155146, "grad_norm": 8.212174415588379, "learning_rate": 9.984477634284176e-06, "loss": 0.15215496, "memory(GiB)": 13.7, "step": 7875, "train_speed(iter/s)": 1.557709 }, { "acc": 0.96385822, "epoch": 3.6934614483243497, "grad_norm": 3.4720797538757324, "learning_rate": 9.984416542408565e-06, "loss": 0.15906312, "memory(GiB)": 13.7, "step": 7880, "train_speed(iter/s)": 1.557721 }, { "acc": 0.95968256, "epoch": 3.695805015233185, "grad_norm": 11.298526763916016, "learning_rate": 9.984355330735975e-06, "loss": 0.29541264, "memory(GiB)": 13.7, "step": 7885, "train_speed(iter/s)": 1.55774 }, { "acc": 0.95523806, "epoch": 3.69814858214202, "grad_norm": 10.025370597839355, "learning_rate": 9.984293999267878e-06, "loss": 0.33657024, "memory(GiB)": 13.7, "step": 7890, "train_speed(iter/s)": 1.557712 }, { "acc": 0.94627333, "epoch": 3.7004921490508553, "grad_norm": 10.268925666809082, "learning_rate": 9.984232548005744e-06, "loss": 0.38759754, "memory(GiB)": 13.7, "step": 7895, "train_speed(iter/s)": 1.55772 }, { "acc": 0.96612015, "epoch": 3.7028357159596905, "grad_norm": 4.67874002456665, "learning_rate": 9.984170976951053e-06, "loss": 0.21426635, "memory(GiB)": 13.7, "step": 7900, "train_speed(iter/s)": 1.557745 }, { "acc": 0.94778271, "epoch": 3.705179282868526, "grad_norm": 7.954425811767578, "learning_rate": 9.984109286105284e-06, "loss": 0.33469591, "memory(GiB)": 13.7, "step": 7905, "train_speed(iter/s)": 1.557744 }, { "acc": 0.94184532, "epoch": 3.7075228497773614, "grad_norm": 10.808732032775879, "learning_rate": 9.984047475469922e-06, "loss": 0.33048186, "memory(GiB)": 13.7, "step": 7910, "train_speed(iter/s)": 1.557743 }, { "acc": 0.95337753, "epoch": 3.7098664166861965, "grad_norm": 9.827835083007812, "learning_rate": 9.983985545046449e-06, "loss": 0.21922412, "memory(GiB)": 13.7, "step": 7915, "train_speed(iter/s)": 1.557705 }, { "acc": 0.92231455, "epoch": 3.7122099835950317, "grad_norm": 14.309925079345703, "learning_rate": 9.983923494836355e-06, "loss": 0.43661318, "memory(GiB)": 13.7, "step": 7920, "train_speed(iter/s)": 1.557646 }, { "acc": 0.93706264, "epoch": 3.714553550503867, "grad_norm": 12.438130378723145, "learning_rate": 9.983861324841133e-06, "loss": 0.38830261, "memory(GiB)": 13.7, "step": 7925, "train_speed(iter/s)": 1.557684 }, { "acc": 0.96142368, "epoch": 3.716897117412702, "grad_norm": 4.856544017791748, "learning_rate": 9.983799035062276e-06, "loss": 0.28687611, "memory(GiB)": 13.7, "step": 7930, "train_speed(iter/s)": 1.557751 }, { "acc": 0.96238966, "epoch": 3.7192406843215373, "grad_norm": 6.152478218078613, "learning_rate": 9.983736625501284e-06, "loss": 0.24723127, "memory(GiB)": 13.7, "step": 7935, "train_speed(iter/s)": 1.557816 }, { "acc": 0.95849171, "epoch": 3.7215842512303725, "grad_norm": 5.966937065124512, "learning_rate": 9.983674096159652e-06, "loss": 0.20322297, "memory(GiB)": 13.7, "step": 7940, "train_speed(iter/s)": 1.557869 }, { "acc": 0.95298615, "epoch": 3.7239278181392077, "grad_norm": 16.968408584594727, "learning_rate": 9.983611447038887e-06, "loss": 0.28273969, "memory(GiB)": 13.7, "step": 7945, "train_speed(iter/s)": 1.557842 }, { "acc": 0.94946423, "epoch": 3.726271385048043, "grad_norm": 7.613863945007324, "learning_rate": 9.983548678140491e-06, "loss": 0.28127832, "memory(GiB)": 13.7, "step": 7950, "train_speed(iter/s)": 1.557828 }, { "acc": 0.92374506, "epoch": 3.7286149519568785, "grad_norm": 7.300922393798828, "learning_rate": 9.983485789465978e-06, "loss": 0.37039387, "memory(GiB)": 13.7, "step": 7955, "train_speed(iter/s)": 1.557826 }, { "acc": 0.96352959, "epoch": 3.7309585188657137, "grad_norm": 4.949718475341797, "learning_rate": 9.983422781016858e-06, "loss": 0.24159377, "memory(GiB)": 13.7, "step": 7960, "train_speed(iter/s)": 1.557821 }, { "acc": 0.95492601, "epoch": 3.733302085774549, "grad_norm": 6.400862216949463, "learning_rate": 9.983359652794641e-06, "loss": 0.27085257, "memory(GiB)": 13.7, "step": 7965, "train_speed(iter/s)": 1.557845 }, { "acc": 0.95867786, "epoch": 3.735645652683384, "grad_norm": 8.413444519042969, "learning_rate": 9.983296404800851e-06, "loss": 0.22357321, "memory(GiB)": 13.7, "step": 7970, "train_speed(iter/s)": 1.557792 }, { "acc": 0.93529415, "epoch": 3.7379892195922193, "grad_norm": 29.9193115234375, "learning_rate": 9.983233037037005e-06, "loss": 0.43196683, "memory(GiB)": 13.7, "step": 7975, "train_speed(iter/s)": 1.557797 }, { "acc": 0.94903755, "epoch": 3.7403327865010545, "grad_norm": 22.63701629638672, "learning_rate": 9.983169549504625e-06, "loss": 0.32395456, "memory(GiB)": 13.7, "step": 7980, "train_speed(iter/s)": 1.557811 }, { "acc": 0.97402382, "epoch": 3.7426763534098897, "grad_norm": 5.671073913574219, "learning_rate": 9.983105942205239e-06, "loss": 0.17276269, "memory(GiB)": 13.7, "step": 7985, "train_speed(iter/s)": 1.557763 }, { "acc": 0.95245848, "epoch": 3.7450199203187253, "grad_norm": 10.577126502990723, "learning_rate": 9.983042215140374e-06, "loss": 0.26028695, "memory(GiB)": 13.7, "step": 7990, "train_speed(iter/s)": 1.557825 }, { "acc": 0.95975275, "epoch": 3.7473634872275605, "grad_norm": 7.6038126945495605, "learning_rate": 9.982978368311564e-06, "loss": 0.21797571, "memory(GiB)": 13.7, "step": 7995, "train_speed(iter/s)": 1.55785 }, { "acc": 0.93088675, "epoch": 3.7497070541363957, "grad_norm": 24.469257354736328, "learning_rate": 9.982914401720345e-06, "loss": 0.39969459, "memory(GiB)": 13.7, "step": 8000, "train_speed(iter/s)": 1.557807 }, { "acc": 0.95799561, "epoch": 3.752050621045231, "grad_norm": 6.793664932250977, "learning_rate": 9.98285031536825e-06, "loss": 0.21520228, "memory(GiB)": 13.7, "step": 8005, "train_speed(iter/s)": 1.557821 }, { "acc": 0.94297924, "epoch": 3.754394187954066, "grad_norm": 19.957866668701172, "learning_rate": 9.98278610925682e-06, "loss": 0.28109109, "memory(GiB)": 13.7, "step": 8010, "train_speed(iter/s)": 1.557826 }, { "acc": 0.91897182, "epoch": 3.7567377548629013, "grad_norm": 11.425292015075684, "learning_rate": 9.982721783387602e-06, "loss": 0.50594473, "memory(GiB)": 13.7, "step": 8015, "train_speed(iter/s)": 1.557798 }, { "acc": 0.95110693, "epoch": 3.7590813217717365, "grad_norm": 204.84310913085938, "learning_rate": 9.98265733776214e-06, "loss": 0.30109222, "memory(GiB)": 13.7, "step": 8020, "train_speed(iter/s)": 1.557744 }, { "acc": 0.95907211, "epoch": 3.7614248886805717, "grad_norm": 6.939612865447998, "learning_rate": 9.982592772381983e-06, "loss": 0.22978568, "memory(GiB)": 13.7, "step": 8025, "train_speed(iter/s)": 1.557677 }, { "acc": 0.93155031, "epoch": 3.763768455589407, "grad_norm": 14.487546920776367, "learning_rate": 9.982528087248684e-06, "loss": 0.39182196, "memory(GiB)": 13.7, "step": 8030, "train_speed(iter/s)": 1.557668 }, { "acc": 0.95661392, "epoch": 3.766112022498242, "grad_norm": 4.381590366363525, "learning_rate": 9.982463282363797e-06, "loss": 0.28482342, "memory(GiB)": 13.7, "step": 8035, "train_speed(iter/s)": 1.557695 }, { "acc": 0.97557068, "epoch": 3.7684555894070777, "grad_norm": 3.5898120403289795, "learning_rate": 9.982398357728879e-06, "loss": 0.15639144, "memory(GiB)": 13.7, "step": 8040, "train_speed(iter/s)": 1.557649 }, { "acc": 0.9741951, "epoch": 3.770799156315913, "grad_norm": 5.894969940185547, "learning_rate": 9.982333313345491e-06, "loss": 0.20987134, "memory(GiB)": 13.7, "step": 8045, "train_speed(iter/s)": 1.557684 }, { "acc": 0.96858435, "epoch": 3.773142723224748, "grad_norm": 11.405261039733887, "learning_rate": 9.982268149215198e-06, "loss": 0.23000443, "memory(GiB)": 13.7, "step": 8050, "train_speed(iter/s)": 1.557589 }, { "acc": 0.95352678, "epoch": 3.7754862901335833, "grad_norm": 6.282966613769531, "learning_rate": 9.982202865339565e-06, "loss": 0.20068457, "memory(GiB)": 13.7, "step": 8055, "train_speed(iter/s)": 1.557562 }, { "acc": 0.9444705, "epoch": 3.7778298570424185, "grad_norm": 7.378444194793701, "learning_rate": 9.98213746172016e-06, "loss": 0.32714601, "memory(GiB)": 13.7, "step": 8060, "train_speed(iter/s)": 1.557536 }, { "acc": 0.96458292, "epoch": 3.7801734239512537, "grad_norm": 10.576299667358398, "learning_rate": 9.982071938358557e-06, "loss": 0.24744303, "memory(GiB)": 13.7, "step": 8065, "train_speed(iter/s)": 1.557558 }, { "acc": 0.92875299, "epoch": 3.7825169908600893, "grad_norm": 21.852087020874023, "learning_rate": 9.98200629525633e-06, "loss": 0.38390331, "memory(GiB)": 13.7, "step": 8070, "train_speed(iter/s)": 1.557621 }, { "acc": 0.96426964, "epoch": 3.7848605577689245, "grad_norm": 14.372570037841797, "learning_rate": 9.981940532415056e-06, "loss": 0.22872365, "memory(GiB)": 13.7, "step": 8075, "train_speed(iter/s)": 1.557593 }, { "acc": 0.94608536, "epoch": 3.7872041246777597, "grad_norm": 9.257746696472168, "learning_rate": 9.98187464983632e-06, "loss": 0.31330378, "memory(GiB)": 13.7, "step": 8080, "train_speed(iter/s)": 1.557604 }, { "acc": 0.95158939, "epoch": 3.789547691586595, "grad_norm": 8.427720069885254, "learning_rate": 9.9818086475217e-06, "loss": 0.29510758, "memory(GiB)": 13.7, "step": 8085, "train_speed(iter/s)": 1.557592 }, { "acc": 0.96584311, "epoch": 3.79189125849543, "grad_norm": 7.471869945526123, "learning_rate": 9.981742525472787e-06, "loss": 0.18373456, "memory(GiB)": 13.7, "step": 8090, "train_speed(iter/s)": 1.557543 }, { "acc": 0.94559031, "epoch": 3.7942348254042653, "grad_norm": 39.48638153076172, "learning_rate": 9.981676283691167e-06, "loss": 0.26796598, "memory(GiB)": 13.7, "step": 8095, "train_speed(iter/s)": 1.557531 }, { "acc": 0.92901611, "epoch": 3.7965783923131005, "grad_norm": 15.36499309539795, "learning_rate": 9.981609922178433e-06, "loss": 0.42749329, "memory(GiB)": 13.7, "step": 8100, "train_speed(iter/s)": 1.557563 }, { "acc": 0.94161758, "epoch": 3.7989219592219357, "grad_norm": 15.746758460998535, "learning_rate": 9.981543440936182e-06, "loss": 0.39510386, "memory(GiB)": 13.7, "step": 8105, "train_speed(iter/s)": 1.557507 }, { "acc": 0.94670458, "epoch": 3.801265526130771, "grad_norm": 17.52196502685547, "learning_rate": 9.98147683996601e-06, "loss": 0.34185884, "memory(GiB)": 13.7, "step": 8110, "train_speed(iter/s)": 1.55756 }, { "acc": 0.9527976, "epoch": 3.803609093039606, "grad_norm": 28.99175262451172, "learning_rate": 9.98141011926952e-06, "loss": 0.23612795, "memory(GiB)": 13.7, "step": 8115, "train_speed(iter/s)": 1.557595 }, { "acc": 0.92753477, "epoch": 3.8059526599484417, "grad_norm": 32.64483642578125, "learning_rate": 9.981343278848313e-06, "loss": 0.48811197, "memory(GiB)": 13.7, "step": 8120, "train_speed(iter/s)": 1.557597 }, { "acc": 0.95965672, "epoch": 3.808296226857277, "grad_norm": 10.811062812805176, "learning_rate": 9.981276318703995e-06, "loss": 0.28402176, "memory(GiB)": 13.7, "step": 8125, "train_speed(iter/s)": 1.557619 }, { "acc": 0.9541666, "epoch": 3.810639793766112, "grad_norm": 0.5127199292182922, "learning_rate": 9.98120923883818e-06, "loss": 0.33040369, "memory(GiB)": 13.7, "step": 8130, "train_speed(iter/s)": 1.557664 }, { "acc": 0.96763887, "epoch": 3.8129833606749473, "grad_norm": 8.439996719360352, "learning_rate": 9.981142039252476e-06, "loss": 0.20284195, "memory(GiB)": 13.7, "step": 8135, "train_speed(iter/s)": 1.557674 }, { "acc": 0.94445887, "epoch": 3.8153269275837824, "grad_norm": 23.67483901977539, "learning_rate": 9.981074719948502e-06, "loss": 0.39566784, "memory(GiB)": 13.7, "step": 8140, "train_speed(iter/s)": 1.557648 }, { "acc": 0.95456848, "epoch": 3.8176704944926176, "grad_norm": 8.325599670410156, "learning_rate": 9.981007280927873e-06, "loss": 0.26342061, "memory(GiB)": 13.7, "step": 8145, "train_speed(iter/s)": 1.557634 }, { "acc": 0.94511471, "epoch": 3.8200140614014533, "grad_norm": 10.55848217010498, "learning_rate": 9.980939722192213e-06, "loss": 0.27400043, "memory(GiB)": 13.7, "step": 8150, "train_speed(iter/s)": 1.557637 }, { "acc": 0.95018997, "epoch": 3.8223576283102885, "grad_norm": 11.506936073303223, "learning_rate": 9.980872043743141e-06, "loss": 0.27503524, "memory(GiB)": 13.7, "step": 8155, "train_speed(iter/s)": 1.557597 }, { "acc": 0.95734043, "epoch": 3.8247011952191237, "grad_norm": 4.3660759925842285, "learning_rate": 9.980804245582289e-06, "loss": 0.19123083, "memory(GiB)": 13.7, "step": 8160, "train_speed(iter/s)": 1.557603 }, { "acc": 0.94415684, "epoch": 3.827044762127959, "grad_norm": 38.83077621459961, "learning_rate": 9.980736327711285e-06, "loss": 0.38300614, "memory(GiB)": 13.7, "step": 8165, "train_speed(iter/s)": 1.557602 }, { "acc": 0.9607336, "epoch": 3.829388329036794, "grad_norm": 11.002098083496094, "learning_rate": 9.98066829013176e-06, "loss": 0.17888238, "memory(GiB)": 13.7, "step": 8170, "train_speed(iter/s)": 1.557587 }, { "acc": 0.93586197, "epoch": 3.8317318959456292, "grad_norm": 65.14949035644531, "learning_rate": 9.98060013284535e-06, "loss": 0.33235672, "memory(GiB)": 13.7, "step": 8175, "train_speed(iter/s)": 1.557583 }, { "acc": 0.97327385, "epoch": 3.8340754628544644, "grad_norm": 3.851285219192505, "learning_rate": 9.980531855853693e-06, "loss": 0.22596235, "memory(GiB)": 13.7, "step": 8180, "train_speed(iter/s)": 1.557587 }, { "acc": 0.96692257, "epoch": 3.8364190297632996, "grad_norm": 6.592456817626953, "learning_rate": 9.980463459158433e-06, "loss": 0.22101741, "memory(GiB)": 13.7, "step": 8185, "train_speed(iter/s)": 1.557565 }, { "acc": 0.96244869, "epoch": 3.838762596672135, "grad_norm": 12.440536499023438, "learning_rate": 9.980394942761211e-06, "loss": 0.16778858, "memory(GiB)": 13.7, "step": 8190, "train_speed(iter/s)": 1.557521 }, { "acc": 0.9562582, "epoch": 3.84110616358097, "grad_norm": 13.469730377197266, "learning_rate": 9.980326306663675e-06, "loss": 0.29351501, "memory(GiB)": 13.7, "step": 8195, "train_speed(iter/s)": 1.557487 }, { "acc": 0.94018297, "epoch": 3.8434497304898056, "grad_norm": 15.004159927368164, "learning_rate": 9.980257550867475e-06, "loss": 0.34954228, "memory(GiB)": 13.7, "step": 8200, "train_speed(iter/s)": 1.557522 }, { "acc": 0.95909004, "epoch": 3.845793297398641, "grad_norm": 13.200839042663574, "learning_rate": 9.98018867537426e-06, "loss": 0.21681743, "memory(GiB)": 13.7, "step": 8205, "train_speed(iter/s)": 1.557514 }, { "acc": 0.93788109, "epoch": 3.848136864307476, "grad_norm": 12.403609275817871, "learning_rate": 9.980119680185693e-06, "loss": 0.3158958, "memory(GiB)": 13.7, "step": 8210, "train_speed(iter/s)": 1.557523 }, { "acc": 0.94182529, "epoch": 3.8504804312163112, "grad_norm": 26.906381607055664, "learning_rate": 9.980050565303427e-06, "loss": 0.42183151, "memory(GiB)": 13.7, "step": 8215, "train_speed(iter/s)": 1.557489 }, { "acc": 0.91815701, "epoch": 3.8528239981251464, "grad_norm": 11.424653053283691, "learning_rate": 9.979981330729124e-06, "loss": 0.45035248, "memory(GiB)": 13.7, "step": 8220, "train_speed(iter/s)": 1.557486 }, { "acc": 0.95375319, "epoch": 3.8551675650339816, "grad_norm": 10.304549217224121, "learning_rate": 9.979911976464448e-06, "loss": 0.301701, "memory(GiB)": 13.7, "step": 8225, "train_speed(iter/s)": 1.557502 }, { "acc": 0.96433029, "epoch": 3.857511131942817, "grad_norm": 13.302889823913574, "learning_rate": 9.979842502511068e-06, "loss": 0.28234849, "memory(GiB)": 13.7, "step": 8230, "train_speed(iter/s)": 1.557502 }, { "acc": 0.97367382, "epoch": 3.8598546988516524, "grad_norm": 8.088586807250977, "learning_rate": 9.979772908870653e-06, "loss": 0.17227468, "memory(GiB)": 13.7, "step": 8235, "train_speed(iter/s)": 1.557506 }, { "acc": 0.95125008, "epoch": 3.8621982657604876, "grad_norm": 5.718100547790527, "learning_rate": 9.979703195544873e-06, "loss": 0.18868804, "memory(GiB)": 13.7, "step": 8240, "train_speed(iter/s)": 1.557496 }, { "acc": 0.95189781, "epoch": 3.864541832669323, "grad_norm": 24.323333740234375, "learning_rate": 9.97963336253541e-06, "loss": 0.33519657, "memory(GiB)": 13.7, "step": 8245, "train_speed(iter/s)": 1.557486 }, { "acc": 0.94920835, "epoch": 3.866885399578158, "grad_norm": 6.669947147369385, "learning_rate": 9.979563409843934e-06, "loss": 0.38764954, "memory(GiB)": 13.7, "step": 8250, "train_speed(iter/s)": 1.557495 }, { "acc": 0.94706135, "epoch": 3.869228966486993, "grad_norm": 17.794742584228516, "learning_rate": 9.979493337472133e-06, "loss": 0.35888443, "memory(GiB)": 13.7, "step": 8255, "train_speed(iter/s)": 1.557503 }, { "acc": 0.95336838, "epoch": 3.8715725333958284, "grad_norm": 9.168024063110352, "learning_rate": 9.979423145421692e-06, "loss": 0.25759988, "memory(GiB)": 13.7, "step": 8260, "train_speed(iter/s)": 1.557517 }, { "acc": 0.94556789, "epoch": 3.8739161003046636, "grad_norm": 20.639047622680664, "learning_rate": 9.979352833694293e-06, "loss": 0.3231144, "memory(GiB)": 13.7, "step": 8265, "train_speed(iter/s)": 1.557527 }, { "acc": 0.96310673, "epoch": 3.876259667213499, "grad_norm": 5.8163557052612305, "learning_rate": 9.979282402291629e-06, "loss": 0.24918582, "memory(GiB)": 13.7, "step": 8270, "train_speed(iter/s)": 1.557548 }, { "acc": 0.95317993, "epoch": 3.878603234122334, "grad_norm": 17.174287796020508, "learning_rate": 9.979211851215392e-06, "loss": 0.30892897, "memory(GiB)": 13.7, "step": 8275, "train_speed(iter/s)": 1.557514 }, { "acc": 0.95646458, "epoch": 3.880946801031169, "grad_norm": 25.035263061523438, "learning_rate": 9.97914118046728e-06, "loss": 0.2870044, "memory(GiB)": 13.7, "step": 8280, "train_speed(iter/s)": 1.55755 }, { "acc": 0.97399855, "epoch": 3.883290367940005, "grad_norm": 6.6571831703186035, "learning_rate": 9.979070390048991e-06, "loss": 0.13921826, "memory(GiB)": 13.7, "step": 8285, "train_speed(iter/s)": 1.55757 }, { "acc": 0.95607147, "epoch": 3.88563393484884, "grad_norm": 8.33663558959961, "learning_rate": 9.978999479962222e-06, "loss": 0.22953107, "memory(GiB)": 13.7, "step": 8290, "train_speed(iter/s)": 1.55757 }, { "acc": 0.96770344, "epoch": 3.887977501757675, "grad_norm": 11.22694206237793, "learning_rate": 9.978928450208683e-06, "loss": 0.18033159, "memory(GiB)": 13.7, "step": 8295, "train_speed(iter/s)": 1.557557 }, { "acc": 0.94347477, "epoch": 3.8903210686665104, "grad_norm": 24.596210479736328, "learning_rate": 9.978857300790081e-06, "loss": 0.32224786, "memory(GiB)": 13.7, "step": 8300, "train_speed(iter/s)": 1.557534 }, { "acc": 0.94410448, "epoch": 3.8926646355753456, "grad_norm": 12.481832504272461, "learning_rate": 9.978786031708123e-06, "loss": 0.33857069, "memory(GiB)": 13.7, "step": 8305, "train_speed(iter/s)": 1.557587 }, { "acc": 0.94587383, "epoch": 3.895008202484181, "grad_norm": 7.453958034515381, "learning_rate": 9.978714642964523e-06, "loss": 0.31425724, "memory(GiB)": 13.7, "step": 8310, "train_speed(iter/s)": 1.557631 }, { "acc": 0.93729801, "epoch": 3.8973517693930164, "grad_norm": 13.021306991577148, "learning_rate": 9.978643134561e-06, "loss": 0.36157756, "memory(GiB)": 13.7, "step": 8315, "train_speed(iter/s)": 1.557601 }, { "acc": 0.98410711, "epoch": 3.8996953363018516, "grad_norm": 9.410930633544922, "learning_rate": 9.97857150649927e-06, "loss": 0.06467183, "memory(GiB)": 13.7, "step": 8320, "train_speed(iter/s)": 1.557571 }, { "acc": 0.9326313, "epoch": 3.902038903210687, "grad_norm": 26.92484474182129, "learning_rate": 9.978499758781054e-06, "loss": 0.43566837, "memory(GiB)": 13.7, "step": 8325, "train_speed(iter/s)": 1.557591 }, { "acc": 0.94721003, "epoch": 3.904382470119522, "grad_norm": 22.505325317382812, "learning_rate": 9.978427891408078e-06, "loss": 0.34050465, "memory(GiB)": 13.7, "step": 8330, "train_speed(iter/s)": 1.557608 }, { "acc": 0.95424213, "epoch": 3.906726037028357, "grad_norm": 31.15131950378418, "learning_rate": 9.97835590438207e-06, "loss": 0.26382716, "memory(GiB)": 13.7, "step": 8335, "train_speed(iter/s)": 1.55758 }, { "acc": 0.96310101, "epoch": 3.9090696039371924, "grad_norm": 6.161809921264648, "learning_rate": 9.978283797704758e-06, "loss": 0.2128273, "memory(GiB)": 13.7, "step": 8340, "train_speed(iter/s)": 1.557525 }, { "acc": 0.9695281, "epoch": 3.9114131708460276, "grad_norm": 6.004411220550537, "learning_rate": 9.97821157137788e-06, "loss": 0.201068, "memory(GiB)": 13.7, "step": 8345, "train_speed(iter/s)": 1.557493 }, { "acc": 0.95533638, "epoch": 3.9137567377548628, "grad_norm": 5.496346473693848, "learning_rate": 9.978139225403165e-06, "loss": 0.23648243, "memory(GiB)": 13.7, "step": 8350, "train_speed(iter/s)": 1.55748 }, { "acc": 0.94732895, "epoch": 3.916100304663698, "grad_norm": 5.96008825302124, "learning_rate": 9.978066759782357e-06, "loss": 0.26647129, "memory(GiB)": 13.7, "step": 8355, "train_speed(iter/s)": 1.557461 }, { "acc": 0.96172085, "epoch": 3.918443871572533, "grad_norm": 7.228713035583496, "learning_rate": 9.977994174517199e-06, "loss": 0.25590553, "memory(GiB)": 13.7, "step": 8360, "train_speed(iter/s)": 1.557489 }, { "acc": 0.94093246, "epoch": 3.920787438481369, "grad_norm": 21.735946655273438, "learning_rate": 9.97792146960943e-06, "loss": 0.43398056, "memory(GiB)": 13.7, "step": 8365, "train_speed(iter/s)": 1.557531 }, { "acc": 0.94853725, "epoch": 3.923131005390204, "grad_norm": 10.990867614746094, "learning_rate": 9.977848645060805e-06, "loss": 0.26657734, "memory(GiB)": 13.7, "step": 8370, "train_speed(iter/s)": 1.557555 }, { "acc": 0.9398798, "epoch": 3.925474572299039, "grad_norm": 13.468832969665527, "learning_rate": 9.977775700873066e-06, "loss": 0.43336697, "memory(GiB)": 13.7, "step": 8375, "train_speed(iter/s)": 1.55754 }, { "acc": 0.93650932, "epoch": 3.9278181392078744, "grad_norm": 24.202014923095703, "learning_rate": 9.977702637047974e-06, "loss": 0.34561865, "memory(GiB)": 13.7, "step": 8380, "train_speed(iter/s)": 1.557533 }, { "acc": 0.95269213, "epoch": 3.9301617061167096, "grad_norm": 6.9537272453308105, "learning_rate": 9.97762945358728e-06, "loss": 0.39129915, "memory(GiB)": 13.7, "step": 8385, "train_speed(iter/s)": 1.55753 }, { "acc": 0.93670673, "epoch": 3.9325052730255448, "grad_norm": 11.540855407714844, "learning_rate": 9.977556150492746e-06, "loss": 0.35860903, "memory(GiB)": 13.7, "step": 8390, "train_speed(iter/s)": 1.557535 }, { "acc": 0.95380135, "epoch": 3.9348488399343804, "grad_norm": 20.741798400878906, "learning_rate": 9.977482727766134e-06, "loss": 0.31914008, "memory(GiB)": 13.7, "step": 8395, "train_speed(iter/s)": 1.55749 }, { "acc": 0.95760069, "epoch": 3.9371924068432156, "grad_norm": 7.1618876457214355, "learning_rate": 9.977409185409205e-06, "loss": 0.28556309, "memory(GiB)": 13.7, "step": 8400, "train_speed(iter/s)": 1.557538 }, { "acc": 0.95185108, "epoch": 3.939535973752051, "grad_norm": 13.173321723937988, "learning_rate": 9.977335523423733e-06, "loss": 0.24714079, "memory(GiB)": 13.7, "step": 8405, "train_speed(iter/s)": 1.557528 }, { "acc": 0.93660717, "epoch": 3.941879540660886, "grad_norm": 8.704888343811035, "learning_rate": 9.977261741811484e-06, "loss": 0.32382381, "memory(GiB)": 13.7, "step": 8410, "train_speed(iter/s)": 1.557547 }, { "acc": 0.96419716, "epoch": 3.944223107569721, "grad_norm": 9.357342720031738, "learning_rate": 9.977187840574232e-06, "loss": 0.19583579, "memory(GiB)": 13.7, "step": 8415, "train_speed(iter/s)": 1.557535 }, { "acc": 0.95425596, "epoch": 3.9465666744785564, "grad_norm": 15.074176788330078, "learning_rate": 9.977113819713755e-06, "loss": 0.23483765, "memory(GiB)": 13.7, "step": 8420, "train_speed(iter/s)": 1.557496 }, { "acc": 0.95762978, "epoch": 3.9489102413873916, "grad_norm": 8.759973526000977, "learning_rate": 9.977039679231829e-06, "loss": 0.19223082, "memory(GiB)": 13.7, "step": 8425, "train_speed(iter/s)": 1.55755 }, { "acc": 0.95986195, "epoch": 3.9512538082962267, "grad_norm": 8.923524856567383, "learning_rate": 9.97696541913024e-06, "loss": 0.20814106, "memory(GiB)": 13.7, "step": 8430, "train_speed(iter/s)": 1.557523 }, { "acc": 0.96796875, "epoch": 3.953597375205062, "grad_norm": 16.60121726989746, "learning_rate": 9.976891039410772e-06, "loss": 0.16074512, "memory(GiB)": 13.7, "step": 8435, "train_speed(iter/s)": 1.557524 }, { "acc": 0.94955387, "epoch": 3.955940942113897, "grad_norm": 37.6981086730957, "learning_rate": 9.97681654007521e-06, "loss": 0.35412054, "memory(GiB)": 13.7, "step": 8440, "train_speed(iter/s)": 1.557511 }, { "acc": 0.95912209, "epoch": 3.9582845090227323, "grad_norm": 6.005030632019043, "learning_rate": 9.976741921125348e-06, "loss": 0.23905902, "memory(GiB)": 13.7, "step": 8445, "train_speed(iter/s)": 1.557522 }, { "acc": 0.9393589, "epoch": 3.960628075931568, "grad_norm": 8.474737167358398, "learning_rate": 9.97666718256298e-06, "loss": 0.42076187, "memory(GiB)": 13.7, "step": 8450, "train_speed(iter/s)": 1.557572 }, { "acc": 0.96349697, "epoch": 3.962971642840403, "grad_norm": 9.86483383178711, "learning_rate": 9.976592324389899e-06, "loss": 0.14452085, "memory(GiB)": 13.7, "step": 8455, "train_speed(iter/s)": 1.557578 }, { "acc": 0.93258228, "epoch": 3.9653152097492383, "grad_norm": 8.457627296447754, "learning_rate": 9.976517346607906e-06, "loss": 0.45211143, "memory(GiB)": 13.7, "step": 8460, "train_speed(iter/s)": 1.557632 }, { "acc": 0.92964411, "epoch": 3.9676587766580735, "grad_norm": 10.651090621948242, "learning_rate": 9.976442249218804e-06, "loss": 0.47508421, "memory(GiB)": 13.7, "step": 8465, "train_speed(iter/s)": 1.557665 }, { "acc": 0.9666851, "epoch": 3.9700023435669087, "grad_norm": 7.139288902282715, "learning_rate": 9.976367032224397e-06, "loss": 0.19548273, "memory(GiB)": 13.7, "step": 8470, "train_speed(iter/s)": 1.557705 }, { "acc": 0.97764616, "epoch": 3.972345910475744, "grad_norm": 2.4417524337768555, "learning_rate": 9.976291695626496e-06, "loss": 0.15598292, "memory(GiB)": 13.7, "step": 8475, "train_speed(iter/s)": 1.55771 }, { "acc": 0.95158138, "epoch": 3.9746894773845796, "grad_norm": 12.782565116882324, "learning_rate": 9.976216239426908e-06, "loss": 0.26536577, "memory(GiB)": 13.7, "step": 8480, "train_speed(iter/s)": 1.557735 }, { "acc": 0.9637455, "epoch": 3.9770330442934148, "grad_norm": 13.382307052612305, "learning_rate": 9.976140663627446e-06, "loss": 0.2007457, "memory(GiB)": 13.7, "step": 8485, "train_speed(iter/s)": 1.557712 }, { "acc": 0.94690351, "epoch": 3.97937661120225, "grad_norm": 24.623327255249023, "learning_rate": 9.97606496822993e-06, "loss": 0.37074199, "memory(GiB)": 13.7, "step": 8490, "train_speed(iter/s)": 1.557676 }, { "acc": 0.94267864, "epoch": 3.981720178111085, "grad_norm": 42.44209289550781, "learning_rate": 9.975989153236179e-06, "loss": 0.27653041, "memory(GiB)": 13.7, "step": 8495, "train_speed(iter/s)": 1.557722 }, { "acc": 0.96329365, "epoch": 3.9840637450199203, "grad_norm": 8.6561861038208, "learning_rate": 9.975913218648016e-06, "loss": 0.30928273, "memory(GiB)": 13.7, "step": 8500, "train_speed(iter/s)": 1.557762 }, { "acc": 0.96352005, "epoch": 3.9864073119287555, "grad_norm": 17.533254623413086, "learning_rate": 9.97583716446726e-06, "loss": 0.27731862, "memory(GiB)": 13.7, "step": 8505, "train_speed(iter/s)": 1.557777 }, { "acc": 0.9770834, "epoch": 3.9887508788375907, "grad_norm": 6.28672981262207, "learning_rate": 9.975760990695748e-06, "loss": 0.15138087, "memory(GiB)": 13.7, "step": 8510, "train_speed(iter/s)": 1.557703 }, { "acc": 0.94647961, "epoch": 3.991094445746426, "grad_norm": 28.207408905029297, "learning_rate": 9.975684697335305e-06, "loss": 0.41680059, "memory(GiB)": 13.7, "step": 8515, "train_speed(iter/s)": 1.557736 }, { "acc": 0.94522839, "epoch": 3.993438012655261, "grad_norm": 6.638087272644043, "learning_rate": 9.975608284387768e-06, "loss": 0.33362999, "memory(GiB)": 13.7, "step": 8520, "train_speed(iter/s)": 1.557742 }, { "acc": 0.94831848, "epoch": 3.9957815795640963, "grad_norm": 5.704758644104004, "learning_rate": 9.975531751854971e-06, "loss": 0.28758507, "memory(GiB)": 13.7, "step": 8525, "train_speed(iter/s)": 1.557763 }, { "acc": 0.93826637, "epoch": 3.998125146472932, "grad_norm": 9.694072723388672, "learning_rate": 9.975455099738756e-06, "loss": 0.44933925, "memory(GiB)": 13.7, "step": 8530, "train_speed(iter/s)": 1.557745 }, { "acc": 0.96322918, "epoch": 4.000468713381767, "grad_norm": 8.19826602935791, "learning_rate": 9.975378328040964e-06, "loss": 0.19754186, "memory(GiB)": 13.7, "step": 8535, "train_speed(iter/s)": 1.557676 }, { "acc": 0.95471725, "epoch": 4.002812280290602, "grad_norm": 10.830920219421387, "learning_rate": 9.975301436763439e-06, "loss": 0.23297067, "memory(GiB)": 13.7, "step": 8540, "train_speed(iter/s)": 1.557661 }, { "acc": 0.95065975, "epoch": 4.005155847199438, "grad_norm": 21.685033798217773, "learning_rate": 9.975224425908034e-06, "loss": 0.25850396, "memory(GiB)": 13.7, "step": 8545, "train_speed(iter/s)": 1.557691 }, { "acc": 0.94374676, "epoch": 4.007499414108273, "grad_norm": 12.82726001739502, "learning_rate": 9.975147295476594e-06, "loss": 0.34573092, "memory(GiB)": 13.7, "step": 8550, "train_speed(iter/s)": 1.557655 }, { "acc": 0.94693336, "epoch": 4.009842981017108, "grad_norm": 9.168801307678223, "learning_rate": 9.975070045470978e-06, "loss": 0.22844739, "memory(GiB)": 13.7, "step": 8555, "train_speed(iter/s)": 1.55768 }, { "acc": 0.96937504, "epoch": 4.0121865479259435, "grad_norm": 11.103073120117188, "learning_rate": 9.974992675893039e-06, "loss": 0.11025553, "memory(GiB)": 13.7, "step": 8560, "train_speed(iter/s)": 1.557671 }, { "acc": 0.95517864, "epoch": 4.014530114834779, "grad_norm": 10.46323299407959, "learning_rate": 9.974915186744642e-06, "loss": 0.25420239, "memory(GiB)": 13.7, "step": 8565, "train_speed(iter/s)": 1.557675 }, { "acc": 0.96109028, "epoch": 4.016873681743614, "grad_norm": 11.406269073486328, "learning_rate": 9.974837578027642e-06, "loss": 0.20151515, "memory(GiB)": 13.7, "step": 8570, "train_speed(iter/s)": 1.557679 }, { "acc": 0.94584723, "epoch": 4.019217248652449, "grad_norm": 3.5771477222442627, "learning_rate": 9.974759849743911e-06, "loss": 0.45129347, "memory(GiB)": 13.7, "step": 8575, "train_speed(iter/s)": 1.557636 }, { "acc": 0.95531397, "epoch": 4.021560815561284, "grad_norm": 21.567432403564453, "learning_rate": 9.974682001895314e-06, "loss": 0.25595479, "memory(GiB)": 13.7, "step": 8580, "train_speed(iter/s)": 1.557649 }, { "acc": 0.93798523, "epoch": 4.0239043824701195, "grad_norm": 10.266214370727539, "learning_rate": 9.974604034483723e-06, "loss": 0.34731493, "memory(GiB)": 13.7, "step": 8585, "train_speed(iter/s)": 1.557674 }, { "acc": 0.95195513, "epoch": 4.026247949378955, "grad_norm": 7.158225059509277, "learning_rate": 9.974525947511015e-06, "loss": 0.22550285, "memory(GiB)": 13.7, "step": 8590, "train_speed(iter/s)": 1.557678 }, { "acc": 0.94416351, "epoch": 4.02859151628779, "grad_norm": 19.215051651000977, "learning_rate": 9.974447740979061e-06, "loss": 0.34081264, "memory(GiB)": 13.7, "step": 8595, "train_speed(iter/s)": 1.557673 }, { "acc": 0.90668802, "epoch": 4.030935083196625, "grad_norm": 12.273200035095215, "learning_rate": 9.974369414889745e-06, "loss": 0.50327716, "memory(GiB)": 13.7, "step": 8600, "train_speed(iter/s)": 1.557689 }, { "acc": 0.95371876, "epoch": 4.03327865010546, "grad_norm": 9.317025184631348, "learning_rate": 9.97429096924495e-06, "loss": 0.16978709, "memory(GiB)": 13.7, "step": 8605, "train_speed(iter/s)": 1.557677 }, { "acc": 0.94008284, "epoch": 4.0356222170142955, "grad_norm": 37.68052673339844, "learning_rate": 9.974212404046559e-06, "loss": 0.36378074, "memory(GiB)": 13.7, "step": 8610, "train_speed(iter/s)": 1.557707 }, { "acc": 0.94003925, "epoch": 4.037965783923131, "grad_norm": 9.703641891479492, "learning_rate": 9.97413371929646e-06, "loss": 0.31697702, "memory(GiB)": 13.7, "step": 8615, "train_speed(iter/s)": 1.557711 }, { "acc": 0.96707258, "epoch": 4.040309350831966, "grad_norm": 12.87386417388916, "learning_rate": 9.974054914996547e-06, "loss": 0.18217156, "memory(GiB)": 13.7, "step": 8620, "train_speed(iter/s)": 1.557717 }, { "acc": 0.96427746, "epoch": 4.042652917740802, "grad_norm": 11.963872909545898, "learning_rate": 9.973975991148715e-06, "loss": 0.19174269, "memory(GiB)": 13.7, "step": 8625, "train_speed(iter/s)": 1.557701 }, { "acc": 0.9647419, "epoch": 4.044996484649637, "grad_norm": 7.7021484375, "learning_rate": 9.973896947754857e-06, "loss": 0.20769978, "memory(GiB)": 13.7, "step": 8630, "train_speed(iter/s)": 1.557708 }, { "acc": 0.9619174, "epoch": 4.047340051558472, "grad_norm": 14.065817832946777, "learning_rate": 9.973817784816879e-06, "loss": 0.23569674, "memory(GiB)": 13.7, "step": 8635, "train_speed(iter/s)": 1.557712 }, { "acc": 0.96608315, "epoch": 4.0496836184673075, "grad_norm": 118.14168548583984, "learning_rate": 9.973738502336677e-06, "loss": 0.19979427, "memory(GiB)": 13.7, "step": 8640, "train_speed(iter/s)": 1.557723 }, { "acc": 0.9602541, "epoch": 4.052027185376143, "grad_norm": 9.762447357177734, "learning_rate": 9.973659100316159e-06, "loss": 0.22536814, "memory(GiB)": 13.7, "step": 8645, "train_speed(iter/s)": 1.557747 }, { "acc": 0.9781662, "epoch": 4.054370752284978, "grad_norm": 3.071154832839966, "learning_rate": 9.973579578757236e-06, "loss": 0.12026713, "memory(GiB)": 13.7, "step": 8650, "train_speed(iter/s)": 1.557767 }, { "acc": 0.93889885, "epoch": 4.056714319193813, "grad_norm": 7.595761299133301, "learning_rate": 9.973499937661818e-06, "loss": 0.32239213, "memory(GiB)": 13.7, "step": 8655, "train_speed(iter/s)": 1.557836 }, { "acc": 0.9770834, "epoch": 4.059057886102648, "grad_norm": 4.8660383224487305, "learning_rate": 9.973420177031819e-06, "loss": 0.16137984, "memory(GiB)": 13.7, "step": 8660, "train_speed(iter/s)": 1.557803 }, { "acc": 0.95576048, "epoch": 4.0614014530114835, "grad_norm": 16.101781845092773, "learning_rate": 9.973340296869155e-06, "loss": 0.29557672, "memory(GiB)": 13.7, "step": 8665, "train_speed(iter/s)": 1.557767 }, { "acc": 0.95092258, "epoch": 4.063745019920319, "grad_norm": 28.428142547607422, "learning_rate": 9.973260297175747e-06, "loss": 0.18287351, "memory(GiB)": 13.7, "step": 8670, "train_speed(iter/s)": 1.55775 }, { "acc": 0.96599121, "epoch": 4.066088586829154, "grad_norm": 6.73831844329834, "learning_rate": 9.97318017795352e-06, "loss": 0.15221027, "memory(GiB)": 13.7, "step": 8675, "train_speed(iter/s)": 1.557759 }, { "acc": 0.96778412, "epoch": 4.068432153737989, "grad_norm": 10.673234939575195, "learning_rate": 9.973099939204398e-06, "loss": 0.21811814, "memory(GiB)": 13.7, "step": 8680, "train_speed(iter/s)": 1.557766 }, { "acc": 0.94633398, "epoch": 4.070775720646824, "grad_norm": 6.262928009033203, "learning_rate": 9.97301958093031e-06, "loss": 0.23852963, "memory(GiB)": 13.7, "step": 8685, "train_speed(iter/s)": 1.557786 }, { "acc": 0.94434032, "epoch": 4.073119287555659, "grad_norm": 7.320801734924316, "learning_rate": 9.972939103133184e-06, "loss": 0.34113674, "memory(GiB)": 13.7, "step": 8690, "train_speed(iter/s)": 1.557828 }, { "acc": 0.94793158, "epoch": 4.075462854464495, "grad_norm": 6.332176208496094, "learning_rate": 9.972858505814961e-06, "loss": 0.31718771, "memory(GiB)": 13.7, "step": 8695, "train_speed(iter/s)": 1.557807 }, { "acc": 0.95717258, "epoch": 4.07780642137333, "grad_norm": 13.147871971130371, "learning_rate": 9.972777788977574e-06, "loss": 0.32462399, "memory(GiB)": 13.7, "step": 8700, "train_speed(iter/s)": 1.557763 }, { "acc": 0.94023323, "epoch": 4.080149988282165, "grad_norm": 54.35652160644531, "learning_rate": 9.972696952622964e-06, "loss": 0.33894529, "memory(GiB)": 13.7, "step": 8705, "train_speed(iter/s)": 1.557733 }, { "acc": 0.96330357, "epoch": 4.082493555191001, "grad_norm": 6.033565521240234, "learning_rate": 9.972615996753074e-06, "loss": 0.16213211, "memory(GiB)": 13.7, "step": 8710, "train_speed(iter/s)": 1.557722 }, { "acc": 0.96526985, "epoch": 4.084837122099836, "grad_norm": 6.243305206298828, "learning_rate": 9.972534921369852e-06, "loss": 0.24962444, "memory(GiB)": 13.7, "step": 8715, "train_speed(iter/s)": 1.557744 }, { "acc": 0.96087303, "epoch": 4.0871806890086715, "grad_norm": 11.825312614440918, "learning_rate": 9.972453726475242e-06, "loss": 0.35708256, "memory(GiB)": 13.7, "step": 8720, "train_speed(iter/s)": 1.557793 }, { "acc": 0.94725304, "epoch": 4.089524255917507, "grad_norm": 9.918643951416016, "learning_rate": 9.972372412071201e-06, "loss": 0.26058538, "memory(GiB)": 13.7, "step": 8725, "train_speed(iter/s)": 1.557808 }, { "acc": 0.95322914, "epoch": 4.091867822826342, "grad_norm": 6.673951148986816, "learning_rate": 9.972290978159679e-06, "loss": 0.23256307, "memory(GiB)": 13.7, "step": 8730, "train_speed(iter/s)": 1.557781 }, { "acc": 0.97026482, "epoch": 4.094211389735177, "grad_norm": 8.790481567382812, "learning_rate": 9.972209424742637e-06, "loss": 0.21866291, "memory(GiB)": 13.7, "step": 8735, "train_speed(iter/s)": 1.557795 }, { "acc": 0.95931225, "epoch": 4.096554956644012, "grad_norm": 10.017904281616211, "learning_rate": 9.972127751822032e-06, "loss": 0.19711914, "memory(GiB)": 13.7, "step": 8740, "train_speed(iter/s)": 1.55782 }, { "acc": 0.93341484, "epoch": 4.0988985235528475, "grad_norm": 22.813385009765625, "learning_rate": 9.972045959399832e-06, "loss": 0.46294527, "memory(GiB)": 13.7, "step": 8745, "train_speed(iter/s)": 1.55786 }, { "acc": 0.94726191, "epoch": 4.101242090461683, "grad_norm": 22.92144012451172, "learning_rate": 9.971964047477996e-06, "loss": 0.31165123, "memory(GiB)": 13.7, "step": 8750, "train_speed(iter/s)": 1.557862 }, { "acc": 0.9345933, "epoch": 4.103585657370518, "grad_norm": 14.244552612304688, "learning_rate": 9.9718820160585e-06, "loss": 0.45318623, "memory(GiB)": 13.7, "step": 8755, "train_speed(iter/s)": 1.557903 }, { "acc": 0.96404591, "epoch": 4.105929224279353, "grad_norm": 7.625492095947266, "learning_rate": 9.971799865143312e-06, "loss": 0.27052412, "memory(GiB)": 13.7, "step": 8760, "train_speed(iter/s)": 1.557966 }, { "acc": 0.9703927, "epoch": 4.108272791188188, "grad_norm": 6.085126876831055, "learning_rate": 9.971717594734405e-06, "loss": 0.18795592, "memory(GiB)": 13.7, "step": 8765, "train_speed(iter/s)": 1.557941 }, { "acc": 0.9441, "epoch": 4.110616358097023, "grad_norm": 12.105391502380371, "learning_rate": 9.971635204833761e-06, "loss": 0.37940354, "memory(GiB)": 13.7, "step": 8770, "train_speed(iter/s)": 1.557938 }, { "acc": 0.97517853, "epoch": 4.112959925005859, "grad_norm": 7.898726463317871, "learning_rate": 9.971552695443359e-06, "loss": 0.13102994, "memory(GiB)": 13.7, "step": 8775, "train_speed(iter/s)": 1.557936 }, { "acc": 0.95940933, "epoch": 4.115303491914694, "grad_norm": 18.868247985839844, "learning_rate": 9.971470066565179e-06, "loss": 0.22193203, "memory(GiB)": 13.7, "step": 8780, "train_speed(iter/s)": 1.557943 }, { "acc": 0.97460222, "epoch": 4.117647058823529, "grad_norm": 8.967595100402832, "learning_rate": 9.971387318201212e-06, "loss": 0.18501618, "memory(GiB)": 13.7, "step": 8785, "train_speed(iter/s)": 1.557943 }, { "acc": 0.93222847, "epoch": 4.119990625732365, "grad_norm": 11.026995658874512, "learning_rate": 9.97130445035344e-06, "loss": 0.31293204, "memory(GiB)": 13.7, "step": 8790, "train_speed(iter/s)": 1.557981 }, { "acc": 0.96080008, "epoch": 4.1223341926412, "grad_norm": 12.330119132995605, "learning_rate": 9.971221463023863e-06, "loss": 0.30729451, "memory(GiB)": 13.7, "step": 8795, "train_speed(iter/s)": 1.557989 }, { "acc": 0.93769274, "epoch": 4.1246777595500355, "grad_norm": 15.444450378417969, "learning_rate": 9.971138356214472e-06, "loss": 0.3538795, "memory(GiB)": 13.7, "step": 8800, "train_speed(iter/s)": 1.558051 }, { "acc": 0.94375877, "epoch": 4.127021326458871, "grad_norm": 12.899019241333008, "learning_rate": 9.971055129927265e-06, "loss": 0.30725451, "memory(GiB)": 13.7, "step": 8805, "train_speed(iter/s)": 1.558077 }, { "acc": 0.9529438, "epoch": 4.129364893367706, "grad_norm": 33.14741516113281, "learning_rate": 9.97097178416424e-06, "loss": 0.37606378, "memory(GiB)": 13.7, "step": 8810, "train_speed(iter/s)": 1.558097 }, { "acc": 0.94451485, "epoch": 4.131708460276541, "grad_norm": 3.776658058166504, "learning_rate": 9.970888318927405e-06, "loss": 0.22389183, "memory(GiB)": 13.7, "step": 8815, "train_speed(iter/s)": 1.558069 }, { "acc": 0.96140871, "epoch": 4.134052027185376, "grad_norm": 6.308600425720215, "learning_rate": 9.970804734218762e-06, "loss": 0.22801027, "memory(GiB)": 13.7, "step": 8820, "train_speed(iter/s)": 1.55808 }, { "acc": 0.95738106, "epoch": 4.136395594094211, "grad_norm": 11.069939613342285, "learning_rate": 9.970721030040323e-06, "loss": 0.21474102, "memory(GiB)": 13.7, "step": 8825, "train_speed(iter/s)": 1.558065 }, { "acc": 0.94538155, "epoch": 4.138739161003047, "grad_norm": 5.111324310302734, "learning_rate": 9.970637206394098e-06, "loss": 0.3232651, "memory(GiB)": 13.7, "step": 8830, "train_speed(iter/s)": 1.558073 }, { "acc": 0.94497032, "epoch": 4.141082727911882, "grad_norm": 10.174749374389648, "learning_rate": 9.970553263282103e-06, "loss": 0.36592522, "memory(GiB)": 13.7, "step": 8835, "train_speed(iter/s)": 1.558096 }, { "acc": 0.93607216, "epoch": 4.143426294820717, "grad_norm": 20.69565773010254, "learning_rate": 9.970469200706357e-06, "loss": 0.30929849, "memory(GiB)": 13.7, "step": 8840, "train_speed(iter/s)": 1.558118 }, { "acc": 0.9442853, "epoch": 4.145769861729552, "grad_norm": 2.1301002502441406, "learning_rate": 9.970385018668876e-06, "loss": 0.28298755, "memory(GiB)": 13.7, "step": 8845, "train_speed(iter/s)": 1.558104 }, { "acc": 0.95313492, "epoch": 4.148113428638387, "grad_norm": 5.867552280426025, "learning_rate": 9.97030071717169e-06, "loss": 0.24125612, "memory(GiB)": 13.7, "step": 8850, "train_speed(iter/s)": 1.558148 }, { "acc": 0.9555582, "epoch": 4.150456995547223, "grad_norm": 27.093242645263672, "learning_rate": 9.970216296216819e-06, "loss": 0.22973926, "memory(GiB)": 13.7, "step": 8855, "train_speed(iter/s)": 1.558133 }, { "acc": 0.97264223, "epoch": 4.152800562456058, "grad_norm": 5.118619441986084, "learning_rate": 9.970131755806297e-06, "loss": 0.22279978, "memory(GiB)": 13.7, "step": 8860, "train_speed(iter/s)": 1.558073 }, { "acc": 0.96182156, "epoch": 4.155144129364893, "grad_norm": 7.485621452331543, "learning_rate": 9.970047095942153e-06, "loss": 0.22187548, "memory(GiB)": 13.7, "step": 8865, "train_speed(iter/s)": 1.558054 }, { "acc": 0.95377674, "epoch": 4.157487696273728, "grad_norm": 4.926938056945801, "learning_rate": 9.969962316626422e-06, "loss": 0.22543666, "memory(GiB)": 13.7, "step": 8870, "train_speed(iter/s)": 1.558072 }, { "acc": 0.95007963, "epoch": 4.159831263182564, "grad_norm": 16.65636444091797, "learning_rate": 9.969877417861146e-06, "loss": 0.3717063, "memory(GiB)": 13.7, "step": 8875, "train_speed(iter/s)": 1.558096 }, { "acc": 0.96267242, "epoch": 4.162174830091399, "grad_norm": 169.4681396484375, "learning_rate": 9.96979239964836e-06, "loss": 0.23185818, "memory(GiB)": 13.7, "step": 8880, "train_speed(iter/s)": 1.558149 }, { "acc": 0.96251869, "epoch": 4.164518397000235, "grad_norm": 38.743404388427734, "learning_rate": 9.96970726199011e-06, "loss": 0.19390569, "memory(GiB)": 13.7, "step": 8885, "train_speed(iter/s)": 1.558138 }, { "acc": 0.95484848, "epoch": 4.16686196390907, "grad_norm": 12.127245903015137, "learning_rate": 9.969622004888443e-06, "loss": 0.30735064, "memory(GiB)": 13.7, "step": 8890, "train_speed(iter/s)": 1.55813 }, { "acc": 0.96500502, "epoch": 4.169205530817905, "grad_norm": 19.074872970581055, "learning_rate": 9.969536628345408e-06, "loss": 0.16777823, "memory(GiB)": 13.7, "step": 8895, "train_speed(iter/s)": 1.558128 }, { "acc": 0.95344925, "epoch": 4.17154909772674, "grad_norm": 7.504146575927734, "learning_rate": 9.969451132363058e-06, "loss": 0.37954316, "memory(GiB)": 13.7, "step": 8900, "train_speed(iter/s)": 1.558106 }, { "acc": 0.9552083, "epoch": 4.173892664635575, "grad_norm": 3.7076892852783203, "learning_rate": 9.969365516943446e-06, "loss": 0.30367231, "memory(GiB)": 13.7, "step": 8905, "train_speed(iter/s)": 1.558096 }, { "acc": 0.95904217, "epoch": 4.176236231544411, "grad_norm": 7.04235315322876, "learning_rate": 9.969279782088633e-06, "loss": 0.27393279, "memory(GiB)": 13.7, "step": 8910, "train_speed(iter/s)": 1.558057 }, { "acc": 0.94389687, "epoch": 4.178579798453246, "grad_norm": 13.093605041503906, "learning_rate": 9.969193927800675e-06, "loss": 0.29033294, "memory(GiB)": 13.7, "step": 8915, "train_speed(iter/s)": 1.558078 }, { "acc": 0.96087208, "epoch": 4.180923365362081, "grad_norm": 40.1126708984375, "learning_rate": 9.969107954081639e-06, "loss": 0.26475654, "memory(GiB)": 13.7, "step": 8920, "train_speed(iter/s)": 1.558094 }, { "acc": 0.95608025, "epoch": 4.183266932270916, "grad_norm": 11.886431694030762, "learning_rate": 9.969021860933591e-06, "loss": 0.22255356, "memory(GiB)": 13.7, "step": 8925, "train_speed(iter/s)": 1.558091 }, { "acc": 0.96367407, "epoch": 4.185610499179751, "grad_norm": 9.242759704589844, "learning_rate": 9.968935648358599e-06, "loss": 0.18981264, "memory(GiB)": 13.7, "step": 8930, "train_speed(iter/s)": 1.558124 }, { "acc": 0.95696115, "epoch": 4.187954066088587, "grad_norm": 28.501544952392578, "learning_rate": 9.968849316358738e-06, "loss": 0.27399836, "memory(GiB)": 13.7, "step": 8935, "train_speed(iter/s)": 1.558133 }, { "acc": 0.96895828, "epoch": 4.190297632997422, "grad_norm": 14.681329727172852, "learning_rate": 9.968762864936084e-06, "loss": 0.16651957, "memory(GiB)": 13.7, "step": 8940, "train_speed(iter/s)": 1.558121 }, { "acc": 0.9746726, "epoch": 4.192641199906257, "grad_norm": 11.697785377502441, "learning_rate": 9.96867629409271e-06, "loss": 0.15538654, "memory(GiB)": 13.7, "step": 8945, "train_speed(iter/s)": 1.558081 }, { "acc": 0.95844574, "epoch": 4.194984766815093, "grad_norm": 77.59296417236328, "learning_rate": 9.9685896038307e-06, "loss": 0.25815659, "memory(GiB)": 13.7, "step": 8950, "train_speed(iter/s)": 1.558118 }, { "acc": 0.9669611, "epoch": 4.197328333723928, "grad_norm": 10.381855010986328, "learning_rate": 9.968502794152137e-06, "loss": 0.26587524, "memory(GiB)": 13.7, "step": 8955, "train_speed(iter/s)": 1.558088 }, { "acc": 0.9458478, "epoch": 4.199671900632763, "grad_norm": 15.72500228881836, "learning_rate": 9.96841586505911e-06, "loss": 0.31492114, "memory(GiB)": 13.7, "step": 8960, "train_speed(iter/s)": 1.558089 }, { "acc": 0.95147972, "epoch": 4.202015467541599, "grad_norm": 11.364391326904297, "learning_rate": 9.968328816553704e-06, "loss": 0.27822859, "memory(GiB)": 13.7, "step": 8965, "train_speed(iter/s)": 1.558112 }, { "acc": 0.97044601, "epoch": 4.204359034450434, "grad_norm": 4.160978317260742, "learning_rate": 9.968241648638015e-06, "loss": 0.17285986, "memory(GiB)": 13.7, "step": 8970, "train_speed(iter/s)": 1.558149 }, { "acc": 0.9410677, "epoch": 4.206702601359269, "grad_norm": 21.038190841674805, "learning_rate": 9.968154361314136e-06, "loss": 0.46553898, "memory(GiB)": 13.7, "step": 8975, "train_speed(iter/s)": 1.558166 }, { "acc": 0.97093754, "epoch": 4.209046168268104, "grad_norm": 16.366191864013672, "learning_rate": 9.968066954584169e-06, "loss": 0.14663126, "memory(GiB)": 13.7, "step": 8980, "train_speed(iter/s)": 1.558168 }, { "acc": 0.95873775, "epoch": 4.211389735176939, "grad_norm": 19.55923843383789, "learning_rate": 9.967979428450208e-06, "loss": 0.26031361, "memory(GiB)": 13.7, "step": 8985, "train_speed(iter/s)": 1.558152 }, { "acc": 0.94180527, "epoch": 4.213733302085775, "grad_norm": 6.4826765060424805, "learning_rate": 9.967891782914364e-06, "loss": 0.28993835, "memory(GiB)": 13.7, "step": 8990, "train_speed(iter/s)": 1.558143 }, { "acc": 0.96710072, "epoch": 4.21607686899461, "grad_norm": 29.888683319091797, "learning_rate": 9.967804017978739e-06, "loss": 0.18387917, "memory(GiB)": 13.7, "step": 8995, "train_speed(iter/s)": 1.558112 }, { "acc": 0.95675592, "epoch": 4.218420435903445, "grad_norm": 7.885878562927246, "learning_rate": 9.967716133645446e-06, "loss": 0.22856069, "memory(GiB)": 13.7, "step": 9000, "train_speed(iter/s)": 1.55811 }, { "acc": 0.95493422, "epoch": 4.22076400281228, "grad_norm": 26.371583938598633, "learning_rate": 9.967628129916596e-06, "loss": 0.30614786, "memory(GiB)": 13.7, "step": 9005, "train_speed(iter/s)": 1.558101 }, { "acc": 0.96422625, "epoch": 4.223107569721115, "grad_norm": 27.36546516418457, "learning_rate": 9.967540006794302e-06, "loss": 0.24278989, "memory(GiB)": 13.7, "step": 9010, "train_speed(iter/s)": 1.5581 }, { "acc": 0.96333332, "epoch": 4.2254511366299505, "grad_norm": 4.455219268798828, "learning_rate": 9.967451764280686e-06, "loss": 0.19581724, "memory(GiB)": 13.7, "step": 9015, "train_speed(iter/s)": 1.558109 }, { "acc": 0.98359203, "epoch": 4.227794703538786, "grad_norm": 3.40023136138916, "learning_rate": 9.967363402377866e-06, "loss": 0.09182311, "memory(GiB)": 13.7, "step": 9020, "train_speed(iter/s)": 1.558097 }, { "acc": 0.96176958, "epoch": 4.230138270447621, "grad_norm": 6.676788806915283, "learning_rate": 9.967274921087966e-06, "loss": 0.19153165, "memory(GiB)": 13.7, "step": 9025, "train_speed(iter/s)": 1.558093 }, { "acc": 0.95936117, "epoch": 4.232481837356456, "grad_norm": 73.51971435546875, "learning_rate": 9.967186320413116e-06, "loss": 0.24914365, "memory(GiB)": 13.7, "step": 9030, "train_speed(iter/s)": 1.558089 }, { "acc": 0.93335085, "epoch": 4.234825404265292, "grad_norm": 3.432870626449585, "learning_rate": 9.967097600355445e-06, "loss": 0.40344238, "memory(GiB)": 13.7, "step": 9035, "train_speed(iter/s)": 1.558067 }, { "acc": 0.97872601, "epoch": 4.237168971174127, "grad_norm": 5.966979503631592, "learning_rate": 9.967008760917083e-06, "loss": 0.14164972, "memory(GiB)": 13.7, "step": 9040, "train_speed(iter/s)": 1.558074 }, { "acc": 0.96568975, "epoch": 4.239512538082963, "grad_norm": 6.950619697570801, "learning_rate": 9.966919802100163e-06, "loss": 0.18552151, "memory(GiB)": 13.7, "step": 9045, "train_speed(iter/s)": 1.558071 }, { "acc": 0.95684528, "epoch": 4.241856104991798, "grad_norm": 6.729736804962158, "learning_rate": 9.966830723906832e-06, "loss": 0.22765117, "memory(GiB)": 13.7, "step": 9050, "train_speed(iter/s)": 1.558065 }, { "acc": 0.95706358, "epoch": 4.244199671900633, "grad_norm": 15.511008262634277, "learning_rate": 9.966741526339224e-06, "loss": 0.37706118, "memory(GiB)": 13.7, "step": 9055, "train_speed(iter/s)": 1.558126 }, { "acc": 0.98593063, "epoch": 4.246543238809468, "grad_norm": 10.313962936401367, "learning_rate": 9.966652209399485e-06, "loss": 0.09273647, "memory(GiB)": 13.7, "step": 9060, "train_speed(iter/s)": 1.558145 }, { "acc": 0.93945932, "epoch": 4.248886805718303, "grad_norm": 18.062877655029297, "learning_rate": 9.966562773089763e-06, "loss": 0.32685013, "memory(GiB)": 13.7, "step": 9065, "train_speed(iter/s)": 1.558169 }, { "acc": 0.95958061, "epoch": 4.2512303726271385, "grad_norm": 6.67808723449707, "learning_rate": 9.966473217412205e-06, "loss": 0.26205597, "memory(GiB)": 13.7, "step": 9070, "train_speed(iter/s)": 1.55815 }, { "acc": 0.93955116, "epoch": 4.253573939535974, "grad_norm": 8.579763412475586, "learning_rate": 9.966383542368967e-06, "loss": 0.40171828, "memory(GiB)": 13.7, "step": 9075, "train_speed(iter/s)": 1.558144 }, { "acc": 0.95307541, "epoch": 4.255917506444809, "grad_norm": 10.428237915039062, "learning_rate": 9.9662937479622e-06, "loss": 0.27047768, "memory(GiB)": 13.7, "step": 9080, "train_speed(iter/s)": 1.558118 }, { "acc": 0.96848145, "epoch": 4.258261073353644, "grad_norm": 10.075825691223145, "learning_rate": 9.966203834194068e-06, "loss": 0.18657625, "memory(GiB)": 13.7, "step": 9085, "train_speed(iter/s)": 1.558125 }, { "acc": 0.98712215, "epoch": 4.260604640262479, "grad_norm": 12.681378364562988, "learning_rate": 9.96611380106673e-06, "loss": 0.13716469, "memory(GiB)": 13.7, "step": 9090, "train_speed(iter/s)": 1.558102 }, { "acc": 0.95977182, "epoch": 4.2629482071713145, "grad_norm": 14.502508163452148, "learning_rate": 9.966023648582349e-06, "loss": 0.30769854, "memory(GiB)": 13.7, "step": 9095, "train_speed(iter/s)": 1.558114 }, { "acc": 0.94432182, "epoch": 4.26529177408015, "grad_norm": 5.54386043548584, "learning_rate": 9.965933376743092e-06, "loss": 0.29093549, "memory(GiB)": 13.7, "step": 9100, "train_speed(iter/s)": 1.558136 }, { "acc": 0.96296587, "epoch": 4.267635340988985, "grad_norm": 5.6589765548706055, "learning_rate": 9.965842985551127e-06, "loss": 0.20099144, "memory(GiB)": 13.7, "step": 9105, "train_speed(iter/s)": 1.558144 }, { "acc": 0.95246534, "epoch": 4.26997890789782, "grad_norm": 17.765949249267578, "learning_rate": 9.96575247500863e-06, "loss": 0.27851577, "memory(GiB)": 13.7, "step": 9110, "train_speed(iter/s)": 1.558127 }, { "acc": 0.95388813, "epoch": 4.272322474806655, "grad_norm": 4.893039226531982, "learning_rate": 9.965661845117777e-06, "loss": 0.31220913, "memory(GiB)": 13.7, "step": 9115, "train_speed(iter/s)": 1.558152 }, { "acc": 0.9606575, "epoch": 4.274666041715491, "grad_norm": 8.143577575683594, "learning_rate": 9.965571095880745e-06, "loss": 0.30228276, "memory(GiB)": 13.7, "step": 9120, "train_speed(iter/s)": 1.558152 }, { "acc": 0.94344063, "epoch": 4.2770096086243266, "grad_norm": 11.573471069335938, "learning_rate": 9.965480227299716e-06, "loss": 0.29945333, "memory(GiB)": 13.7, "step": 9125, "train_speed(iter/s)": 1.558153 }, { "acc": 0.94664564, "epoch": 4.279353175533162, "grad_norm": 7.532103061676025, "learning_rate": 9.965389239376872e-06, "loss": 0.34014587, "memory(GiB)": 13.7, "step": 9130, "train_speed(iter/s)": 1.558171 }, { "acc": 0.95242586, "epoch": 4.281696742441997, "grad_norm": 6.809294700622559, "learning_rate": 9.965298132114404e-06, "loss": 0.20802259, "memory(GiB)": 13.7, "step": 9135, "train_speed(iter/s)": 1.558159 }, { "acc": 0.96290178, "epoch": 4.284040309350832, "grad_norm": 11.806683540344238, "learning_rate": 9.965206905514498e-06, "loss": 0.25123246, "memory(GiB)": 13.7, "step": 9140, "train_speed(iter/s)": 1.558128 }, { "acc": 0.95695419, "epoch": 4.286383876259667, "grad_norm": 7.1020636558532715, "learning_rate": 9.965115559579348e-06, "loss": 0.32436836, "memory(GiB)": 13.7, "step": 9145, "train_speed(iter/s)": 1.5581 }, { "acc": 0.97724705, "epoch": 4.2887274431685025, "grad_norm": 5.523575305938721, "learning_rate": 9.96502409431115e-06, "loss": 0.20933871, "memory(GiB)": 13.7, "step": 9150, "train_speed(iter/s)": 1.558107 }, { "acc": 0.95775299, "epoch": 4.291071010077338, "grad_norm": 3.3120899200439453, "learning_rate": 9.964932509712102e-06, "loss": 0.28712192, "memory(GiB)": 13.7, "step": 9155, "train_speed(iter/s)": 1.558106 }, { "acc": 0.9681345, "epoch": 4.293414576986173, "grad_norm": 74.85761260986328, "learning_rate": 9.964840805784407e-06, "loss": 0.22841656, "memory(GiB)": 13.7, "step": 9160, "train_speed(iter/s)": 1.55815 }, { "acc": 0.97012806, "epoch": 4.295758143895008, "grad_norm": 3.1332743167877197, "learning_rate": 9.964748982530268e-06, "loss": 0.20210586, "memory(GiB)": 13.7, "step": 9165, "train_speed(iter/s)": 1.558131 }, { "acc": 0.9513361, "epoch": 4.298101710803843, "grad_norm": 5.349430084228516, "learning_rate": 9.964657039951892e-06, "loss": 0.24300256, "memory(GiB)": 13.7, "step": 9170, "train_speed(iter/s)": 1.558123 }, { "acc": 0.94798613, "epoch": 4.3004452777126785, "grad_norm": 8.904172897338867, "learning_rate": 9.964564978051488e-06, "loss": 0.28630571, "memory(GiB)": 13.7, "step": 9175, "train_speed(iter/s)": 1.558149 }, { "acc": 0.9408699, "epoch": 4.302788844621514, "grad_norm": 16.646326065063477, "learning_rate": 9.964472796831272e-06, "loss": 0.36948814, "memory(GiB)": 13.7, "step": 9180, "train_speed(iter/s)": 1.558191 }, { "acc": 0.94095097, "epoch": 4.305132411530349, "grad_norm": 2.9344329833984375, "learning_rate": 9.964380496293457e-06, "loss": 0.37658985, "memory(GiB)": 13.7, "step": 9185, "train_speed(iter/s)": 1.558206 }, { "acc": 0.95233784, "epoch": 4.307475978439184, "grad_norm": 8.085441589355469, "learning_rate": 9.964288076440264e-06, "loss": 0.27194951, "memory(GiB)": 13.7, "step": 9190, "train_speed(iter/s)": 1.558217 }, { "acc": 0.94782848, "epoch": 4.30981954534802, "grad_norm": 14.2593994140625, "learning_rate": 9.964195537273911e-06, "loss": 0.27904649, "memory(GiB)": 13.7, "step": 9195, "train_speed(iter/s)": 1.558214 }, { "acc": 0.94377804, "epoch": 4.312163112256855, "grad_norm": 11.294477462768555, "learning_rate": 9.964102878796624e-06, "loss": 0.29717784, "memory(GiB)": 13.7, "step": 9200, "train_speed(iter/s)": 1.558192 }, { "acc": 0.94322309, "epoch": 4.3145066791656905, "grad_norm": 11.217281341552734, "learning_rate": 9.964010101010633e-06, "loss": 0.34520035, "memory(GiB)": 13.7, "step": 9205, "train_speed(iter/s)": 1.558178 }, { "acc": 0.94618559, "epoch": 4.316850246074526, "grad_norm": 15.974347114562988, "learning_rate": 9.963917203918164e-06, "loss": 0.32582738, "memory(GiB)": 13.7, "step": 9210, "train_speed(iter/s)": 1.5582 }, { "acc": 0.93810501, "epoch": 4.319193812983361, "grad_norm": 8.196094512939453, "learning_rate": 9.963824187521451e-06, "loss": 0.33228402, "memory(GiB)": 13.7, "step": 9215, "train_speed(iter/s)": 1.558168 }, { "acc": 0.96842804, "epoch": 4.321537379892196, "grad_norm": 3.525683641433716, "learning_rate": 9.963731051822732e-06, "loss": 0.19875023, "memory(GiB)": 13.7, "step": 9220, "train_speed(iter/s)": 1.558167 }, { "acc": 0.96952877, "epoch": 4.323880946801031, "grad_norm": 2.163649320602417, "learning_rate": 9.963637796824243e-06, "loss": 0.1785224, "memory(GiB)": 13.7, "step": 9225, "train_speed(iter/s)": 1.558164 }, { "acc": 0.9416666, "epoch": 4.3262245137098665, "grad_norm": 6.036410808563232, "learning_rate": 9.963544422528226e-06, "loss": 0.29040632, "memory(GiB)": 13.7, "step": 9230, "train_speed(iter/s)": 1.558152 }, { "acc": 0.94260149, "epoch": 4.328568080618702, "grad_norm": 27.336509704589844, "learning_rate": 9.963450928936927e-06, "loss": 0.31789393, "memory(GiB)": 13.7, "step": 9235, "train_speed(iter/s)": 1.558145 }, { "acc": 0.94455357, "epoch": 4.330911647527537, "grad_norm": 10.048020362854004, "learning_rate": 9.963357316052591e-06, "loss": 0.26157506, "memory(GiB)": 13.7, "step": 9240, "train_speed(iter/s)": 1.558181 }, { "acc": 0.96429939, "epoch": 4.333255214436372, "grad_norm": 8.983186721801758, "learning_rate": 9.96326358387747e-06, "loss": 0.20912504, "memory(GiB)": 13.7, "step": 9245, "train_speed(iter/s)": 1.558187 }, { "acc": 0.98311014, "epoch": 4.335598781345207, "grad_norm": 2.665618419647217, "learning_rate": 9.963169732413816e-06, "loss": 0.06853112, "memory(GiB)": 13.7, "step": 9250, "train_speed(iter/s)": 1.558183 }, { "acc": 0.94381151, "epoch": 4.3379423482540425, "grad_norm": 13.997095108032227, "learning_rate": 9.963075761663886e-06, "loss": 0.41156311, "memory(GiB)": 13.7, "step": 9255, "train_speed(iter/s)": 1.55823 }, { "acc": 0.92966633, "epoch": 4.340285915162878, "grad_norm": 15.093661308288574, "learning_rate": 9.962981671629938e-06, "loss": 0.30143259, "memory(GiB)": 13.7, "step": 9260, "train_speed(iter/s)": 1.558214 }, { "acc": 0.95826464, "epoch": 4.342629482071713, "grad_norm": 13.27857494354248, "learning_rate": 9.962887462314232e-06, "loss": 0.21777215, "memory(GiB)": 13.7, "step": 9265, "train_speed(iter/s)": 1.558184 }, { "acc": 0.96068459, "epoch": 4.344973048980548, "grad_norm": 10.696944236755371, "learning_rate": 9.962793133719038e-06, "loss": 0.25156786, "memory(GiB)": 13.7, "step": 9270, "train_speed(iter/s)": 1.55821 }, { "acc": 0.94809666, "epoch": 4.347316615889383, "grad_norm": 5.83935022354126, "learning_rate": 9.962698685846614e-06, "loss": 0.34642282, "memory(GiB)": 13.7, "step": 9275, "train_speed(iter/s)": 1.558188 }, { "acc": 0.95458336, "epoch": 4.349660182798219, "grad_norm": 8.767643928527832, "learning_rate": 9.96260411869924e-06, "loss": 0.30920615, "memory(GiB)": 13.7, "step": 9280, "train_speed(iter/s)": 1.558237 }, { "acc": 0.95754528, "epoch": 4.3520037497070545, "grad_norm": 81.01089477539062, "learning_rate": 9.962509432279183e-06, "loss": 0.26385524, "memory(GiB)": 13.7, "step": 9285, "train_speed(iter/s)": 1.558241 }, { "acc": 0.96095238, "epoch": 4.35434731661589, "grad_norm": 8.155451774597168, "learning_rate": 9.962414626588723e-06, "loss": 0.15362697, "memory(GiB)": 13.7, "step": 9290, "train_speed(iter/s)": 1.558226 }, { "acc": 0.95902033, "epoch": 4.356690883524725, "grad_norm": 6.6356425285339355, "learning_rate": 9.962319701630133e-06, "loss": 0.220768, "memory(GiB)": 13.7, "step": 9295, "train_speed(iter/s)": 1.558252 }, { "acc": 0.96042862, "epoch": 4.35903445043356, "grad_norm": 2.872788906097412, "learning_rate": 9.962224657405701e-06, "loss": 0.33693848, "memory(GiB)": 13.7, "step": 9300, "train_speed(iter/s)": 1.558263 }, { "acc": 0.9303669, "epoch": 4.361378017342395, "grad_norm": 27.959604263305664, "learning_rate": 9.962129493917705e-06, "loss": 0.3489789, "memory(GiB)": 13.7, "step": 9305, "train_speed(iter/s)": 1.558266 }, { "acc": 0.9727335, "epoch": 4.3637215842512305, "grad_norm": 5.656519412994385, "learning_rate": 9.96203421116844e-06, "loss": 0.16519727, "memory(GiB)": 13.7, "step": 9310, "train_speed(iter/s)": 1.558278 }, { "acc": 0.97196426, "epoch": 4.366065151160066, "grad_norm": 11.863686561584473, "learning_rate": 9.96193880916019e-06, "loss": 0.14966639, "memory(GiB)": 13.7, "step": 9315, "train_speed(iter/s)": 1.558311 }, { "acc": 0.95608587, "epoch": 4.368408718068901, "grad_norm": 7.238772869110107, "learning_rate": 9.961843287895253e-06, "loss": 0.17651556, "memory(GiB)": 13.7, "step": 9320, "train_speed(iter/s)": 1.558279 }, { "acc": 0.9485836, "epoch": 4.370752284977736, "grad_norm": 5.261313438415527, "learning_rate": 9.96174764737592e-06, "loss": 0.32361436, "memory(GiB)": 13.7, "step": 9325, "train_speed(iter/s)": 1.558237 }, { "acc": 0.94889374, "epoch": 4.373095851886571, "grad_norm": 9.05029296875, "learning_rate": 9.961651887604495e-06, "loss": 0.27932878, "memory(GiB)": 13.7, "step": 9330, "train_speed(iter/s)": 1.558244 }, { "acc": 0.96974697, "epoch": 4.375439418795406, "grad_norm": 0.06927665323019028, "learning_rate": 9.961556008583277e-06, "loss": 0.22936144, "memory(GiB)": 13.7, "step": 9335, "train_speed(iter/s)": 1.558267 }, { "acc": 0.94777088, "epoch": 4.377782985704242, "grad_norm": 11.423491477966309, "learning_rate": 9.96146001031457e-06, "loss": 0.35954411, "memory(GiB)": 13.7, "step": 9340, "train_speed(iter/s)": 1.558278 }, { "acc": 0.95359936, "epoch": 4.380126552613077, "grad_norm": 7.212821006774902, "learning_rate": 9.961363892800682e-06, "loss": 0.17874825, "memory(GiB)": 13.7, "step": 9345, "train_speed(iter/s)": 1.558254 }, { "acc": 0.94624996, "epoch": 4.382470119521912, "grad_norm": 64.09770202636719, "learning_rate": 9.961267656043923e-06, "loss": 0.21884561, "memory(GiB)": 13.7, "step": 9350, "train_speed(iter/s)": 1.558247 }, { "acc": 0.95449657, "epoch": 4.384813686430747, "grad_norm": 151.62550354003906, "learning_rate": 9.96117130004661e-06, "loss": 0.34130447, "memory(GiB)": 13.7, "step": 9355, "train_speed(iter/s)": 1.558218 }, { "acc": 0.94472179, "epoch": 4.387157253339582, "grad_norm": 15.725936889648438, "learning_rate": 9.961074824811054e-06, "loss": 0.22752798, "memory(GiB)": 13.7, "step": 9360, "train_speed(iter/s)": 1.558216 }, { "acc": 0.93377686, "epoch": 4.3895008202484185, "grad_norm": 12.589905738830566, "learning_rate": 9.960978230339575e-06, "loss": 0.24295943, "memory(GiB)": 13.7, "step": 9365, "train_speed(iter/s)": 1.558208 }, { "acc": 0.97810049, "epoch": 4.391844387157254, "grad_norm": 1.3299790620803833, "learning_rate": 9.960881516634498e-06, "loss": 0.09071797, "memory(GiB)": 13.7, "step": 9370, "train_speed(iter/s)": 1.558218 }, { "acc": 0.97519665, "epoch": 4.394187954066089, "grad_norm": 7.684833526611328, "learning_rate": 9.960784683698143e-06, "loss": 0.1491575, "memory(GiB)": 13.7, "step": 9375, "train_speed(iter/s)": 1.558178 }, { "acc": 0.95841217, "epoch": 4.396531520974924, "grad_norm": 8.927494049072266, "learning_rate": 9.960687731532843e-06, "loss": 0.3033566, "memory(GiB)": 13.7, "step": 9380, "train_speed(iter/s)": 1.558154 }, { "acc": 0.96878576, "epoch": 4.398875087883759, "grad_norm": 25.641265869140625, "learning_rate": 9.960590660140923e-06, "loss": 0.14587843, "memory(GiB)": 13.7, "step": 9385, "train_speed(iter/s)": 1.558153 }, { "acc": 0.98101196, "epoch": 4.401218654792594, "grad_norm": 7.002072334289551, "learning_rate": 9.96049346952472e-06, "loss": 0.12630527, "memory(GiB)": 13.7, "step": 9390, "train_speed(iter/s)": 1.558166 }, { "acc": 0.97178974, "epoch": 4.40356222170143, "grad_norm": 18.5343074798584, "learning_rate": 9.960396159686569e-06, "loss": 0.15820123, "memory(GiB)": 13.7, "step": 9395, "train_speed(iter/s)": 1.558167 }, { "acc": 0.94627419, "epoch": 4.405905788610265, "grad_norm": 17.014266967773438, "learning_rate": 9.960298730628807e-06, "loss": 0.3156863, "memory(GiB)": 13.7, "step": 9400, "train_speed(iter/s)": 1.558168 }, { "acc": 0.94575577, "epoch": 4.4082493555191, "grad_norm": 12.506475448608398, "learning_rate": 9.960201182353779e-06, "loss": 0.34180443, "memory(GiB)": 13.7, "step": 9405, "train_speed(iter/s)": 1.558138 }, { "acc": 0.95524797, "epoch": 4.410592922427935, "grad_norm": 3.2441318035125732, "learning_rate": 9.960103514863828e-06, "loss": 0.17957035, "memory(GiB)": 13.7, "step": 9410, "train_speed(iter/s)": 1.558137 }, { "acc": 0.95084438, "epoch": 4.41293648933677, "grad_norm": 6.802974700927734, "learning_rate": 9.960005728161302e-06, "loss": 0.33195701, "memory(GiB)": 13.7, "step": 9415, "train_speed(iter/s)": 1.558162 }, { "acc": 0.94709511, "epoch": 4.415280056245606, "grad_norm": 8.725379943847656, "learning_rate": 9.959907822248553e-06, "loss": 0.3680644, "memory(GiB)": 13.7, "step": 9420, "train_speed(iter/s)": 1.558185 }, { "acc": 0.96428528, "epoch": 4.417623623154441, "grad_norm": 12.260334014892578, "learning_rate": 9.959809797127931e-06, "loss": 0.23000472, "memory(GiB)": 13.7, "step": 9425, "train_speed(iter/s)": 1.55815 }, { "acc": 0.95009918, "epoch": 4.419967190063276, "grad_norm": 5.93626070022583, "learning_rate": 9.959711652801796e-06, "loss": 0.3149426, "memory(GiB)": 13.7, "step": 9430, "train_speed(iter/s)": 1.558125 }, { "acc": 0.95103502, "epoch": 4.422310756972111, "grad_norm": 37.882625579833984, "learning_rate": 9.959613389272503e-06, "loss": 0.3761888, "memory(GiB)": 13.7, "step": 9435, "train_speed(iter/s)": 1.55813 }, { "acc": 0.94922771, "epoch": 4.424654323880947, "grad_norm": 8.116162300109863, "learning_rate": 9.959515006542418e-06, "loss": 0.27469139, "memory(GiB)": 13.7, "step": 9440, "train_speed(iter/s)": 1.558183 }, { "acc": 0.9585887, "epoch": 4.4269978907897825, "grad_norm": 22.32216453552246, "learning_rate": 9.959416504613902e-06, "loss": 0.17781456, "memory(GiB)": 13.7, "step": 9445, "train_speed(iter/s)": 1.558184 }, { "acc": 0.96875896, "epoch": 4.429341457698618, "grad_norm": 11.913063049316406, "learning_rate": 9.959317883489326e-06, "loss": 0.20766623, "memory(GiB)": 13.7, "step": 9450, "train_speed(iter/s)": 1.558218 }, { "acc": 0.95806847, "epoch": 4.431685024607453, "grad_norm": 4.3337507247924805, "learning_rate": 9.959219143171057e-06, "loss": 0.20468895, "memory(GiB)": 13.7, "step": 9455, "train_speed(iter/s)": 1.558247 }, { "acc": 0.95394573, "epoch": 4.434028591516288, "grad_norm": 43.38298416137695, "learning_rate": 9.959120283661474e-06, "loss": 0.35561094, "memory(GiB)": 13.7, "step": 9460, "train_speed(iter/s)": 1.55832 }, { "acc": 0.97892857, "epoch": 4.436372158425123, "grad_norm": 8.038971900939941, "learning_rate": 9.959021304962947e-06, "loss": 0.09382989, "memory(GiB)": 13.7, "step": 9465, "train_speed(iter/s)": 1.558273 }, { "acc": 0.95840778, "epoch": 4.438715725333958, "grad_norm": 22.380144119262695, "learning_rate": 9.958922207077859e-06, "loss": 0.29402447, "memory(GiB)": 13.7, "step": 9470, "train_speed(iter/s)": 1.558269 }, { "acc": 0.93957176, "epoch": 4.441059292242794, "grad_norm": 17.705270767211914, "learning_rate": 9.958822990008589e-06, "loss": 0.29002926, "memory(GiB)": 13.7, "step": 9475, "train_speed(iter/s)": 1.558278 }, { "acc": 0.94199934, "epoch": 4.443402859151629, "grad_norm": 7.2689032554626465, "learning_rate": 9.958723653757526e-06, "loss": 0.26161833, "memory(GiB)": 13.7, "step": 9480, "train_speed(iter/s)": 1.558278 }, { "acc": 0.97836304, "epoch": 4.445746426060464, "grad_norm": 2.569932699203491, "learning_rate": 9.958624198327054e-06, "loss": 0.10040791, "memory(GiB)": 13.7, "step": 9485, "train_speed(iter/s)": 1.558271 }, { "acc": 0.95468206, "epoch": 4.448089992969299, "grad_norm": 5.010261058807373, "learning_rate": 9.958524623719567e-06, "loss": 0.25707717, "memory(GiB)": 13.7, "step": 9490, "train_speed(iter/s)": 1.55828 }, { "acc": 0.96707697, "epoch": 4.450433559878134, "grad_norm": 3.663703441619873, "learning_rate": 9.958424929937454e-06, "loss": 0.23026586, "memory(GiB)": 13.7, "step": 9495, "train_speed(iter/s)": 1.558337 }, { "acc": 0.96510639, "epoch": 4.45277712678697, "grad_norm": 12.762689590454102, "learning_rate": 9.958325116983115e-06, "loss": 0.18153358, "memory(GiB)": 13.7, "step": 9500, "train_speed(iter/s)": 1.558342 }, { "acc": 0.95430346, "epoch": 4.455120693695805, "grad_norm": 7.2080979347229, "learning_rate": 9.958225184858948e-06, "loss": 0.25399804, "memory(GiB)": 13.7, "step": 9505, "train_speed(iter/s)": 1.558346 }, { "acc": 0.93935966, "epoch": 4.45746426060464, "grad_norm": 6.936193943023682, "learning_rate": 9.958125133567354e-06, "loss": 0.32735939, "memory(GiB)": 13.7, "step": 9510, "train_speed(iter/s)": 1.558324 }, { "acc": 0.95732088, "epoch": 4.459807827513475, "grad_norm": 4.606472492218018, "learning_rate": 9.958024963110738e-06, "loss": 0.19098916, "memory(GiB)": 13.7, "step": 9515, "train_speed(iter/s)": 1.558373 }, { "acc": 0.9518589, "epoch": 4.46215139442231, "grad_norm": 13.170248031616211, "learning_rate": 9.95792467349151e-06, "loss": 0.27186596, "memory(GiB)": 13.7, "step": 9520, "train_speed(iter/s)": 1.55835 }, { "acc": 0.94327164, "epoch": 4.464494961331146, "grad_norm": 5.982778549194336, "learning_rate": 9.957824264712079e-06, "loss": 0.1897746, "memory(GiB)": 13.7, "step": 9525, "train_speed(iter/s)": 1.558371 }, { "acc": 0.97934523, "epoch": 4.466838528239982, "grad_norm": 30.830007553100586, "learning_rate": 9.957723736774859e-06, "loss": 0.1947576, "memory(GiB)": 13.7, "step": 9530, "train_speed(iter/s)": 1.5584 }, { "acc": 0.93922434, "epoch": 4.469182095148817, "grad_norm": 18.946910858154297, "learning_rate": 9.957623089682267e-06, "loss": 0.45199709, "memory(GiB)": 13.7, "step": 9535, "train_speed(iter/s)": 1.558395 }, { "acc": 0.95655832, "epoch": 4.471525662057652, "grad_norm": 12.456330299377441, "learning_rate": 9.95752232343672e-06, "loss": 0.22313488, "memory(GiB)": 13.7, "step": 9540, "train_speed(iter/s)": 1.558382 }, { "acc": 0.93869858, "epoch": 4.473869228966487, "grad_norm": 111.28740692138672, "learning_rate": 9.95742143804064e-06, "loss": 0.45665975, "memory(GiB)": 13.7, "step": 9545, "train_speed(iter/s)": 1.558403 }, { "acc": 0.96764183, "epoch": 4.476212795875322, "grad_norm": 10.630617141723633, "learning_rate": 9.957320433496456e-06, "loss": 0.22971127, "memory(GiB)": 13.7, "step": 9550, "train_speed(iter/s)": 1.558385 }, { "acc": 0.94871693, "epoch": 4.478556362784158, "grad_norm": 59.91331100463867, "learning_rate": 9.957219309806592e-06, "loss": 0.28758831, "memory(GiB)": 13.7, "step": 9555, "train_speed(iter/s)": 1.558358 }, { "acc": 0.945644, "epoch": 4.480899929692993, "grad_norm": 12.320244789123535, "learning_rate": 9.957118066973483e-06, "loss": 0.33614125, "memory(GiB)": 13.7, "step": 9560, "train_speed(iter/s)": 1.55833 }, { "acc": 0.97227993, "epoch": 4.483243496601828, "grad_norm": 9.138496398925781, "learning_rate": 9.957016704999554e-06, "loss": 0.19806094, "memory(GiB)": 13.7, "step": 9565, "train_speed(iter/s)": 1.558335 }, { "acc": 0.93175192, "epoch": 4.485587063510663, "grad_norm": 52.978092193603516, "learning_rate": 9.95691522388725e-06, "loss": 0.32054725, "memory(GiB)": 13.7, "step": 9570, "train_speed(iter/s)": 1.558373 }, { "acc": 0.92881632, "epoch": 4.487930630419498, "grad_norm": 14.926582336425781, "learning_rate": 9.956813623639007e-06, "loss": 0.38152771, "memory(GiB)": 13.7, "step": 9575, "train_speed(iter/s)": 1.558338 }, { "acc": 0.95747585, "epoch": 4.4902741973283335, "grad_norm": 15.572964668273926, "learning_rate": 9.956711904257268e-06, "loss": 0.23218961, "memory(GiB)": 13.7, "step": 9580, "train_speed(iter/s)": 1.558333 }, { "acc": 0.96546803, "epoch": 4.492617764237169, "grad_norm": 11.764777183532715, "learning_rate": 9.956610065744476e-06, "loss": 0.22936242, "memory(GiB)": 13.7, "step": 9585, "train_speed(iter/s)": 1.558295 }, { "acc": 0.96769352, "epoch": 4.494961331146004, "grad_norm": 5.471004009246826, "learning_rate": 9.956508108103078e-06, "loss": 0.13409352, "memory(GiB)": 13.7, "step": 9590, "train_speed(iter/s)": 1.558326 }, { "acc": 0.96864586, "epoch": 4.497304898054839, "grad_norm": 9.005623817443848, "learning_rate": 9.95640603133553e-06, "loss": 0.19203732, "memory(GiB)": 13.7, "step": 9595, "train_speed(iter/s)": 1.558333 }, { "acc": 0.96582794, "epoch": 4.499648464963674, "grad_norm": 10.204246520996094, "learning_rate": 9.956303835444283e-06, "loss": 0.17693363, "memory(GiB)": 13.7, "step": 9600, "train_speed(iter/s)": 1.558329 }, { "acc": 0.95804224, "epoch": 4.5019920318725095, "grad_norm": 4.77137565612793, "learning_rate": 9.95620152043179e-06, "loss": 0.30282867, "memory(GiB)": 13.7, "step": 9605, "train_speed(iter/s)": 1.558341 }, { "acc": 0.96056557, "epoch": 4.504335598781346, "grad_norm": 9.502673149108887, "learning_rate": 9.956099086300514e-06, "loss": 0.22520411, "memory(GiB)": 13.7, "step": 9610, "train_speed(iter/s)": 1.558362 }, { "acc": 0.9645833, "epoch": 4.506679165690181, "grad_norm": 20.470947265625, "learning_rate": 9.955996533052918e-06, "loss": 0.19536247, "memory(GiB)": 13.7, "step": 9615, "train_speed(iter/s)": 1.558378 }, { "acc": 0.95427837, "epoch": 4.509022732599016, "grad_norm": 16.742473602294922, "learning_rate": 9.955893860691463e-06, "loss": 0.29446664, "memory(GiB)": 13.7, "step": 9620, "train_speed(iter/s)": 1.558371 }, { "acc": 0.97290726, "epoch": 4.511366299507851, "grad_norm": 9.494538307189941, "learning_rate": 9.95579106921862e-06, "loss": 0.13151513, "memory(GiB)": 13.7, "step": 9625, "train_speed(iter/s)": 1.558356 }, { "acc": 0.95717258, "epoch": 4.513709866416686, "grad_norm": 0.12729115784168243, "learning_rate": 9.955688158636861e-06, "loss": 0.31533136, "memory(GiB)": 13.7, "step": 9630, "train_speed(iter/s)": 1.558388 }, { "acc": 0.964536, "epoch": 4.516053433325522, "grad_norm": 35.828773498535156, "learning_rate": 9.955585128948656e-06, "loss": 0.17070169, "memory(GiB)": 13.7, "step": 9635, "train_speed(iter/s)": 1.558384 }, { "acc": 0.94343815, "epoch": 4.518397000234357, "grad_norm": 98.07598114013672, "learning_rate": 9.955481980156483e-06, "loss": 0.34039831, "memory(GiB)": 13.7, "step": 9640, "train_speed(iter/s)": 1.558414 }, { "acc": 0.97185898, "epoch": 4.520740567143192, "grad_norm": 8.297832489013672, "learning_rate": 9.955378712262824e-06, "loss": 0.1495911, "memory(GiB)": 13.7, "step": 9645, "train_speed(iter/s)": 1.558423 }, { "acc": 0.91876926, "epoch": 4.523084134052027, "grad_norm": 6.966236114501953, "learning_rate": 9.955275325270156e-06, "loss": 0.54746714, "memory(GiB)": 13.7, "step": 9650, "train_speed(iter/s)": 1.558436 }, { "acc": 0.94507866, "epoch": 4.525427700960862, "grad_norm": 10.127158164978027, "learning_rate": 9.955171819180967e-06, "loss": 0.34188657, "memory(GiB)": 13.7, "step": 9655, "train_speed(iter/s)": 1.558417 }, { "acc": 0.92908554, "epoch": 4.5277712678696975, "grad_norm": 14.51186466217041, "learning_rate": 9.955068193997748e-06, "loss": 0.35194545, "memory(GiB)": 13.7, "step": 9660, "train_speed(iter/s)": 1.558425 }, { "acc": 0.95294456, "epoch": 4.530114834778533, "grad_norm": 23.639636993408203, "learning_rate": 9.954964449722984e-06, "loss": 0.24158177, "memory(GiB)": 13.7, "step": 9665, "train_speed(iter/s)": 1.558446 }, { "acc": 0.96567345, "epoch": 4.532458401687368, "grad_norm": 9.504633903503418, "learning_rate": 9.95486058635917e-06, "loss": 0.21535859, "memory(GiB)": 13.7, "step": 9670, "train_speed(iter/s)": 1.558453 }, { "acc": 0.92187176, "epoch": 4.534801968596203, "grad_norm": 2.0344793796539307, "learning_rate": 9.954756603908807e-06, "loss": 0.4329854, "memory(GiB)": 13.7, "step": 9675, "train_speed(iter/s)": 1.558485 }, { "acc": 0.96291666, "epoch": 4.537145535505038, "grad_norm": 6.785062313079834, "learning_rate": 9.954652502374389e-06, "loss": 0.16058713, "memory(GiB)": 13.7, "step": 9680, "train_speed(iter/s)": 1.558479 }, { "acc": 0.96125622, "epoch": 4.539489102413874, "grad_norm": 11.933781623840332, "learning_rate": 9.954548281758421e-06, "loss": 0.22878449, "memory(GiB)": 13.7, "step": 9685, "train_speed(iter/s)": 1.558511 }, { "acc": 0.94436016, "epoch": 4.541832669322709, "grad_norm": 33.48133087158203, "learning_rate": 9.954443942063408e-06, "loss": 0.33588734, "memory(GiB)": 13.7, "step": 9690, "train_speed(iter/s)": 1.558535 }, { "acc": 0.95651722, "epoch": 4.544176236231545, "grad_norm": 13.64638614654541, "learning_rate": 9.954339483291857e-06, "loss": 0.28719449, "memory(GiB)": 13.7, "step": 9695, "train_speed(iter/s)": 1.558546 }, { "acc": 0.95613422, "epoch": 4.54651980314038, "grad_norm": 6.9420671463012695, "learning_rate": 9.95423490544628e-06, "loss": 0.22283087, "memory(GiB)": 13.7, "step": 9700, "train_speed(iter/s)": 1.558524 }, { "acc": 0.95720844, "epoch": 4.548863370049215, "grad_norm": 6.908289432525635, "learning_rate": 9.95413020852919e-06, "loss": 0.25656996, "memory(GiB)": 13.7, "step": 9705, "train_speed(iter/s)": 1.558524 }, { "acc": 0.96758013, "epoch": 4.55120693695805, "grad_norm": 15.650965690612793, "learning_rate": 9.954025392543105e-06, "loss": 0.23555038, "memory(GiB)": 13.7, "step": 9710, "train_speed(iter/s)": 1.558525 }, { "acc": 0.95904732, "epoch": 4.5535505038668855, "grad_norm": 4.889605522155762, "learning_rate": 9.953920457490541e-06, "loss": 0.23758492, "memory(GiB)": 13.7, "step": 9715, "train_speed(iter/s)": 1.558532 }, { "acc": 0.95080338, "epoch": 4.555894070775721, "grad_norm": 20.921245574951172, "learning_rate": 9.953815403374024e-06, "loss": 0.26754687, "memory(GiB)": 13.7, "step": 9720, "train_speed(iter/s)": 1.558537 }, { "acc": 0.97013979, "epoch": 4.558237637684556, "grad_norm": 9.050995826721191, "learning_rate": 9.953710230196076e-06, "loss": 0.24011059, "memory(GiB)": 13.7, "step": 9725, "train_speed(iter/s)": 1.558564 }, { "acc": 0.97610149, "epoch": 4.560581204593391, "grad_norm": 6.735082626342773, "learning_rate": 9.953604937959229e-06, "loss": 0.12024446, "memory(GiB)": 13.7, "step": 9730, "train_speed(iter/s)": 1.558552 }, { "acc": 0.95233593, "epoch": 4.562924771502226, "grad_norm": 4.927314758300781, "learning_rate": 9.953499526666009e-06, "loss": 0.25547707, "memory(GiB)": 13.7, "step": 9735, "train_speed(iter/s)": 1.558538 }, { "acc": 0.96104164, "epoch": 4.5652683384110615, "grad_norm": 8.838593482971191, "learning_rate": 9.953393996318955e-06, "loss": 0.22106719, "memory(GiB)": 13.7, "step": 9740, "train_speed(iter/s)": 1.558548 }, { "acc": 0.9420207, "epoch": 4.567611905319897, "grad_norm": 18.816923141479492, "learning_rate": 9.953288346920596e-06, "loss": 0.41461949, "memory(GiB)": 13.7, "step": 9745, "train_speed(iter/s)": 1.558571 }, { "acc": 0.95855656, "epoch": 4.569955472228732, "grad_norm": 12.931962966918945, "learning_rate": 9.95318257847348e-06, "loss": 0.20999632, "memory(GiB)": 13.7, "step": 9750, "train_speed(iter/s)": 1.558553 }, { "acc": 0.97315483, "epoch": 4.572299039137567, "grad_norm": 2.881270408630371, "learning_rate": 9.953076690980146e-06, "loss": 0.19557861, "memory(GiB)": 13.7, "step": 9755, "train_speed(iter/s)": 1.558537 }, { "acc": 0.93795538, "epoch": 4.574642606046402, "grad_norm": 26.993465423583984, "learning_rate": 9.952970684443139e-06, "loss": 0.43496733, "memory(GiB)": 13.7, "step": 9760, "train_speed(iter/s)": 1.558537 }, { "acc": 0.95342674, "epoch": 4.5769861729552375, "grad_norm": 14.64423656463623, "learning_rate": 9.952864558865004e-06, "loss": 0.28947105, "memory(GiB)": 13.7, "step": 9765, "train_speed(iter/s)": 1.558513 }, { "acc": 0.9530735, "epoch": 4.5793297398640735, "grad_norm": 12.144556045532227, "learning_rate": 9.952758314248298e-06, "loss": 0.2338347, "memory(GiB)": 13.7, "step": 9770, "train_speed(iter/s)": 1.558501 }, { "acc": 0.97353077, "epoch": 4.581673306772909, "grad_norm": 15.680739402770996, "learning_rate": 9.95265195059557e-06, "loss": 0.12562807, "memory(GiB)": 13.7, "step": 9775, "train_speed(iter/s)": 1.558523 }, { "acc": 0.93682003, "epoch": 4.584016873681744, "grad_norm": 15.852502822875977, "learning_rate": 9.952545467909379e-06, "loss": 0.41272993, "memory(GiB)": 13.7, "step": 9780, "train_speed(iter/s)": 1.55857 }, { "acc": 0.96019268, "epoch": 4.586360440590579, "grad_norm": 11.797040939331055, "learning_rate": 9.95243886619228e-06, "loss": 0.26555572, "memory(GiB)": 13.7, "step": 9785, "train_speed(iter/s)": 1.558575 }, { "acc": 0.93804951, "epoch": 4.588704007499414, "grad_norm": 20.318470001220703, "learning_rate": 9.952332145446845e-06, "loss": 0.46820965, "memory(GiB)": 13.7, "step": 9790, "train_speed(iter/s)": 1.558551 }, { "acc": 0.95680561, "epoch": 4.5910475744082495, "grad_norm": 13.760849952697754, "learning_rate": 9.95222530567563e-06, "loss": 0.3038877, "memory(GiB)": 13.7, "step": 9795, "train_speed(iter/s)": 1.558538 }, { "acc": 0.98183174, "epoch": 4.593391141317085, "grad_norm": 0.2276112288236618, "learning_rate": 9.952118346881207e-06, "loss": 0.14173095, "memory(GiB)": 13.7, "step": 9800, "train_speed(iter/s)": 1.55854 }, { "acc": 0.96516247, "epoch": 4.59573470822592, "grad_norm": 8.955907821655273, "learning_rate": 9.952011269066144e-06, "loss": 0.27660184, "memory(GiB)": 13.7, "step": 9805, "train_speed(iter/s)": 1.558516 }, { "acc": 0.94224358, "epoch": 4.598078275134755, "grad_norm": 18.601011276245117, "learning_rate": 9.951904072233021e-06, "loss": 0.30524719, "memory(GiB)": 13.7, "step": 9810, "train_speed(iter/s)": 1.558504 }, { "acc": 0.96936693, "epoch": 4.60042184204359, "grad_norm": 15.923697471618652, "learning_rate": 9.951796756384407e-06, "loss": 0.21740122, "memory(GiB)": 13.7, "step": 9815, "train_speed(iter/s)": 1.558561 }, { "acc": 0.95644817, "epoch": 4.6027654089524255, "grad_norm": 8.561488151550293, "learning_rate": 9.951689321522887e-06, "loss": 0.22684152, "memory(GiB)": 13.7, "step": 9820, "train_speed(iter/s)": 1.558599 }, { "acc": 0.97963285, "epoch": 4.605108975861261, "grad_norm": 4.807947635650635, "learning_rate": 9.951581767651043e-06, "loss": 0.13620239, "memory(GiB)": 13.7, "step": 9825, "train_speed(iter/s)": 1.558622 }, { "acc": 0.94691868, "epoch": 4.607452542770096, "grad_norm": 10.045392990112305, "learning_rate": 9.951474094771455e-06, "loss": 0.26951964, "memory(GiB)": 13.7, "step": 9830, "train_speed(iter/s)": 1.558609 }, { "acc": 0.94437847, "epoch": 4.609796109678931, "grad_norm": 9.752561569213867, "learning_rate": 9.95136630288672e-06, "loss": 0.39437366, "memory(GiB)": 13.7, "step": 9835, "train_speed(iter/s)": 1.558603 }, { "acc": 0.94723215, "epoch": 4.612139676587766, "grad_norm": 28.778343200683594, "learning_rate": 9.95125839199942e-06, "loss": 0.32668004, "memory(GiB)": 13.7, "step": 9840, "train_speed(iter/s)": 1.558606 }, { "acc": 0.95608597, "epoch": 4.614483243496602, "grad_norm": 5.930637359619141, "learning_rate": 9.951150362112154e-06, "loss": 0.20221553, "memory(GiB)": 13.7, "step": 9845, "train_speed(iter/s)": 1.558576 }, { "acc": 0.96036701, "epoch": 4.616826810405437, "grad_norm": 15.726446151733398, "learning_rate": 9.951042213227519e-06, "loss": 0.24680402, "memory(GiB)": 13.7, "step": 9850, "train_speed(iter/s)": 1.558568 }, { "acc": 0.97069435, "epoch": 4.619170377314273, "grad_norm": 5.667649269104004, "learning_rate": 9.95093394534811e-06, "loss": 0.18179482, "memory(GiB)": 13.7, "step": 9855, "train_speed(iter/s)": 1.558576 }, { "acc": 0.95176182, "epoch": 4.621513944223108, "grad_norm": 1.48115074634552, "learning_rate": 9.950825558476535e-06, "loss": 0.23711672, "memory(GiB)": 13.7, "step": 9860, "train_speed(iter/s)": 1.558614 }, { "acc": 0.95395832, "epoch": 4.623857511131943, "grad_norm": 4.983591079711914, "learning_rate": 9.950717052615396e-06, "loss": 0.2511498, "memory(GiB)": 13.7, "step": 9865, "train_speed(iter/s)": 1.558643 }, { "acc": 0.96925697, "epoch": 4.626201078040778, "grad_norm": 12.155131340026855, "learning_rate": 9.950608427767301e-06, "loss": 0.21607552, "memory(GiB)": 13.7, "step": 9870, "train_speed(iter/s)": 1.558664 }, { "acc": 0.97890968, "epoch": 4.6285446449496135, "grad_norm": 16.55528450012207, "learning_rate": 9.950499683934864e-06, "loss": 0.15013249, "memory(GiB)": 13.7, "step": 9875, "train_speed(iter/s)": 1.558664 }, { "acc": 0.97250957, "epoch": 4.630888211858449, "grad_norm": 12.10170841217041, "learning_rate": 9.950390821120693e-06, "loss": 0.17184055, "memory(GiB)": 13.7, "step": 9880, "train_speed(iter/s)": 1.558689 }, { "acc": 0.96962299, "epoch": 4.633231778767284, "grad_norm": 19.169189453125, "learning_rate": 9.950281839327412e-06, "loss": 0.19448528, "memory(GiB)": 13.7, "step": 9885, "train_speed(iter/s)": 1.558682 }, { "acc": 0.96579418, "epoch": 4.635575345676119, "grad_norm": 0.3606971204280853, "learning_rate": 9.950172738557632e-06, "loss": 0.15075873, "memory(GiB)": 13.7, "step": 9890, "train_speed(iter/s)": 1.558646 }, { "acc": 0.95675602, "epoch": 4.637918912584954, "grad_norm": 3.1818017959594727, "learning_rate": 9.950063518813984e-06, "loss": 0.26090703, "memory(GiB)": 13.7, "step": 9895, "train_speed(iter/s)": 1.558643 }, { "acc": 0.94396038, "epoch": 4.6402624794937894, "grad_norm": 9.540782928466797, "learning_rate": 9.94995418009909e-06, "loss": 0.32913508, "memory(GiB)": 13.7, "step": 9900, "train_speed(iter/s)": 1.558723 }, { "acc": 0.97717266, "epoch": 4.642606046402625, "grad_norm": 19.12485122680664, "learning_rate": 9.949844722415577e-06, "loss": 0.10329603, "memory(GiB)": 13.7, "step": 9905, "train_speed(iter/s)": 1.558747 }, { "acc": 0.95720434, "epoch": 4.64494961331146, "grad_norm": 15.553117752075195, "learning_rate": 9.949735145766076e-06, "loss": 0.21671264, "memory(GiB)": 13.7, "step": 9910, "train_speed(iter/s)": 1.558759 }, { "acc": 0.9507143, "epoch": 4.647293180220295, "grad_norm": 16.072864532470703, "learning_rate": 9.94962545015322e-06, "loss": 0.2254168, "memory(GiB)": 13.7, "step": 9915, "train_speed(iter/s)": 1.558779 }, { "acc": 0.94567299, "epoch": 4.64963674712913, "grad_norm": 10.115642547607422, "learning_rate": 9.94951563557965e-06, "loss": 0.31962981, "memory(GiB)": 13.7, "step": 9920, "train_speed(iter/s)": 1.558766 }, { "acc": 0.95697689, "epoch": 4.651980314037965, "grad_norm": 14.484819412231445, "learning_rate": 9.949405702048004e-06, "loss": 0.30274229, "memory(GiB)": 13.7, "step": 9925, "train_speed(iter/s)": 1.558756 }, { "acc": 0.95928574, "epoch": 4.6543238809468015, "grad_norm": 12.2899751663208, "learning_rate": 9.94929564956092e-06, "loss": 0.29361141, "memory(GiB)": 13.7, "step": 9930, "train_speed(iter/s)": 1.558782 }, { "acc": 0.96742325, "epoch": 4.656667447855636, "grad_norm": 11.471457481384277, "learning_rate": 9.949185478121049e-06, "loss": 0.27772865, "memory(GiB)": 13.7, "step": 9935, "train_speed(iter/s)": 1.558811 }, { "acc": 0.96087799, "epoch": 4.659011014764472, "grad_norm": 9.903404235839844, "learning_rate": 9.949075187731035e-06, "loss": 0.25809536, "memory(GiB)": 13.7, "step": 9940, "train_speed(iter/s)": 1.558833 }, { "acc": 0.95741816, "epoch": 4.661354581673307, "grad_norm": 8.559409141540527, "learning_rate": 9.948964778393531e-06, "loss": 0.24688559, "memory(GiB)": 13.7, "step": 9945, "train_speed(iter/s)": 1.558887 }, { "acc": 0.95413685, "epoch": 4.663698148582142, "grad_norm": 6.309630870819092, "learning_rate": 9.948854250111192e-06, "loss": 0.25863578, "memory(GiB)": 13.7, "step": 9950, "train_speed(iter/s)": 1.558923 }, { "acc": 0.96465778, "epoch": 4.6660417154909775, "grad_norm": 8.810577392578125, "learning_rate": 9.948743602886674e-06, "loss": 0.15450543, "memory(GiB)": 13.7, "step": 9955, "train_speed(iter/s)": 1.558947 }, { "acc": 0.97085667, "epoch": 4.668385282399813, "grad_norm": 12.396832466125488, "learning_rate": 9.948632836722634e-06, "loss": 0.1280552, "memory(GiB)": 13.7, "step": 9960, "train_speed(iter/s)": 1.558968 }, { "acc": 0.95357151, "epoch": 4.670728849308648, "grad_norm": 16.138782501220703, "learning_rate": 9.948521951621739e-06, "loss": 0.21676421, "memory(GiB)": 13.7, "step": 9965, "train_speed(iter/s)": 1.558958 }, { "acc": 0.95880442, "epoch": 4.673072416217483, "grad_norm": 6.977736949920654, "learning_rate": 9.94841094758665e-06, "loss": 0.26915407, "memory(GiB)": 13.7, "step": 9970, "train_speed(iter/s)": 1.55896 }, { "acc": 0.96330357, "epoch": 4.675415983126318, "grad_norm": 18.87510108947754, "learning_rate": 9.948299824620035e-06, "loss": 0.22778201, "memory(GiB)": 13.7, "step": 9975, "train_speed(iter/s)": 1.559016 }, { "acc": 0.95945749, "epoch": 4.677759550035153, "grad_norm": 32.804298400878906, "learning_rate": 9.94818858272457e-06, "loss": 0.24092641, "memory(GiB)": 13.7, "step": 9980, "train_speed(iter/s)": 1.559029 }, { "acc": 0.95128708, "epoch": 4.680103116943989, "grad_norm": 7.472121715545654, "learning_rate": 9.948077221902924e-06, "loss": 0.32183928, "memory(GiB)": 13.7, "step": 9985, "train_speed(iter/s)": 1.559018 }, { "acc": 0.97209148, "epoch": 4.682446683852824, "grad_norm": 13.623747825622559, "learning_rate": 9.947965742157778e-06, "loss": 0.18680456, "memory(GiB)": 13.7, "step": 9990, "train_speed(iter/s)": 1.559043 }, { "acc": 0.95620422, "epoch": 4.684790250761659, "grad_norm": 2.292027235031128, "learning_rate": 9.947854143491807e-06, "loss": 0.29383235, "memory(GiB)": 13.7, "step": 9995, "train_speed(iter/s)": 1.559017 }, { "acc": 0.95192537, "epoch": 4.687133817670494, "grad_norm": 7.530936241149902, "learning_rate": 9.947742425907698e-06, "loss": 0.27821646, "memory(GiB)": 13.7, "step": 10000, "train_speed(iter/s)": 1.559014 }, { "epoch": 4.687133817670494, "eval_acc": 0.7657651581574155, "eval_loss": 1.0537868738174438, "eval_runtime": 143.0119, "eval_samples_per_second": 56.415, "eval_steps_per_second": 7.055, "step": 10000 }, { "acc": 0.95699272, "epoch": 4.689477384579329, "grad_norm": 7.553100109100342, "learning_rate": 9.947630589408132e-06, "loss": 0.26442056, "memory(GiB)": 13.7, "step": 10005, "train_speed(iter/s)": 1.519114 }, { "acc": 0.9598361, "epoch": 4.691820951488165, "grad_norm": 9.468721389770508, "learning_rate": 9.9475186339958e-06, "loss": 0.2221879, "memory(GiB)": 13.7, "step": 10010, "train_speed(iter/s)": 1.519119 }, { "acc": 0.95256939, "epoch": 4.694164518397001, "grad_norm": 2.673274040222168, "learning_rate": 9.947406559673392e-06, "loss": 0.29170363, "memory(GiB)": 13.7, "step": 10015, "train_speed(iter/s)": 1.519131 }, { "acc": 0.95301342, "epoch": 4.696508085305836, "grad_norm": 31.620975494384766, "learning_rate": 9.947294366443601e-06, "loss": 0.27818227, "memory(GiB)": 13.7, "step": 10020, "train_speed(iter/s)": 1.519142 }, { "acc": 0.96190977, "epoch": 4.698851652214671, "grad_norm": 4.535205364227295, "learning_rate": 9.947182054309128e-06, "loss": 0.25347276, "memory(GiB)": 13.7, "step": 10025, "train_speed(iter/s)": 1.519175 }, { "acc": 0.93360348, "epoch": 4.701195219123506, "grad_norm": 12.941229820251465, "learning_rate": 9.947069623272667e-06, "loss": 0.25125117, "memory(GiB)": 13.7, "step": 10030, "train_speed(iter/s)": 1.519188 }, { "acc": 0.97388315, "epoch": 4.703538786032341, "grad_norm": 15.078330039978027, "learning_rate": 9.946957073336923e-06, "loss": 0.18459772, "memory(GiB)": 13.7, "step": 10035, "train_speed(iter/s)": 1.519194 }, { "acc": 0.96114235, "epoch": 4.705882352941177, "grad_norm": 5.227476119995117, "learning_rate": 9.946844404504604e-06, "loss": 0.27127872, "memory(GiB)": 13.7, "step": 10040, "train_speed(iter/s)": 1.519253 }, { "acc": 0.95539131, "epoch": 4.708225919850012, "grad_norm": 10.633732795715332, "learning_rate": 9.946731616778412e-06, "loss": 0.2285392, "memory(GiB)": 13.7, "step": 10045, "train_speed(iter/s)": 1.519269 }, { "acc": 0.96812496, "epoch": 4.710569486758847, "grad_norm": 12.249913215637207, "learning_rate": 9.946618710161063e-06, "loss": 0.12650114, "memory(GiB)": 13.7, "step": 10050, "train_speed(iter/s)": 1.51931 }, { "acc": 0.96806784, "epoch": 4.712913053667682, "grad_norm": 8.95293140411377, "learning_rate": 9.946505684655268e-06, "loss": 0.19176772, "memory(GiB)": 13.7, "step": 10055, "train_speed(iter/s)": 1.519335 }, { "acc": 0.97446423, "epoch": 4.715256620576517, "grad_norm": 8.710929870605469, "learning_rate": 9.946392540263748e-06, "loss": 0.14181134, "memory(GiB)": 13.7, "step": 10060, "train_speed(iter/s)": 1.51935 }, { "acc": 0.97230577, "epoch": 4.717600187485353, "grad_norm": 7.655498027801514, "learning_rate": 9.946279276989219e-06, "loss": 0.12223266, "memory(GiB)": 13.7, "step": 10065, "train_speed(iter/s)": 1.519401 }, { "acc": 0.95802088, "epoch": 4.719943754394188, "grad_norm": 23.202342987060547, "learning_rate": 9.946165894834404e-06, "loss": 0.23817596, "memory(GiB)": 13.7, "step": 10070, "train_speed(iter/s)": 1.519493 }, { "acc": 0.97016602, "epoch": 4.722287321303023, "grad_norm": 6.452765941619873, "learning_rate": 9.94605239380203e-06, "loss": 0.21886773, "memory(GiB)": 13.7, "step": 10075, "train_speed(iter/s)": 1.519488 }, { "acc": 0.94866152, "epoch": 4.724630888211858, "grad_norm": 6.465718746185303, "learning_rate": 9.945938773894822e-06, "loss": 0.27905891, "memory(GiB)": 13.7, "step": 10080, "train_speed(iter/s)": 1.519521 }, { "acc": 0.96303158, "epoch": 4.726974455120693, "grad_norm": 7.463196754455566, "learning_rate": 9.945825035115513e-06, "loss": 0.27063618, "memory(GiB)": 13.7, "step": 10085, "train_speed(iter/s)": 1.519566 }, { "acc": 0.97562504, "epoch": 4.729318022029529, "grad_norm": 0.9373210668563843, "learning_rate": 9.945711177466837e-06, "loss": 0.14063728, "memory(GiB)": 13.7, "step": 10090, "train_speed(iter/s)": 1.519562 }, { "acc": 0.94042931, "epoch": 4.731661588938364, "grad_norm": 20.15410804748535, "learning_rate": 9.945597200951532e-06, "loss": 0.34497509, "memory(GiB)": 13.7, "step": 10095, "train_speed(iter/s)": 1.519587 }, { "acc": 0.93744383, "epoch": 4.7340051558472, "grad_norm": 18.009963989257812, "learning_rate": 9.945483105572333e-06, "loss": 0.40106902, "memory(GiB)": 13.7, "step": 10100, "train_speed(iter/s)": 1.519583 }, { "acc": 0.96883316, "epoch": 4.736348722756035, "grad_norm": 5.405035018920898, "learning_rate": 9.945368891331988e-06, "loss": 0.18983991, "memory(GiB)": 13.7, "step": 10105, "train_speed(iter/s)": 1.519618 }, { "acc": 0.95794144, "epoch": 4.73869228966487, "grad_norm": 8.023499488830566, "learning_rate": 9.94525455823324e-06, "loss": 0.18238976, "memory(GiB)": 13.7, "step": 10110, "train_speed(iter/s)": 1.51964 }, { "acc": 0.95903721, "epoch": 4.741035856573705, "grad_norm": 3.739062786102295, "learning_rate": 9.945140106278837e-06, "loss": 0.30709076, "memory(GiB)": 13.7, "step": 10115, "train_speed(iter/s)": 1.519673 }, { "acc": 0.96533232, "epoch": 4.743379423482541, "grad_norm": 12.65305233001709, "learning_rate": 9.94502553547153e-06, "loss": 0.22876899, "memory(GiB)": 13.7, "step": 10120, "train_speed(iter/s)": 1.519717 }, { "acc": 0.97524614, "epoch": 4.745722990391376, "grad_norm": 2.8508052825927734, "learning_rate": 9.944910845814073e-06, "loss": 0.20196517, "memory(GiB)": 13.7, "step": 10125, "train_speed(iter/s)": 1.519692 }, { "acc": 0.94771595, "epoch": 4.748066557300211, "grad_norm": 260.85455322265625, "learning_rate": 9.944796037309224e-06, "loss": 0.24243214, "memory(GiB)": 13.7, "step": 10130, "train_speed(iter/s)": 1.519744 }, { "acc": 0.93833332, "epoch": 4.750410124209046, "grad_norm": 12.233736038208008, "learning_rate": 9.944681109959742e-06, "loss": 0.36111097, "memory(GiB)": 13.7, "step": 10135, "train_speed(iter/s)": 1.519771 }, { "acc": 0.96474552, "epoch": 4.752753691117881, "grad_norm": 5.409464359283447, "learning_rate": 9.944566063768386e-06, "loss": 0.13937908, "memory(GiB)": 13.7, "step": 10140, "train_speed(iter/s)": 1.519792 }, { "acc": 0.96825886, "epoch": 4.755097258026717, "grad_norm": 4.623258590698242, "learning_rate": 9.944450898737926e-06, "loss": 0.20536568, "memory(GiB)": 13.7, "step": 10145, "train_speed(iter/s)": 1.519818 }, { "acc": 0.94286861, "epoch": 4.757440824935552, "grad_norm": 4.079713344573975, "learning_rate": 9.94433561487113e-06, "loss": 0.31652632, "memory(GiB)": 13.7, "step": 10150, "train_speed(iter/s)": 1.519842 }, { "acc": 0.95866566, "epoch": 4.759784391844387, "grad_norm": 9.62069034576416, "learning_rate": 9.944220212170766e-06, "loss": 0.24905152, "memory(GiB)": 13.7, "step": 10155, "train_speed(iter/s)": 1.519882 }, { "acc": 0.95927372, "epoch": 4.762127958753222, "grad_norm": 10.565681457519531, "learning_rate": 9.944104690639609e-06, "loss": 0.22985086, "memory(GiB)": 13.7, "step": 10160, "train_speed(iter/s)": 1.519943 }, { "acc": 0.96603622, "epoch": 4.764471525662057, "grad_norm": 5.363990306854248, "learning_rate": 9.943989050280436e-06, "loss": 0.1816206, "memory(GiB)": 13.7, "step": 10165, "train_speed(iter/s)": 1.519942 }, { "acc": 0.94714279, "epoch": 4.7668150925708925, "grad_norm": 12.150505065917969, "learning_rate": 9.943873291096028e-06, "loss": 0.23837523, "memory(GiB)": 13.7, "step": 10170, "train_speed(iter/s)": 1.519945 }, { "acc": 0.95117064, "epoch": 4.769158659479729, "grad_norm": 11.280157089233398, "learning_rate": 9.943757413089166e-06, "loss": 0.37555671, "memory(GiB)": 13.7, "step": 10175, "train_speed(iter/s)": 1.519997 }, { "acc": 0.96525307, "epoch": 4.771502226388563, "grad_norm": 2.2707109451293945, "learning_rate": 9.943641416262636e-06, "loss": 0.20721312, "memory(GiB)": 13.7, "step": 10180, "train_speed(iter/s)": 1.519999 }, { "acc": 0.95556488, "epoch": 4.773845793297399, "grad_norm": 8.663941383361816, "learning_rate": 9.943525300619224e-06, "loss": 0.36478906, "memory(GiB)": 13.7, "step": 10185, "train_speed(iter/s)": 1.519998 }, { "acc": 0.95282192, "epoch": 4.776189360206234, "grad_norm": 18.650436401367188, "learning_rate": 9.943409066161727e-06, "loss": 0.2842556, "memory(GiB)": 13.7, "step": 10190, "train_speed(iter/s)": 1.519998 }, { "acc": 0.96033421, "epoch": 4.778532927115069, "grad_norm": 26.305177688598633, "learning_rate": 9.943292712892932e-06, "loss": 0.20614889, "memory(GiB)": 13.7, "step": 10195, "train_speed(iter/s)": 1.519984 }, { "acc": 0.959587, "epoch": 4.780876494023905, "grad_norm": 42.360809326171875, "learning_rate": 9.943176240815637e-06, "loss": 0.24215333, "memory(GiB)": 13.7, "step": 10200, "train_speed(iter/s)": 1.520003 }, { "acc": 0.96003971, "epoch": 4.78322006093274, "grad_norm": 7.7586517333984375, "learning_rate": 9.943059649932645e-06, "loss": 0.27817163, "memory(GiB)": 13.7, "step": 10205, "train_speed(iter/s)": 1.520002 }, { "acc": 0.96340275, "epoch": 4.785563627841575, "grad_norm": 9.72508430480957, "learning_rate": 9.942942940246757e-06, "loss": 0.23783131, "memory(GiB)": 13.7, "step": 10210, "train_speed(iter/s)": 1.520046 }, { "acc": 0.96766367, "epoch": 4.78790719475041, "grad_norm": 5.872747898101807, "learning_rate": 9.942826111760776e-06, "loss": 0.16367652, "memory(GiB)": 13.7, "step": 10215, "train_speed(iter/s)": 1.52005 }, { "acc": 0.97209663, "epoch": 4.790250761659245, "grad_norm": 5.107381343841553, "learning_rate": 9.942709164477515e-06, "loss": 0.1811671, "memory(GiB)": 13.7, "step": 10220, "train_speed(iter/s)": 1.520046 }, { "acc": 0.95324669, "epoch": 4.7925943285680805, "grad_norm": 39.723060607910156, "learning_rate": 9.94259209839978e-06, "loss": 0.26055644, "memory(GiB)": 13.7, "step": 10225, "train_speed(iter/s)": 1.52008 }, { "acc": 0.9625845, "epoch": 4.794937895476916, "grad_norm": 13.589054107666016, "learning_rate": 9.942474913530388e-06, "loss": 0.18259829, "memory(GiB)": 13.7, "step": 10230, "train_speed(iter/s)": 1.520081 }, { "acc": 0.95873747, "epoch": 4.797281462385751, "grad_norm": 6.417457580566406, "learning_rate": 9.942357609872152e-06, "loss": 0.24355645, "memory(GiB)": 13.7, "step": 10235, "train_speed(iter/s)": 1.520079 }, { "acc": 0.9619585, "epoch": 4.799625029294586, "grad_norm": 16.328744888305664, "learning_rate": 9.942240187427899e-06, "loss": 0.22517562, "memory(GiB)": 13.7, "step": 10240, "train_speed(iter/s)": 1.520131 }, { "acc": 0.96216545, "epoch": 4.801968596203421, "grad_norm": 6.95881462097168, "learning_rate": 9.942122646200444e-06, "loss": 0.21612866, "memory(GiB)": 13.7, "step": 10245, "train_speed(iter/s)": 1.520139 }, { "acc": 0.97616949, "epoch": 4.8043121631122565, "grad_norm": 0.7219721078872681, "learning_rate": 9.942004986192615e-06, "loss": 0.12907989, "memory(GiB)": 13.7, "step": 10250, "train_speed(iter/s)": 1.520171 }, { "acc": 0.96487007, "epoch": 4.806655730021092, "grad_norm": 10.957448959350586, "learning_rate": 9.94188720740724e-06, "loss": 0.27936187, "memory(GiB)": 13.7, "step": 10255, "train_speed(iter/s)": 1.520152 }, { "acc": 0.95211172, "epoch": 4.808999296929928, "grad_norm": 11.234257698059082, "learning_rate": 9.94176930984715e-06, "loss": 0.29811749, "memory(GiB)": 13.7, "step": 10260, "train_speed(iter/s)": 1.520148 }, { "acc": 0.9652009, "epoch": 4.811342863838763, "grad_norm": 26.60923957824707, "learning_rate": 9.941651293515181e-06, "loss": 0.16065131, "memory(GiB)": 13.7, "step": 10265, "train_speed(iter/s)": 1.520175 }, { "acc": 0.96909332, "epoch": 4.813686430747598, "grad_norm": 35.11579895019531, "learning_rate": 9.941533158414167e-06, "loss": 0.18377621, "memory(GiB)": 13.7, "step": 10270, "train_speed(iter/s)": 1.52022 }, { "acc": 0.98488092, "epoch": 4.816029997656433, "grad_norm": 5.933981895446777, "learning_rate": 9.941414904546948e-06, "loss": 0.15273581, "memory(GiB)": 13.7, "step": 10275, "train_speed(iter/s)": 1.520216 }, { "acc": 0.94284449, "epoch": 4.8183735645652686, "grad_norm": 44.82288360595703, "learning_rate": 9.941296531916367e-06, "loss": 0.2850915, "memory(GiB)": 13.7, "step": 10280, "train_speed(iter/s)": 1.520226 }, { "acc": 0.96988249, "epoch": 4.820717131474104, "grad_norm": 20.61853790283203, "learning_rate": 9.94117804052527e-06, "loss": 0.16449783, "memory(GiB)": 13.7, "step": 10285, "train_speed(iter/s)": 1.520263 }, { "acc": 0.96546497, "epoch": 4.823060698382939, "grad_norm": 7.720476150512695, "learning_rate": 9.941059430376504e-06, "loss": 0.22295625, "memory(GiB)": 13.7, "step": 10290, "train_speed(iter/s)": 1.520291 }, { "acc": 0.95815296, "epoch": 4.825404265291774, "grad_norm": 16.643468856811523, "learning_rate": 9.94094070147292e-06, "loss": 0.18390659, "memory(GiB)": 13.7, "step": 10295, "train_speed(iter/s)": 1.520351 }, { "acc": 0.95137558, "epoch": 4.827747832200609, "grad_norm": 5.178791522979736, "learning_rate": 9.940821853817372e-06, "loss": 0.23101025, "memory(GiB)": 13.7, "step": 10300, "train_speed(iter/s)": 1.520404 }, { "acc": 0.95325403, "epoch": 4.8300913991094445, "grad_norm": 3.299147129058838, "learning_rate": 9.940702887412715e-06, "loss": 0.34258223, "memory(GiB)": 13.7, "step": 10305, "train_speed(iter/s)": 1.520443 }, { "acc": 0.92728043, "epoch": 4.83243496601828, "grad_norm": 15.360204696655273, "learning_rate": 9.940583802261812e-06, "loss": 0.29012721, "memory(GiB)": 13.7, "step": 10310, "train_speed(iter/s)": 1.52047 }, { "acc": 0.95561943, "epoch": 4.834778532927115, "grad_norm": 6.0179057121276855, "learning_rate": 9.940464598367526e-06, "loss": 0.2696306, "memory(GiB)": 13.7, "step": 10315, "train_speed(iter/s)": 1.52048 }, { "acc": 0.97666664, "epoch": 4.83712209983595, "grad_norm": 8.336181640625, "learning_rate": 9.940345275732716e-06, "loss": 0.15231769, "memory(GiB)": 13.7, "step": 10320, "train_speed(iter/s)": 1.5205 }, { "acc": 0.94953108, "epoch": 4.839465666744785, "grad_norm": 75.62168884277344, "learning_rate": 9.940225834360257e-06, "loss": 0.29893749, "memory(GiB)": 13.7, "step": 10325, "train_speed(iter/s)": 1.52051 }, { "acc": 0.97657051, "epoch": 4.8418092336536205, "grad_norm": 2.5097250938415527, "learning_rate": 9.940106274253017e-06, "loss": 0.15472674, "memory(GiB)": 13.7, "step": 10330, "train_speed(iter/s)": 1.520547 }, { "acc": 0.93426666, "epoch": 4.844152800562456, "grad_norm": 107.94403076171875, "learning_rate": 9.93998659541387e-06, "loss": 0.39281483, "memory(GiB)": 13.7, "step": 10335, "train_speed(iter/s)": 1.520606 }, { "acc": 0.98273401, "epoch": 4.846496367471291, "grad_norm": 8.189942359924316, "learning_rate": 9.939866797845691e-06, "loss": 0.13067224, "memory(GiB)": 13.7, "step": 10340, "train_speed(iter/s)": 1.52063 }, { "acc": 0.97085819, "epoch": 4.848839934380127, "grad_norm": 5.362456321716309, "learning_rate": 9.939746881551364e-06, "loss": 0.19091407, "memory(GiB)": 13.7, "step": 10345, "train_speed(iter/s)": 1.520644 }, { "acc": 0.95005951, "epoch": 4.851183501288962, "grad_norm": 3.5825355052948, "learning_rate": 9.939626846533767e-06, "loss": 0.37107067, "memory(GiB)": 13.7, "step": 10350, "train_speed(iter/s)": 1.52067 }, { "acc": 0.96201115, "epoch": 4.853527068197797, "grad_norm": 30.28289794921875, "learning_rate": 9.939506692795788e-06, "loss": 0.22334952, "memory(GiB)": 13.7, "step": 10355, "train_speed(iter/s)": 1.520671 }, { "acc": 0.96248417, "epoch": 4.8558706351066325, "grad_norm": 6.7966203689575195, "learning_rate": 9.939386420340312e-06, "loss": 0.26735814, "memory(GiB)": 13.7, "step": 10360, "train_speed(iter/s)": 1.520663 }, { "acc": 0.97807274, "epoch": 4.858214202015468, "grad_norm": 2.7222132682800293, "learning_rate": 9.939266029170234e-06, "loss": 0.15511127, "memory(GiB)": 13.7, "step": 10365, "train_speed(iter/s)": 1.520636 }, { "acc": 0.93876982, "epoch": 4.860557768924303, "grad_norm": 17.551118850708008, "learning_rate": 9.939145519288445e-06, "loss": 0.28881757, "memory(GiB)": 13.7, "step": 10370, "train_speed(iter/s)": 1.520661 }, { "acc": 0.94537201, "epoch": 4.862901335833138, "grad_norm": 7.395896911621094, "learning_rate": 9.939024890697843e-06, "loss": 0.26302376, "memory(GiB)": 13.7, "step": 10375, "train_speed(iter/s)": 1.52066 }, { "acc": 0.94535103, "epoch": 4.865244902741973, "grad_norm": 13.560236930847168, "learning_rate": 9.938904143401324e-06, "loss": 0.33908155, "memory(GiB)": 13.7, "step": 10380, "train_speed(iter/s)": 1.520643 }, { "acc": 0.95411701, "epoch": 4.8675884696508085, "grad_norm": 9.912261009216309, "learning_rate": 9.938783277401795e-06, "loss": 0.2257596, "memory(GiB)": 13.7, "step": 10385, "train_speed(iter/s)": 1.520704 }, { "acc": 0.96752605, "epoch": 4.869932036559644, "grad_norm": 5.371792793273926, "learning_rate": 9.938662292702162e-06, "loss": 0.19367054, "memory(GiB)": 13.7, "step": 10390, "train_speed(iter/s)": 1.520698 }, { "acc": 0.95187492, "epoch": 4.872275603468479, "grad_norm": 13.382866859436035, "learning_rate": 9.938541189305328e-06, "loss": 0.27789204, "memory(GiB)": 13.7, "step": 10395, "train_speed(iter/s)": 1.520743 }, { "acc": 0.966889, "epoch": 4.874619170377314, "grad_norm": 6.90041446685791, "learning_rate": 9.938419967214207e-06, "loss": 0.21309025, "memory(GiB)": 13.7, "step": 10400, "train_speed(iter/s)": 1.5208 }, { "acc": 0.96795597, "epoch": 4.876962737286149, "grad_norm": 2.8437139987945557, "learning_rate": 9.938298626431712e-06, "loss": 0.15020161, "memory(GiB)": 13.7, "step": 10405, "train_speed(iter/s)": 1.520834 }, { "acc": 0.95615444, "epoch": 4.8793063041949845, "grad_norm": 7.3561859130859375, "learning_rate": 9.938177166960761e-06, "loss": 0.2945919, "memory(GiB)": 13.7, "step": 10410, "train_speed(iter/s)": 1.520864 }, { "acc": 0.94901791, "epoch": 4.88164987110382, "grad_norm": 29.359161376953125, "learning_rate": 9.938055588804271e-06, "loss": 0.32583876, "memory(GiB)": 13.7, "step": 10415, "train_speed(iter/s)": 1.520888 }, { "acc": 0.94324265, "epoch": 4.883993438012656, "grad_norm": 11.769933700561523, "learning_rate": 9.937933891965167e-06, "loss": 0.24717913, "memory(GiB)": 13.7, "step": 10420, "train_speed(iter/s)": 1.520913 }, { "acc": 0.96585159, "epoch": 4.88633700492149, "grad_norm": 2.3443527221679688, "learning_rate": 9.937812076446373e-06, "loss": 0.18707535, "memory(GiB)": 13.7, "step": 10425, "train_speed(iter/s)": 1.520957 }, { "acc": 0.95412693, "epoch": 4.888680571830326, "grad_norm": 6.658681392669678, "learning_rate": 9.937690142250817e-06, "loss": 0.28031578, "memory(GiB)": 13.7, "step": 10430, "train_speed(iter/s)": 1.520965 }, { "acc": 0.96151981, "epoch": 4.891024138739161, "grad_norm": 7.6887383460998535, "learning_rate": 9.93756808938143e-06, "loss": 0.25679679, "memory(GiB)": 13.7, "step": 10435, "train_speed(iter/s)": 1.521018 }, { "acc": 0.94769573, "epoch": 4.8933677056479965, "grad_norm": 6.610279560089111, "learning_rate": 9.937445917841145e-06, "loss": 0.29004021, "memory(GiB)": 13.7, "step": 10440, "train_speed(iter/s)": 1.521023 }, { "acc": 0.94488964, "epoch": 4.895711272556832, "grad_norm": 12.691153526306152, "learning_rate": 9.9373236276329e-06, "loss": 0.19208344, "memory(GiB)": 13.7, "step": 10445, "train_speed(iter/s)": 1.521085 }, { "acc": 0.94822016, "epoch": 4.898054839465667, "grad_norm": 6.991100311279297, "learning_rate": 9.937201218759633e-06, "loss": 0.34533277, "memory(GiB)": 13.7, "step": 10450, "train_speed(iter/s)": 1.521098 }, { "acc": 0.96751471, "epoch": 4.900398406374502, "grad_norm": 7.949896812438965, "learning_rate": 9.937078691224288e-06, "loss": 0.137617, "memory(GiB)": 13.7, "step": 10455, "train_speed(iter/s)": 1.521119 }, { "acc": 0.94783182, "epoch": 4.902741973283337, "grad_norm": 8.144082069396973, "learning_rate": 9.936956045029806e-06, "loss": 0.21985679, "memory(GiB)": 13.7, "step": 10460, "train_speed(iter/s)": 1.521131 }, { "acc": 0.95979195, "epoch": 4.9050855401921725, "grad_norm": 9.659801483154297, "learning_rate": 9.93683328017914e-06, "loss": 0.26318202, "memory(GiB)": 13.7, "step": 10465, "train_speed(iter/s)": 1.521143 }, { "acc": 0.96776104, "epoch": 4.907429107101008, "grad_norm": 6.357937335968018, "learning_rate": 9.936710396675241e-06, "loss": 0.20118115, "memory(GiB)": 13.7, "step": 10470, "train_speed(iter/s)": 1.521154 }, { "acc": 0.94731197, "epoch": 4.909772674009843, "grad_norm": 12.614897727966309, "learning_rate": 9.936587394521059e-06, "loss": 0.48522525, "memory(GiB)": 13.7, "step": 10475, "train_speed(iter/s)": 1.521168 }, { "acc": 0.93714561, "epoch": 4.912116240918678, "grad_norm": 8.947885513305664, "learning_rate": 9.936464273719553e-06, "loss": 0.26036792, "memory(GiB)": 13.7, "step": 10480, "train_speed(iter/s)": 1.521212 }, { "acc": 0.97365856, "epoch": 4.914459807827513, "grad_norm": 153.59329223632812, "learning_rate": 9.936341034273683e-06, "loss": 0.20318577, "memory(GiB)": 13.7, "step": 10485, "train_speed(iter/s)": 1.521264 }, { "acc": 0.96909838, "epoch": 4.916803374736348, "grad_norm": 9.208352088928223, "learning_rate": 9.936217676186407e-06, "loss": 0.20463939, "memory(GiB)": 13.7, "step": 10490, "train_speed(iter/s)": 1.521265 }, { "acc": 0.95459785, "epoch": 4.919146941645184, "grad_norm": 10.54217529296875, "learning_rate": 9.936094199460696e-06, "loss": 0.32175562, "memory(GiB)": 13.7, "step": 10495, "train_speed(iter/s)": 1.521267 }, { "acc": 0.95841722, "epoch": 4.921490508554019, "grad_norm": 6.92899751663208, "learning_rate": 9.935970604099514e-06, "loss": 0.13276818, "memory(GiB)": 13.7, "step": 10500, "train_speed(iter/s)": 1.521307 }, { "acc": 0.95486612, "epoch": 4.923834075462855, "grad_norm": 9.657068252563477, "learning_rate": 9.935846890105831e-06, "loss": 0.25265627, "memory(GiB)": 13.7, "step": 10505, "train_speed(iter/s)": 1.521338 }, { "acc": 0.96261425, "epoch": 4.92617764237169, "grad_norm": 15.006643295288086, "learning_rate": 9.935723057482626e-06, "loss": 0.216799, "memory(GiB)": 13.7, "step": 10510, "train_speed(iter/s)": 1.521347 }, { "acc": 0.95251751, "epoch": 4.928521209280525, "grad_norm": 9.672739028930664, "learning_rate": 9.935599106232869e-06, "loss": 0.27432523, "memory(GiB)": 13.7, "step": 10515, "train_speed(iter/s)": 1.521351 }, { "acc": 0.94934845, "epoch": 4.9308647761893605, "grad_norm": 17.822078704833984, "learning_rate": 9.935475036359545e-06, "loss": 0.29211924, "memory(GiB)": 13.7, "step": 10520, "train_speed(iter/s)": 1.521381 }, { "acc": 0.96550322, "epoch": 4.933208343098196, "grad_norm": 4.763551712036133, "learning_rate": 9.935350847865633e-06, "loss": 0.19501624, "memory(GiB)": 13.7, "step": 10525, "train_speed(iter/s)": 1.521381 }, { "acc": 0.96896191, "epoch": 4.935551910007031, "grad_norm": 3.3748018741607666, "learning_rate": 9.935226540754118e-06, "loss": 0.19110537, "memory(GiB)": 13.7, "step": 10530, "train_speed(iter/s)": 1.521383 }, { "acc": 0.96490231, "epoch": 4.937895476915866, "grad_norm": 4.314663887023926, "learning_rate": 9.93510211502799e-06, "loss": 0.14836895, "memory(GiB)": 13.7, "step": 10535, "train_speed(iter/s)": 1.5214 }, { "acc": 0.94757223, "epoch": 4.940239043824701, "grad_norm": 6.603183746337891, "learning_rate": 9.934977570690238e-06, "loss": 0.3618257, "memory(GiB)": 13.7, "step": 10540, "train_speed(iter/s)": 1.521447 }, { "acc": 0.95516357, "epoch": 4.942582610733536, "grad_norm": 8.525165557861328, "learning_rate": 9.934852907743857e-06, "loss": 0.21701376, "memory(GiB)": 13.7, "step": 10545, "train_speed(iter/s)": 1.521482 }, { "acc": 0.94517508, "epoch": 4.944926177642372, "grad_norm": 9.60504150390625, "learning_rate": 9.934728126191842e-06, "loss": 0.34180727, "memory(GiB)": 13.7, "step": 10550, "train_speed(iter/s)": 1.521491 }, { "acc": 0.96291351, "epoch": 4.947269744551207, "grad_norm": 8.35656452178955, "learning_rate": 9.934603226037194e-06, "loss": 0.23732057, "memory(GiB)": 13.7, "step": 10555, "train_speed(iter/s)": 1.521539 }, { "acc": 0.95738907, "epoch": 4.949613311460042, "grad_norm": 117.82901763916016, "learning_rate": 9.934478207282912e-06, "loss": 0.26699767, "memory(GiB)": 13.7, "step": 10560, "train_speed(iter/s)": 1.521523 }, { "acc": 0.9607481, "epoch": 4.951956878368877, "grad_norm": 4.5104146003723145, "learning_rate": 9.934353069932005e-06, "loss": 0.2413188, "memory(GiB)": 13.7, "step": 10565, "train_speed(iter/s)": 1.521588 }, { "acc": 0.9642251, "epoch": 4.954300445277712, "grad_norm": 6.122373104095459, "learning_rate": 9.93422781398748e-06, "loss": 0.22736909, "memory(GiB)": 13.7, "step": 10570, "train_speed(iter/s)": 1.521587 }, { "acc": 0.95579615, "epoch": 4.956644012186548, "grad_norm": 9.735255241394043, "learning_rate": 9.934102439452344e-06, "loss": 0.34367065, "memory(GiB)": 13.7, "step": 10575, "train_speed(iter/s)": 1.521605 }, { "acc": 0.93376226, "epoch": 4.958987579095383, "grad_norm": 181.5194091796875, "learning_rate": 9.933976946329616e-06, "loss": 0.34822631, "memory(GiB)": 13.7, "step": 10580, "train_speed(iter/s)": 1.521603 }, { "acc": 0.94226761, "epoch": 4.961331146004218, "grad_norm": 8.663437843322754, "learning_rate": 9.933851334622308e-06, "loss": 0.34204097, "memory(GiB)": 13.7, "step": 10585, "train_speed(iter/s)": 1.521604 }, { "acc": 0.9671114, "epoch": 4.963674712913054, "grad_norm": 3.111675262451172, "learning_rate": 9.933725604333443e-06, "loss": 0.17673516, "memory(GiB)": 13.7, "step": 10590, "train_speed(iter/s)": 1.521656 }, { "acc": 0.96276293, "epoch": 4.966018279821889, "grad_norm": 4.8360676765441895, "learning_rate": 9.93359975546604e-06, "loss": 0.23367453, "memory(GiB)": 13.7, "step": 10595, "train_speed(iter/s)": 1.521675 }, { "acc": 0.94600878, "epoch": 4.9683618467307245, "grad_norm": 14.741164207458496, "learning_rate": 9.933473788023127e-06, "loss": 0.25501871, "memory(GiB)": 13.7, "step": 10600, "train_speed(iter/s)": 1.521695 }, { "acc": 0.95024719, "epoch": 4.97070541363956, "grad_norm": 6.82921838760376, "learning_rate": 9.933347702007729e-06, "loss": 0.30393114, "memory(GiB)": 13.7, "step": 10605, "train_speed(iter/s)": 1.521741 }, { "acc": 0.97344847, "epoch": 4.973048980548395, "grad_norm": 12.7041654586792, "learning_rate": 9.933221497422877e-06, "loss": 0.17454935, "memory(GiB)": 13.7, "step": 10610, "train_speed(iter/s)": 1.521738 }, { "acc": 0.96082649, "epoch": 4.97539254745723, "grad_norm": 5.071805477142334, "learning_rate": 9.933095174271607e-06, "loss": 0.24387147, "memory(GiB)": 13.7, "step": 10615, "train_speed(iter/s)": 1.52173 }, { "acc": 0.95916538, "epoch": 4.977736114366065, "grad_norm": 5.08005428314209, "learning_rate": 9.932968732556955e-06, "loss": 0.26156702, "memory(GiB)": 13.7, "step": 10620, "train_speed(iter/s)": 1.521781 }, { "acc": 0.95439873, "epoch": 4.9800796812749, "grad_norm": 7.849494934082031, "learning_rate": 9.932842172281957e-06, "loss": 0.20912831, "memory(GiB)": 13.7, "step": 10625, "train_speed(iter/s)": 1.521848 }, { "acc": 0.95996113, "epoch": 4.982423248183736, "grad_norm": 22.865882873535156, "learning_rate": 9.932715493449657e-06, "loss": 0.22730455, "memory(GiB)": 13.7, "step": 10630, "train_speed(iter/s)": 1.52184 }, { "acc": 0.95648422, "epoch": 4.984766815092571, "grad_norm": 6.552141189575195, "learning_rate": 9.932588696063101e-06, "loss": 0.28450611, "memory(GiB)": 13.7, "step": 10635, "train_speed(iter/s)": 1.521835 }, { "acc": 0.94584827, "epoch": 4.987110382001406, "grad_norm": 5.674893856048584, "learning_rate": 9.932461780125336e-06, "loss": 0.2285346, "memory(GiB)": 13.7, "step": 10640, "train_speed(iter/s)": 1.521874 }, { "acc": 0.96467152, "epoch": 4.989453948910241, "grad_norm": 2.31002140045166, "learning_rate": 9.932334745639413e-06, "loss": 0.22551029, "memory(GiB)": 13.7, "step": 10645, "train_speed(iter/s)": 1.521903 }, { "acc": 0.94826298, "epoch": 4.991797515819076, "grad_norm": 45.23572540283203, "learning_rate": 9.932207592608384e-06, "loss": 0.3754509, "memory(GiB)": 13.7, "step": 10650, "train_speed(iter/s)": 1.52191 }, { "acc": 0.96039143, "epoch": 4.994141082727912, "grad_norm": 3.8442766666412354, "learning_rate": 9.932080321035307e-06, "loss": 0.19424571, "memory(GiB)": 13.7, "step": 10655, "train_speed(iter/s)": 1.521926 }, { "acc": 0.96468706, "epoch": 4.996484649636747, "grad_norm": 10.977386474609375, "learning_rate": 9.93195293092324e-06, "loss": 0.20668564, "memory(GiB)": 13.7, "step": 10660, "train_speed(iter/s)": 1.521945 }, { "acc": 0.96756935, "epoch": 4.998828216545583, "grad_norm": 12.150527000427246, "learning_rate": 9.931825422275246e-06, "loss": 0.21306448, "memory(GiB)": 13.7, "step": 10665, "train_speed(iter/s)": 1.52197 }, { "acc": 0.97498913, "epoch": 5.001171783454418, "grad_norm": 2.997936725616455, "learning_rate": 9.93169779509439e-06, "loss": 0.13114593, "memory(GiB)": 13.7, "step": 10670, "train_speed(iter/s)": 1.521862 }, { "acc": 0.94869051, "epoch": 5.003515350363253, "grad_norm": 12.91653823852539, "learning_rate": 9.931570049383738e-06, "loss": 0.29069166, "memory(GiB)": 13.7, "step": 10675, "train_speed(iter/s)": 1.521895 }, { "acc": 0.96752796, "epoch": 5.005858917272088, "grad_norm": 17.795469284057617, "learning_rate": 9.931442185146363e-06, "loss": 0.19573917, "memory(GiB)": 13.7, "step": 10680, "train_speed(iter/s)": 1.521922 }, { "acc": 0.97440739, "epoch": 5.008202484180924, "grad_norm": 15.441311836242676, "learning_rate": 9.931314202385337e-06, "loss": 0.12502108, "memory(GiB)": 13.7, "step": 10685, "train_speed(iter/s)": 1.521922 }, { "acc": 0.95473213, "epoch": 5.010546051089759, "grad_norm": 16.383691787719727, "learning_rate": 9.931186101103737e-06, "loss": 0.25220881, "memory(GiB)": 13.7, "step": 10690, "train_speed(iter/s)": 1.521917 }, { "acc": 0.95959644, "epoch": 5.012889617998594, "grad_norm": 11.193205833435059, "learning_rate": 9.93105788130464e-06, "loss": 0.27266331, "memory(GiB)": 13.7, "step": 10695, "train_speed(iter/s)": 1.521937 }, { "acc": 0.94696836, "epoch": 5.015233184907429, "grad_norm": 7.245415210723877, "learning_rate": 9.930929542991131e-06, "loss": 0.22748871, "memory(GiB)": 13.7, "step": 10700, "train_speed(iter/s)": 1.521942 }, { "acc": 0.97174358, "epoch": 5.017576751816264, "grad_norm": 14.820323944091797, "learning_rate": 9.930801086166293e-06, "loss": 0.17145023, "memory(GiB)": 13.7, "step": 10705, "train_speed(iter/s)": 1.521974 }, { "acc": 0.95791168, "epoch": 5.0199203187251, "grad_norm": 5.642789363861084, "learning_rate": 9.930672510833215e-06, "loss": 0.24438758, "memory(GiB)": 13.7, "step": 10710, "train_speed(iter/s)": 1.521959 }, { "acc": 0.95444689, "epoch": 5.022263885633935, "grad_norm": 6.945587635040283, "learning_rate": 9.930543816994986e-06, "loss": 0.38627076, "memory(GiB)": 13.7, "step": 10715, "train_speed(iter/s)": 1.521974 }, { "acc": 0.9542758, "epoch": 5.02460745254277, "grad_norm": 3.996715545654297, "learning_rate": 9.9304150046547e-06, "loss": 0.30173235, "memory(GiB)": 13.7, "step": 10720, "train_speed(iter/s)": 1.521967 }, { "acc": 0.96008148, "epoch": 5.026951019451605, "grad_norm": 12.254836082458496, "learning_rate": 9.930286073815454e-06, "loss": 0.22028759, "memory(GiB)": 13.7, "step": 10725, "train_speed(iter/s)": 1.521946 }, { "acc": 0.94417362, "epoch": 5.02929458636044, "grad_norm": 18.47847557067871, "learning_rate": 9.930157024480346e-06, "loss": 0.35598629, "memory(GiB)": 13.7, "step": 10730, "train_speed(iter/s)": 1.52199 }, { "acc": 0.96684017, "epoch": 5.0316381532692755, "grad_norm": 7.309003829956055, "learning_rate": 9.930027856652477e-06, "loss": 0.23229787, "memory(GiB)": 13.7, "step": 10735, "train_speed(iter/s)": 1.522036 }, { "acc": 0.95417156, "epoch": 5.033981720178111, "grad_norm": 30.073589324951172, "learning_rate": 9.929898570334957e-06, "loss": 0.22519672, "memory(GiB)": 13.7, "step": 10740, "train_speed(iter/s)": 1.522037 }, { "acc": 0.97208338, "epoch": 5.036325287086946, "grad_norm": 5.28061056137085, "learning_rate": 9.929769165530886e-06, "loss": 0.11832175, "memory(GiB)": 13.7, "step": 10745, "train_speed(iter/s)": 1.522074 }, { "acc": 0.95584774, "epoch": 5.038668853995782, "grad_norm": 5.663973808288574, "learning_rate": 9.92963964224338e-06, "loss": 0.27033405, "memory(GiB)": 13.7, "step": 10750, "train_speed(iter/s)": 1.522116 }, { "acc": 0.96502972, "epoch": 5.041012420904617, "grad_norm": 13.972862243652344, "learning_rate": 9.929510000475549e-06, "loss": 0.19008319, "memory(GiB)": 13.7, "step": 10755, "train_speed(iter/s)": 1.522146 }, { "acc": 0.96114588, "epoch": 5.043355987813452, "grad_norm": 38.21994400024414, "learning_rate": 9.929380240230511e-06, "loss": 0.27098925, "memory(GiB)": 13.7, "step": 10760, "train_speed(iter/s)": 1.522194 }, { "acc": 0.97105217, "epoch": 5.045699554722288, "grad_norm": 15.056005477905273, "learning_rate": 9.929250361511385e-06, "loss": 0.13847005, "memory(GiB)": 13.7, "step": 10765, "train_speed(iter/s)": 1.522182 }, { "acc": 0.95290508, "epoch": 5.048043121631123, "grad_norm": 7.635725975036621, "learning_rate": 9.929120364321292e-06, "loss": 0.27300167, "memory(GiB)": 13.7, "step": 10770, "train_speed(iter/s)": 1.522213 }, { "acc": 0.96524086, "epoch": 5.050386688539958, "grad_norm": 5.6781229972839355, "learning_rate": 9.928990248663358e-06, "loss": 0.27738531, "memory(GiB)": 13.7, "step": 10775, "train_speed(iter/s)": 1.522238 }, { "acc": 0.95503912, "epoch": 5.052730255448793, "grad_norm": 8.022419929504395, "learning_rate": 9.92886001454071e-06, "loss": 0.27290926, "memory(GiB)": 13.7, "step": 10780, "train_speed(iter/s)": 1.522259 }, { "acc": 0.95433922, "epoch": 5.055073822357628, "grad_norm": 3.284322738647461, "learning_rate": 9.928729661956477e-06, "loss": 0.19688327, "memory(GiB)": 13.7, "step": 10785, "train_speed(iter/s)": 1.522268 }, { "acc": 0.96144257, "epoch": 5.057417389266464, "grad_norm": 18.00544548034668, "learning_rate": 9.928599190913795e-06, "loss": 0.20362651, "memory(GiB)": 13.7, "step": 10790, "train_speed(iter/s)": 1.522296 }, { "acc": 0.94006252, "epoch": 5.059760956175299, "grad_norm": 6.022841930389404, "learning_rate": 9.928468601415797e-06, "loss": 0.36579783, "memory(GiB)": 13.7, "step": 10795, "train_speed(iter/s)": 1.522315 }, { "acc": 0.95691223, "epoch": 5.062104523084134, "grad_norm": 12.007566452026367, "learning_rate": 9.928337893465624e-06, "loss": 0.26580863, "memory(GiB)": 13.7, "step": 10800, "train_speed(iter/s)": 1.522332 }, { "acc": 0.96064987, "epoch": 5.064448089992969, "grad_norm": 8.844141960144043, "learning_rate": 9.928207067066417e-06, "loss": 0.24315155, "memory(GiB)": 13.7, "step": 10805, "train_speed(iter/s)": 1.522349 }, { "acc": 0.95694599, "epoch": 5.066791656901804, "grad_norm": 7.32185173034668, "learning_rate": 9.928076122221322e-06, "loss": 0.19087343, "memory(GiB)": 13.7, "step": 10810, "train_speed(iter/s)": 1.522414 }, { "acc": 0.95562143, "epoch": 5.0691352238106395, "grad_norm": 4.650967121124268, "learning_rate": 9.927945058933485e-06, "loss": 0.19171863, "memory(GiB)": 13.7, "step": 10815, "train_speed(iter/s)": 1.522492 }, { "acc": 0.96900349, "epoch": 5.071478790719475, "grad_norm": 7.990041255950928, "learning_rate": 9.927813877206055e-06, "loss": 0.09094938, "memory(GiB)": 13.7, "step": 10820, "train_speed(iter/s)": 1.522504 }, { "acc": 0.95306053, "epoch": 5.07382235762831, "grad_norm": 0.038741108030080795, "learning_rate": 9.927682577042189e-06, "loss": 0.18293898, "memory(GiB)": 13.7, "step": 10825, "train_speed(iter/s)": 1.522523 }, { "acc": 0.96099043, "epoch": 5.076165924537145, "grad_norm": 7.061225891113281, "learning_rate": 9.92755115844504e-06, "loss": 0.20200858, "memory(GiB)": 13.7, "step": 10830, "train_speed(iter/s)": 1.522536 }, { "acc": 0.94257441, "epoch": 5.078509491445981, "grad_norm": 2.9739229679107666, "learning_rate": 9.927419621417766e-06, "loss": 0.26442823, "memory(GiB)": 13.7, "step": 10835, "train_speed(iter/s)": 1.522573 }, { "acc": 0.9639616, "epoch": 5.080853058354816, "grad_norm": 8.420726776123047, "learning_rate": 9.927287965963534e-06, "loss": 0.19512104, "memory(GiB)": 13.7, "step": 10840, "train_speed(iter/s)": 1.522633 }, { "acc": 0.94748154, "epoch": 5.083196625263652, "grad_norm": 14.213179588317871, "learning_rate": 9.927156192085504e-06, "loss": 0.37594688, "memory(GiB)": 13.7, "step": 10845, "train_speed(iter/s)": 1.522652 }, { "acc": 0.95895834, "epoch": 5.085540192172487, "grad_norm": 7.296352386474609, "learning_rate": 9.927024299786843e-06, "loss": 0.19832478, "memory(GiB)": 13.7, "step": 10850, "train_speed(iter/s)": 1.522676 }, { "acc": 0.96895523, "epoch": 5.087883759081322, "grad_norm": 5.946958541870117, "learning_rate": 9.926892289070725e-06, "loss": 0.15067477, "memory(GiB)": 13.7, "step": 10855, "train_speed(iter/s)": 1.522709 }, { "acc": 0.96435013, "epoch": 5.090227325990157, "grad_norm": 6.432384014129639, "learning_rate": 9.926760159940317e-06, "loss": 0.26656556, "memory(GiB)": 13.7, "step": 10860, "train_speed(iter/s)": 1.522706 }, { "acc": 0.97244949, "epoch": 5.092570892898992, "grad_norm": 18.64497947692871, "learning_rate": 9.926627912398801e-06, "loss": 0.15196537, "memory(GiB)": 13.7, "step": 10865, "train_speed(iter/s)": 1.522739 }, { "acc": 0.96362553, "epoch": 5.0949144598078275, "grad_norm": 3.420107364654541, "learning_rate": 9.926495546449356e-06, "loss": 0.19390314, "memory(GiB)": 13.7, "step": 10870, "train_speed(iter/s)": 1.522775 }, { "acc": 0.9519866, "epoch": 5.097258026716663, "grad_norm": 8.760668754577637, "learning_rate": 9.926363062095156e-06, "loss": 0.29151192, "memory(GiB)": 13.7, "step": 10875, "train_speed(iter/s)": 1.522839 }, { "acc": 0.97187157, "epoch": 5.099601593625498, "grad_norm": 21.679540634155273, "learning_rate": 9.926230459339395e-06, "loss": 0.15840409, "memory(GiB)": 13.7, "step": 10880, "train_speed(iter/s)": 1.52287 }, { "acc": 0.96716423, "epoch": 5.101945160534333, "grad_norm": 8.24923324584961, "learning_rate": 9.926097738185254e-06, "loss": 0.17444984, "memory(GiB)": 13.7, "step": 10885, "train_speed(iter/s)": 1.522899 }, { "acc": 0.94277163, "epoch": 5.104288727443168, "grad_norm": 10.997153282165527, "learning_rate": 9.925964898635927e-06, "loss": 0.30041037, "memory(GiB)": 13.7, "step": 10890, "train_speed(iter/s)": 1.522967 }, { "acc": 0.94672623, "epoch": 5.1066322943520035, "grad_norm": 17.982276916503906, "learning_rate": 9.925831940694604e-06, "loss": 0.26525164, "memory(GiB)": 13.7, "step": 10895, "train_speed(iter/s)": 1.523026 }, { "acc": 0.94451437, "epoch": 5.108975861260839, "grad_norm": 7.158702373504639, "learning_rate": 9.92569886436448e-06, "loss": 0.26665511, "memory(GiB)": 13.7, "step": 10900, "train_speed(iter/s)": 1.523043 }, { "acc": 0.95022278, "epoch": 5.111319428169674, "grad_norm": 8.5896635055542, "learning_rate": 9.92556566964876e-06, "loss": 0.31088738, "memory(GiB)": 13.7, "step": 10905, "train_speed(iter/s)": 1.523092 }, { "acc": 0.96244154, "epoch": 5.113662995078509, "grad_norm": 10.148988723754883, "learning_rate": 9.925432356550639e-06, "loss": 0.26793811, "memory(GiB)": 13.7, "step": 10910, "train_speed(iter/s)": 1.52314 }, { "acc": 0.94543591, "epoch": 5.116006561987345, "grad_norm": 11.831356048583984, "learning_rate": 9.925298925073324e-06, "loss": 0.33136122, "memory(GiB)": 13.7, "step": 10915, "train_speed(iter/s)": 1.523181 }, { "acc": 0.95780716, "epoch": 5.11835012889618, "grad_norm": 13.155537605285645, "learning_rate": 9.925165375220022e-06, "loss": 0.2683141, "memory(GiB)": 13.7, "step": 10920, "train_speed(iter/s)": 1.523231 }, { "acc": 0.9550024, "epoch": 5.1206936958050155, "grad_norm": 10.973641395568848, "learning_rate": 9.925031706993944e-06, "loss": 0.26653023, "memory(GiB)": 13.7, "step": 10925, "train_speed(iter/s)": 1.523266 }, { "acc": 0.98156195, "epoch": 5.123037262713851, "grad_norm": 3.272355079650879, "learning_rate": 9.924897920398302e-06, "loss": 0.0901908, "memory(GiB)": 13.7, "step": 10930, "train_speed(iter/s)": 1.523256 }, { "acc": 0.98282738, "epoch": 5.125380829622686, "grad_norm": 3.2776734828948975, "learning_rate": 9.92476401543631e-06, "loss": 0.15234057, "memory(GiB)": 13.7, "step": 10935, "train_speed(iter/s)": 1.523276 }, { "acc": 0.98486109, "epoch": 5.127724396531521, "grad_norm": 3.9260153770446777, "learning_rate": 9.924629992111191e-06, "loss": 0.12860104, "memory(GiB)": 13.7, "step": 10940, "train_speed(iter/s)": 1.523331 }, { "acc": 0.97331848, "epoch": 5.130067963440356, "grad_norm": 2.5592024326324463, "learning_rate": 9.924495850426164e-06, "loss": 0.18356748, "memory(GiB)": 13.7, "step": 10945, "train_speed(iter/s)": 1.523377 }, { "acc": 0.96019592, "epoch": 5.1324115303491915, "grad_norm": 10.304177284240723, "learning_rate": 9.924361590384452e-06, "loss": 0.17276148, "memory(GiB)": 13.7, "step": 10950, "train_speed(iter/s)": 1.523389 }, { "acc": 0.95426865, "epoch": 5.134755097258027, "grad_norm": 6.92853307723999, "learning_rate": 9.924227211989283e-06, "loss": 0.27991312, "memory(GiB)": 13.7, "step": 10955, "train_speed(iter/s)": 1.523451 }, { "acc": 0.96201878, "epoch": 5.137098664166862, "grad_norm": 1.8537043333053589, "learning_rate": 9.924092715243887e-06, "loss": 0.20232141, "memory(GiB)": 13.7, "step": 10960, "train_speed(iter/s)": 1.523485 }, { "acc": 0.96474361, "epoch": 5.139442231075697, "grad_norm": 23.41748809814453, "learning_rate": 9.923958100151499e-06, "loss": 0.2196523, "memory(GiB)": 13.7, "step": 10965, "train_speed(iter/s)": 1.523489 }, { "acc": 0.96708336, "epoch": 5.141785797984532, "grad_norm": 6.755587100982666, "learning_rate": 9.923823366715353e-06, "loss": 0.1819028, "memory(GiB)": 13.7, "step": 10970, "train_speed(iter/s)": 1.523495 }, { "acc": 0.95254469, "epoch": 5.1441293648933675, "grad_norm": 12.45675277709961, "learning_rate": 9.923688514938688e-06, "loss": 0.26024084, "memory(GiB)": 13.7, "step": 10975, "train_speed(iter/s)": 1.523536 }, { "acc": 0.96567364, "epoch": 5.146472931802203, "grad_norm": 6.176791191101074, "learning_rate": 9.923553544824745e-06, "loss": 0.18956406, "memory(GiB)": 13.7, "step": 10980, "train_speed(iter/s)": 1.523594 }, { "acc": 0.95305462, "epoch": 5.148816498711038, "grad_norm": 13.355681419372559, "learning_rate": 9.92341845637677e-06, "loss": 0.25164964, "memory(GiB)": 13.7, "step": 10985, "train_speed(iter/s)": 1.523626 }, { "acc": 0.96240082, "epoch": 5.151160065619873, "grad_norm": 14.023442268371582, "learning_rate": 9.923283249598004e-06, "loss": 0.20535588, "memory(GiB)": 13.7, "step": 10990, "train_speed(iter/s)": 1.523647 }, { "acc": 0.97235117, "epoch": 5.153503632528709, "grad_norm": 6.831467151641846, "learning_rate": 9.923147924491703e-06, "loss": 0.16204789, "memory(GiB)": 13.7, "step": 10995, "train_speed(iter/s)": 1.523697 }, { "acc": 0.9558527, "epoch": 5.155847199437544, "grad_norm": 6.699839115142822, "learning_rate": 9.923012481061118e-06, "loss": 0.21081407, "memory(GiB)": 13.7, "step": 11000, "train_speed(iter/s)": 1.523709 }, { "acc": 0.96281252, "epoch": 5.1581907663463795, "grad_norm": 7.261240005493164, "learning_rate": 9.922876919309507e-06, "loss": 0.21982696, "memory(GiB)": 13.7, "step": 11005, "train_speed(iter/s)": 1.52371 }, { "acc": 0.95517511, "epoch": 5.160534333255215, "grad_norm": 8.46755599975586, "learning_rate": 9.922741239240124e-06, "loss": 0.17490265, "memory(GiB)": 13.7, "step": 11010, "train_speed(iter/s)": 1.523735 }, { "acc": 0.94617968, "epoch": 5.16287790016405, "grad_norm": 14.484397888183594, "learning_rate": 9.922605440856232e-06, "loss": 0.35811462, "memory(GiB)": 13.7, "step": 11015, "train_speed(iter/s)": 1.523765 }, { "acc": 0.96652775, "epoch": 5.165221467072885, "grad_norm": 0.4102935194969177, "learning_rate": 9.922469524161095e-06, "loss": 0.20886667, "memory(GiB)": 13.7, "step": 11020, "train_speed(iter/s)": 1.523806 }, { "acc": 0.94012556, "epoch": 5.16756503398172, "grad_norm": 12.199095726013184, "learning_rate": 9.922333489157983e-06, "loss": 0.24834785, "memory(GiB)": 13.7, "step": 11025, "train_speed(iter/s)": 1.523815 }, { "acc": 0.96000004, "epoch": 5.1699086008905555, "grad_norm": 4.846989631652832, "learning_rate": 9.922197335850163e-06, "loss": 0.23180666, "memory(GiB)": 13.7, "step": 11030, "train_speed(iter/s)": 1.523835 }, { "acc": 0.95508442, "epoch": 5.172252167799391, "grad_norm": 20.21689796447754, "learning_rate": 9.922061064240906e-06, "loss": 0.19716364, "memory(GiB)": 13.7, "step": 11035, "train_speed(iter/s)": 1.523885 }, { "acc": 0.97322187, "epoch": 5.174595734708226, "grad_norm": 32.00511169433594, "learning_rate": 9.921924674333491e-06, "loss": 0.17558656, "memory(GiB)": 13.7, "step": 11040, "train_speed(iter/s)": 1.523898 }, { "acc": 0.94085054, "epoch": 5.176939301617061, "grad_norm": 32.68980026245117, "learning_rate": 9.921788166131194e-06, "loss": 0.36000957, "memory(GiB)": 13.7, "step": 11045, "train_speed(iter/s)": 1.52394 }, { "acc": 0.95949898, "epoch": 5.179282868525896, "grad_norm": 6.320481300354004, "learning_rate": 9.921651539637299e-06, "loss": 0.1790019, "memory(GiB)": 13.7, "step": 11050, "train_speed(iter/s)": 1.524005 }, { "acc": 0.96212997, "epoch": 5.1816264354347314, "grad_norm": 12.61821460723877, "learning_rate": 9.921514794855086e-06, "loss": 0.1697757, "memory(GiB)": 13.7, "step": 11055, "train_speed(iter/s)": 1.52407 }, { "acc": 0.95633888, "epoch": 5.183970002343567, "grad_norm": 2.986048460006714, "learning_rate": 9.921377931787848e-06, "loss": 0.25511873, "memory(GiB)": 13.7, "step": 11060, "train_speed(iter/s)": 1.52407 }, { "acc": 0.97057543, "epoch": 5.186313569252402, "grad_norm": 5.204738616943359, "learning_rate": 9.921240950438867e-06, "loss": 0.236672, "memory(GiB)": 13.7, "step": 11065, "train_speed(iter/s)": 1.524104 }, { "acc": 0.9669363, "epoch": 5.188657136161237, "grad_norm": 7.803380966186523, "learning_rate": 9.92110385081144e-06, "loss": 0.21743689, "memory(GiB)": 13.7, "step": 11070, "train_speed(iter/s)": 1.524109 }, { "acc": 0.97936096, "epoch": 5.191000703070072, "grad_norm": 1.8954297304153442, "learning_rate": 9.920966632908863e-06, "loss": 0.16520627, "memory(GiB)": 13.7, "step": 11075, "train_speed(iter/s)": 1.52412 }, { "acc": 0.9729435, "epoch": 5.193344269978908, "grad_norm": 12.326729774475098, "learning_rate": 9.920829296734433e-06, "loss": 0.25344853, "memory(GiB)": 13.7, "step": 11080, "train_speed(iter/s)": 1.524094 }, { "acc": 0.97050591, "epoch": 5.1956878368877435, "grad_norm": 5.703145503997803, "learning_rate": 9.92069184229145e-06, "loss": 0.12810147, "memory(GiB)": 13.7, "step": 11085, "train_speed(iter/s)": 1.52411 }, { "acc": 0.95145836, "epoch": 5.198031403796579, "grad_norm": 20.499691009521484, "learning_rate": 9.92055426958322e-06, "loss": 0.26177759, "memory(GiB)": 13.7, "step": 11090, "train_speed(iter/s)": 1.524094 }, { "acc": 0.98000002, "epoch": 5.200374970705414, "grad_norm": 5.231180667877197, "learning_rate": 9.920416578613049e-06, "loss": 0.12166827, "memory(GiB)": 13.7, "step": 11095, "train_speed(iter/s)": 1.524137 }, { "acc": 0.95771484, "epoch": 5.202718537614249, "grad_norm": 7.867936134338379, "learning_rate": 9.920278769384248e-06, "loss": 0.29305673, "memory(GiB)": 13.7, "step": 11100, "train_speed(iter/s)": 1.524141 }, { "acc": 0.93946657, "epoch": 5.205062104523084, "grad_norm": 13.090367317199707, "learning_rate": 9.920140841900127e-06, "loss": 0.31389651, "memory(GiB)": 13.7, "step": 11105, "train_speed(iter/s)": 1.524155 }, { "acc": 0.96434526, "epoch": 5.2074056714319195, "grad_norm": 4.491591930389404, "learning_rate": 9.920002796164003e-06, "loss": 0.19728544, "memory(GiB)": 13.7, "step": 11110, "train_speed(iter/s)": 1.524151 }, { "acc": 0.94231148, "epoch": 5.209749238340755, "grad_norm": 66.64044189453125, "learning_rate": 9.91986463217919e-06, "loss": 0.37645874, "memory(GiB)": 13.7, "step": 11115, "train_speed(iter/s)": 1.524167 }, { "acc": 0.94800596, "epoch": 5.21209280524959, "grad_norm": 8.115920066833496, "learning_rate": 9.919726349949016e-06, "loss": 0.27788305, "memory(GiB)": 13.7, "step": 11120, "train_speed(iter/s)": 1.524197 }, { "acc": 0.94173756, "epoch": 5.214436372158425, "grad_norm": 8.567523002624512, "learning_rate": 9.919587949476802e-06, "loss": 0.31187468, "memory(GiB)": 13.7, "step": 11125, "train_speed(iter/s)": 1.524216 }, { "acc": 0.95707197, "epoch": 5.21677993906726, "grad_norm": 12.635699272155762, "learning_rate": 9.919449430765872e-06, "loss": 0.34482722, "memory(GiB)": 13.7, "step": 11130, "train_speed(iter/s)": 1.524277 }, { "acc": 0.95409718, "epoch": 5.219123505976095, "grad_norm": 6.963092803955078, "learning_rate": 9.919310793819558e-06, "loss": 0.31157875, "memory(GiB)": 13.7, "step": 11135, "train_speed(iter/s)": 1.524301 }, { "acc": 0.95726004, "epoch": 5.221467072884931, "grad_norm": 13.598981857299805, "learning_rate": 9.919172038641194e-06, "loss": 0.31286931, "memory(GiB)": 13.7, "step": 11140, "train_speed(iter/s)": 1.524299 }, { "acc": 0.93837566, "epoch": 5.223810639793766, "grad_norm": 7.928924560546875, "learning_rate": 9.919033165234112e-06, "loss": 0.26690216, "memory(GiB)": 13.7, "step": 11145, "train_speed(iter/s)": 1.524332 }, { "acc": 0.95999069, "epoch": 5.226154206702601, "grad_norm": 7.560621738433838, "learning_rate": 9.918894173601652e-06, "loss": 0.23333826, "memory(GiB)": 13.7, "step": 11150, "train_speed(iter/s)": 1.524379 }, { "acc": 0.96235304, "epoch": 5.228497773611436, "grad_norm": 6.224531650543213, "learning_rate": 9.918755063747153e-06, "loss": 0.1921185, "memory(GiB)": 13.7, "step": 11155, "train_speed(iter/s)": 1.524407 }, { "acc": 0.9618187, "epoch": 5.230841340520272, "grad_norm": 31.005521774291992, "learning_rate": 9.91861583567396e-06, "loss": 0.15229611, "memory(GiB)": 13.7, "step": 11160, "train_speed(iter/s)": 1.524413 }, { "acc": 0.97192535, "epoch": 5.2331849074291075, "grad_norm": 11.535491943359375, "learning_rate": 9.918476489385421e-06, "loss": 0.14393018, "memory(GiB)": 13.7, "step": 11165, "train_speed(iter/s)": 1.524444 }, { "acc": 0.95735111, "epoch": 5.235528474337943, "grad_norm": 16.057207107543945, "learning_rate": 9.918337024884883e-06, "loss": 0.15555271, "memory(GiB)": 13.7, "step": 11170, "train_speed(iter/s)": 1.524457 }, { "acc": 0.94821453, "epoch": 5.237872041246778, "grad_norm": 15.115041732788086, "learning_rate": 9.9181974421757e-06, "loss": 0.26441147, "memory(GiB)": 13.7, "step": 11175, "train_speed(iter/s)": 1.524475 }, { "acc": 0.97086601, "epoch": 5.240215608155613, "grad_norm": 13.056283950805664, "learning_rate": 9.918057741261227e-06, "loss": 0.15162723, "memory(GiB)": 13.7, "step": 11180, "train_speed(iter/s)": 1.524523 }, { "acc": 0.96360121, "epoch": 5.242559175064448, "grad_norm": 1.6050602197647095, "learning_rate": 9.917917922144819e-06, "loss": 0.23360624, "memory(GiB)": 13.7, "step": 11185, "train_speed(iter/s)": 1.524537 }, { "acc": 0.96527338, "epoch": 5.244902741973283, "grad_norm": 4.911757469177246, "learning_rate": 9.91777798482984e-06, "loss": 0.29815817, "memory(GiB)": 13.7, "step": 11190, "train_speed(iter/s)": 1.52456 }, { "acc": 0.96218147, "epoch": 5.247246308882119, "grad_norm": 4.9572248458862305, "learning_rate": 9.917637929319653e-06, "loss": 0.21377935, "memory(GiB)": 13.7, "step": 11195, "train_speed(iter/s)": 1.524585 }, { "acc": 0.95423861, "epoch": 5.249589875790954, "grad_norm": 12.686808586120605, "learning_rate": 9.917497755617626e-06, "loss": 0.23515139, "memory(GiB)": 13.7, "step": 11200, "train_speed(iter/s)": 1.52462 }, { "acc": 0.9427742, "epoch": 5.251933442699789, "grad_norm": 70.87950897216797, "learning_rate": 9.917357463727125e-06, "loss": 0.41213984, "memory(GiB)": 13.7, "step": 11205, "train_speed(iter/s)": 1.524665 }, { "acc": 0.96877975, "epoch": 5.254277009608624, "grad_norm": 8.336264610290527, "learning_rate": 9.917217053651523e-06, "loss": 0.20780559, "memory(GiB)": 13.7, "step": 11210, "train_speed(iter/s)": 1.524692 }, { "acc": 0.95725651, "epoch": 5.256620576517459, "grad_norm": 8.060219764709473, "learning_rate": 9.917076525394196e-06, "loss": 0.24222584, "memory(GiB)": 13.7, "step": 11215, "train_speed(iter/s)": 1.524694 }, { "acc": 0.96231232, "epoch": 5.258964143426295, "grad_norm": 0.35005030035972595, "learning_rate": 9.916935878958521e-06, "loss": 0.26678491, "memory(GiB)": 13.7, "step": 11220, "train_speed(iter/s)": 1.524717 }, { "acc": 0.97437496, "epoch": 5.26130771033513, "grad_norm": 1.8386510610580444, "learning_rate": 9.91679511434788e-06, "loss": 0.13262677, "memory(GiB)": 13.7, "step": 11225, "train_speed(iter/s)": 1.524717 }, { "acc": 0.9701786, "epoch": 5.263651277243965, "grad_norm": 4.65306282043457, "learning_rate": 9.916654231565657e-06, "loss": 0.16627181, "memory(GiB)": 13.7, "step": 11230, "train_speed(iter/s)": 1.524761 }, { "acc": 0.93763094, "epoch": 5.2659948441528, "grad_norm": 3.401254415512085, "learning_rate": 9.916513230615236e-06, "loss": 0.29292278, "memory(GiB)": 13.7, "step": 11235, "train_speed(iter/s)": 1.52477 }, { "acc": 0.9696104, "epoch": 5.268338411061636, "grad_norm": 6.386344909667969, "learning_rate": 9.916372111500006e-06, "loss": 0.19057511, "memory(GiB)": 13.7, "step": 11240, "train_speed(iter/s)": 1.524775 }, { "acc": 0.95702648, "epoch": 5.270681977970471, "grad_norm": 6.817617416381836, "learning_rate": 9.916230874223361e-06, "loss": 0.20110269, "memory(GiB)": 13.7, "step": 11245, "train_speed(iter/s)": 1.524852 }, { "acc": 0.98249998, "epoch": 5.273025544879307, "grad_norm": 4.154213905334473, "learning_rate": 9.916089518788696e-06, "loss": 0.10337141, "memory(GiB)": 13.7, "step": 11250, "train_speed(iter/s)": 1.524878 }, { "acc": 0.9726099, "epoch": 5.275369111788142, "grad_norm": 9.834837913513184, "learning_rate": 9.915948045199408e-06, "loss": 0.15962274, "memory(GiB)": 13.7, "step": 11255, "train_speed(iter/s)": 1.524894 }, { "acc": 0.9622407, "epoch": 5.277712678696977, "grad_norm": 8.366576194763184, "learning_rate": 9.915806453458896e-06, "loss": 0.21626782, "memory(GiB)": 13.7, "step": 11260, "train_speed(iter/s)": 1.524949 }, { "acc": 0.96573486, "epoch": 5.280056245605812, "grad_norm": 4.339930534362793, "learning_rate": 9.915664743570567e-06, "loss": 0.19612949, "memory(GiB)": 13.7, "step": 11265, "train_speed(iter/s)": 1.524965 }, { "acc": 0.97947922, "epoch": 5.282399812514647, "grad_norm": 1.4800037145614624, "learning_rate": 9.915522915537825e-06, "loss": 0.16926121, "memory(GiB)": 13.7, "step": 11270, "train_speed(iter/s)": 1.524962 }, { "acc": 0.97848282, "epoch": 5.284743379423483, "grad_norm": 59.96312713623047, "learning_rate": 9.915380969364078e-06, "loss": 0.10033842, "memory(GiB)": 13.7, "step": 11275, "train_speed(iter/s)": 1.524988 }, { "acc": 0.9605051, "epoch": 5.287086946332318, "grad_norm": 17.473052978515625, "learning_rate": 9.91523890505274e-06, "loss": 0.20927615, "memory(GiB)": 13.7, "step": 11280, "train_speed(iter/s)": 1.524996 }, { "acc": 0.957094, "epoch": 5.289430513241153, "grad_norm": 6.385454177856445, "learning_rate": 9.915096722607224e-06, "loss": 0.23320966, "memory(GiB)": 13.7, "step": 11285, "train_speed(iter/s)": 1.525034 }, { "acc": 0.96079998, "epoch": 5.291774080149988, "grad_norm": 9.550808906555176, "learning_rate": 9.914954422030949e-06, "loss": 0.28895078, "memory(GiB)": 13.7, "step": 11290, "train_speed(iter/s)": 1.525097 }, { "acc": 0.94718437, "epoch": 5.294117647058823, "grad_norm": 15.225836753845215, "learning_rate": 9.914812003327333e-06, "loss": 0.30980303, "memory(GiB)": 13.7, "step": 11295, "train_speed(iter/s)": 1.52513 }, { "acc": 0.97230358, "epoch": 5.296461213967659, "grad_norm": 10.41920280456543, "learning_rate": 9.914669466499803e-06, "loss": 0.22977738, "memory(GiB)": 13.7, "step": 11300, "train_speed(iter/s)": 1.525181 }, { "acc": 0.95570488, "epoch": 5.298804780876494, "grad_norm": 7.33677339553833, "learning_rate": 9.914526811551785e-06, "loss": 0.26933479, "memory(GiB)": 13.7, "step": 11305, "train_speed(iter/s)": 1.525259 }, { "acc": 0.95541401, "epoch": 5.301148347785329, "grad_norm": 9.214138984680176, "learning_rate": 9.914384038486705e-06, "loss": 0.17640063, "memory(GiB)": 13.7, "step": 11310, "train_speed(iter/s)": 1.525243 }, { "acc": 0.9760417, "epoch": 5.303491914694164, "grad_norm": 4.675262451171875, "learning_rate": 9.914241147307996e-06, "loss": 0.1039676, "memory(GiB)": 13.7, "step": 11315, "train_speed(iter/s)": 1.525278 }, { "acc": 0.95783329, "epoch": 5.305835481602999, "grad_norm": 4.983160018920898, "learning_rate": 9.91409813801909e-06, "loss": 0.20191846, "memory(GiB)": 13.7, "step": 11320, "train_speed(iter/s)": 1.525295 }, { "acc": 0.97875004, "epoch": 5.308179048511835, "grad_norm": 5.655220031738281, "learning_rate": 9.91395501062343e-06, "loss": 0.11478366, "memory(GiB)": 13.7, "step": 11325, "train_speed(iter/s)": 1.525299 }, { "acc": 0.95497026, "epoch": 5.310522615420671, "grad_norm": 11.873039245605469, "learning_rate": 9.913811765124455e-06, "loss": 0.21864462, "memory(GiB)": 13.7, "step": 11330, "train_speed(iter/s)": 1.525279 }, { "acc": 0.95812397, "epoch": 5.312866182329506, "grad_norm": 11.072734832763672, "learning_rate": 9.913668401525603e-06, "loss": 0.2338341, "memory(GiB)": 13.7, "step": 11335, "train_speed(iter/s)": 1.525333 }, { "acc": 0.97621813, "epoch": 5.315209749238341, "grad_norm": 4.716916561126709, "learning_rate": 9.913524919830326e-06, "loss": 0.12630938, "memory(GiB)": 13.7, "step": 11340, "train_speed(iter/s)": 1.525355 }, { "acc": 0.94879751, "epoch": 5.317553316147176, "grad_norm": 11.938192367553711, "learning_rate": 9.913381320042069e-06, "loss": 0.21873877, "memory(GiB)": 13.7, "step": 11345, "train_speed(iter/s)": 1.525377 }, { "acc": 0.96881943, "epoch": 5.319896883056011, "grad_norm": 6.391254901885986, "learning_rate": 9.913237602164287e-06, "loss": 0.15510137, "memory(GiB)": 13.7, "step": 11350, "train_speed(iter/s)": 1.525362 }, { "acc": 0.98412209, "epoch": 5.322240449964847, "grad_norm": 8.314950942993164, "learning_rate": 9.913093766200431e-06, "loss": 0.14887705, "memory(GiB)": 13.7, "step": 11355, "train_speed(iter/s)": 1.52538 }, { "acc": 0.97278843, "epoch": 5.324584016873682, "grad_norm": 8.548991203308105, "learning_rate": 9.912949812153962e-06, "loss": 0.18388034, "memory(GiB)": 13.7, "step": 11360, "train_speed(iter/s)": 1.525405 }, { "acc": 0.96113377, "epoch": 5.326927583782517, "grad_norm": 9.435351371765137, "learning_rate": 9.912805740028337e-06, "loss": 0.17071077, "memory(GiB)": 13.7, "step": 11365, "train_speed(iter/s)": 1.525468 }, { "acc": 0.9620985, "epoch": 5.329271150691352, "grad_norm": 5.123012065887451, "learning_rate": 9.91266154982702e-06, "loss": 0.27781024, "memory(GiB)": 13.7, "step": 11370, "train_speed(iter/s)": 1.525466 }, { "acc": 0.97849207, "epoch": 5.331614717600187, "grad_norm": 8.317217826843262, "learning_rate": 9.912517241553475e-06, "loss": 0.11174473, "memory(GiB)": 13.7, "step": 11375, "train_speed(iter/s)": 1.525471 }, { "acc": 0.95351877, "epoch": 5.3339582845090225, "grad_norm": 4.0843987464904785, "learning_rate": 9.912372815211175e-06, "loss": 0.33273895, "memory(GiB)": 13.7, "step": 11380, "train_speed(iter/s)": 1.525481 }, { "acc": 0.95397329, "epoch": 5.336301851417858, "grad_norm": 17.83935546875, "learning_rate": 9.91222827080359e-06, "loss": 0.31433721, "memory(GiB)": 13.7, "step": 11385, "train_speed(iter/s)": 1.525503 }, { "acc": 0.96576395, "epoch": 5.338645418326693, "grad_norm": 18.993391036987305, "learning_rate": 9.912083608334192e-06, "loss": 0.24549446, "memory(GiB)": 13.7, "step": 11390, "train_speed(iter/s)": 1.525501 }, { "acc": 0.95884914, "epoch": 5.340988985235528, "grad_norm": 12.033791542053223, "learning_rate": 9.911938827806461e-06, "loss": 0.30419786, "memory(GiB)": 13.7, "step": 11395, "train_speed(iter/s)": 1.52551 }, { "acc": 0.95948391, "epoch": 5.343332552144363, "grad_norm": 2.3292524814605713, "learning_rate": 9.911793929223876e-06, "loss": 0.28527489, "memory(GiB)": 13.7, "step": 11400, "train_speed(iter/s)": 1.525541 }, { "acc": 0.96162004, "epoch": 5.3456761190531985, "grad_norm": 43.98373794555664, "learning_rate": 9.911648912589917e-06, "loss": 0.21944413, "memory(GiB)": 13.7, "step": 11405, "train_speed(iter/s)": 1.525583 }, { "acc": 0.96208334, "epoch": 5.348019685962035, "grad_norm": 7.709427833557129, "learning_rate": 9.911503777908076e-06, "loss": 0.27083673, "memory(GiB)": 13.7, "step": 11410, "train_speed(iter/s)": 1.525632 }, { "acc": 0.95876198, "epoch": 5.35036325287087, "grad_norm": 10.03668212890625, "learning_rate": 9.911358525181835e-06, "loss": 0.26951866, "memory(GiB)": 13.7, "step": 11415, "train_speed(iter/s)": 1.525639 }, { "acc": 0.94214268, "epoch": 5.352706819779705, "grad_norm": 11.077550888061523, "learning_rate": 9.911213154414692e-06, "loss": 0.30638669, "memory(GiB)": 13.7, "step": 11420, "train_speed(iter/s)": 1.525652 }, { "acc": 0.96650658, "epoch": 5.35505038668854, "grad_norm": 8.628308296203613, "learning_rate": 9.911067665610137e-06, "loss": 0.21196175, "memory(GiB)": 13.7, "step": 11425, "train_speed(iter/s)": 1.525719 }, { "acc": 0.96469898, "epoch": 5.357393953597375, "grad_norm": 9.012823104858398, "learning_rate": 9.910922058771664e-06, "loss": 0.30123847, "memory(GiB)": 13.7, "step": 11430, "train_speed(iter/s)": 1.525736 }, { "acc": 0.97489586, "epoch": 5.3597375205062106, "grad_norm": 3.227891445159912, "learning_rate": 9.910776333902781e-06, "loss": 0.14474683, "memory(GiB)": 13.7, "step": 11435, "train_speed(iter/s)": 1.525742 }, { "acc": 0.95552082, "epoch": 5.362081087415046, "grad_norm": 14.03114128112793, "learning_rate": 9.910630491006985e-06, "loss": 0.20897002, "memory(GiB)": 13.7, "step": 11440, "train_speed(iter/s)": 1.525742 }, { "acc": 0.94666281, "epoch": 5.364424654323881, "grad_norm": 42.946903228759766, "learning_rate": 9.910484530087785e-06, "loss": 0.36183102, "memory(GiB)": 13.7, "step": 11445, "train_speed(iter/s)": 1.525802 }, { "acc": 0.98193111, "epoch": 5.366768221232716, "grad_norm": 1.615657091140747, "learning_rate": 9.910338451148684e-06, "loss": 0.16419828, "memory(GiB)": 13.7, "step": 11450, "train_speed(iter/s)": 1.525855 }, { "acc": 0.97361116, "epoch": 5.369111788141551, "grad_norm": 4.658864498138428, "learning_rate": 9.910192254193199e-06, "loss": 0.19219955, "memory(GiB)": 13.7, "step": 11455, "train_speed(iter/s)": 1.525865 }, { "acc": 0.95385818, "epoch": 5.3714553550503865, "grad_norm": 7.7015252113342285, "learning_rate": 9.91004593922484e-06, "loss": 0.27985077, "memory(GiB)": 13.7, "step": 11460, "train_speed(iter/s)": 1.525898 }, { "acc": 0.97242517, "epoch": 5.373798921959222, "grad_norm": 3.8369364738464355, "learning_rate": 9.909899506247127e-06, "loss": 0.14030001, "memory(GiB)": 13.7, "step": 11465, "train_speed(iter/s)": 1.525921 }, { "acc": 0.95869312, "epoch": 5.376142488868057, "grad_norm": 5.553395748138428, "learning_rate": 9.909752955263578e-06, "loss": 0.24064364, "memory(GiB)": 13.7, "step": 11470, "train_speed(iter/s)": 1.525922 }, { "acc": 0.96991472, "epoch": 5.378486055776892, "grad_norm": 7.187021732330322, "learning_rate": 9.909606286277716e-06, "loss": 0.16976967, "memory(GiB)": 13.7, "step": 11475, "train_speed(iter/s)": 1.525928 }, { "acc": 0.97032986, "epoch": 5.380829622685727, "grad_norm": 33.516998291015625, "learning_rate": 9.909459499293068e-06, "loss": 0.1914603, "memory(GiB)": 13.7, "step": 11480, "train_speed(iter/s)": 1.525931 }, { "acc": 0.96788282, "epoch": 5.383173189594563, "grad_norm": 2.895658254623413, "learning_rate": 9.909312594313157e-06, "loss": 0.1752713, "memory(GiB)": 13.7, "step": 11485, "train_speed(iter/s)": 1.525966 }, { "acc": 0.959863, "epoch": 5.385516756503399, "grad_norm": 9.458077430725098, "learning_rate": 9.909165571341522e-06, "loss": 0.28741906, "memory(GiB)": 13.7, "step": 11490, "train_speed(iter/s)": 1.525994 }, { "acc": 0.96035929, "epoch": 5.387860323412234, "grad_norm": 5.15767765045166, "learning_rate": 9.909018430381691e-06, "loss": 0.26062763, "memory(GiB)": 13.7, "step": 11495, "train_speed(iter/s)": 1.526007 }, { "acc": 0.94456501, "epoch": 5.390203890321069, "grad_norm": 6.3522491455078125, "learning_rate": 9.908871171437204e-06, "loss": 0.32798305, "memory(GiB)": 13.7, "step": 11500, "train_speed(iter/s)": 1.526023 }, { "acc": 0.96900253, "epoch": 5.392547457229904, "grad_norm": 27.116121292114258, "learning_rate": 9.908723794511597e-06, "loss": 0.23557525, "memory(GiB)": 13.7, "step": 11505, "train_speed(iter/s)": 1.526059 }, { "acc": 0.95151787, "epoch": 5.394891024138739, "grad_norm": 12.41899585723877, "learning_rate": 9.908576299608416e-06, "loss": 0.2450068, "memory(GiB)": 13.7, "step": 11510, "train_speed(iter/s)": 1.526104 }, { "acc": 0.96895838, "epoch": 5.3972345910475745, "grad_norm": 4.838184356689453, "learning_rate": 9.908428686731206e-06, "loss": 0.17157354, "memory(GiB)": 13.7, "step": 11515, "train_speed(iter/s)": 1.526132 }, { "acc": 0.95662766, "epoch": 5.39957815795641, "grad_norm": 13.201484680175781, "learning_rate": 9.908280955883513e-06, "loss": 0.28814809, "memory(GiB)": 13.7, "step": 11520, "train_speed(iter/s)": 1.526144 }, { "acc": 0.97671623, "epoch": 5.401921724865245, "grad_norm": 5.468162536621094, "learning_rate": 9.90813310706889e-06, "loss": 0.11980264, "memory(GiB)": 13.7, "step": 11525, "train_speed(iter/s)": 1.526151 }, { "acc": 0.9712532, "epoch": 5.40426529177408, "grad_norm": 8.243318557739258, "learning_rate": 9.907985140290889e-06, "loss": 0.18602784, "memory(GiB)": 13.7, "step": 11530, "train_speed(iter/s)": 1.526153 }, { "acc": 0.95207253, "epoch": 5.406608858682915, "grad_norm": 18.156944274902344, "learning_rate": 9.907837055553067e-06, "loss": 0.35862646, "memory(GiB)": 13.7, "step": 11535, "train_speed(iter/s)": 1.526159 }, { "acc": 0.95091877, "epoch": 5.4089524255917505, "grad_norm": 19.82554054260254, "learning_rate": 9.907688852858986e-06, "loss": 0.20172081, "memory(GiB)": 13.7, "step": 11540, "train_speed(iter/s)": 1.52614 }, { "acc": 0.9528429, "epoch": 5.411295992500586, "grad_norm": 11.604402542114258, "learning_rate": 9.907540532212206e-06, "loss": 0.28056793, "memory(GiB)": 13.7, "step": 11545, "train_speed(iter/s)": 1.526134 }, { "acc": 0.96445351, "epoch": 5.413639559409421, "grad_norm": 14.320487976074219, "learning_rate": 9.907392093616291e-06, "loss": 0.26639442, "memory(GiB)": 13.7, "step": 11550, "train_speed(iter/s)": 1.526136 }, { "acc": 0.96178484, "epoch": 5.415983126318256, "grad_norm": 9.959125518798828, "learning_rate": 9.907243537074811e-06, "loss": 0.22776103, "memory(GiB)": 13.7, "step": 11555, "train_speed(iter/s)": 1.52618 }, { "acc": 0.95825367, "epoch": 5.418326693227091, "grad_norm": 29.352188110351562, "learning_rate": 9.907094862591337e-06, "loss": 0.30950823, "memory(GiB)": 13.7, "step": 11560, "train_speed(iter/s)": 1.526201 }, { "acc": 0.95712795, "epoch": 5.4206702601359265, "grad_norm": 5.933341026306152, "learning_rate": 9.90694607016944e-06, "loss": 0.27603714, "memory(GiB)": 13.7, "step": 11565, "train_speed(iter/s)": 1.526253 }, { "acc": 0.95975275, "epoch": 5.4230138270447625, "grad_norm": 66.31928253173828, "learning_rate": 9.906797159812701e-06, "loss": 0.33174317, "memory(GiB)": 13.7, "step": 11570, "train_speed(iter/s)": 1.526293 }, { "acc": 0.97251816, "epoch": 5.425357393953598, "grad_norm": 6.434128761291504, "learning_rate": 9.906648131524695e-06, "loss": 0.17832856, "memory(GiB)": 13.7, "step": 11575, "train_speed(iter/s)": 1.526307 }, { "acc": 0.96461306, "epoch": 5.427700960862433, "grad_norm": 8.819028854370117, "learning_rate": 9.906498985309006e-06, "loss": 0.22075915, "memory(GiB)": 13.7, "step": 11580, "train_speed(iter/s)": 1.526317 }, { "acc": 0.94487743, "epoch": 5.430044527771268, "grad_norm": 18.449356079101562, "learning_rate": 9.90634972116922e-06, "loss": 0.25241921, "memory(GiB)": 13.7, "step": 11585, "train_speed(iter/s)": 1.526323 }, { "acc": 0.9667491, "epoch": 5.432388094680103, "grad_norm": 2.5382790565490723, "learning_rate": 9.906200339108924e-06, "loss": 0.16019917, "memory(GiB)": 13.7, "step": 11590, "train_speed(iter/s)": 1.526339 }, { "acc": 0.95705357, "epoch": 5.4347316615889385, "grad_norm": 12.582572937011719, "learning_rate": 9.906050839131705e-06, "loss": 0.23289096, "memory(GiB)": 13.7, "step": 11595, "train_speed(iter/s)": 1.526335 }, { "acc": 0.94707212, "epoch": 5.437075228497774, "grad_norm": 10.383101463317871, "learning_rate": 9.905901221241163e-06, "loss": 0.27646351, "memory(GiB)": 13.7, "step": 11600, "train_speed(iter/s)": 1.526357 }, { "acc": 0.96757097, "epoch": 5.439418795406609, "grad_norm": 5.417575359344482, "learning_rate": 9.905751485440891e-06, "loss": 0.21992874, "memory(GiB)": 13.7, "step": 11605, "train_speed(iter/s)": 1.526377 }, { "acc": 0.95610924, "epoch": 5.441762362315444, "grad_norm": 7.3426690101623535, "learning_rate": 9.905601631734487e-06, "loss": 0.24769771, "memory(GiB)": 13.7, "step": 11610, "train_speed(iter/s)": 1.526431 }, { "acc": 0.9631259, "epoch": 5.444105929224279, "grad_norm": 15.699623107910156, "learning_rate": 9.905451660125554e-06, "loss": 0.2124227, "memory(GiB)": 13.7, "step": 11615, "train_speed(iter/s)": 1.526443 }, { "acc": 0.96734581, "epoch": 5.4464494961331145, "grad_norm": 0.3532622158527374, "learning_rate": 9.9053015706177e-06, "loss": 0.13985645, "memory(GiB)": 13.7, "step": 11620, "train_speed(iter/s)": 1.526466 }, { "acc": 0.96384802, "epoch": 5.44879306304195, "grad_norm": 4.785731315612793, "learning_rate": 9.905151363214527e-06, "loss": 0.16707169, "memory(GiB)": 13.7, "step": 11625, "train_speed(iter/s)": 1.526487 }, { "acc": 0.9556757, "epoch": 5.451136629950785, "grad_norm": 4.72387170791626, "learning_rate": 9.90500103791965e-06, "loss": 0.26836894, "memory(GiB)": 13.7, "step": 11630, "train_speed(iter/s)": 1.526486 }, { "acc": 0.93726082, "epoch": 5.45348019685962, "grad_norm": 8.97244930267334, "learning_rate": 9.904850594736678e-06, "loss": 0.3750922, "memory(GiB)": 13.7, "step": 11635, "train_speed(iter/s)": 1.526508 }, { "acc": 0.96136904, "epoch": 5.455823763768455, "grad_norm": 12.850744247436523, "learning_rate": 9.904700033669231e-06, "loss": 0.19206867, "memory(GiB)": 13.7, "step": 11640, "train_speed(iter/s)": 1.52651 }, { "acc": 0.95059528, "epoch": 5.45816733067729, "grad_norm": 8.496431350708008, "learning_rate": 9.904549354720927e-06, "loss": 0.28052039, "memory(GiB)": 13.7, "step": 11645, "train_speed(iter/s)": 1.526574 }, { "acc": 0.95978813, "epoch": 5.460510897586126, "grad_norm": 9.17376708984375, "learning_rate": 9.904398557895387e-06, "loss": 0.2112905, "memory(GiB)": 13.7, "step": 11650, "train_speed(iter/s)": 1.526637 }, { "acc": 0.94941463, "epoch": 5.462854464494962, "grad_norm": 5.10399866104126, "learning_rate": 9.904247643196238e-06, "loss": 0.32232554, "memory(GiB)": 13.7, "step": 11655, "train_speed(iter/s)": 1.526645 }, { "acc": 0.97421131, "epoch": 5.465198031403797, "grad_norm": 3.1257171630859375, "learning_rate": 9.904096610627103e-06, "loss": 0.16551082, "memory(GiB)": 13.7, "step": 11660, "train_speed(iter/s)": 1.526671 }, { "acc": 0.96370192, "epoch": 5.467541598312632, "grad_norm": 4.413194179534912, "learning_rate": 9.903945460191618e-06, "loss": 0.12944696, "memory(GiB)": 13.7, "step": 11665, "train_speed(iter/s)": 1.526701 }, { "acc": 0.96133089, "epoch": 5.469885165221467, "grad_norm": 12.586956024169922, "learning_rate": 9.90379419189341e-06, "loss": 0.25224557, "memory(GiB)": 13.7, "step": 11670, "train_speed(iter/s)": 1.526727 }, { "acc": 0.981427, "epoch": 5.4722287321303025, "grad_norm": 2.12961483001709, "learning_rate": 9.903642805736121e-06, "loss": 0.14121833, "memory(GiB)": 13.7, "step": 11675, "train_speed(iter/s)": 1.526745 }, { "acc": 0.94423695, "epoch": 5.474572299039138, "grad_norm": 44.25144958496094, "learning_rate": 9.903491301723386e-06, "loss": 0.36067517, "memory(GiB)": 13.7, "step": 11680, "train_speed(iter/s)": 1.526784 }, { "acc": 0.96596384, "epoch": 5.476915865947973, "grad_norm": 11.239850044250488, "learning_rate": 9.903339679858848e-06, "loss": 0.18002553, "memory(GiB)": 13.7, "step": 11685, "train_speed(iter/s)": 1.526821 }, { "acc": 0.97112579, "epoch": 5.479259432856808, "grad_norm": 105.9154281616211, "learning_rate": 9.90318794014615e-06, "loss": 0.10894158, "memory(GiB)": 13.7, "step": 11690, "train_speed(iter/s)": 1.526842 }, { "acc": 0.97413216, "epoch": 5.481602999765643, "grad_norm": 5.9398603439331055, "learning_rate": 9.903036082588943e-06, "loss": 0.15779686, "memory(GiB)": 13.7, "step": 11695, "train_speed(iter/s)": 1.526866 }, { "acc": 0.96990566, "epoch": 5.483946566674478, "grad_norm": 8.911221504211426, "learning_rate": 9.902884107190872e-06, "loss": 0.18310914, "memory(GiB)": 13.7, "step": 11700, "train_speed(iter/s)": 1.526865 }, { "acc": 0.94795637, "epoch": 5.486290133583314, "grad_norm": 23.629295349121094, "learning_rate": 9.902732013955593e-06, "loss": 0.30616963, "memory(GiB)": 13.7, "step": 11705, "train_speed(iter/s)": 1.526882 }, { "acc": 0.97006941, "epoch": 5.488633700492149, "grad_norm": 0.3670051693916321, "learning_rate": 9.902579802886762e-06, "loss": 0.14974945, "memory(GiB)": 13.7, "step": 11710, "train_speed(iter/s)": 1.526884 }, { "acc": 0.97006226, "epoch": 5.490977267400984, "grad_norm": 10.200608253479004, "learning_rate": 9.902427473988037e-06, "loss": 0.17424629, "memory(GiB)": 13.7, "step": 11715, "train_speed(iter/s)": 1.526889 }, { "acc": 0.96129417, "epoch": 5.493320834309819, "grad_norm": 8.797354698181152, "learning_rate": 9.90227502726308e-06, "loss": 0.2115787, "memory(GiB)": 13.7, "step": 11720, "train_speed(iter/s)": 1.526913 }, { "acc": 0.95952587, "epoch": 5.495664401218654, "grad_norm": 10.460016250610352, "learning_rate": 9.902122462715554e-06, "loss": 0.23980436, "memory(GiB)": 13.7, "step": 11725, "train_speed(iter/s)": 1.526939 }, { "acc": 0.9637496, "epoch": 5.4980079681274905, "grad_norm": 5.701318740844727, "learning_rate": 9.90196978034913e-06, "loss": 0.15668938, "memory(GiB)": 13.7, "step": 11730, "train_speed(iter/s)": 1.526957 }, { "acc": 0.97025871, "epoch": 5.500351535036325, "grad_norm": 16.547739028930664, "learning_rate": 9.901816980167473e-06, "loss": 0.18929523, "memory(GiB)": 13.7, "step": 11735, "train_speed(iter/s)": 1.52695 }, { "acc": 0.95902767, "epoch": 5.502695101945161, "grad_norm": 10.48530101776123, "learning_rate": 9.901664062174258e-06, "loss": 0.27824152, "memory(GiB)": 13.7, "step": 11740, "train_speed(iter/s)": 1.526995 }, { "acc": 0.94879789, "epoch": 5.505038668853996, "grad_norm": 26.40266990661621, "learning_rate": 9.90151102637316e-06, "loss": 0.28588281, "memory(GiB)": 13.7, "step": 11745, "train_speed(iter/s)": 1.527028 }, { "acc": 0.966255, "epoch": 5.507382235762831, "grad_norm": 8.685157775878906, "learning_rate": 9.901357872767857e-06, "loss": 0.23074298, "memory(GiB)": 13.7, "step": 11750, "train_speed(iter/s)": 1.527031 }, { "acc": 0.97796345, "epoch": 5.5097258026716665, "grad_norm": 5.870660781860352, "learning_rate": 9.901204601362034e-06, "loss": 0.11376626, "memory(GiB)": 13.7, "step": 11755, "train_speed(iter/s)": 1.527014 }, { "acc": 0.96188002, "epoch": 5.512069369580502, "grad_norm": 12.06784725189209, "learning_rate": 9.90105121215937e-06, "loss": 0.25104756, "memory(GiB)": 13.7, "step": 11760, "train_speed(iter/s)": 1.527032 }, { "acc": 0.9229701, "epoch": 5.514412936489337, "grad_norm": 8.140565872192383, "learning_rate": 9.900897705163555e-06, "loss": 0.33049264, "memory(GiB)": 13.7, "step": 11765, "train_speed(iter/s)": 1.527024 }, { "acc": 0.97042236, "epoch": 5.516756503398172, "grad_norm": 23.711023330688477, "learning_rate": 9.900744080378279e-06, "loss": 0.19195753, "memory(GiB)": 13.7, "step": 11770, "train_speed(iter/s)": 1.527045 }, { "acc": 0.96898727, "epoch": 5.519100070307007, "grad_norm": 11.629850387573242, "learning_rate": 9.900590337807233e-06, "loss": 0.11685245, "memory(GiB)": 13.7, "step": 11775, "train_speed(iter/s)": 1.527056 }, { "acc": 0.9511713, "epoch": 5.521443637215842, "grad_norm": 10.168508529663086, "learning_rate": 9.900436477454115e-06, "loss": 0.2509665, "memory(GiB)": 13.7, "step": 11780, "train_speed(iter/s)": 1.527107 }, { "acc": 0.97194805, "epoch": 5.523787204124678, "grad_norm": 7.770915508270264, "learning_rate": 9.90028249932262e-06, "loss": 0.24874823, "memory(GiB)": 13.7, "step": 11785, "train_speed(iter/s)": 1.527099 }, { "acc": 0.97368202, "epoch": 5.526130771033513, "grad_norm": 8.511063575744629, "learning_rate": 9.900128403416453e-06, "loss": 0.12947117, "memory(GiB)": 13.7, "step": 11790, "train_speed(iter/s)": 1.527108 }, { "acc": 0.96658421, "epoch": 5.528474337942348, "grad_norm": 11.681046485900879, "learning_rate": 9.899974189739313e-06, "loss": 0.23238902, "memory(GiB)": 13.7, "step": 11795, "train_speed(iter/s)": 1.527121 }, { "acc": 0.95177488, "epoch": 5.530817904851183, "grad_norm": 4.434096813201904, "learning_rate": 9.899819858294911e-06, "loss": 0.15933597, "memory(GiB)": 13.7, "step": 11800, "train_speed(iter/s)": 1.527131 }, { "acc": 0.95843725, "epoch": 5.533161471760018, "grad_norm": 9.144671440124512, "learning_rate": 9.899665409086953e-06, "loss": 0.22432439, "memory(GiB)": 13.7, "step": 11805, "train_speed(iter/s)": 1.527158 }, { "acc": 0.95809031, "epoch": 5.535505038668854, "grad_norm": 11.530328750610352, "learning_rate": 9.899510842119155e-06, "loss": 0.20319889, "memory(GiB)": 13.7, "step": 11810, "train_speed(iter/s)": 1.527197 }, { "acc": 0.98675594, "epoch": 5.53784860557769, "grad_norm": 2.350175380706787, "learning_rate": 9.899356157395232e-06, "loss": 0.12152058, "memory(GiB)": 13.7, "step": 11815, "train_speed(iter/s)": 1.527186 }, { "acc": 0.96275768, "epoch": 5.540192172486525, "grad_norm": 7.274529457092285, "learning_rate": 9.8992013549189e-06, "loss": 0.21623855, "memory(GiB)": 13.7, "step": 11820, "train_speed(iter/s)": 1.527216 }, { "acc": 0.97666664, "epoch": 5.54253573939536, "grad_norm": 18.11286163330078, "learning_rate": 9.89904643469388e-06, "loss": 0.15618351, "memory(GiB)": 13.7, "step": 11825, "train_speed(iter/s)": 1.527262 }, { "acc": 0.95540218, "epoch": 5.544879306304195, "grad_norm": 6.495236396789551, "learning_rate": 9.898891396723898e-06, "loss": 0.2114115, "memory(GiB)": 13.7, "step": 11830, "train_speed(iter/s)": 1.52731 }, { "acc": 0.97290173, "epoch": 5.54722287321303, "grad_norm": 3.099154233932495, "learning_rate": 9.898736241012679e-06, "loss": 0.16085877, "memory(GiB)": 13.7, "step": 11835, "train_speed(iter/s)": 1.527313 }, { "acc": 0.96113491, "epoch": 5.549566440121866, "grad_norm": 7.3074421882629395, "learning_rate": 9.898580967563952e-06, "loss": 0.25813789, "memory(GiB)": 13.7, "step": 11840, "train_speed(iter/s)": 1.527359 }, { "acc": 0.96425571, "epoch": 5.551910007030701, "grad_norm": 2.6117875576019287, "learning_rate": 9.898425576381452e-06, "loss": 0.22007623, "memory(GiB)": 13.7, "step": 11845, "train_speed(iter/s)": 1.527388 }, { "acc": 0.95759125, "epoch": 5.554253573939536, "grad_norm": 19.970186233520508, "learning_rate": 9.89827006746891e-06, "loss": 0.34011989, "memory(GiB)": 13.7, "step": 11850, "train_speed(iter/s)": 1.527435 }, { "acc": 0.96404381, "epoch": 5.556597140848371, "grad_norm": 130.7742156982422, "learning_rate": 9.898114440830067e-06, "loss": 0.19514402, "memory(GiB)": 13.7, "step": 11855, "train_speed(iter/s)": 1.52746 }, { "acc": 0.96587458, "epoch": 5.558940707757206, "grad_norm": 6.256916522979736, "learning_rate": 9.89795869646866e-06, "loss": 0.20128975, "memory(GiB)": 13.7, "step": 11860, "train_speed(iter/s)": 1.52746 }, { "acc": 0.96096764, "epoch": 5.561284274666042, "grad_norm": 8.161704063415527, "learning_rate": 9.897802834388439e-06, "loss": 0.2354713, "memory(GiB)": 13.7, "step": 11865, "train_speed(iter/s)": 1.527455 }, { "acc": 0.95943909, "epoch": 5.563627841574877, "grad_norm": 2.8407254219055176, "learning_rate": 9.897646854593146e-06, "loss": 0.16054068, "memory(GiB)": 13.7, "step": 11870, "train_speed(iter/s)": 1.527469 }, { "acc": 0.94174109, "epoch": 5.565971408483712, "grad_norm": 12.95099925994873, "learning_rate": 9.89749075708653e-06, "loss": 0.32326782, "memory(GiB)": 13.7, "step": 11875, "train_speed(iter/s)": 1.527462 }, { "acc": 0.93661366, "epoch": 5.568314975392547, "grad_norm": 14.89148998260498, "learning_rate": 9.897334541872344e-06, "loss": 0.27653587, "memory(GiB)": 13.7, "step": 11880, "train_speed(iter/s)": 1.527474 }, { "acc": 0.96434526, "epoch": 5.570658542301382, "grad_norm": 21.590335845947266, "learning_rate": 9.897178208954343e-06, "loss": 0.22698851, "memory(GiB)": 13.7, "step": 11885, "train_speed(iter/s)": 1.527481 }, { "acc": 0.94279051, "epoch": 5.573002109210218, "grad_norm": 12.341134071350098, "learning_rate": 9.897021758336285e-06, "loss": 0.31685493, "memory(GiB)": 13.7, "step": 11890, "train_speed(iter/s)": 1.52751 }, { "acc": 0.9678175, "epoch": 5.575345676119053, "grad_norm": 7.109302043914795, "learning_rate": 9.89686519002193e-06, "loss": 0.19739743, "memory(GiB)": 13.7, "step": 11895, "train_speed(iter/s)": 1.527549 }, { "acc": 0.95453358, "epoch": 5.577689243027889, "grad_norm": 21.811330795288086, "learning_rate": 9.896708504015044e-06, "loss": 0.21690655, "memory(GiB)": 13.7, "step": 11900, "train_speed(iter/s)": 1.527574 }, { "acc": 0.94822111, "epoch": 5.580032809936724, "grad_norm": 12.641429901123047, "learning_rate": 9.896551700319389e-06, "loss": 0.33099899, "memory(GiB)": 13.7, "step": 11905, "train_speed(iter/s)": 1.527629 }, { "acc": 0.96297512, "epoch": 5.582376376845559, "grad_norm": 11.289336204528809, "learning_rate": 9.896394778938735e-06, "loss": 0.23572412, "memory(GiB)": 13.7, "step": 11910, "train_speed(iter/s)": 1.527644 }, { "acc": 0.96663685, "epoch": 5.584719943754394, "grad_norm": 11.009598731994629, "learning_rate": 9.896237739876855e-06, "loss": 0.21103106, "memory(GiB)": 13.7, "step": 11915, "train_speed(iter/s)": 1.527644 }, { "acc": 0.94399986, "epoch": 5.58706351066323, "grad_norm": 16.984182357788086, "learning_rate": 9.896080583137524e-06, "loss": 0.36863008, "memory(GiB)": 13.7, "step": 11920, "train_speed(iter/s)": 1.527702 }, { "acc": 0.96004152, "epoch": 5.589407077572065, "grad_norm": 2.9916789531707764, "learning_rate": 9.89592330872452e-06, "loss": 0.20217271, "memory(GiB)": 13.7, "step": 11925, "train_speed(iter/s)": 1.527728 }, { "acc": 0.97709284, "epoch": 5.5917506444809, "grad_norm": 6.767732620239258, "learning_rate": 9.89576591664162e-06, "loss": 0.0913503, "memory(GiB)": 13.7, "step": 11930, "train_speed(iter/s)": 1.527774 }, { "acc": 0.977841, "epoch": 5.594094211389735, "grad_norm": 3.474350690841675, "learning_rate": 9.895608406892611e-06, "loss": 0.10793836, "memory(GiB)": 13.7, "step": 11935, "train_speed(iter/s)": 1.527801 }, { "acc": 0.96473217, "epoch": 5.59643777829857, "grad_norm": 5.338733196258545, "learning_rate": 9.895450779481279e-06, "loss": 0.18725288, "memory(GiB)": 13.7, "step": 11940, "train_speed(iter/s)": 1.527783 }, { "acc": 0.9720623, "epoch": 5.598781345207406, "grad_norm": 17.61247444152832, "learning_rate": 9.895293034411409e-06, "loss": 0.25279529, "memory(GiB)": 13.7, "step": 11945, "train_speed(iter/s)": 1.527755 }, { "acc": 0.9837595, "epoch": 5.601124912116241, "grad_norm": 7.569144248962402, "learning_rate": 9.895135171686796e-06, "loss": 0.14538748, "memory(GiB)": 13.7, "step": 11950, "train_speed(iter/s)": 1.527791 }, { "acc": 0.94163685, "epoch": 5.603468479025076, "grad_norm": 18.002443313598633, "learning_rate": 9.894977191311234e-06, "loss": 0.50547047, "memory(GiB)": 13.7, "step": 11955, "train_speed(iter/s)": 1.527836 }, { "acc": 0.96048603, "epoch": 5.605812045933911, "grad_norm": 5.97263765335083, "learning_rate": 9.89481909328852e-06, "loss": 0.25103421, "memory(GiB)": 13.7, "step": 11960, "train_speed(iter/s)": 1.527884 }, { "acc": 0.96971855, "epoch": 5.608155612842746, "grad_norm": 8.152420043945312, "learning_rate": 9.894660877622452e-06, "loss": 0.17988909, "memory(GiB)": 13.7, "step": 11965, "train_speed(iter/s)": 1.527885 }, { "acc": 0.962743, "epoch": 5.6104991797515815, "grad_norm": 23.49811363220215, "learning_rate": 9.894502544316838e-06, "loss": 0.34418228, "memory(GiB)": 13.7, "step": 11970, "train_speed(iter/s)": 1.527855 }, { "acc": 0.95976181, "epoch": 5.612842746660418, "grad_norm": 127.77679443359375, "learning_rate": 9.89434409337548e-06, "loss": 0.24288857, "memory(GiB)": 13.7, "step": 11975, "train_speed(iter/s)": 1.527924 }, { "acc": 0.95626373, "epoch": 5.615186313569252, "grad_norm": 8.565113067626953, "learning_rate": 9.894185524802186e-06, "loss": 0.20517197, "memory(GiB)": 13.7, "step": 11980, "train_speed(iter/s)": 1.527933 }, { "acc": 0.97787704, "epoch": 5.617529880478088, "grad_norm": 7.9321088790893555, "learning_rate": 9.894026838600771e-06, "loss": 0.14836667, "memory(GiB)": 13.7, "step": 11985, "train_speed(iter/s)": 1.52798 }, { "acc": 0.96454859, "epoch": 5.619873447386923, "grad_norm": 1.7534195184707642, "learning_rate": 9.893868034775045e-06, "loss": 0.17581587, "memory(GiB)": 13.7, "step": 11990, "train_speed(iter/s)": 1.527989 }, { "acc": 0.95192986, "epoch": 5.622217014295758, "grad_norm": 11.133932113647461, "learning_rate": 9.89370911332883e-06, "loss": 0.30094624, "memory(GiB)": 13.7, "step": 11995, "train_speed(iter/s)": 1.528019 }, { "acc": 0.96214628, "epoch": 5.624560581204594, "grad_norm": 20.502761840820312, "learning_rate": 9.893550074265942e-06, "loss": 0.25891538, "memory(GiB)": 13.7, "step": 12000, "train_speed(iter/s)": 1.528034 }, { "acc": 0.95697918, "epoch": 5.626904148113429, "grad_norm": 10.800262451171875, "learning_rate": 9.893390917590206e-06, "loss": 0.19197209, "memory(GiB)": 13.7, "step": 12005, "train_speed(iter/s)": 1.528037 }, { "acc": 0.93946609, "epoch": 5.629247715022264, "grad_norm": 14.539320945739746, "learning_rate": 9.893231643305447e-06, "loss": 0.29489367, "memory(GiB)": 13.7, "step": 12010, "train_speed(iter/s)": 1.528042 }, { "acc": 0.94320669, "epoch": 5.631591281931099, "grad_norm": 8.065817832946777, "learning_rate": 9.893072251415491e-06, "loss": 0.26728516, "memory(GiB)": 13.7, "step": 12015, "train_speed(iter/s)": 1.528077 }, { "acc": 0.95206499, "epoch": 5.633934848839934, "grad_norm": 9.98893928527832, "learning_rate": 9.892912741924172e-06, "loss": 0.17640994, "memory(GiB)": 13.7, "step": 12020, "train_speed(iter/s)": 1.528089 }, { "acc": 0.96439323, "epoch": 5.6362784157487695, "grad_norm": 7.786943435668945, "learning_rate": 9.892753114835326e-06, "loss": 0.16140242, "memory(GiB)": 13.7, "step": 12025, "train_speed(iter/s)": 1.528099 }, { "acc": 0.96511183, "epoch": 5.638621982657605, "grad_norm": 13.712553024291992, "learning_rate": 9.892593370152785e-06, "loss": 0.22745337, "memory(GiB)": 13.7, "step": 12030, "train_speed(iter/s)": 1.528103 }, { "acc": 0.97341223, "epoch": 5.64096554956644, "grad_norm": 7.224246501922607, "learning_rate": 9.89243350788039e-06, "loss": 0.14979296, "memory(GiB)": 13.7, "step": 12035, "train_speed(iter/s)": 1.528109 }, { "acc": 0.96004448, "epoch": 5.643309116475275, "grad_norm": 8.78169059753418, "learning_rate": 9.892273528021987e-06, "loss": 0.19363455, "memory(GiB)": 13.7, "step": 12040, "train_speed(iter/s)": 1.528137 }, { "acc": 0.95432816, "epoch": 5.64565268338411, "grad_norm": 17.775850296020508, "learning_rate": 9.89211343058142e-06, "loss": 0.2479857, "memory(GiB)": 13.7, "step": 12045, "train_speed(iter/s)": 1.528167 }, { "acc": 0.9739727, "epoch": 5.6479962502929455, "grad_norm": 9.948746681213379, "learning_rate": 9.891953215562534e-06, "loss": 0.13470081, "memory(GiB)": 13.7, "step": 12050, "train_speed(iter/s)": 1.528178 }, { "acc": 0.95778551, "epoch": 5.650339817201781, "grad_norm": 4.7687602043151855, "learning_rate": 9.891792882969183e-06, "loss": 0.30683556, "memory(GiB)": 13.7, "step": 12055, "train_speed(iter/s)": 1.528222 }, { "acc": 0.96227398, "epoch": 5.652683384110617, "grad_norm": 8.3701171875, "learning_rate": 9.891632432805221e-06, "loss": 0.19011437, "memory(GiB)": 13.7, "step": 12060, "train_speed(iter/s)": 1.528259 }, { "acc": 0.95851107, "epoch": 5.655026951019452, "grad_norm": 8.545978546142578, "learning_rate": 9.891471865074502e-06, "loss": 0.30071263, "memory(GiB)": 13.7, "step": 12065, "train_speed(iter/s)": 1.52826 }, { "acc": 0.95732889, "epoch": 5.657370517928287, "grad_norm": 4.503600597381592, "learning_rate": 9.89131117978089e-06, "loss": 0.15497109, "memory(GiB)": 13.7, "step": 12070, "train_speed(iter/s)": 1.528307 }, { "acc": 0.97889957, "epoch": 5.659714084837122, "grad_norm": 5.003383636474609, "learning_rate": 9.891150376928243e-06, "loss": 0.07307823, "memory(GiB)": 13.7, "step": 12075, "train_speed(iter/s)": 1.528317 }, { "acc": 0.95206852, "epoch": 5.6620576517459575, "grad_norm": 6.964266777038574, "learning_rate": 9.89098945652043e-06, "loss": 0.26550469, "memory(GiB)": 13.7, "step": 12080, "train_speed(iter/s)": 1.528348 }, { "acc": 0.96677084, "epoch": 5.664401218654793, "grad_norm": 7.864246845245361, "learning_rate": 9.890828418561314e-06, "loss": 0.17611184, "memory(GiB)": 13.7, "step": 12085, "train_speed(iter/s)": 1.528387 }, { "acc": 0.96597223, "epoch": 5.666744785563628, "grad_norm": 6.887933731079102, "learning_rate": 9.890667263054772e-06, "loss": 0.18341722, "memory(GiB)": 13.7, "step": 12090, "train_speed(iter/s)": 1.528353 }, { "acc": 0.95105648, "epoch": 5.669088352472463, "grad_norm": 13.474786758422852, "learning_rate": 9.890505990004672e-06, "loss": 0.30729523, "memory(GiB)": 13.7, "step": 12095, "train_speed(iter/s)": 1.52838 }, { "acc": 0.96547413, "epoch": 5.671431919381298, "grad_norm": 31.647085189819336, "learning_rate": 9.890344599414893e-06, "loss": 0.16916714, "memory(GiB)": 13.7, "step": 12100, "train_speed(iter/s)": 1.528377 }, { "acc": 0.9535553, "epoch": 5.6737754862901335, "grad_norm": 8.239559173583984, "learning_rate": 9.890183091289315e-06, "loss": 0.25747437, "memory(GiB)": 13.7, "step": 12105, "train_speed(iter/s)": 1.528404 }, { "acc": 0.96757441, "epoch": 5.676119053198969, "grad_norm": 7.758466720581055, "learning_rate": 9.89002146563182e-06, "loss": 0.13344204, "memory(GiB)": 13.7, "step": 12110, "train_speed(iter/s)": 1.528428 }, { "acc": 0.95130043, "epoch": 5.678462620107804, "grad_norm": 10.325124740600586, "learning_rate": 9.889859722446293e-06, "loss": 0.3350395, "memory(GiB)": 13.7, "step": 12115, "train_speed(iter/s)": 1.528424 }, { "acc": 0.948843, "epoch": 5.680806187016639, "grad_norm": 18.742576599121094, "learning_rate": 9.889697861736617e-06, "loss": 0.34287224, "memory(GiB)": 13.7, "step": 12120, "train_speed(iter/s)": 1.528417 }, { "acc": 0.96660709, "epoch": 5.683149753925474, "grad_norm": 7.589636325836182, "learning_rate": 9.88953588350669e-06, "loss": 0.15816898, "memory(GiB)": 13.7, "step": 12125, "train_speed(iter/s)": 1.528433 }, { "acc": 0.96651325, "epoch": 5.6854933208343095, "grad_norm": 11.347208976745605, "learning_rate": 9.8893737877604e-06, "loss": 0.21656368, "memory(GiB)": 13.7, "step": 12130, "train_speed(iter/s)": 1.528419 }, { "acc": 0.96974354, "epoch": 5.6878368877431456, "grad_norm": 8.528154373168945, "learning_rate": 9.889211574501648e-06, "loss": 0.20201404, "memory(GiB)": 13.7, "step": 12135, "train_speed(iter/s)": 1.528423 }, { "acc": 0.96609488, "epoch": 5.69018045465198, "grad_norm": 3.7369701862335205, "learning_rate": 9.88904924373433e-06, "loss": 0.241224, "memory(GiB)": 13.7, "step": 12140, "train_speed(iter/s)": 1.528456 }, { "acc": 0.95774803, "epoch": 5.692524021560816, "grad_norm": 9.283541679382324, "learning_rate": 9.888886795462347e-06, "loss": 0.35803757, "memory(GiB)": 13.7, "step": 12145, "train_speed(iter/s)": 1.528495 }, { "acc": 0.97755775, "epoch": 5.694867588469651, "grad_norm": 0.5444247722625732, "learning_rate": 9.888724229689606e-06, "loss": 0.08823168, "memory(GiB)": 13.7, "step": 12150, "train_speed(iter/s)": 1.528522 }, { "acc": 0.97050591, "epoch": 5.697211155378486, "grad_norm": 2.5660483837127686, "learning_rate": 9.888561546420013e-06, "loss": 0.09928245, "memory(GiB)": 13.7, "step": 12155, "train_speed(iter/s)": 1.528527 }, { "acc": 0.97071428, "epoch": 5.6995547222873215, "grad_norm": 11.348294258117676, "learning_rate": 9.888398745657479e-06, "loss": 0.20878778, "memory(GiB)": 13.7, "step": 12160, "train_speed(iter/s)": 1.528564 }, { "acc": 0.96986742, "epoch": 5.701898289196157, "grad_norm": 3.620347499847412, "learning_rate": 9.888235827405917e-06, "loss": 0.19579463, "memory(GiB)": 13.7, "step": 12165, "train_speed(iter/s)": 1.528575 }, { "acc": 0.96561966, "epoch": 5.704241856104992, "grad_norm": 13.966462135314941, "learning_rate": 9.888072791669244e-06, "loss": 0.26033921, "memory(GiB)": 13.7, "step": 12170, "train_speed(iter/s)": 1.5286 }, { "acc": 0.97000008, "epoch": 5.706585423013827, "grad_norm": 4.13515567779541, "learning_rate": 9.887909638451377e-06, "loss": 0.19142965, "memory(GiB)": 13.7, "step": 12175, "train_speed(iter/s)": 1.528595 }, { "acc": 0.95654154, "epoch": 5.708928989922662, "grad_norm": 5.504190921783447, "learning_rate": 9.88774636775624e-06, "loss": 0.21957383, "memory(GiB)": 13.7, "step": 12180, "train_speed(iter/s)": 1.528627 }, { "acc": 0.97434521, "epoch": 5.7112725568314975, "grad_norm": 2.338681221008301, "learning_rate": 9.887582979587755e-06, "loss": 0.16427518, "memory(GiB)": 13.7, "step": 12185, "train_speed(iter/s)": 1.528651 }, { "acc": 0.95562735, "epoch": 5.713616123740333, "grad_norm": 4.958739280700684, "learning_rate": 9.88741947394985e-06, "loss": 0.22659738, "memory(GiB)": 13.7, "step": 12190, "train_speed(iter/s)": 1.528681 }, { "acc": 0.96337757, "epoch": 5.715959690649168, "grad_norm": 13.933256149291992, "learning_rate": 9.887255850846457e-06, "loss": 0.14212685, "memory(GiB)": 13.7, "step": 12195, "train_speed(iter/s)": 1.528743 }, { "acc": 0.96056767, "epoch": 5.718303257558003, "grad_norm": 21.280765533447266, "learning_rate": 9.887092110281506e-06, "loss": 0.13738228, "memory(GiB)": 13.7, "step": 12200, "train_speed(iter/s)": 1.528749 }, { "acc": 0.96561489, "epoch": 5.720646824466838, "grad_norm": 50.585853576660156, "learning_rate": 9.886928252258934e-06, "loss": 0.15602033, "memory(GiB)": 13.7, "step": 12205, "train_speed(iter/s)": 1.52876 }, { "acc": 0.97474251, "epoch": 5.7229903913756734, "grad_norm": 6.434066295623779, "learning_rate": 9.886764276782683e-06, "loss": 0.11059477, "memory(GiB)": 13.7, "step": 12210, "train_speed(iter/s)": 1.528779 }, { "acc": 0.93560743, "epoch": 5.725333958284509, "grad_norm": 19.9229793548584, "learning_rate": 9.886600183856688e-06, "loss": 0.27950501, "memory(GiB)": 13.7, "step": 12215, "train_speed(iter/s)": 1.528792 }, { "acc": 0.96222477, "epoch": 5.727677525193345, "grad_norm": 5.051419258117676, "learning_rate": 9.8864359734849e-06, "loss": 0.25517173, "memory(GiB)": 13.7, "step": 12220, "train_speed(iter/s)": 1.528807 }, { "acc": 0.96012316, "epoch": 5.730021092102179, "grad_norm": 6.452895164489746, "learning_rate": 9.886271645671262e-06, "loss": 0.24137166, "memory(GiB)": 13.7, "step": 12225, "train_speed(iter/s)": 1.528824 }, { "acc": 0.96373377, "epoch": 5.732364659011015, "grad_norm": 10.92800235748291, "learning_rate": 9.886107200419723e-06, "loss": 0.21919267, "memory(GiB)": 13.7, "step": 12230, "train_speed(iter/s)": 1.528843 }, { "acc": 0.95151653, "epoch": 5.73470822591985, "grad_norm": 11.327291488647461, "learning_rate": 9.885942637734238e-06, "loss": 0.28913591, "memory(GiB)": 13.7, "step": 12235, "train_speed(iter/s)": 1.528885 }, { "acc": 0.96215363, "epoch": 5.7370517928286855, "grad_norm": 5.761331558227539, "learning_rate": 9.885777957618762e-06, "loss": 0.19873818, "memory(GiB)": 13.7, "step": 12240, "train_speed(iter/s)": 1.528878 }, { "acc": 0.97019892, "epoch": 5.739395359737521, "grad_norm": 5.052816867828369, "learning_rate": 9.885613160077253e-06, "loss": 0.14599912, "memory(GiB)": 13.7, "step": 12245, "train_speed(iter/s)": 1.52887 }, { "acc": 0.96890459, "epoch": 5.741738926646356, "grad_norm": 6.541325092315674, "learning_rate": 9.885448245113672e-06, "loss": 0.13318959, "memory(GiB)": 13.7, "step": 12250, "train_speed(iter/s)": 1.528861 }, { "acc": 0.96022167, "epoch": 5.744082493555191, "grad_norm": 4.178924560546875, "learning_rate": 9.885283212731984e-06, "loss": 0.20985866, "memory(GiB)": 13.7, "step": 12255, "train_speed(iter/s)": 1.528891 }, { "acc": 0.96378975, "epoch": 5.746426060464026, "grad_norm": 17.433813095092773, "learning_rate": 9.885118062936156e-06, "loss": 0.26254895, "memory(GiB)": 13.7, "step": 12260, "train_speed(iter/s)": 1.528943 }, { "acc": 0.95668354, "epoch": 5.7487696273728615, "grad_norm": 12.749073028564453, "learning_rate": 9.884952795730155e-06, "loss": 0.19660287, "memory(GiB)": 13.7, "step": 12265, "train_speed(iter/s)": 1.528952 }, { "acc": 0.97251987, "epoch": 5.751113194281697, "grad_norm": 3.3011186122894287, "learning_rate": 9.88478741111796e-06, "loss": 0.18921313, "memory(GiB)": 13.7, "step": 12270, "train_speed(iter/s)": 1.528955 }, { "acc": 0.96412916, "epoch": 5.753456761190532, "grad_norm": 31.206130981445312, "learning_rate": 9.884621909103538e-06, "loss": 0.2456018, "memory(GiB)": 13.7, "step": 12275, "train_speed(iter/s)": 1.52898 }, { "acc": 0.94562836, "epoch": 5.755800328099367, "grad_norm": 3.4952945709228516, "learning_rate": 9.88445628969087e-06, "loss": 0.29050462, "memory(GiB)": 13.7, "step": 12280, "train_speed(iter/s)": 1.529009 }, { "acc": 0.96870937, "epoch": 5.758143895008202, "grad_norm": 4.997375011444092, "learning_rate": 9.88429055288394e-06, "loss": 0.20669942, "memory(GiB)": 13.7, "step": 12285, "train_speed(iter/s)": 1.529004 }, { "acc": 0.97129421, "epoch": 5.760487461917037, "grad_norm": 9.634140968322754, "learning_rate": 9.88412469868673e-06, "loss": 0.20502994, "memory(GiB)": 13.7, "step": 12290, "train_speed(iter/s)": 1.529029 }, { "acc": 0.96142454, "epoch": 5.762831028825873, "grad_norm": 8.259844779968262, "learning_rate": 9.883958727103225e-06, "loss": 0.27981069, "memory(GiB)": 13.7, "step": 12295, "train_speed(iter/s)": 1.529038 }, { "acc": 0.96349707, "epoch": 5.765174595734708, "grad_norm": 9.688328742980957, "learning_rate": 9.883792638137416e-06, "loss": 0.22916005, "memory(GiB)": 13.7, "step": 12300, "train_speed(iter/s)": 1.529055 }, { "acc": 0.9654397, "epoch": 5.767518162643544, "grad_norm": 11.134041786193848, "learning_rate": 9.883626431793296e-06, "loss": 0.23642457, "memory(GiB)": 13.7, "step": 12305, "train_speed(iter/s)": 1.529088 }, { "acc": 0.9585043, "epoch": 5.769861729552379, "grad_norm": 10.974489212036133, "learning_rate": 9.883460108074857e-06, "loss": 0.21387658, "memory(GiB)": 13.7, "step": 12310, "train_speed(iter/s)": 1.529103 }, { "acc": 0.99226189, "epoch": 5.772205296461214, "grad_norm": 8.308124542236328, "learning_rate": 9.8832936669861e-06, "loss": 0.0783257, "memory(GiB)": 13.7, "step": 12315, "train_speed(iter/s)": 1.529142 }, { "acc": 0.94267769, "epoch": 5.7745488633700495, "grad_norm": 10.392217636108398, "learning_rate": 9.883127108531024e-06, "loss": 0.20153203, "memory(GiB)": 13.7, "step": 12320, "train_speed(iter/s)": 1.529164 }, { "acc": 0.94415188, "epoch": 5.776892430278885, "grad_norm": 49.886940002441406, "learning_rate": 9.882960432713633e-06, "loss": 0.2177953, "memory(GiB)": 13.7, "step": 12325, "train_speed(iter/s)": 1.529183 }, { "acc": 0.95912018, "epoch": 5.77923599718772, "grad_norm": 7.763242721557617, "learning_rate": 9.882793639537933e-06, "loss": 0.27033994, "memory(GiB)": 13.7, "step": 12330, "train_speed(iter/s)": 1.529198 }, { "acc": 0.96098499, "epoch": 5.781579564096555, "grad_norm": 13.309375762939453, "learning_rate": 9.88262672900793e-06, "loss": 0.1433502, "memory(GiB)": 13.7, "step": 12335, "train_speed(iter/s)": 1.529223 }, { "acc": 0.98167877, "epoch": 5.78392313100539, "grad_norm": 7.942503452301025, "learning_rate": 9.882459701127646e-06, "loss": 0.11738871, "memory(GiB)": 13.7, "step": 12340, "train_speed(iter/s)": 1.529233 }, { "acc": 0.94973373, "epoch": 5.786266697914225, "grad_norm": 22.948997497558594, "learning_rate": 9.882292555901086e-06, "loss": 0.39029949, "memory(GiB)": 13.7, "step": 12345, "train_speed(iter/s)": 1.529248 }, { "acc": 0.96845732, "epoch": 5.788610264823061, "grad_norm": 3.8884334564208984, "learning_rate": 9.882125293332272e-06, "loss": 0.1471362, "memory(GiB)": 13.7, "step": 12350, "train_speed(iter/s)": 1.529243 }, { "acc": 0.96094933, "epoch": 5.790953831731896, "grad_norm": 7.423099517822266, "learning_rate": 9.881957913425221e-06, "loss": 0.25808697, "memory(GiB)": 13.7, "step": 12355, "train_speed(iter/s)": 1.529217 }, { "acc": 0.96500254, "epoch": 5.793297398640731, "grad_norm": 13.368647575378418, "learning_rate": 9.88179041618396e-06, "loss": 0.2313441, "memory(GiB)": 13.7, "step": 12360, "train_speed(iter/s)": 1.52923 }, { "acc": 0.94869051, "epoch": 5.795640965549566, "grad_norm": 12.275923728942871, "learning_rate": 9.881622801612515e-06, "loss": 0.32728119, "memory(GiB)": 13.7, "step": 12365, "train_speed(iter/s)": 1.529234 }, { "acc": 0.94675598, "epoch": 5.797984532458401, "grad_norm": 6.244762420654297, "learning_rate": 9.881455069714913e-06, "loss": 0.36128178, "memory(GiB)": 13.7, "step": 12370, "train_speed(iter/s)": 1.52924 }, { "acc": 0.93132277, "epoch": 5.800328099367237, "grad_norm": 11.04078483581543, "learning_rate": 9.881287220495184e-06, "loss": 0.39315462, "memory(GiB)": 13.7, "step": 12375, "train_speed(iter/s)": 1.529211 }, { "acc": 0.95668182, "epoch": 5.802671666276073, "grad_norm": 4.278650283813477, "learning_rate": 9.881119253957367e-06, "loss": 0.26443644, "memory(GiB)": 13.7, "step": 12380, "train_speed(iter/s)": 1.52921 }, { "acc": 0.97191391, "epoch": 5.805015233184907, "grad_norm": 10.823104858398438, "learning_rate": 9.880951170105498e-06, "loss": 0.19486768, "memory(GiB)": 13.7, "step": 12385, "train_speed(iter/s)": 1.529232 }, { "acc": 0.98511791, "epoch": 5.807358800093743, "grad_norm": 0.857810378074646, "learning_rate": 9.880782968943614e-06, "loss": 0.07187832, "memory(GiB)": 13.7, "step": 12390, "train_speed(iter/s)": 1.52924 }, { "acc": 0.94913483, "epoch": 5.809702367002578, "grad_norm": 12.938231468200684, "learning_rate": 9.880614650475763e-06, "loss": 0.34132733, "memory(GiB)": 13.7, "step": 12395, "train_speed(iter/s)": 1.529246 }, { "acc": 0.95686569, "epoch": 5.812045933911413, "grad_norm": 6.495233535766602, "learning_rate": 9.880446214705985e-06, "loss": 0.18306375, "memory(GiB)": 13.7, "step": 12400, "train_speed(iter/s)": 1.529245 }, { "acc": 0.94374542, "epoch": 5.814389500820249, "grad_norm": 14.432867050170898, "learning_rate": 9.880277661638334e-06, "loss": 0.35491204, "memory(GiB)": 13.7, "step": 12405, "train_speed(iter/s)": 1.529268 }, { "acc": 0.9760416, "epoch": 5.816733067729084, "grad_norm": 14.75544548034668, "learning_rate": 9.88010899127686e-06, "loss": 0.14885788, "memory(GiB)": 13.7, "step": 12410, "train_speed(iter/s)": 1.529268 }, { "acc": 0.96241102, "epoch": 5.819076634637919, "grad_norm": 3.7014431953430176, "learning_rate": 9.879940203625615e-06, "loss": 0.2383575, "memory(GiB)": 13.7, "step": 12415, "train_speed(iter/s)": 1.529276 }, { "acc": 0.98019352, "epoch": 5.821420201546754, "grad_norm": 9.868828773498535, "learning_rate": 9.87977129868866e-06, "loss": 0.07847481, "memory(GiB)": 13.7, "step": 12420, "train_speed(iter/s)": 1.529297 }, { "acc": 0.95223217, "epoch": 5.823763768455589, "grad_norm": 27.685110092163086, "learning_rate": 9.87960227647005e-06, "loss": 0.27564888, "memory(GiB)": 13.7, "step": 12425, "train_speed(iter/s)": 1.529322 }, { "acc": 0.94008427, "epoch": 5.826107335364425, "grad_norm": 8.204833030700684, "learning_rate": 9.879433136973851e-06, "loss": 0.26772113, "memory(GiB)": 13.7, "step": 12430, "train_speed(iter/s)": 1.529346 }, { "acc": 0.96693363, "epoch": 5.82845090227326, "grad_norm": 8.526514053344727, "learning_rate": 9.879263880204129e-06, "loss": 0.26023555, "memory(GiB)": 13.7, "step": 12435, "train_speed(iter/s)": 1.529361 }, { "acc": 0.97636547, "epoch": 5.830794469182095, "grad_norm": 7.120580196380615, "learning_rate": 9.87909450616495e-06, "loss": 0.12227116, "memory(GiB)": 13.7, "step": 12440, "train_speed(iter/s)": 1.529413 }, { "acc": 0.94880657, "epoch": 5.83313803609093, "grad_norm": 12.271523475646973, "learning_rate": 9.878925014860388e-06, "loss": 0.20129488, "memory(GiB)": 13.7, "step": 12445, "train_speed(iter/s)": 1.529432 }, { "acc": 0.96445513, "epoch": 5.835481602999765, "grad_norm": 7.897796630859375, "learning_rate": 9.878755406294513e-06, "loss": 0.21590376, "memory(GiB)": 13.7, "step": 12450, "train_speed(iter/s)": 1.529423 }, { "acc": 0.96455355, "epoch": 5.837825169908601, "grad_norm": 3.0177764892578125, "learning_rate": 9.878585680471407e-06, "loss": 0.22336962, "memory(GiB)": 13.7, "step": 12455, "train_speed(iter/s)": 1.529436 }, { "acc": 0.95165634, "epoch": 5.840168736817436, "grad_norm": 3.439945697784424, "learning_rate": 9.878415837395145e-06, "loss": 0.26907611, "memory(GiB)": 13.7, "step": 12460, "train_speed(iter/s)": 1.529447 }, { "acc": 0.95410919, "epoch": 5.842512303726272, "grad_norm": 4.556804656982422, "learning_rate": 9.878245877069812e-06, "loss": 0.26309333, "memory(GiB)": 13.7, "step": 12465, "train_speed(iter/s)": 1.52947 }, { "acc": 0.96306314, "epoch": 5.844855870635106, "grad_norm": 12.478252410888672, "learning_rate": 9.878075799499494e-06, "loss": 0.14393601, "memory(GiB)": 13.7, "step": 12470, "train_speed(iter/s)": 1.529468 }, { "acc": 0.96709948, "epoch": 5.847199437543942, "grad_norm": 12.139594078063965, "learning_rate": 9.877905604688277e-06, "loss": 0.21337047, "memory(GiB)": 13.7, "step": 12475, "train_speed(iter/s)": 1.529444 }, { "acc": 0.97458324, "epoch": 5.849543004452777, "grad_norm": 5.179226398468018, "learning_rate": 9.877735292640252e-06, "loss": 0.23319747, "memory(GiB)": 13.7, "step": 12480, "train_speed(iter/s)": 1.529481 }, { "acc": 0.97086973, "epoch": 5.851886571361613, "grad_norm": 17.169498443603516, "learning_rate": 9.877564863359516e-06, "loss": 0.19902916, "memory(GiB)": 13.7, "step": 12485, "train_speed(iter/s)": 1.529504 }, { "acc": 0.94958334, "epoch": 5.854230138270448, "grad_norm": 16.795534133911133, "learning_rate": 9.87739431685016e-06, "loss": 0.30631104, "memory(GiB)": 13.7, "step": 12490, "train_speed(iter/s)": 1.529532 }, { "acc": 0.95045395, "epoch": 5.856573705179283, "grad_norm": 10.845806121826172, "learning_rate": 9.87722365311629e-06, "loss": 0.32402935, "memory(GiB)": 13.7, "step": 12495, "train_speed(iter/s)": 1.529535 }, { "acc": 0.966572, "epoch": 5.858917272088118, "grad_norm": 5.400877475738525, "learning_rate": 9.877052872162e-06, "loss": 0.11382394, "memory(GiB)": 13.7, "step": 12500, "train_speed(iter/s)": 1.529571 }, { "acc": 0.97186956, "epoch": 5.861260838996953, "grad_norm": 3.5517051219940186, "learning_rate": 9.876881973991404e-06, "loss": 0.12753901, "memory(GiB)": 13.7, "step": 12505, "train_speed(iter/s)": 1.529632 }, { "acc": 0.9800189, "epoch": 5.863604405905789, "grad_norm": 9.39240550994873, "learning_rate": 9.876710958608603e-06, "loss": 0.20915303, "memory(GiB)": 13.7, "step": 12510, "train_speed(iter/s)": 1.529664 }, { "acc": 0.97147121, "epoch": 5.865947972814624, "grad_norm": 7.685603141784668, "learning_rate": 9.87653982601771e-06, "loss": 0.26866941, "memory(GiB)": 13.7, "step": 12515, "train_speed(iter/s)": 1.529687 }, { "acc": 0.96858139, "epoch": 5.868291539723459, "grad_norm": 22.241310119628906, "learning_rate": 9.87636857622284e-06, "loss": 0.13749014, "memory(GiB)": 13.7, "step": 12520, "train_speed(iter/s)": 1.529668 }, { "acc": 0.94925232, "epoch": 5.870635106632294, "grad_norm": 5.047861099243164, "learning_rate": 9.87619720922811e-06, "loss": 0.28763793, "memory(GiB)": 13.7, "step": 12525, "train_speed(iter/s)": 1.529718 }, { "acc": 0.97388802, "epoch": 5.872978673541129, "grad_norm": 4.3064446449279785, "learning_rate": 9.876025725037631e-06, "loss": 0.19287242, "memory(GiB)": 13.7, "step": 12530, "train_speed(iter/s)": 1.529715 }, { "acc": 0.97242222, "epoch": 5.8753222404499645, "grad_norm": 5.757880687713623, "learning_rate": 9.875854123655535e-06, "loss": 0.17499988, "memory(GiB)": 13.7, "step": 12535, "train_speed(iter/s)": 1.529758 }, { "acc": 0.93731651, "epoch": 5.8776658073588, "grad_norm": 12.076184272766113, "learning_rate": 9.875682405085943e-06, "loss": 0.37822726, "memory(GiB)": 13.7, "step": 12540, "train_speed(iter/s)": 1.529787 }, { "acc": 0.94851437, "epoch": 5.880009374267635, "grad_norm": 8.185190200805664, "learning_rate": 9.875510569332982e-06, "loss": 0.29603062, "memory(GiB)": 13.7, "step": 12545, "train_speed(iter/s)": 1.529802 }, { "acc": 0.96963825, "epoch": 5.882352941176471, "grad_norm": 5.783454895019531, "learning_rate": 9.875338616400781e-06, "loss": 0.1598681, "memory(GiB)": 13.7, "step": 12550, "train_speed(iter/s)": 1.529811 }, { "acc": 0.96232281, "epoch": 5.884696508085306, "grad_norm": 5.85209846496582, "learning_rate": 9.875166546293476e-06, "loss": 0.20771837, "memory(GiB)": 13.7, "step": 12555, "train_speed(iter/s)": 1.529841 }, { "acc": 0.96498871, "epoch": 5.887040074994141, "grad_norm": 8.979547500610352, "learning_rate": 9.874994359015199e-06, "loss": 0.21899738, "memory(GiB)": 13.7, "step": 12560, "train_speed(iter/s)": 1.52989 }, { "acc": 0.95636234, "epoch": 5.889383641902977, "grad_norm": 21.203493118286133, "learning_rate": 9.874822054570095e-06, "loss": 0.29673884, "memory(GiB)": 13.7, "step": 12565, "train_speed(iter/s)": 1.529906 }, { "acc": 0.94979172, "epoch": 5.891727208811812, "grad_norm": 11.549376487731934, "learning_rate": 9.874649632962301e-06, "loss": 0.24947534, "memory(GiB)": 13.7, "step": 12570, "train_speed(iter/s)": 1.529919 }, { "acc": 0.96375008, "epoch": 5.894070775720647, "grad_norm": 2.7707622051239014, "learning_rate": 9.874477094195963e-06, "loss": 0.1590973, "memory(GiB)": 13.7, "step": 12575, "train_speed(iter/s)": 1.529921 }, { "acc": 0.95369053, "epoch": 5.896414342629482, "grad_norm": 32.86848831176758, "learning_rate": 9.874304438275227e-06, "loss": 0.22300305, "memory(GiB)": 13.7, "step": 12580, "train_speed(iter/s)": 1.529946 }, { "acc": 0.95651789, "epoch": 5.898757909538317, "grad_norm": 5.492867946624756, "learning_rate": 9.874131665204244e-06, "loss": 0.24354813, "memory(GiB)": 13.7, "step": 12585, "train_speed(iter/s)": 1.529945 }, { "acc": 0.97082348, "epoch": 5.9011014764471525, "grad_norm": 12.524073600769043, "learning_rate": 9.873958774987168e-06, "loss": 0.19734913, "memory(GiB)": 13.7, "step": 12590, "train_speed(iter/s)": 1.529966 }, { "acc": 0.97258015, "epoch": 5.903445043355988, "grad_norm": 3.4308178424835205, "learning_rate": 9.873785767628153e-06, "loss": 0.12506528, "memory(GiB)": 13.7, "step": 12595, "train_speed(iter/s)": 1.529979 }, { "acc": 0.96967859, "epoch": 5.905788610264823, "grad_norm": 4.7416582107543945, "learning_rate": 9.873612643131359e-06, "loss": 0.18219714, "memory(GiB)": 13.7, "step": 12600, "train_speed(iter/s)": 1.530001 }, { "acc": 0.96662207, "epoch": 5.908132177173658, "grad_norm": 8.277970314025879, "learning_rate": 9.873439401500943e-06, "loss": 0.20051939, "memory(GiB)": 13.7, "step": 12605, "train_speed(iter/s)": 1.530019 }, { "acc": 0.9622776, "epoch": 5.910475744082493, "grad_norm": 8.589510917663574, "learning_rate": 9.873266042741077e-06, "loss": 0.16543498, "memory(GiB)": 13.7, "step": 12610, "train_speed(iter/s)": 1.529992 }, { "acc": 0.96900301, "epoch": 5.9128193109913285, "grad_norm": 48.74722671508789, "learning_rate": 9.87309256685592e-06, "loss": 0.22908239, "memory(GiB)": 13.7, "step": 12615, "train_speed(iter/s)": 1.53 }, { "acc": 0.93649578, "epoch": 5.915162877900164, "grad_norm": 12.87273120880127, "learning_rate": 9.872918973849649e-06, "loss": 0.37620444, "memory(GiB)": 13.7, "step": 12620, "train_speed(iter/s)": 1.530008 }, { "acc": 0.95345306, "epoch": 5.917506444809, "grad_norm": 6.835785388946533, "learning_rate": 9.872745263726431e-06, "loss": 0.25292628, "memory(GiB)": 13.7, "step": 12625, "train_speed(iter/s)": 1.530051 }, { "acc": 0.97930641, "epoch": 5.919850011717834, "grad_norm": 6.432653427124023, "learning_rate": 9.872571436490443e-06, "loss": 0.12867811, "memory(GiB)": 13.7, "step": 12630, "train_speed(iter/s)": 1.530082 }, { "acc": 0.94043808, "epoch": 5.92219357862667, "grad_norm": 9.25662899017334, "learning_rate": 9.872397492145865e-06, "loss": 0.33725724, "memory(GiB)": 13.7, "step": 12635, "train_speed(iter/s)": 1.530087 }, { "acc": 0.97398806, "epoch": 5.924537145535505, "grad_norm": 1.1958414316177368, "learning_rate": 9.872223430696877e-06, "loss": 0.15069546, "memory(GiB)": 13.7, "step": 12640, "train_speed(iter/s)": 1.530103 }, { "acc": 0.94343529, "epoch": 5.926880712444341, "grad_norm": 13.287317276000977, "learning_rate": 9.87204925214766e-06, "loss": 0.3335901, "memory(GiB)": 13.7, "step": 12645, "train_speed(iter/s)": 1.530146 }, { "acc": 0.97005005, "epoch": 5.929224279353176, "grad_norm": 6.2672247886657715, "learning_rate": 9.871874956502405e-06, "loss": 0.2183135, "memory(GiB)": 13.7, "step": 12650, "train_speed(iter/s)": 1.530175 }, { "acc": 0.95571432, "epoch": 5.931567846262011, "grad_norm": 18.563350677490234, "learning_rate": 9.8717005437653e-06, "loss": 0.34719737, "memory(GiB)": 13.7, "step": 12655, "train_speed(iter/s)": 1.530208 }, { "acc": 0.96413193, "epoch": 5.933911413170846, "grad_norm": 1.7182011604309082, "learning_rate": 9.871526013940537e-06, "loss": 0.20395589, "memory(GiB)": 13.7, "step": 12660, "train_speed(iter/s)": 1.530225 }, { "acc": 0.97350693, "epoch": 5.936254980079681, "grad_norm": 7.4092607498168945, "learning_rate": 9.871351367032311e-06, "loss": 0.20818729, "memory(GiB)": 13.7, "step": 12665, "train_speed(iter/s)": 1.530243 }, { "acc": 0.95353498, "epoch": 5.9385985469885165, "grad_norm": 14.70850944519043, "learning_rate": 9.871176603044821e-06, "loss": 0.30069027, "memory(GiB)": 13.7, "step": 12670, "train_speed(iter/s)": 1.530263 }, { "acc": 0.95443907, "epoch": 5.940942113897352, "grad_norm": 84.9615707397461, "learning_rate": 9.871001721982265e-06, "loss": 0.27712717, "memory(GiB)": 13.7, "step": 12675, "train_speed(iter/s)": 1.530258 }, { "acc": 0.97416134, "epoch": 5.943285680806187, "grad_norm": 5.466806411743164, "learning_rate": 9.870826723848849e-06, "loss": 0.08347043, "memory(GiB)": 13.7, "step": 12680, "train_speed(iter/s)": 1.530275 }, { "acc": 0.96712799, "epoch": 5.945629247715022, "grad_norm": 9.984044075012207, "learning_rate": 9.87065160864878e-06, "loss": 0.18216488, "memory(GiB)": 13.7, "step": 12685, "train_speed(iter/s)": 1.530296 }, { "acc": 0.95987549, "epoch": 5.947972814623857, "grad_norm": 7.3507513999938965, "learning_rate": 9.870476376386265e-06, "loss": 0.15607576, "memory(GiB)": 13.7, "step": 12690, "train_speed(iter/s)": 1.530348 }, { "acc": 0.95395832, "epoch": 5.9503163815326925, "grad_norm": 0.4318634569644928, "learning_rate": 9.870301027065517e-06, "loss": 0.2330694, "memory(GiB)": 13.7, "step": 12695, "train_speed(iter/s)": 1.5304 }, { "acc": 0.97196426, "epoch": 5.952659948441528, "grad_norm": 2.0923521518707275, "learning_rate": 9.870125560690751e-06, "loss": 0.13935258, "memory(GiB)": 13.7, "step": 12700, "train_speed(iter/s)": 1.53039 }, { "acc": 0.97092323, "epoch": 5.955003515350363, "grad_norm": 10.988973617553711, "learning_rate": 9.869949977266185e-06, "loss": 0.18568319, "memory(GiB)": 13.7, "step": 12705, "train_speed(iter/s)": 1.530406 }, { "acc": 0.95743151, "epoch": 5.957347082259199, "grad_norm": 21.958356857299805, "learning_rate": 9.869774276796038e-06, "loss": 0.24288969, "memory(GiB)": 13.7, "step": 12710, "train_speed(iter/s)": 1.5304 }, { "acc": 0.9610877, "epoch": 5.959690649168033, "grad_norm": 6.240504741668701, "learning_rate": 9.869598459284534e-06, "loss": 0.24603748, "memory(GiB)": 13.7, "step": 12715, "train_speed(iter/s)": 1.530415 }, { "acc": 0.94607954, "epoch": 5.962034216076869, "grad_norm": 7.943431377410889, "learning_rate": 9.8694225247359e-06, "loss": 0.30696907, "memory(GiB)": 13.7, "step": 12720, "train_speed(iter/s)": 1.530453 }, { "acc": 0.96752081, "epoch": 5.9643777829857045, "grad_norm": 7.362837791442871, "learning_rate": 9.869246473154364e-06, "loss": 0.12862319, "memory(GiB)": 13.7, "step": 12725, "train_speed(iter/s)": 1.530508 }, { "acc": 0.95877628, "epoch": 5.96672134989454, "grad_norm": 10.396546363830566, "learning_rate": 9.869070304544158e-06, "loss": 0.17924393, "memory(GiB)": 13.7, "step": 12730, "train_speed(iter/s)": 1.530532 }, { "acc": 0.96880226, "epoch": 5.969064916803375, "grad_norm": 24.76632308959961, "learning_rate": 9.868894018909517e-06, "loss": 0.15138266, "memory(GiB)": 13.7, "step": 12735, "train_speed(iter/s)": 1.530547 }, { "acc": 0.94511909, "epoch": 5.97140848371221, "grad_norm": 3.726562023162842, "learning_rate": 9.868717616254678e-06, "loss": 0.28930411, "memory(GiB)": 13.7, "step": 12740, "train_speed(iter/s)": 1.53054 }, { "acc": 0.96814394, "epoch": 5.973752050621045, "grad_norm": 13.097119331359863, "learning_rate": 9.86854109658388e-06, "loss": 0.18564069, "memory(GiB)": 13.7, "step": 12745, "train_speed(iter/s)": 1.530605 }, { "acc": 0.96831665, "epoch": 5.9760956175298805, "grad_norm": 3.4038374423980713, "learning_rate": 9.868364459901367e-06, "loss": 0.17773978, "memory(GiB)": 13.7, "step": 12750, "train_speed(iter/s)": 1.530628 }, { "acc": 0.95815973, "epoch": 5.978439184438716, "grad_norm": 121.34517669677734, "learning_rate": 9.868187706211383e-06, "loss": 0.2068548, "memory(GiB)": 13.7, "step": 12755, "train_speed(iter/s)": 1.530644 }, { "acc": 0.95047617, "epoch": 5.980782751347551, "grad_norm": 10.6516695022583, "learning_rate": 9.86801083551818e-06, "loss": 0.2739898, "memory(GiB)": 13.7, "step": 12760, "train_speed(iter/s)": 1.530638 }, { "acc": 0.97238102, "epoch": 5.983126318256386, "grad_norm": 1.0119136571884155, "learning_rate": 9.867833847826006e-06, "loss": 0.19207406, "memory(GiB)": 13.7, "step": 12765, "train_speed(iter/s)": 1.530599 }, { "acc": 0.95597382, "epoch": 5.985469885165221, "grad_norm": 2.4003560543060303, "learning_rate": 9.867656743139118e-06, "loss": 0.22031918, "memory(GiB)": 13.7, "step": 12770, "train_speed(iter/s)": 1.530583 }, { "acc": 0.96783237, "epoch": 5.9878134520740565, "grad_norm": 3.8681507110595703, "learning_rate": 9.86747952146177e-06, "loss": 0.17795386, "memory(GiB)": 13.7, "step": 12775, "train_speed(iter/s)": 1.5306 }, { "acc": 0.94091873, "epoch": 5.990157018982892, "grad_norm": 6.067126274108887, "learning_rate": 9.867302182798224e-06, "loss": 0.38537467, "memory(GiB)": 13.7, "step": 12780, "train_speed(iter/s)": 1.530612 }, { "acc": 0.98188114, "epoch": 5.992500585891727, "grad_norm": 13.63302993774414, "learning_rate": 9.867124727152745e-06, "loss": 0.09759625, "memory(GiB)": 13.7, "step": 12785, "train_speed(iter/s)": 1.530641 }, { "acc": 0.9809226, "epoch": 5.994844152800562, "grad_norm": 16.103137969970703, "learning_rate": 9.866947154529595e-06, "loss": 0.16451437, "memory(GiB)": 13.7, "step": 12790, "train_speed(iter/s)": 1.530658 }, { "acc": 0.95442715, "epoch": 5.997187719709398, "grad_norm": 7.886356353759766, "learning_rate": 9.866769464933041e-06, "loss": 0.21795743, "memory(GiB)": 13.7, "step": 12795, "train_speed(iter/s)": 1.530664 }, { "acc": 0.96205359, "epoch": 5.999531286618233, "grad_norm": 9.131110191345215, "learning_rate": 9.866591658367356e-06, "loss": 0.10077293, "memory(GiB)": 13.7, "step": 12800, "train_speed(iter/s)": 1.53069 }, { "acc": 0.95946426, "epoch": 6.0018748535270685, "grad_norm": 6.5544304847717285, "learning_rate": 9.866413734836816e-06, "loss": 0.18581245, "memory(GiB)": 13.7, "step": 12805, "train_speed(iter/s)": 1.530632 }, { "acc": 0.98008928, "epoch": 6.004218420435904, "grad_norm": 7.954492092132568, "learning_rate": 9.866235694345693e-06, "loss": 0.1129427, "memory(GiB)": 13.7, "step": 12810, "train_speed(iter/s)": 1.53064 }, { "acc": 0.97074184, "epoch": 6.006561987344739, "grad_norm": 6.099849700927734, "learning_rate": 9.86605753689827e-06, "loss": 0.25719457, "memory(GiB)": 13.7, "step": 12815, "train_speed(iter/s)": 1.53067 }, { "acc": 0.97719698, "epoch": 6.008905554253574, "grad_norm": 29.392120361328125, "learning_rate": 9.86587926249883e-06, "loss": 0.10753462, "memory(GiB)": 13.7, "step": 12820, "train_speed(iter/s)": 1.530707 }, { "acc": 0.94359493, "epoch": 6.011249121162409, "grad_norm": 11.225802421569824, "learning_rate": 9.865700871151654e-06, "loss": 0.31301317, "memory(GiB)": 13.7, "step": 12825, "train_speed(iter/s)": 1.53076 }, { "acc": 0.96027451, "epoch": 6.0135926880712445, "grad_norm": 3.2941811084747314, "learning_rate": 9.865522362861035e-06, "loss": 0.14975206, "memory(GiB)": 13.7, "step": 12830, "train_speed(iter/s)": 1.530787 }, { "acc": 0.96226358, "epoch": 6.01593625498008, "grad_norm": 10.179636001586914, "learning_rate": 9.865343737631259e-06, "loss": 0.25327024, "memory(GiB)": 13.7, "step": 12835, "train_speed(iter/s)": 1.530786 }, { "acc": 0.97296085, "epoch": 6.018279821888915, "grad_norm": 6.490510940551758, "learning_rate": 9.865164995466625e-06, "loss": 0.1470329, "memory(GiB)": 13.7, "step": 12840, "train_speed(iter/s)": 1.530814 }, { "acc": 0.95604916, "epoch": 6.02062338879775, "grad_norm": 9.2033052444458, "learning_rate": 9.864986136371422e-06, "loss": 0.22643828, "memory(GiB)": 13.7, "step": 12845, "train_speed(iter/s)": 1.530826 }, { "acc": 0.96759262, "epoch": 6.022966955706585, "grad_norm": 7.850190162658691, "learning_rate": 9.864807160349956e-06, "loss": 0.17144134, "memory(GiB)": 13.7, "step": 12850, "train_speed(iter/s)": 1.530851 }, { "acc": 0.96395836, "epoch": 6.02531052261542, "grad_norm": 5.862673759460449, "learning_rate": 9.864628067406525e-06, "loss": 0.25384569, "memory(GiB)": 13.7, "step": 12855, "train_speed(iter/s)": 1.530862 }, { "acc": 0.96152534, "epoch": 6.027654089524256, "grad_norm": 12.345462799072266, "learning_rate": 9.864448857545435e-06, "loss": 0.23757033, "memory(GiB)": 13.7, "step": 12860, "train_speed(iter/s)": 1.530853 }, { "acc": 0.98533545, "epoch": 6.029997656433091, "grad_norm": 31.263500213623047, "learning_rate": 9.864269530770994e-06, "loss": 0.15711007, "memory(GiB)": 13.7, "step": 12865, "train_speed(iter/s)": 1.530863 }, { "acc": 0.95426569, "epoch": 6.032341223341926, "grad_norm": 13.57670783996582, "learning_rate": 9.864090087087513e-06, "loss": 0.30748696, "memory(GiB)": 13.7, "step": 12870, "train_speed(iter/s)": 1.53089 }, { "acc": 0.96998777, "epoch": 6.034684790250762, "grad_norm": 4.784103870391846, "learning_rate": 9.863910526499306e-06, "loss": 0.14594885, "memory(GiB)": 13.7, "step": 12875, "train_speed(iter/s)": 1.530887 }, { "acc": 0.93270502, "epoch": 6.037028357159597, "grad_norm": 15.786345481872559, "learning_rate": 9.863730849010685e-06, "loss": 0.25967705, "memory(GiB)": 13.7, "step": 12880, "train_speed(iter/s)": 1.530913 }, { "acc": 0.96048527, "epoch": 6.0393719240684325, "grad_norm": 16.605756759643555, "learning_rate": 9.863551054625972e-06, "loss": 0.21887243, "memory(GiB)": 13.7, "step": 12885, "train_speed(iter/s)": 1.530921 }, { "acc": 0.96513882, "epoch": 6.041715490977268, "grad_norm": 3.535745143890381, "learning_rate": 9.863371143349488e-06, "loss": 0.14019258, "memory(GiB)": 13.7, "step": 12890, "train_speed(iter/s)": 1.530917 }, { "acc": 0.96785126, "epoch": 6.044059057886103, "grad_norm": 1.0656861066818237, "learning_rate": 9.863191115185557e-06, "loss": 0.16293902, "memory(GiB)": 13.7, "step": 12895, "train_speed(iter/s)": 1.530923 }, { "acc": 0.9741827, "epoch": 6.046402624794938, "grad_norm": 7.466006278991699, "learning_rate": 9.863010970138508e-06, "loss": 0.12971568, "memory(GiB)": 13.7, "step": 12900, "train_speed(iter/s)": 1.530945 }, { "acc": 0.97170715, "epoch": 6.048746191703773, "grad_norm": 7.900843620300293, "learning_rate": 9.862830708212669e-06, "loss": 0.21780295, "memory(GiB)": 13.7, "step": 12905, "train_speed(iter/s)": 1.530957 }, { "acc": 0.9447485, "epoch": 6.0510897586126084, "grad_norm": 7.183340072631836, "learning_rate": 9.862650329412375e-06, "loss": 0.3147655, "memory(GiB)": 13.7, "step": 12910, "train_speed(iter/s)": 1.53096 }, { "acc": 0.96447105, "epoch": 6.053433325521444, "grad_norm": 14.209233283996582, "learning_rate": 9.86246983374196e-06, "loss": 0.18765097, "memory(GiB)": 13.7, "step": 12915, "train_speed(iter/s)": 1.531009 }, { "acc": 0.95332794, "epoch": 6.055776892430279, "grad_norm": 10.718505859375, "learning_rate": 9.862289221205761e-06, "loss": 0.28749208, "memory(GiB)": 13.7, "step": 12920, "train_speed(iter/s)": 1.53101 }, { "acc": 0.97303028, "epoch": 6.058120459339114, "grad_norm": 4.868980407714844, "learning_rate": 9.862108491808124e-06, "loss": 0.18285027, "memory(GiB)": 13.7, "step": 12925, "train_speed(iter/s)": 1.53101 }, { "acc": 0.96578255, "epoch": 6.060464026247949, "grad_norm": 10.384878158569336, "learning_rate": 9.861927645553388e-06, "loss": 0.15229642, "memory(GiB)": 13.7, "step": 12930, "train_speed(iter/s)": 1.530974 }, { "acc": 0.96377888, "epoch": 6.062807593156784, "grad_norm": 6.430037975311279, "learning_rate": 9.861746682445902e-06, "loss": 0.28994226, "memory(GiB)": 13.7, "step": 12935, "train_speed(iter/s)": 1.530983 }, { "acc": 0.96819439, "epoch": 6.06515116006562, "grad_norm": 28.82265853881836, "learning_rate": 9.86156560249002e-06, "loss": 0.20669036, "memory(GiB)": 13.7, "step": 12940, "train_speed(iter/s)": 1.530964 }, { "acc": 0.98770828, "epoch": 6.067494726974455, "grad_norm": 4.099615097045898, "learning_rate": 9.861384405690087e-06, "loss": 0.06172355, "memory(GiB)": 13.7, "step": 12945, "train_speed(iter/s)": 1.530989 }, { "acc": 0.95922279, "epoch": 6.06983829388329, "grad_norm": 2.459622383117676, "learning_rate": 9.861203092050465e-06, "loss": 0.16796679, "memory(GiB)": 13.7, "step": 12950, "train_speed(iter/s)": 1.531035 }, { "acc": 0.9479538, "epoch": 6.072181860792125, "grad_norm": 14.1033935546875, "learning_rate": 9.861021661575509e-06, "loss": 0.33490567, "memory(GiB)": 13.7, "step": 12955, "train_speed(iter/s)": 1.531042 }, { "acc": 0.95675592, "epoch": 6.074525427700961, "grad_norm": 32.92470169067383, "learning_rate": 9.86084011426958e-06, "loss": 0.21582057, "memory(GiB)": 13.7, "step": 12960, "train_speed(iter/s)": 1.531042 }, { "acc": 0.9707839, "epoch": 6.0768689946097965, "grad_norm": 11.403645515441895, "learning_rate": 9.860658450137039e-06, "loss": 0.16807793, "memory(GiB)": 13.7, "step": 12965, "train_speed(iter/s)": 1.531047 }, { "acc": 0.95987606, "epoch": 6.079212561518632, "grad_norm": 4.468063831329346, "learning_rate": 9.86047666918226e-06, "loss": 0.22667632, "memory(GiB)": 13.7, "step": 12970, "train_speed(iter/s)": 1.531091 }, { "acc": 0.9675952, "epoch": 6.081556128427467, "grad_norm": 7.524147987365723, "learning_rate": 9.860294771409605e-06, "loss": 0.13134, "memory(GiB)": 13.7, "step": 12975, "train_speed(iter/s)": 1.531112 }, { "acc": 0.96318455, "epoch": 6.083899695336302, "grad_norm": 19.04428482055664, "learning_rate": 9.86011275682345e-06, "loss": 0.18322062, "memory(GiB)": 13.7, "step": 12980, "train_speed(iter/s)": 1.531141 }, { "acc": 0.96916666, "epoch": 6.086243262245137, "grad_norm": 9.04287338256836, "learning_rate": 9.85993062542817e-06, "loss": 0.18144169, "memory(GiB)": 13.7, "step": 12985, "train_speed(iter/s)": 1.531158 }, { "acc": 0.943645, "epoch": 6.088586829153972, "grad_norm": 8.701315879821777, "learning_rate": 9.859748377228144e-06, "loss": 0.44192333, "memory(GiB)": 13.7, "step": 12990, "train_speed(iter/s)": 1.531202 }, { "acc": 0.96198788, "epoch": 6.090930396062808, "grad_norm": 57.521114349365234, "learning_rate": 9.859566012227747e-06, "loss": 0.24397032, "memory(GiB)": 13.7, "step": 12995, "train_speed(iter/s)": 1.531235 }, { "acc": 0.95625801, "epoch": 6.093273962971643, "grad_norm": 6.0254669189453125, "learning_rate": 9.85938353043137e-06, "loss": 0.22490606, "memory(GiB)": 13.7, "step": 13000, "train_speed(iter/s)": 1.531275 }, { "acc": 0.95984745, "epoch": 6.095617529880478, "grad_norm": 4.731033802032471, "learning_rate": 9.859200931843394e-06, "loss": 0.1631907, "memory(GiB)": 13.7, "step": 13005, "train_speed(iter/s)": 1.53126 }, { "acc": 0.95533915, "epoch": 6.097961096789313, "grad_norm": 14.9453706741333, "learning_rate": 9.85901821646821e-06, "loss": 0.2727906, "memory(GiB)": 13.7, "step": 13010, "train_speed(iter/s)": 1.53128 }, { "acc": 0.95970516, "epoch": 6.100304663698148, "grad_norm": 29.931556701660156, "learning_rate": 9.858835384310209e-06, "loss": 0.25515132, "memory(GiB)": 13.7, "step": 13015, "train_speed(iter/s)": 1.531302 }, { "acc": 0.96358538, "epoch": 6.102648230606984, "grad_norm": 11.47960376739502, "learning_rate": 9.858652435373786e-06, "loss": 0.22287471, "memory(GiB)": 13.7, "step": 13020, "train_speed(iter/s)": 1.531303 }, { "acc": 0.97979164, "epoch": 6.104991797515819, "grad_norm": 5.532079696655273, "learning_rate": 9.858469369663341e-06, "loss": 0.09560269, "memory(GiB)": 13.7, "step": 13025, "train_speed(iter/s)": 1.531336 }, { "acc": 0.95533695, "epoch": 6.107335364424654, "grad_norm": 6.716806411743164, "learning_rate": 9.858286187183268e-06, "loss": 0.24152713, "memory(GiB)": 13.7, "step": 13030, "train_speed(iter/s)": 1.531343 }, { "acc": 0.97766781, "epoch": 6.109678931333489, "grad_norm": 2.948732614517212, "learning_rate": 9.858102887937978e-06, "loss": 0.14825343, "memory(GiB)": 13.7, "step": 13035, "train_speed(iter/s)": 1.531365 }, { "acc": 0.96657734, "epoch": 6.112022498242325, "grad_norm": 19.076740264892578, "learning_rate": 9.857919471931872e-06, "loss": 0.12323663, "memory(GiB)": 13.7, "step": 13040, "train_speed(iter/s)": 1.531373 }, { "acc": 0.97269344, "epoch": 6.11436606515116, "grad_norm": 10.431182861328125, "learning_rate": 9.857735939169359e-06, "loss": 0.18373671, "memory(GiB)": 13.7, "step": 13045, "train_speed(iter/s)": 1.531376 }, { "acc": 0.93922033, "epoch": 6.116709632059996, "grad_norm": 9.014657974243164, "learning_rate": 9.857552289654852e-06, "loss": 0.24500668, "memory(GiB)": 13.7, "step": 13050, "train_speed(iter/s)": 1.531391 }, { "acc": 0.97016287, "epoch": 6.119053198968831, "grad_norm": 9.67054271697998, "learning_rate": 9.857368523392763e-06, "loss": 0.19277127, "memory(GiB)": 13.7, "step": 13055, "train_speed(iter/s)": 1.531365 }, { "acc": 0.96762905, "epoch": 6.121396765877666, "grad_norm": 6.392226219177246, "learning_rate": 9.857184640387512e-06, "loss": 0.13768765, "memory(GiB)": 13.7, "step": 13060, "train_speed(iter/s)": 1.531386 }, { "acc": 0.9578661, "epoch": 6.123740332786501, "grad_norm": 7.186777114868164, "learning_rate": 9.857000640643518e-06, "loss": 0.24725494, "memory(GiB)": 13.7, "step": 13065, "train_speed(iter/s)": 1.53142 }, { "acc": 0.96229057, "epoch": 6.126083899695336, "grad_norm": 3.4467856884002686, "learning_rate": 9.856816524165203e-06, "loss": 0.16406972, "memory(GiB)": 13.7, "step": 13070, "train_speed(iter/s)": 1.531455 }, { "acc": 0.97015877, "epoch": 6.128427466604172, "grad_norm": 11.08718204498291, "learning_rate": 9.856632290956992e-06, "loss": 0.15718632, "memory(GiB)": 13.7, "step": 13075, "train_speed(iter/s)": 1.531496 }, { "acc": 0.95582714, "epoch": 6.130771033513007, "grad_norm": 4.291473865509033, "learning_rate": 9.856447941023317e-06, "loss": 0.31962323, "memory(GiB)": 13.7, "step": 13080, "train_speed(iter/s)": 1.53152 }, { "acc": 0.95494547, "epoch": 6.133114600421842, "grad_norm": 93.48973083496094, "learning_rate": 9.856263474368604e-06, "loss": 0.2240099, "memory(GiB)": 13.7, "step": 13085, "train_speed(iter/s)": 1.531537 }, { "acc": 0.96606312, "epoch": 6.135458167330677, "grad_norm": 8.089592933654785, "learning_rate": 9.85607889099729e-06, "loss": 0.2261142, "memory(GiB)": 13.7, "step": 13090, "train_speed(iter/s)": 1.531541 }, { "acc": 0.97533731, "epoch": 6.137801734239512, "grad_norm": 8.768012046813965, "learning_rate": 9.855894190913813e-06, "loss": 0.14001573, "memory(GiB)": 13.7, "step": 13095, "train_speed(iter/s)": 1.531557 }, { "acc": 0.94218445, "epoch": 6.140145301148348, "grad_norm": 28.309358596801758, "learning_rate": 9.855709374122609e-06, "loss": 0.37144456, "memory(GiB)": 13.7, "step": 13100, "train_speed(iter/s)": 1.531542 }, { "acc": 0.95526695, "epoch": 6.142488868057183, "grad_norm": 11.92297649383545, "learning_rate": 9.855524440628123e-06, "loss": 0.21879148, "memory(GiB)": 13.7, "step": 13105, "train_speed(iter/s)": 1.531569 }, { "acc": 0.95684528, "epoch": 6.144832434966018, "grad_norm": 5.533915042877197, "learning_rate": 9.8553393904348e-06, "loss": 0.19733838, "memory(GiB)": 13.7, "step": 13110, "train_speed(iter/s)": 1.531588 }, { "acc": 0.9835453, "epoch": 6.147176001874853, "grad_norm": 5.03230619430542, "learning_rate": 9.855154223547085e-06, "loss": 0.09130771, "memory(GiB)": 13.7, "step": 13115, "train_speed(iter/s)": 1.531604 }, { "acc": 0.9600976, "epoch": 6.149519568783688, "grad_norm": 34.948116302490234, "learning_rate": 9.854968939969433e-06, "loss": 0.21770558, "memory(GiB)": 13.7, "step": 13120, "train_speed(iter/s)": 1.531594 }, { "acc": 0.94038677, "epoch": 6.151863135692524, "grad_norm": 34.011837005615234, "learning_rate": 9.854783539706296e-06, "loss": 0.29364893, "memory(GiB)": 13.7, "step": 13125, "train_speed(iter/s)": 1.531619 }, { "acc": 0.95925121, "epoch": 6.15420670260136, "grad_norm": 54.555870056152344, "learning_rate": 9.85459802276213e-06, "loss": 0.19220511, "memory(GiB)": 13.7, "step": 13130, "train_speed(iter/s)": 1.531647 }, { "acc": 0.96625996, "epoch": 6.156550269510195, "grad_norm": 3.8050243854522705, "learning_rate": 9.854412389141394e-06, "loss": 0.17084433, "memory(GiB)": 13.7, "step": 13135, "train_speed(iter/s)": 1.531672 }, { "acc": 0.97308607, "epoch": 6.15889383641903, "grad_norm": 3.624508857727051, "learning_rate": 9.854226638848552e-06, "loss": 0.16796477, "memory(GiB)": 13.7, "step": 13140, "train_speed(iter/s)": 1.531695 }, { "acc": 0.98318901, "epoch": 6.161237403327865, "grad_norm": 7.803112030029297, "learning_rate": 9.854040771888066e-06, "loss": 0.11925783, "memory(GiB)": 13.7, "step": 13145, "train_speed(iter/s)": 1.531706 }, { "acc": 0.96416664, "epoch": 6.1635809702367, "grad_norm": 5.120355606079102, "learning_rate": 9.853854788264404e-06, "loss": 0.16409833, "memory(GiB)": 13.7, "step": 13150, "train_speed(iter/s)": 1.531731 }, { "acc": 0.9539381, "epoch": 6.165924537145536, "grad_norm": 27.02035903930664, "learning_rate": 9.85366868798204e-06, "loss": 0.26126354, "memory(GiB)": 13.7, "step": 13155, "train_speed(iter/s)": 1.531785 }, { "acc": 0.98102779, "epoch": 6.168268104054371, "grad_norm": 12.777290344238281, "learning_rate": 9.853482471045443e-06, "loss": 0.16818255, "memory(GiB)": 13.7, "step": 13160, "train_speed(iter/s)": 1.531809 }, { "acc": 0.97613087, "epoch": 6.170611670963206, "grad_norm": 90.32550811767578, "learning_rate": 9.85329613745909e-06, "loss": 0.21411083, "memory(GiB)": 13.7, "step": 13165, "train_speed(iter/s)": 1.531822 }, { "acc": 0.94268637, "epoch": 6.172955237872041, "grad_norm": 27.49654197692871, "learning_rate": 9.853109687227463e-06, "loss": 0.19476912, "memory(GiB)": 13.7, "step": 13170, "train_speed(iter/s)": 1.53181 }, { "acc": 0.9767087, "epoch": 6.175298804780876, "grad_norm": 5.523468017578125, "learning_rate": 9.852923120355038e-06, "loss": 0.18091412, "memory(GiB)": 13.7, "step": 13175, "train_speed(iter/s)": 1.531814 }, { "acc": 0.97544641, "epoch": 6.1776423716897115, "grad_norm": 4.646206855773926, "learning_rate": 9.852736436846306e-06, "loss": 0.11530674, "memory(GiB)": 13.7, "step": 13180, "train_speed(iter/s)": 1.531825 }, { "acc": 0.96832094, "epoch": 6.179985938598547, "grad_norm": 1.6178524494171143, "learning_rate": 9.852549636705749e-06, "loss": 0.20992775, "memory(GiB)": 13.7, "step": 13185, "train_speed(iter/s)": 1.53185 }, { "acc": 0.97170639, "epoch": 6.182329505507382, "grad_norm": 13.367942810058594, "learning_rate": 9.85236271993786e-06, "loss": 0.11687102, "memory(GiB)": 13.7, "step": 13190, "train_speed(iter/s)": 1.531874 }, { "acc": 0.95235481, "epoch": 6.184673072416217, "grad_norm": 5.595159530639648, "learning_rate": 9.85217568654713e-06, "loss": 0.26607149, "memory(GiB)": 13.7, "step": 13195, "train_speed(iter/s)": 1.531903 }, { "acc": 0.93984127, "epoch": 6.187016639325052, "grad_norm": 6.244041442871094, "learning_rate": 9.851988536538054e-06, "loss": 0.36915598, "memory(GiB)": 13.7, "step": 13200, "train_speed(iter/s)": 1.531925 }, { "acc": 0.97426262, "epoch": 6.189360206233888, "grad_norm": 12.307024955749512, "learning_rate": 9.851801269915135e-06, "loss": 0.12598884, "memory(GiB)": 13.7, "step": 13205, "train_speed(iter/s)": 1.531917 }, { "acc": 0.97401791, "epoch": 6.191703773142724, "grad_norm": 7.111966609954834, "learning_rate": 9.851613886682869e-06, "loss": 0.13309253, "memory(GiB)": 13.7, "step": 13210, "train_speed(iter/s)": 1.531921 }, { "acc": 0.97088366, "epoch": 6.194047340051559, "grad_norm": 2.0054705142974854, "learning_rate": 9.851426386845762e-06, "loss": 0.16946449, "memory(GiB)": 13.7, "step": 13215, "train_speed(iter/s)": 1.531934 }, { "acc": 0.96980801, "epoch": 6.196390906960394, "grad_norm": 2.0633392333984375, "learning_rate": 9.851238770408323e-06, "loss": 0.1842109, "memory(GiB)": 13.7, "step": 13220, "train_speed(iter/s)": 1.53192 }, { "acc": 0.95106516, "epoch": 6.198734473869229, "grad_norm": 5.261531352996826, "learning_rate": 9.851051037375058e-06, "loss": 0.22217419, "memory(GiB)": 13.7, "step": 13225, "train_speed(iter/s)": 1.531943 }, { "acc": 0.95716553, "epoch": 6.201078040778064, "grad_norm": 5.1083984375, "learning_rate": 9.850863187750483e-06, "loss": 0.23555379, "memory(GiB)": 13.7, "step": 13230, "train_speed(iter/s)": 1.531988 }, { "acc": 0.97482462, "epoch": 6.2034216076868995, "grad_norm": 10.502042770385742, "learning_rate": 9.850675221539111e-06, "loss": 0.1827214, "memory(GiB)": 13.7, "step": 13235, "train_speed(iter/s)": 1.532002 }, { "acc": 0.97443457, "epoch": 6.205765174595735, "grad_norm": 7.539066314697266, "learning_rate": 9.85048713874546e-06, "loss": 0.12383614, "memory(GiB)": 13.7, "step": 13240, "train_speed(iter/s)": 1.531999 }, { "acc": 0.9745265, "epoch": 6.20810874150457, "grad_norm": 21.798032760620117, "learning_rate": 9.850298939374054e-06, "loss": 0.15230358, "memory(GiB)": 13.7, "step": 13245, "train_speed(iter/s)": 1.531983 }, { "acc": 0.94990988, "epoch": 6.210452308413405, "grad_norm": 5.63046407699585, "learning_rate": 9.850110623429412e-06, "loss": 0.25077636, "memory(GiB)": 13.7, "step": 13250, "train_speed(iter/s)": 1.531996 }, { "acc": 0.97622662, "epoch": 6.21279587532224, "grad_norm": 5.552354335784912, "learning_rate": 9.849922190916063e-06, "loss": 0.17222803, "memory(GiB)": 13.7, "step": 13255, "train_speed(iter/s)": 1.531986 }, { "acc": 0.96146784, "epoch": 6.2151394422310755, "grad_norm": 4.777218818664551, "learning_rate": 9.849733641838538e-06, "loss": 0.31563873, "memory(GiB)": 13.7, "step": 13260, "train_speed(iter/s)": 1.531998 }, { "acc": 0.95718613, "epoch": 6.217483009139911, "grad_norm": 14.212126731872559, "learning_rate": 9.849544976201365e-06, "loss": 0.2435611, "memory(GiB)": 13.7, "step": 13265, "train_speed(iter/s)": 1.532001 }, { "acc": 0.97529755, "epoch": 6.219826576048746, "grad_norm": 5.965771198272705, "learning_rate": 9.849356194009082e-06, "loss": 0.09064203, "memory(GiB)": 13.7, "step": 13270, "train_speed(iter/s)": 1.532014 }, { "acc": 0.96948605, "epoch": 6.222170142957581, "grad_norm": 4.088698863983154, "learning_rate": 9.849167295266226e-06, "loss": 0.16470598, "memory(GiB)": 13.7, "step": 13275, "train_speed(iter/s)": 1.532039 }, { "acc": 0.9641964, "epoch": 6.224513709866416, "grad_norm": 4.9486985206604, "learning_rate": 9.848978279977337e-06, "loss": 0.23415878, "memory(GiB)": 13.7, "step": 13280, "train_speed(iter/s)": 1.532032 }, { "acc": 0.9788269, "epoch": 6.226857276775252, "grad_norm": 8.208077430725098, "learning_rate": 9.84878914814696e-06, "loss": 0.15691433, "memory(GiB)": 13.7, "step": 13285, "train_speed(iter/s)": 1.532038 }, { "acc": 0.97813301, "epoch": 6.2292008436840876, "grad_norm": 2.167997121810913, "learning_rate": 9.848599899779637e-06, "loss": 0.11708831, "memory(GiB)": 13.7, "step": 13290, "train_speed(iter/s)": 1.532067 }, { "acc": 0.95858593, "epoch": 6.231544410592923, "grad_norm": 11.311575889587402, "learning_rate": 9.848410534879921e-06, "loss": 0.29122319, "memory(GiB)": 13.7, "step": 13295, "train_speed(iter/s)": 1.532077 }, { "acc": 0.95936546, "epoch": 6.233887977501758, "grad_norm": 9.560959815979004, "learning_rate": 9.848221053452362e-06, "loss": 0.28984306, "memory(GiB)": 13.7, "step": 13300, "train_speed(iter/s)": 1.532074 }, { "acc": 0.95400629, "epoch": 6.236231544410593, "grad_norm": 13.063504219055176, "learning_rate": 9.848031455501516e-06, "loss": 0.23841882, "memory(GiB)": 13.7, "step": 13305, "train_speed(iter/s)": 1.532111 }, { "acc": 0.96541786, "epoch": 6.238575111319428, "grad_norm": 7.287164211273193, "learning_rate": 9.847841741031939e-06, "loss": 0.24461012, "memory(GiB)": 13.7, "step": 13310, "train_speed(iter/s)": 1.532121 }, { "acc": 0.96747656, "epoch": 6.2409186782282635, "grad_norm": 16.67228126525879, "learning_rate": 9.847651910048192e-06, "loss": 0.24613705, "memory(GiB)": 13.7, "step": 13315, "train_speed(iter/s)": 1.532105 }, { "acc": 0.96372023, "epoch": 6.243262245137099, "grad_norm": 27.441892623901367, "learning_rate": 9.847461962554836e-06, "loss": 0.25695472, "memory(GiB)": 13.7, "step": 13320, "train_speed(iter/s)": 1.53214 }, { "acc": 0.95090857, "epoch": 6.245605812045934, "grad_norm": 12.417332649230957, "learning_rate": 9.847271898556437e-06, "loss": 0.29331369, "memory(GiB)": 13.7, "step": 13325, "train_speed(iter/s)": 1.532156 }, { "acc": 0.98245049, "epoch": 6.247949378954769, "grad_norm": 0.34891989827156067, "learning_rate": 9.847081718057567e-06, "loss": 0.09045095, "memory(GiB)": 13.7, "step": 13330, "train_speed(iter/s)": 1.532175 }, { "acc": 0.96667004, "epoch": 6.250292945863604, "grad_norm": 6.501346588134766, "learning_rate": 9.846891421062791e-06, "loss": 0.22969732, "memory(GiB)": 13.7, "step": 13335, "train_speed(iter/s)": 1.532183 }, { "acc": 0.94305286, "epoch": 6.2526365127724395, "grad_norm": 8.166873931884766, "learning_rate": 9.84670100757669e-06, "loss": 0.34157267, "memory(GiB)": 13.7, "step": 13340, "train_speed(iter/s)": 1.532176 }, { "acc": 0.97195969, "epoch": 6.254980079681275, "grad_norm": 3.003394842147827, "learning_rate": 9.846510477603837e-06, "loss": 0.18476737, "memory(GiB)": 13.7, "step": 13345, "train_speed(iter/s)": 1.532186 }, { "acc": 0.9542923, "epoch": 6.25732364659011, "grad_norm": 8.55167007446289, "learning_rate": 9.846319831148812e-06, "loss": 0.24613285, "memory(GiB)": 13.7, "step": 13350, "train_speed(iter/s)": 1.532223 }, { "acc": 0.97664557, "epoch": 6.259667213498945, "grad_norm": 5.6523027420043945, "learning_rate": 9.8461290682162e-06, "loss": 0.11389563, "memory(GiB)": 13.7, "step": 13355, "train_speed(iter/s)": 1.532238 }, { "acc": 0.97442245, "epoch": 6.26201078040778, "grad_norm": 0.774595320224762, "learning_rate": 9.845938188810582e-06, "loss": 0.18155726, "memory(GiB)": 13.7, "step": 13360, "train_speed(iter/s)": 1.532259 }, { "acc": 0.96016245, "epoch": 6.2643543473166154, "grad_norm": 5.330488681793213, "learning_rate": 9.84574719293655e-06, "loss": 0.23581855, "memory(GiB)": 13.7, "step": 13365, "train_speed(iter/s)": 1.532274 }, { "acc": 0.97687187, "epoch": 6.2666979142254515, "grad_norm": 6.9237446784973145, "learning_rate": 9.845556080598692e-06, "loss": 0.1145695, "memory(GiB)": 13.7, "step": 13370, "train_speed(iter/s)": 1.532275 }, { "acc": 0.97703381, "epoch": 6.269041481134287, "grad_norm": 6.820160865783691, "learning_rate": 9.845364851801603e-06, "loss": 0.1353477, "memory(GiB)": 13.7, "step": 13375, "train_speed(iter/s)": 1.532292 }, { "acc": 0.97662792, "epoch": 6.271385048043122, "grad_norm": 5.203238010406494, "learning_rate": 9.84517350654988e-06, "loss": 0.14053074, "memory(GiB)": 13.7, "step": 13380, "train_speed(iter/s)": 1.532311 }, { "acc": 0.95774841, "epoch": 6.273728614951957, "grad_norm": 17.962888717651367, "learning_rate": 9.844982044848122e-06, "loss": 0.20803704, "memory(GiB)": 13.7, "step": 13385, "train_speed(iter/s)": 1.532319 }, { "acc": 0.94167118, "epoch": 6.276072181860792, "grad_norm": 11.384695053100586, "learning_rate": 9.844790466700933e-06, "loss": 0.27008309, "memory(GiB)": 13.7, "step": 13390, "train_speed(iter/s)": 1.532302 }, { "acc": 0.96772022, "epoch": 6.2784157487696275, "grad_norm": 5.2589335441589355, "learning_rate": 9.844598772112912e-06, "loss": 0.15048152, "memory(GiB)": 13.7, "step": 13395, "train_speed(iter/s)": 1.532313 }, { "acc": 0.95154762, "epoch": 6.280759315678463, "grad_norm": 6.221190929412842, "learning_rate": 9.844406961088673e-06, "loss": 0.19016557, "memory(GiB)": 13.7, "step": 13400, "train_speed(iter/s)": 1.532303 }, { "acc": 0.97761364, "epoch": 6.283102882587298, "grad_norm": 4.680269718170166, "learning_rate": 9.844215033632823e-06, "loss": 0.16019263, "memory(GiB)": 13.7, "step": 13405, "train_speed(iter/s)": 1.532341 }, { "acc": 0.97736483, "epoch": 6.285446449496133, "grad_norm": 4.935531139373779, "learning_rate": 9.844022989749977e-06, "loss": 0.19143444, "memory(GiB)": 13.7, "step": 13410, "train_speed(iter/s)": 1.532337 }, { "acc": 0.96873369, "epoch": 6.287790016404968, "grad_norm": 7.964598655700684, "learning_rate": 9.843830829444751e-06, "loss": 0.14176929, "memory(GiB)": 13.7, "step": 13415, "train_speed(iter/s)": 1.532371 }, { "acc": 0.96729164, "epoch": 6.2901335833138035, "grad_norm": 9.851844787597656, "learning_rate": 9.843638552721764e-06, "loss": 0.1603929, "memory(GiB)": 13.7, "step": 13420, "train_speed(iter/s)": 1.532369 }, { "acc": 0.96916218, "epoch": 6.292477150222639, "grad_norm": 13.243913650512695, "learning_rate": 9.843446159585636e-06, "loss": 0.20733428, "memory(GiB)": 13.7, "step": 13425, "train_speed(iter/s)": 1.532388 }, { "acc": 0.95841846, "epoch": 6.294820717131474, "grad_norm": 12.628100395202637, "learning_rate": 9.843253650040993e-06, "loss": 0.24268341, "memory(GiB)": 13.7, "step": 13430, "train_speed(iter/s)": 1.532407 }, { "acc": 0.97780905, "epoch": 6.297164284040309, "grad_norm": 22.112911224365234, "learning_rate": 9.843061024092462e-06, "loss": 0.20964694, "memory(GiB)": 13.7, "step": 13435, "train_speed(iter/s)": 1.532392 }, { "acc": 0.9705143, "epoch": 6.299507850949144, "grad_norm": 9.091950416564941, "learning_rate": 9.842868281744672e-06, "loss": 0.20384698, "memory(GiB)": 13.7, "step": 13440, "train_speed(iter/s)": 1.532401 }, { "acc": 0.96540241, "epoch": 6.30185141785798, "grad_norm": 2.4504902362823486, "learning_rate": 9.842675423002257e-06, "loss": 0.18374537, "memory(GiB)": 13.7, "step": 13445, "train_speed(iter/s)": 1.532436 }, { "acc": 0.97324333, "epoch": 6.3041949847668155, "grad_norm": 5.293207168579102, "learning_rate": 9.842482447869853e-06, "loss": 0.12080742, "memory(GiB)": 13.7, "step": 13450, "train_speed(iter/s)": 1.532446 }, { "acc": 0.96439486, "epoch": 6.306538551675651, "grad_norm": 21.002206802368164, "learning_rate": 9.842289356352101e-06, "loss": 0.17004273, "memory(GiB)": 13.7, "step": 13455, "train_speed(iter/s)": 1.532436 }, { "acc": 0.96791925, "epoch": 6.308882118584486, "grad_norm": 3.905116558074951, "learning_rate": 9.842096148453636e-06, "loss": 0.16448642, "memory(GiB)": 13.7, "step": 13460, "train_speed(iter/s)": 1.532436 }, { "acc": 0.96092911, "epoch": 6.311225685493321, "grad_norm": 2.0757994651794434, "learning_rate": 9.841902824179106e-06, "loss": 0.22942944, "memory(GiB)": 13.7, "step": 13465, "train_speed(iter/s)": 1.532448 }, { "acc": 0.95220165, "epoch": 6.313569252402156, "grad_norm": 14.167348861694336, "learning_rate": 9.841709383533158e-06, "loss": 0.3155231, "memory(GiB)": 13.7, "step": 13470, "train_speed(iter/s)": 1.53245 }, { "acc": 0.96286077, "epoch": 6.3159128193109915, "grad_norm": 9.917974472045898, "learning_rate": 9.84151582652044e-06, "loss": 0.25616152, "memory(GiB)": 13.7, "step": 13475, "train_speed(iter/s)": 1.532449 }, { "acc": 0.9526042, "epoch": 6.318256386219827, "grad_norm": 10.73738956451416, "learning_rate": 9.841322153145605e-06, "loss": 0.22998536, "memory(GiB)": 13.7, "step": 13480, "train_speed(iter/s)": 1.532464 }, { "acc": 0.96516371, "epoch": 6.320599953128662, "grad_norm": 10.088038444519043, "learning_rate": 9.84112836341331e-06, "loss": 0.16154336, "memory(GiB)": 13.7, "step": 13485, "train_speed(iter/s)": 1.532492 }, { "acc": 0.96667576, "epoch": 6.322943520037497, "grad_norm": 6.200496196746826, "learning_rate": 9.840934457328213e-06, "loss": 0.2250844, "memory(GiB)": 13.7, "step": 13490, "train_speed(iter/s)": 1.532497 }, { "acc": 0.96646824, "epoch": 6.325287086946332, "grad_norm": 3.313244104385376, "learning_rate": 9.840740434894972e-06, "loss": 0.20196784, "memory(GiB)": 13.7, "step": 13495, "train_speed(iter/s)": 1.532512 }, { "acc": 0.97296925, "epoch": 6.327630653855167, "grad_norm": 7.657280445098877, "learning_rate": 9.840546296118255e-06, "loss": 0.20757804, "memory(GiB)": 13.7, "step": 13500, "train_speed(iter/s)": 1.532504 }, { "acc": 0.97900352, "epoch": 6.329974220764003, "grad_norm": 17.258665084838867, "learning_rate": 9.840352041002724e-06, "loss": 0.13694451, "memory(GiB)": 13.7, "step": 13505, "train_speed(iter/s)": 1.532483 }, { "acc": 0.97755957, "epoch": 6.332317787672838, "grad_norm": 5.996537685394287, "learning_rate": 9.840157669553048e-06, "loss": 0.12296031, "memory(GiB)": 13.7, "step": 13510, "train_speed(iter/s)": 1.532494 }, { "acc": 0.96897163, "epoch": 6.334661354581673, "grad_norm": 3.973809242248535, "learning_rate": 9.839963181773903e-06, "loss": 0.15447569, "memory(GiB)": 13.7, "step": 13515, "train_speed(iter/s)": 1.532491 }, { "acc": 0.97416039, "epoch": 6.337004921490508, "grad_norm": 7.8850603103637695, "learning_rate": 9.839768577669962e-06, "loss": 0.18713069, "memory(GiB)": 13.7, "step": 13520, "train_speed(iter/s)": 1.532483 }, { "acc": 0.96139832, "epoch": 6.339348488399343, "grad_norm": 3.8255743980407715, "learning_rate": 9.839573857245902e-06, "loss": 0.20355396, "memory(GiB)": 13.7, "step": 13525, "train_speed(iter/s)": 1.532469 }, { "acc": 0.95466747, "epoch": 6.3416920553081795, "grad_norm": 14.331449508666992, "learning_rate": 9.839379020506405e-06, "loss": 0.27608638, "memory(GiB)": 13.7, "step": 13530, "train_speed(iter/s)": 1.532489 }, { "acc": 0.98051243, "epoch": 6.344035622217015, "grad_norm": 4.841390132904053, "learning_rate": 9.839184067456154e-06, "loss": 0.10731206, "memory(GiB)": 13.7, "step": 13535, "train_speed(iter/s)": 1.532511 }, { "acc": 0.95390873, "epoch": 6.34637918912585, "grad_norm": 8.064196586608887, "learning_rate": 9.838988998099832e-06, "loss": 0.23041902, "memory(GiB)": 13.7, "step": 13540, "train_speed(iter/s)": 1.532514 }, { "acc": 0.93758926, "epoch": 6.348722756034685, "grad_norm": 21.11014175415039, "learning_rate": 9.838793812442131e-06, "loss": 0.36297832, "memory(GiB)": 13.7, "step": 13545, "train_speed(iter/s)": 1.53253 }, { "acc": 0.97531252, "epoch": 6.35106632294352, "grad_norm": 4.630555629730225, "learning_rate": 9.838598510487744e-06, "loss": 0.09957451, "memory(GiB)": 13.7, "step": 13550, "train_speed(iter/s)": 1.532553 }, { "acc": 0.95675602, "epoch": 6.353409889852355, "grad_norm": 6.662616729736328, "learning_rate": 9.83840309224136e-06, "loss": 0.26987095, "memory(GiB)": 13.7, "step": 13555, "train_speed(iter/s)": 1.532537 }, { "acc": 0.95818453, "epoch": 6.355753456761191, "grad_norm": 12.123857498168945, "learning_rate": 9.838207557707681e-06, "loss": 0.23671446, "memory(GiB)": 13.7, "step": 13560, "train_speed(iter/s)": 1.532568 }, { "acc": 0.97200222, "epoch": 6.358097023670026, "grad_norm": 5.822350978851318, "learning_rate": 9.838011906891405e-06, "loss": 0.21499887, "memory(GiB)": 13.7, "step": 13565, "train_speed(iter/s)": 1.532565 }, { "acc": 0.95667725, "epoch": 6.360440590578861, "grad_norm": 6.848392009735107, "learning_rate": 9.837816139797236e-06, "loss": 0.26715341, "memory(GiB)": 13.7, "step": 13570, "train_speed(iter/s)": 1.532576 }, { "acc": 0.97223215, "epoch": 6.362784157487696, "grad_norm": 8.39886474609375, "learning_rate": 9.837620256429879e-06, "loss": 0.16968362, "memory(GiB)": 13.7, "step": 13575, "train_speed(iter/s)": 1.532584 }, { "acc": 0.98041668, "epoch": 6.365127724396531, "grad_norm": 2.3394041061401367, "learning_rate": 9.83742425679404e-06, "loss": 0.07909737, "memory(GiB)": 13.7, "step": 13580, "train_speed(iter/s)": 1.532583 }, { "acc": 0.96964283, "epoch": 6.367471291305367, "grad_norm": 3.4657089710235596, "learning_rate": 9.837228140894433e-06, "loss": 0.20236731, "memory(GiB)": 13.7, "step": 13585, "train_speed(iter/s)": 1.532595 }, { "acc": 0.95731297, "epoch": 6.369814858214202, "grad_norm": 9.514628410339355, "learning_rate": 9.837031908735773e-06, "loss": 0.20719428, "memory(GiB)": 13.7, "step": 13590, "train_speed(iter/s)": 1.532585 }, { "acc": 0.95797129, "epoch": 6.372158425123037, "grad_norm": 8.585631370544434, "learning_rate": 9.836835560322775e-06, "loss": 0.27464595, "memory(GiB)": 13.7, "step": 13595, "train_speed(iter/s)": 1.532622 }, { "acc": 0.94979839, "epoch": 6.374501992031872, "grad_norm": 9.956404685974121, "learning_rate": 9.836639095660159e-06, "loss": 0.28414979, "memory(GiB)": 13.7, "step": 13600, "train_speed(iter/s)": 1.532633 }, { "acc": 0.96726007, "epoch": 6.376845558940707, "grad_norm": 10.583171844482422, "learning_rate": 9.836442514752645e-06, "loss": 0.22738383, "memory(GiB)": 13.7, "step": 13605, "train_speed(iter/s)": 1.532658 }, { "acc": 0.95334835, "epoch": 6.379189125849543, "grad_norm": 11.60837459564209, "learning_rate": 9.836245817604962e-06, "loss": 0.29798589, "memory(GiB)": 13.7, "step": 13610, "train_speed(iter/s)": 1.532695 }, { "acc": 0.95903854, "epoch": 6.381532692758379, "grad_norm": 11.651726722717285, "learning_rate": 9.836049004221835e-06, "loss": 0.22886758, "memory(GiB)": 13.7, "step": 13615, "train_speed(iter/s)": 1.532733 }, { "acc": 0.95819454, "epoch": 6.383876259667214, "grad_norm": 6.740206718444824, "learning_rate": 9.835852074607997e-06, "loss": 0.19190658, "memory(GiB)": 13.7, "step": 13620, "train_speed(iter/s)": 1.532738 }, { "acc": 0.97061005, "epoch": 6.386219826576049, "grad_norm": 6.345414161682129, "learning_rate": 9.83565502876818e-06, "loss": 0.11726985, "memory(GiB)": 13.7, "step": 13625, "train_speed(iter/s)": 1.532745 }, { "acc": 0.97306948, "epoch": 6.388563393484884, "grad_norm": 5.718635082244873, "learning_rate": 9.83545786670712e-06, "loss": 0.18471146, "memory(GiB)": 13.7, "step": 13630, "train_speed(iter/s)": 1.532763 }, { "acc": 0.96592522, "epoch": 6.390906960393719, "grad_norm": 12.353097915649414, "learning_rate": 9.835260588429558e-06, "loss": 0.20472832, "memory(GiB)": 13.7, "step": 13635, "train_speed(iter/s)": 1.532771 }, { "acc": 0.95851192, "epoch": 6.393250527302555, "grad_norm": 4.9568281173706055, "learning_rate": 9.835063193940234e-06, "loss": 0.19782495, "memory(GiB)": 13.7, "step": 13640, "train_speed(iter/s)": 1.532795 }, { "acc": 0.96698179, "epoch": 6.39559409421139, "grad_norm": 4.7913713455200195, "learning_rate": 9.834865683243896e-06, "loss": 0.16593696, "memory(GiB)": 13.7, "step": 13645, "train_speed(iter/s)": 1.532828 }, { "acc": 0.975, "epoch": 6.397937661120225, "grad_norm": 0.1283646821975708, "learning_rate": 9.834668056345286e-06, "loss": 0.07925876, "memory(GiB)": 13.7, "step": 13650, "train_speed(iter/s)": 1.532859 }, { "acc": 0.98269749, "epoch": 6.40028122802906, "grad_norm": 4.168737888336182, "learning_rate": 9.834470313249159e-06, "loss": 0.14453745, "memory(GiB)": 13.7, "step": 13655, "train_speed(iter/s)": 1.532861 }, { "acc": 0.95329437, "epoch": 6.402624794937895, "grad_norm": 4.70489501953125, "learning_rate": 9.834272453960268e-06, "loss": 0.27058964, "memory(GiB)": 13.7, "step": 13660, "train_speed(iter/s)": 1.532862 }, { "acc": 0.95135422, "epoch": 6.404968361846731, "grad_norm": 6.37857723236084, "learning_rate": 9.834074478483364e-06, "loss": 0.24729652, "memory(GiB)": 13.7, "step": 13665, "train_speed(iter/s)": 1.532859 }, { "acc": 0.95594225, "epoch": 6.407311928755566, "grad_norm": 2.9925026893615723, "learning_rate": 9.833876386823212e-06, "loss": 0.21993179, "memory(GiB)": 13.7, "step": 13670, "train_speed(iter/s)": 1.532847 }, { "acc": 0.96734791, "epoch": 6.409655495664401, "grad_norm": 6.910001754760742, "learning_rate": 9.833678178984569e-06, "loss": 0.15782574, "memory(GiB)": 13.7, "step": 13675, "train_speed(iter/s)": 1.532844 }, { "acc": 0.96647167, "epoch": 6.411999062573236, "grad_norm": 9.594477653503418, "learning_rate": 9.833479854972201e-06, "loss": 0.16396962, "memory(GiB)": 13.7, "step": 13680, "train_speed(iter/s)": 1.532842 }, { "acc": 0.94675179, "epoch": 6.414342629482071, "grad_norm": 7.423813343048096, "learning_rate": 9.833281414790875e-06, "loss": 0.3970437, "memory(GiB)": 13.7, "step": 13685, "train_speed(iter/s)": 1.53287 }, { "acc": 0.96272182, "epoch": 6.416686196390907, "grad_norm": 5.171608924865723, "learning_rate": 9.833082858445359e-06, "loss": 0.13057331, "memory(GiB)": 13.7, "step": 13690, "train_speed(iter/s)": 1.532888 }, { "acc": 0.96180553, "epoch": 6.419029763299743, "grad_norm": 24.652782440185547, "learning_rate": 9.832884185940432e-06, "loss": 0.18872333, "memory(GiB)": 13.7, "step": 13695, "train_speed(iter/s)": 1.532905 }, { "acc": 0.96558495, "epoch": 6.421373330208578, "grad_norm": 6.219527244567871, "learning_rate": 9.83268539728086e-06, "loss": 0.21086745, "memory(GiB)": 13.7, "step": 13700, "train_speed(iter/s)": 1.532905 }, { "acc": 0.96925507, "epoch": 6.423716897117413, "grad_norm": 5.439691066741943, "learning_rate": 9.832486492471426e-06, "loss": 0.24975691, "memory(GiB)": 13.7, "step": 13705, "train_speed(iter/s)": 1.532923 }, { "acc": 0.96196423, "epoch": 6.426060464026248, "grad_norm": 6.686866760253906, "learning_rate": 9.832287471516914e-06, "loss": 0.25834327, "memory(GiB)": 13.7, "step": 13710, "train_speed(iter/s)": 1.532916 }, { "acc": 0.97364578, "epoch": 6.428404030935083, "grad_norm": 6.2497053146362305, "learning_rate": 9.832088334422105e-06, "loss": 0.16249011, "memory(GiB)": 13.7, "step": 13715, "train_speed(iter/s)": 1.532932 }, { "acc": 0.96950397, "epoch": 6.430747597843919, "grad_norm": 7.925528049468994, "learning_rate": 9.831889081191784e-06, "loss": 0.15602359, "memory(GiB)": 13.7, "step": 13720, "train_speed(iter/s)": 1.532931 }, { "acc": 0.97593746, "epoch": 6.433091164752754, "grad_norm": 6.264805316925049, "learning_rate": 9.83168971183074e-06, "loss": 0.0810527, "memory(GiB)": 13.7, "step": 13725, "train_speed(iter/s)": 1.532916 }, { "acc": 0.96266861, "epoch": 6.435434731661589, "grad_norm": 7.691404342651367, "learning_rate": 9.83149022634377e-06, "loss": 0.17553191, "memory(GiB)": 13.7, "step": 13730, "train_speed(iter/s)": 1.532941 }, { "acc": 0.97552643, "epoch": 6.437778298570424, "grad_norm": 5.995169162750244, "learning_rate": 9.831290624735665e-06, "loss": 0.1338979, "memory(GiB)": 13.7, "step": 13735, "train_speed(iter/s)": 1.532987 }, { "acc": 0.96498508, "epoch": 6.440121865479259, "grad_norm": 9.382139205932617, "learning_rate": 9.831090907011224e-06, "loss": 0.28113532, "memory(GiB)": 13.7, "step": 13740, "train_speed(iter/s)": 1.532976 }, { "acc": 0.95616722, "epoch": 6.4424654323880945, "grad_norm": 5.942612648010254, "learning_rate": 9.830891073175247e-06, "loss": 0.27828178, "memory(GiB)": 13.7, "step": 13745, "train_speed(iter/s)": 1.532963 }, { "acc": 0.95322924, "epoch": 6.44480899929693, "grad_norm": 2.5617387294769287, "learning_rate": 9.830691123232537e-06, "loss": 0.19467516, "memory(GiB)": 13.7, "step": 13750, "train_speed(iter/s)": 1.532978 }, { "acc": 0.97620201, "epoch": 6.447152566205765, "grad_norm": 8.553196907043457, "learning_rate": 9.830491057187903e-06, "loss": 0.17326083, "memory(GiB)": 13.7, "step": 13755, "train_speed(iter/s)": 1.533022 }, { "acc": 0.9735796, "epoch": 6.4494961331146, "grad_norm": 2.5551347732543945, "learning_rate": 9.830290875046149e-06, "loss": 0.12184188, "memory(GiB)": 13.7, "step": 13760, "train_speed(iter/s)": 1.533055 }, { "acc": 0.96155033, "epoch": 6.451839700023435, "grad_norm": 4.48640775680542, "learning_rate": 9.83009057681209e-06, "loss": 0.10848782, "memory(GiB)": 13.7, "step": 13765, "train_speed(iter/s)": 1.533051 }, { "acc": 0.95062342, "epoch": 6.4541832669322705, "grad_norm": 6.360219478607178, "learning_rate": 9.829890162490542e-06, "loss": 0.29856832, "memory(GiB)": 13.7, "step": 13770, "train_speed(iter/s)": 1.533073 }, { "acc": 0.97045507, "epoch": 6.456526833841107, "grad_norm": 3.8381381034851074, "learning_rate": 9.829689632086317e-06, "loss": 0.18111382, "memory(GiB)": 13.7, "step": 13775, "train_speed(iter/s)": 1.533106 }, { "acc": 0.9591197, "epoch": 6.458870400749942, "grad_norm": 5.324275970458984, "learning_rate": 9.829488985604241e-06, "loss": 0.27440393, "memory(GiB)": 13.7, "step": 13780, "train_speed(iter/s)": 1.533121 }, { "acc": 0.96997032, "epoch": 6.461213967658777, "grad_norm": 30.36167335510254, "learning_rate": 9.829288223049133e-06, "loss": 0.2036267, "memory(GiB)": 13.7, "step": 13785, "train_speed(iter/s)": 1.53315 }, { "acc": 0.96420851, "epoch": 6.463557534567612, "grad_norm": 56.07160949707031, "learning_rate": 9.82908734442582e-06, "loss": 0.19907298, "memory(GiB)": 13.7, "step": 13790, "train_speed(iter/s)": 1.533172 }, { "acc": 0.97344656, "epoch": 6.465901101476447, "grad_norm": 3.7047319412231445, "learning_rate": 9.828886349739131e-06, "loss": 0.15363443, "memory(GiB)": 13.7, "step": 13795, "train_speed(iter/s)": 1.533172 }, { "acc": 0.96796131, "epoch": 6.468244668385283, "grad_norm": 15.07724666595459, "learning_rate": 9.828685238993897e-06, "loss": 0.17041533, "memory(GiB)": 13.7, "step": 13800, "train_speed(iter/s)": 1.533205 }, { "acc": 0.97856636, "epoch": 6.470588235294118, "grad_norm": 7.151169300079346, "learning_rate": 9.828484012194952e-06, "loss": 0.13291423, "memory(GiB)": 13.7, "step": 13805, "train_speed(iter/s)": 1.533224 }, { "acc": 0.9693306, "epoch": 6.472931802202953, "grad_norm": 3.3194732666015625, "learning_rate": 9.82828266934713e-06, "loss": 0.17604247, "memory(GiB)": 13.7, "step": 13810, "train_speed(iter/s)": 1.533229 }, { "acc": 0.97805634, "epoch": 6.475275369111788, "grad_norm": 1.5825027227401733, "learning_rate": 9.828081210455276e-06, "loss": 0.15703137, "memory(GiB)": 13.7, "step": 13815, "train_speed(iter/s)": 1.533261 }, { "acc": 0.98052082, "epoch": 6.477618936020623, "grad_norm": 15.364375114440918, "learning_rate": 9.827879635524228e-06, "loss": 0.10168821, "memory(GiB)": 13.7, "step": 13820, "train_speed(iter/s)": 1.533247 }, { "acc": 0.97450781, "epoch": 6.4799625029294585, "grad_norm": 9.060462951660156, "learning_rate": 9.827677944558833e-06, "loss": 0.18206472, "memory(GiB)": 13.7, "step": 13825, "train_speed(iter/s)": 1.533257 }, { "acc": 0.9554018, "epoch": 6.482306069838294, "grad_norm": 7.306530952453613, "learning_rate": 9.827476137563939e-06, "loss": 0.22952065, "memory(GiB)": 13.7, "step": 13830, "train_speed(iter/s)": 1.533277 }, { "acc": 0.95261068, "epoch": 6.484649636747129, "grad_norm": 11.027779579162598, "learning_rate": 9.827274214544396e-06, "loss": 0.20012169, "memory(GiB)": 13.7, "step": 13835, "train_speed(iter/s)": 1.533302 }, { "acc": 0.96217175, "epoch": 6.486993203655964, "grad_norm": 2.08099365234375, "learning_rate": 9.827072175505059e-06, "loss": 0.12929506, "memory(GiB)": 13.7, "step": 13840, "train_speed(iter/s)": 1.533316 }, { "acc": 0.95749226, "epoch": 6.489336770564799, "grad_norm": 12.545753479003906, "learning_rate": 9.826870020450782e-06, "loss": 0.28078609, "memory(GiB)": 13.7, "step": 13845, "train_speed(iter/s)": 1.533328 }, { "acc": 0.97237272, "epoch": 6.4916803374736345, "grad_norm": 6.582254886627197, "learning_rate": 9.826667749386427e-06, "loss": 0.12762909, "memory(GiB)": 13.7, "step": 13850, "train_speed(iter/s)": 1.533355 }, { "acc": 0.95727186, "epoch": 6.49402390438247, "grad_norm": 2.035949230194092, "learning_rate": 9.826465362316853e-06, "loss": 0.23507624, "memory(GiB)": 13.7, "step": 13855, "train_speed(iter/s)": 1.533365 }, { "acc": 0.97248163, "epoch": 6.496367471291306, "grad_norm": 5.856232643127441, "learning_rate": 9.826262859246927e-06, "loss": 0.14013324, "memory(GiB)": 13.7, "step": 13860, "train_speed(iter/s)": 1.53338 }, { "acc": 0.96281252, "epoch": 6.498711038200141, "grad_norm": 7.982785701751709, "learning_rate": 9.826060240181515e-06, "loss": 0.15290072, "memory(GiB)": 13.7, "step": 13865, "train_speed(iter/s)": 1.533425 }, { "acc": 0.97708368, "epoch": 6.501054605108976, "grad_norm": 5.5775556564331055, "learning_rate": 9.82585750512549e-06, "loss": 0.1044971, "memory(GiB)": 13.7, "step": 13870, "train_speed(iter/s)": 1.533451 }, { "acc": 0.96451149, "epoch": 6.503398172017811, "grad_norm": 9.674110412597656, "learning_rate": 9.82565465408372e-06, "loss": 0.20791025, "memory(GiB)": 13.7, "step": 13875, "train_speed(iter/s)": 1.533497 }, { "acc": 0.9538393, "epoch": 6.5057417389266465, "grad_norm": 9.026511192321777, "learning_rate": 9.825451687061087e-06, "loss": 0.19495413, "memory(GiB)": 13.7, "step": 13880, "train_speed(iter/s)": 1.533497 }, { "acc": 0.98226185, "epoch": 6.508085305835482, "grad_norm": 7.95861291885376, "learning_rate": 9.825248604062466e-06, "loss": 0.12706289, "memory(GiB)": 13.7, "step": 13885, "train_speed(iter/s)": 1.533512 }, { "acc": 0.96320801, "epoch": 6.510428872744317, "grad_norm": 7.275056838989258, "learning_rate": 9.825045405092738e-06, "loss": 0.16121664, "memory(GiB)": 13.7, "step": 13890, "train_speed(iter/s)": 1.533543 }, { "acc": 0.95722866, "epoch": 6.512772439653152, "grad_norm": 7.669464111328125, "learning_rate": 9.82484209015679e-06, "loss": 0.25185025, "memory(GiB)": 13.7, "step": 13895, "train_speed(iter/s)": 1.533525 }, { "acc": 0.96694279, "epoch": 6.515116006561987, "grad_norm": 8.973942756652832, "learning_rate": 9.824638659259505e-06, "loss": 0.14422488, "memory(GiB)": 13.7, "step": 13900, "train_speed(iter/s)": 1.533529 }, { "acc": 0.95834332, "epoch": 6.5174595734708225, "grad_norm": 61.523128509521484, "learning_rate": 9.824435112405776e-06, "loss": 0.1587428, "memory(GiB)": 13.7, "step": 13905, "train_speed(iter/s)": 1.533539 }, { "acc": 0.94677086, "epoch": 6.519803140379658, "grad_norm": 10.129650115966797, "learning_rate": 9.824231449600495e-06, "loss": 0.28502173, "memory(GiB)": 13.7, "step": 13910, "train_speed(iter/s)": 1.533574 }, { "acc": 0.9568449, "epoch": 6.522146707288493, "grad_norm": 9.469657897949219, "learning_rate": 9.824027670848558e-06, "loss": 0.14207808, "memory(GiB)": 13.7, "step": 13915, "train_speed(iter/s)": 1.533592 }, { "acc": 0.96982632, "epoch": 6.524490274197328, "grad_norm": 4.608408451080322, "learning_rate": 9.823823776154862e-06, "loss": 0.11694813, "memory(GiB)": 13.7, "step": 13920, "train_speed(iter/s)": 1.533585 }, { "acc": 0.97398672, "epoch": 6.526833841106163, "grad_norm": 5.687460422515869, "learning_rate": 9.823619765524309e-06, "loss": 0.17602706, "memory(GiB)": 13.7, "step": 13925, "train_speed(iter/s)": 1.533612 }, { "acc": 0.97007999, "epoch": 6.5291774080149985, "grad_norm": 8.372528076171875, "learning_rate": 9.823415638961798e-06, "loss": 0.19089688, "memory(GiB)": 13.7, "step": 13930, "train_speed(iter/s)": 1.533612 }, { "acc": 0.94723282, "epoch": 6.5315209749238345, "grad_norm": 15.278951644897461, "learning_rate": 9.823211396472244e-06, "loss": 0.3498873, "memory(GiB)": 13.7, "step": 13935, "train_speed(iter/s)": 1.533618 }, { "acc": 0.97246037, "epoch": 6.533864541832669, "grad_norm": 261.82720947265625, "learning_rate": 9.82300703806055e-06, "loss": 0.15511416, "memory(GiB)": 13.7, "step": 13940, "train_speed(iter/s)": 1.533589 }, { "acc": 0.97147827, "epoch": 6.536208108741505, "grad_norm": 3.93363356590271, "learning_rate": 9.82280256373163e-06, "loss": 0.10340163, "memory(GiB)": 13.7, "step": 13945, "train_speed(iter/s)": 1.533573 }, { "acc": 0.95562496, "epoch": 6.53855167565034, "grad_norm": 6.01283073425293, "learning_rate": 9.822597973490399e-06, "loss": 0.24613245, "memory(GiB)": 13.7, "step": 13950, "train_speed(iter/s)": 1.533588 }, { "acc": 0.97244987, "epoch": 6.540895242559175, "grad_norm": 11.3707857131958, "learning_rate": 9.822393267341776e-06, "loss": 0.21381216, "memory(GiB)": 13.7, "step": 13955, "train_speed(iter/s)": 1.533571 }, { "acc": 0.94964514, "epoch": 6.5432388094680105, "grad_norm": 9.940560340881348, "learning_rate": 9.822188445290678e-06, "loss": 0.28386676, "memory(GiB)": 13.7, "step": 13960, "train_speed(iter/s)": 1.533578 }, { "acc": 0.9606945, "epoch": 6.545582376376846, "grad_norm": 3.201798677444458, "learning_rate": 9.82198350734203e-06, "loss": 0.19734781, "memory(GiB)": 13.7, "step": 13965, "train_speed(iter/s)": 1.533621 }, { "acc": 0.96376667, "epoch": 6.547925943285681, "grad_norm": 6.688273906707764, "learning_rate": 9.82177845350076e-06, "loss": 0.23752511, "memory(GiB)": 13.7, "step": 13970, "train_speed(iter/s)": 1.533645 }, { "acc": 0.97429924, "epoch": 6.550269510194516, "grad_norm": 5.170932769775391, "learning_rate": 9.821573283771794e-06, "loss": 0.17637755, "memory(GiB)": 13.7, "step": 13975, "train_speed(iter/s)": 1.533669 }, { "acc": 0.9726326, "epoch": 6.552613077103351, "grad_norm": 7.331823825836182, "learning_rate": 9.821367998160065e-06, "loss": 0.13191677, "memory(GiB)": 13.7, "step": 13980, "train_speed(iter/s)": 1.533724 }, { "acc": 0.96686954, "epoch": 6.5549566440121865, "grad_norm": 18.283239364624023, "learning_rate": 9.821162596670507e-06, "loss": 0.23500445, "memory(GiB)": 13.7, "step": 13985, "train_speed(iter/s)": 1.533745 }, { "acc": 0.97140598, "epoch": 6.557300210921022, "grad_norm": 10.190309524536133, "learning_rate": 9.820957079308058e-06, "loss": 0.23141088, "memory(GiB)": 13.7, "step": 13990, "train_speed(iter/s)": 1.53375 }, { "acc": 0.95734043, "epoch": 6.559643777829857, "grad_norm": 8.898059844970703, "learning_rate": 9.820751446077657e-06, "loss": 0.23431373, "memory(GiB)": 13.7, "step": 13995, "train_speed(iter/s)": 1.533749 }, { "acc": 0.96762943, "epoch": 6.561987344738692, "grad_norm": 19.988428115844727, "learning_rate": 9.820545696984246e-06, "loss": 0.25622029, "memory(GiB)": 13.7, "step": 14000, "train_speed(iter/s)": 1.533764 }, { "acc": 0.95809975, "epoch": 6.564330911647527, "grad_norm": 4.7768707275390625, "learning_rate": 9.820339832032771e-06, "loss": 0.23208697, "memory(GiB)": 13.7, "step": 14005, "train_speed(iter/s)": 1.533778 }, { "acc": 0.96934032, "epoch": 6.566674478556362, "grad_norm": 1.5711613893508911, "learning_rate": 9.820133851228183e-06, "loss": 0.23761313, "memory(GiB)": 13.7, "step": 14010, "train_speed(iter/s)": 1.533793 }, { "acc": 0.95680561, "epoch": 6.569018045465198, "grad_norm": 7.433568000793457, "learning_rate": 9.81992775457543e-06, "loss": 0.15693283, "memory(GiB)": 13.7, "step": 14015, "train_speed(iter/s)": 1.533799 }, { "acc": 0.95557289, "epoch": 6.571361612374034, "grad_norm": 10.523633003234863, "learning_rate": 9.819721542079466e-06, "loss": 0.13523985, "memory(GiB)": 13.7, "step": 14020, "train_speed(iter/s)": 1.533808 }, { "acc": 0.95149536, "epoch": 6.573705179282869, "grad_norm": 11.636713027954102, "learning_rate": 9.819515213745248e-06, "loss": 0.29406979, "memory(GiB)": 13.7, "step": 14025, "train_speed(iter/s)": 1.53381 }, { "acc": 0.9770834, "epoch": 6.576048746191704, "grad_norm": 5.139320373535156, "learning_rate": 9.819308769577738e-06, "loss": 0.1237294, "memory(GiB)": 13.7, "step": 14030, "train_speed(iter/s)": 1.533815 }, { "acc": 0.97505951, "epoch": 6.578392313100539, "grad_norm": 6.243825912475586, "learning_rate": 9.819102209581897e-06, "loss": 0.14409192, "memory(GiB)": 13.7, "step": 14035, "train_speed(iter/s)": 1.533818 }, { "acc": 0.9652462, "epoch": 6.5807358800093745, "grad_norm": 4.2505950927734375, "learning_rate": 9.818895533762687e-06, "loss": 0.20905013, "memory(GiB)": 13.7, "step": 14040, "train_speed(iter/s)": 1.533833 }, { "acc": 0.95244045, "epoch": 6.58307944691821, "grad_norm": 9.555305480957031, "learning_rate": 9.818688742125078e-06, "loss": 0.15535457, "memory(GiB)": 13.7, "step": 14045, "train_speed(iter/s)": 1.533873 }, { "acc": 0.96527929, "epoch": 6.585423013827045, "grad_norm": 5.23220682144165, "learning_rate": 9.818481834674041e-06, "loss": 0.16543601, "memory(GiB)": 13.7, "step": 14050, "train_speed(iter/s)": 1.533906 }, { "acc": 0.9688488, "epoch": 6.58776658073588, "grad_norm": 20.226045608520508, "learning_rate": 9.81827481141455e-06, "loss": 0.14669429, "memory(GiB)": 13.7, "step": 14055, "train_speed(iter/s)": 1.5339 }, { "acc": 0.94177227, "epoch": 6.590110147644715, "grad_norm": 28.00271224975586, "learning_rate": 9.81806767235158e-06, "loss": 0.37288866, "memory(GiB)": 13.7, "step": 14060, "train_speed(iter/s)": 1.533904 }, { "acc": 0.97024708, "epoch": 6.5924537145535504, "grad_norm": 3.8457272052764893, "learning_rate": 9.817860417490109e-06, "loss": 0.12531289, "memory(GiB)": 13.7, "step": 14065, "train_speed(iter/s)": 1.533923 }, { "acc": 0.93212662, "epoch": 6.594797281462386, "grad_norm": 103.71338653564453, "learning_rate": 9.81765304683512e-06, "loss": 0.37371933, "memory(GiB)": 13.7, "step": 14070, "train_speed(iter/s)": 1.533933 }, { "acc": 0.95696745, "epoch": 6.597140848371221, "grad_norm": 3.277427911758423, "learning_rate": 9.817445560391597e-06, "loss": 0.29267206, "memory(GiB)": 13.7, "step": 14075, "train_speed(iter/s)": 1.533921 }, { "acc": 0.97821426, "epoch": 6.599484415280056, "grad_norm": 1.6671456098556519, "learning_rate": 9.81723795816453e-06, "loss": 0.15145099, "memory(GiB)": 13.7, "step": 14080, "train_speed(iter/s)": 1.533918 }, { "acc": 0.9659359, "epoch": 6.601827982188891, "grad_norm": 5.485801696777344, "learning_rate": 9.817030240158904e-06, "loss": 0.18702121, "memory(GiB)": 13.7, "step": 14085, "train_speed(iter/s)": 1.533932 }, { "acc": 0.96211987, "epoch": 6.604171549097726, "grad_norm": 10.097405433654785, "learning_rate": 9.816822406379715e-06, "loss": 0.20699263, "memory(GiB)": 13.7, "step": 14090, "train_speed(iter/s)": 1.533955 }, { "acc": 0.98604164, "epoch": 6.606515116006562, "grad_norm": 3.701714515686035, "learning_rate": 9.81661445683196e-06, "loss": 0.1044147, "memory(GiB)": 13.7, "step": 14095, "train_speed(iter/s)": 1.53394 }, { "acc": 0.95379467, "epoch": 6.608858682915397, "grad_norm": 9.598749160766602, "learning_rate": 9.816406391520634e-06, "loss": 0.19806414, "memory(GiB)": 13.7, "step": 14100, "train_speed(iter/s)": 1.533962 }, { "acc": 0.9692709, "epoch": 6.611202249824233, "grad_norm": 3.2657699584960938, "learning_rate": 9.816198210450739e-06, "loss": 0.14757915, "memory(GiB)": 13.7, "step": 14105, "train_speed(iter/s)": 1.533974 }, { "acc": 0.96125002, "epoch": 6.613545816733068, "grad_norm": 71.82357788085938, "learning_rate": 9.815989913627281e-06, "loss": 0.27552252, "memory(GiB)": 13.7, "step": 14110, "train_speed(iter/s)": 1.534012 }, { "acc": 0.97080765, "epoch": 6.615889383641903, "grad_norm": 8.57617473602295, "learning_rate": 9.815781501055267e-06, "loss": 0.16457602, "memory(GiB)": 13.7, "step": 14115, "train_speed(iter/s)": 1.534041 }, { "acc": 0.96868057, "epoch": 6.6182329505507385, "grad_norm": 15.756136894226074, "learning_rate": 9.815572972739702e-06, "loss": 0.30678482, "memory(GiB)": 13.7, "step": 14120, "train_speed(iter/s)": 1.534038 }, { "acc": 0.9440671, "epoch": 6.620576517459574, "grad_norm": 7.731116771697998, "learning_rate": 9.815364328685603e-06, "loss": 0.27727356, "memory(GiB)": 13.7, "step": 14125, "train_speed(iter/s)": 1.534057 }, { "acc": 0.97208405, "epoch": 6.622920084368409, "grad_norm": 8.809324264526367, "learning_rate": 9.815155568897986e-06, "loss": 0.14657686, "memory(GiB)": 13.7, "step": 14130, "train_speed(iter/s)": 1.534086 }, { "acc": 0.95496998, "epoch": 6.625263651277244, "grad_norm": 50.034366607666016, "learning_rate": 9.814946693381862e-06, "loss": 0.23403349, "memory(GiB)": 13.7, "step": 14135, "train_speed(iter/s)": 1.5341 }, { "acc": 0.96994877, "epoch": 6.627607218186079, "grad_norm": 12.771401405334473, "learning_rate": 9.81473770214226e-06, "loss": 0.23726482, "memory(GiB)": 13.7, "step": 14140, "train_speed(iter/s)": 1.534091 }, { "acc": 0.96726227, "epoch": 6.629950785094914, "grad_norm": 5.8315205574035645, "learning_rate": 9.814528595184198e-06, "loss": 0.19777524, "memory(GiB)": 13.7, "step": 14145, "train_speed(iter/s)": 1.534101 }, { "acc": 0.9640666, "epoch": 6.63229435200375, "grad_norm": 6.311275005340576, "learning_rate": 9.814319372512704e-06, "loss": 0.23123503, "memory(GiB)": 13.7, "step": 14150, "train_speed(iter/s)": 1.534081 }, { "acc": 0.97049112, "epoch": 6.634637918912585, "grad_norm": 6.186718463897705, "learning_rate": 9.814110034132806e-06, "loss": 0.11846392, "memory(GiB)": 13.7, "step": 14155, "train_speed(iter/s)": 1.534089 }, { "acc": 0.94279013, "epoch": 6.63698148582142, "grad_norm": 3.2263317108154297, "learning_rate": 9.813900580049537e-06, "loss": 0.35182254, "memory(GiB)": 13.7, "step": 14160, "train_speed(iter/s)": 1.534074 }, { "acc": 0.97279119, "epoch": 6.639325052730255, "grad_norm": 4.225727081298828, "learning_rate": 9.813691010267933e-06, "loss": 0.18840425, "memory(GiB)": 13.7, "step": 14165, "train_speed(iter/s)": 1.534085 }, { "acc": 0.95098782, "epoch": 6.64166861963909, "grad_norm": 9.269034385681152, "learning_rate": 9.813481324793028e-06, "loss": 0.26532891, "memory(GiB)": 13.7, "step": 14170, "train_speed(iter/s)": 1.534087 }, { "acc": 0.95135822, "epoch": 6.644012186547926, "grad_norm": 3.9460699558258057, "learning_rate": 9.813271523629865e-06, "loss": 0.19121461, "memory(GiB)": 13.7, "step": 14175, "train_speed(iter/s)": 1.534095 }, { "acc": 0.96122589, "epoch": 6.646355753456762, "grad_norm": 5.924279689788818, "learning_rate": 9.813061606783484e-06, "loss": 0.22858238, "memory(GiB)": 13.7, "step": 14180, "train_speed(iter/s)": 1.534082 }, { "acc": 0.97325764, "epoch": 6.648699320365596, "grad_norm": 4.0116753578186035, "learning_rate": 9.812851574258934e-06, "loss": 0.15890241, "memory(GiB)": 13.7, "step": 14185, "train_speed(iter/s)": 1.534087 }, { "acc": 0.98065205, "epoch": 6.651042887274432, "grad_norm": 3.5374066829681396, "learning_rate": 9.812641426061263e-06, "loss": 0.13357234, "memory(GiB)": 13.7, "step": 14190, "train_speed(iter/s)": 1.534098 }, { "acc": 0.95263386, "epoch": 6.653386454183267, "grad_norm": 76.20429229736328, "learning_rate": 9.81243116219552e-06, "loss": 0.28240602, "memory(GiB)": 13.7, "step": 14195, "train_speed(iter/s)": 1.534115 }, { "acc": 0.9623889, "epoch": 6.655730021092102, "grad_norm": 29.10444450378418, "learning_rate": 9.81222078266676e-06, "loss": 0.29926777, "memory(GiB)": 13.7, "step": 14200, "train_speed(iter/s)": 1.534138 }, { "acc": 0.98121214, "epoch": 6.658073588000938, "grad_norm": 3.3380608558654785, "learning_rate": 9.812010287480039e-06, "loss": 0.14152088, "memory(GiB)": 13.7, "step": 14205, "train_speed(iter/s)": 1.534123 }, { "acc": 0.97300053, "epoch": 6.660417154909773, "grad_norm": 15.318815231323242, "learning_rate": 9.811799676640419e-06, "loss": 0.16175078, "memory(GiB)": 13.7, "step": 14210, "train_speed(iter/s)": 1.534146 }, { "acc": 0.97967262, "epoch": 6.662760721818608, "grad_norm": 6.839264869689941, "learning_rate": 9.81158895015296e-06, "loss": 0.10598388, "memory(GiB)": 13.7, "step": 14215, "train_speed(iter/s)": 1.534186 }, { "acc": 0.94448528, "epoch": 6.665104288727443, "grad_norm": 6.855401039123535, "learning_rate": 9.81137810802273e-06, "loss": 0.31313224, "memory(GiB)": 13.7, "step": 14220, "train_speed(iter/s)": 1.534194 }, { "acc": 0.9628026, "epoch": 6.667447855636278, "grad_norm": 6.713244438171387, "learning_rate": 9.811167150254795e-06, "loss": 0.26238718, "memory(GiB)": 13.7, "step": 14225, "train_speed(iter/s)": 1.534191 }, { "acc": 0.9553175, "epoch": 6.669791422545114, "grad_norm": 4.185632228851318, "learning_rate": 9.810956076854224e-06, "loss": 0.22609186, "memory(GiB)": 13.7, "step": 14230, "train_speed(iter/s)": 1.534205 }, { "acc": 0.9472085, "epoch": 6.672134989453949, "grad_norm": 6.400630474090576, "learning_rate": 9.810744887826096e-06, "loss": 0.34079986, "memory(GiB)": 13.7, "step": 14235, "train_speed(iter/s)": 1.53423 }, { "acc": 0.97226601, "epoch": 6.674478556362784, "grad_norm": 11.196489334106445, "learning_rate": 9.810533583175481e-06, "loss": 0.13437893, "memory(GiB)": 13.7, "step": 14240, "train_speed(iter/s)": 1.534241 }, { "acc": 0.95201263, "epoch": 6.676822123271619, "grad_norm": 8.218504905700684, "learning_rate": 9.810322162907462e-06, "loss": 0.29594774, "memory(GiB)": 13.7, "step": 14245, "train_speed(iter/s)": 1.534249 }, { "acc": 0.93721762, "epoch": 6.679165690180454, "grad_norm": 10.925568580627441, "learning_rate": 9.81011062702712e-06, "loss": 0.43582153, "memory(GiB)": 13.7, "step": 14250, "train_speed(iter/s)": 1.534284 }, { "acc": 0.96391373, "epoch": 6.6815092570892896, "grad_norm": 7.290310859680176, "learning_rate": 9.809898975539538e-06, "loss": 0.1442209, "memory(GiB)": 13.7, "step": 14255, "train_speed(iter/s)": 1.534285 }, { "acc": 0.96497231, "epoch": 6.683852823998125, "grad_norm": 7.629974365234375, "learning_rate": 9.809687208449805e-06, "loss": 0.19803755, "memory(GiB)": 13.7, "step": 14260, "train_speed(iter/s)": 1.534279 }, { "acc": 0.97104168, "epoch": 6.686196390906961, "grad_norm": 10.157025337219238, "learning_rate": 9.809475325763012e-06, "loss": 0.17699394, "memory(GiB)": 13.7, "step": 14265, "train_speed(iter/s)": 1.534284 }, { "acc": 0.95253897, "epoch": 6.688539957815796, "grad_norm": 4.480657577514648, "learning_rate": 9.80926332748425e-06, "loss": 0.2958492, "memory(GiB)": 13.7, "step": 14270, "train_speed(iter/s)": 1.534298 }, { "acc": 0.97324305, "epoch": 6.690883524724631, "grad_norm": 8.107108116149902, "learning_rate": 9.809051213618616e-06, "loss": 0.12115114, "memory(GiB)": 13.7, "step": 14275, "train_speed(iter/s)": 1.534307 }, { "acc": 0.97026443, "epoch": 6.693227091633466, "grad_norm": 8.696179389953613, "learning_rate": 9.808838984171209e-06, "loss": 0.18577721, "memory(GiB)": 13.7, "step": 14280, "train_speed(iter/s)": 1.534306 }, { "acc": 0.97667074, "epoch": 6.695570658542302, "grad_norm": 6.615264415740967, "learning_rate": 9.808626639147127e-06, "loss": 0.13256823, "memory(GiB)": 13.7, "step": 14285, "train_speed(iter/s)": 1.534342 }, { "acc": 0.99044819, "epoch": 6.697914225451137, "grad_norm": 4.7940993309021, "learning_rate": 9.808414178551478e-06, "loss": 0.07822124, "memory(GiB)": 13.7, "step": 14290, "train_speed(iter/s)": 1.534353 }, { "acc": 0.95853834, "epoch": 6.700257792359972, "grad_norm": 9.735587120056152, "learning_rate": 9.808201602389366e-06, "loss": 0.25659809, "memory(GiB)": 13.7, "step": 14295, "train_speed(iter/s)": 1.534362 }, { "acc": 0.97208643, "epoch": 6.702601359268807, "grad_norm": 3.7195374965667725, "learning_rate": 9.807988910665905e-06, "loss": 0.21661277, "memory(GiB)": 13.7, "step": 14300, "train_speed(iter/s)": 1.534387 }, { "acc": 0.9777976, "epoch": 6.704944926177642, "grad_norm": 4.871890068054199, "learning_rate": 9.807776103386201e-06, "loss": 0.10224338, "memory(GiB)": 13.7, "step": 14305, "train_speed(iter/s)": 1.534363 }, { "acc": 0.96288586, "epoch": 6.707288493086478, "grad_norm": 10.04398250579834, "learning_rate": 9.807563180555376e-06, "loss": 0.25505056, "memory(GiB)": 13.7, "step": 14310, "train_speed(iter/s)": 1.53438 }, { "acc": 0.96527634, "epoch": 6.709632059995313, "grad_norm": 7.840579032897949, "learning_rate": 9.807350142178543e-06, "loss": 0.13987639, "memory(GiB)": 13.7, "step": 14315, "train_speed(iter/s)": 1.53436 }, { "acc": 0.97626982, "epoch": 6.711975626904148, "grad_norm": 9.745096206665039, "learning_rate": 9.807136988260822e-06, "loss": 0.17665662, "memory(GiB)": 13.7, "step": 14320, "train_speed(iter/s)": 1.534365 }, { "acc": 0.95740566, "epoch": 6.714319193812983, "grad_norm": 7.606945514678955, "learning_rate": 9.806923718807343e-06, "loss": 0.22513492, "memory(GiB)": 13.7, "step": 14325, "train_speed(iter/s)": 1.53439 }, { "acc": 0.97354164, "epoch": 6.716662760721818, "grad_norm": 17.39491081237793, "learning_rate": 9.806710333823226e-06, "loss": 0.19398928, "memory(GiB)": 13.7, "step": 14330, "train_speed(iter/s)": 1.5344 }, { "acc": 0.95269346, "epoch": 6.7190063276306535, "grad_norm": 9.901904106140137, "learning_rate": 9.806496833313603e-06, "loss": 0.28434644, "memory(GiB)": 13.7, "step": 14335, "train_speed(iter/s)": 1.534405 }, { "acc": 0.95773678, "epoch": 6.721349894539489, "grad_norm": 4.536360740661621, "learning_rate": 9.806283217283604e-06, "loss": 0.25400252, "memory(GiB)": 13.7, "step": 14340, "train_speed(iter/s)": 1.534407 }, { "acc": 0.96909389, "epoch": 6.723693461448324, "grad_norm": 9.920618057250977, "learning_rate": 9.806069485738365e-06, "loss": 0.1486974, "memory(GiB)": 13.7, "step": 14345, "train_speed(iter/s)": 1.534435 }, { "acc": 0.96121111, "epoch": 6.72603702835716, "grad_norm": 1.3105367422103882, "learning_rate": 9.805855638683023e-06, "loss": 0.14028783, "memory(GiB)": 13.7, "step": 14350, "train_speed(iter/s)": 1.534463 }, { "acc": 0.95811024, "epoch": 6.728380595265995, "grad_norm": 5.102045059204102, "learning_rate": 9.805641676122719e-06, "loss": 0.23790159, "memory(GiB)": 13.7, "step": 14355, "train_speed(iter/s)": 1.53446 }, { "acc": 0.97907467, "epoch": 6.73072416217483, "grad_norm": 2.3923094272613525, "learning_rate": 9.805427598062593e-06, "loss": 0.13701265, "memory(GiB)": 13.7, "step": 14360, "train_speed(iter/s)": 1.534488 }, { "acc": 0.97424679, "epoch": 6.733067729083666, "grad_norm": 9.09631061553955, "learning_rate": 9.805213404507795e-06, "loss": 0.09920135, "memory(GiB)": 13.7, "step": 14365, "train_speed(iter/s)": 1.534493 }, { "acc": 0.97461758, "epoch": 6.735411295992501, "grad_norm": 6.487236499786377, "learning_rate": 9.804999095463472e-06, "loss": 0.16301217, "memory(GiB)": 13.7, "step": 14370, "train_speed(iter/s)": 1.534512 }, { "acc": 0.9681448, "epoch": 6.737754862901336, "grad_norm": 9.448216438293457, "learning_rate": 9.804784670934773e-06, "loss": 0.09799227, "memory(GiB)": 13.7, "step": 14375, "train_speed(iter/s)": 1.534531 }, { "acc": 0.95839291, "epoch": 6.740098429810171, "grad_norm": 36.04232406616211, "learning_rate": 9.804570130926856e-06, "loss": 0.2227982, "memory(GiB)": 13.7, "step": 14380, "train_speed(iter/s)": 1.534553 }, { "acc": 0.95962181, "epoch": 6.742441996719006, "grad_norm": 5.254651069641113, "learning_rate": 9.804355475444873e-06, "loss": 0.31303635, "memory(GiB)": 13.7, "step": 14385, "train_speed(iter/s)": 1.534559 }, { "acc": 0.96291666, "epoch": 6.7447855636278415, "grad_norm": 11.151036262512207, "learning_rate": 9.804140704493986e-06, "loss": 0.27432499, "memory(GiB)": 13.7, "step": 14390, "train_speed(iter/s)": 1.534572 }, { "acc": 0.98305035, "epoch": 6.747129130536677, "grad_norm": 9.090883255004883, "learning_rate": 9.803925818079362e-06, "loss": 0.12967997, "memory(GiB)": 13.7, "step": 14395, "train_speed(iter/s)": 1.534593 }, { "acc": 0.96433983, "epoch": 6.749472697445512, "grad_norm": 2.3649978637695312, "learning_rate": 9.803710816206159e-06, "loss": 0.13433139, "memory(GiB)": 13.7, "step": 14400, "train_speed(iter/s)": 1.534611 }, { "acc": 0.96372023, "epoch": 6.751816264354347, "grad_norm": 9.477937698364258, "learning_rate": 9.803495698879546e-06, "loss": 0.12884421, "memory(GiB)": 13.7, "step": 14405, "train_speed(iter/s)": 1.53463 }, { "acc": 0.95944605, "epoch": 6.754159831263182, "grad_norm": 9.26336669921875, "learning_rate": 9.803280466104696e-06, "loss": 0.18757995, "memory(GiB)": 13.7, "step": 14410, "train_speed(iter/s)": 1.534652 }, { "acc": 0.96213093, "epoch": 6.7565033981720175, "grad_norm": 8.239970207214355, "learning_rate": 9.803065117886785e-06, "loss": 0.24189937, "memory(GiB)": 13.7, "step": 14415, "train_speed(iter/s)": 1.534673 }, { "acc": 0.95924683, "epoch": 6.758846965080853, "grad_norm": 8.910660743713379, "learning_rate": 9.802849654230985e-06, "loss": 0.23919554, "memory(GiB)": 13.7, "step": 14420, "train_speed(iter/s)": 1.53472 }, { "acc": 0.97853622, "epoch": 6.761190531989689, "grad_norm": 4.127047538757324, "learning_rate": 9.802634075142476e-06, "loss": 0.13406507, "memory(GiB)": 13.7, "step": 14425, "train_speed(iter/s)": 1.534704 }, { "acc": 0.96293726, "epoch": 6.763534098898523, "grad_norm": 6.484096527099609, "learning_rate": 9.802418380626439e-06, "loss": 0.18131735, "memory(GiB)": 13.7, "step": 14430, "train_speed(iter/s)": 1.534729 }, { "acc": 0.97305355, "epoch": 6.765877665807359, "grad_norm": 6.282772064208984, "learning_rate": 9.80220257068806e-06, "loss": 0.16991935, "memory(GiB)": 13.7, "step": 14435, "train_speed(iter/s)": 1.534727 }, { "acc": 0.96505957, "epoch": 6.768221232716194, "grad_norm": 2.5480151176452637, "learning_rate": 9.801986645332526e-06, "loss": 0.19313244, "memory(GiB)": 13.7, "step": 14440, "train_speed(iter/s)": 1.534716 }, { "acc": 0.94375591, "epoch": 6.7705647996250296, "grad_norm": 15.386945724487305, "learning_rate": 9.801770604565027e-06, "loss": 0.23408852, "memory(GiB)": 13.7, "step": 14445, "train_speed(iter/s)": 1.534704 }, { "acc": 0.97596054, "epoch": 6.772908366533865, "grad_norm": 10.285795211791992, "learning_rate": 9.801554448390757e-06, "loss": 0.16360981, "memory(GiB)": 13.7, "step": 14450, "train_speed(iter/s)": 1.534699 }, { "acc": 0.96001892, "epoch": 6.7752519334427, "grad_norm": 9.597260475158691, "learning_rate": 9.80133817681491e-06, "loss": 0.24697938, "memory(GiB)": 13.7, "step": 14455, "train_speed(iter/s)": 1.534692 }, { "acc": 0.97230549, "epoch": 6.777595500351535, "grad_norm": 9.870570182800293, "learning_rate": 9.801121789842686e-06, "loss": 0.1574487, "memory(GiB)": 13.7, "step": 14460, "train_speed(iter/s)": 1.534712 }, { "acc": 0.95352173, "epoch": 6.77993906726037, "grad_norm": 6.860602855682373, "learning_rate": 9.800905287479284e-06, "loss": 0.24029474, "memory(GiB)": 13.7, "step": 14465, "train_speed(iter/s)": 1.534735 }, { "acc": 0.96306553, "epoch": 6.7822826341692055, "grad_norm": 5.774374485015869, "learning_rate": 9.800688669729911e-06, "loss": 0.19259596, "memory(GiB)": 13.7, "step": 14470, "train_speed(iter/s)": 1.534764 }, { "acc": 0.97149677, "epoch": 6.784626201078041, "grad_norm": 9.98442268371582, "learning_rate": 9.80047193659977e-06, "loss": 0.17299209, "memory(GiB)": 13.7, "step": 14475, "train_speed(iter/s)": 1.5348 }, { "acc": 0.94352283, "epoch": 6.786969767986876, "grad_norm": 26.415327072143555, "learning_rate": 9.800255088094076e-06, "loss": 0.317698, "memory(GiB)": 13.7, "step": 14480, "train_speed(iter/s)": 1.534817 }, { "acc": 0.94860878, "epoch": 6.789313334895711, "grad_norm": 32.6009521484375, "learning_rate": 9.800038124218037e-06, "loss": 0.29133513, "memory(GiB)": 13.7, "step": 14485, "train_speed(iter/s)": 1.534812 }, { "acc": 0.95861263, "epoch": 6.791656901804546, "grad_norm": 7.3534650802612305, "learning_rate": 9.799821044976868e-06, "loss": 0.26640732, "memory(GiB)": 13.7, "step": 14490, "train_speed(iter/s)": 1.534834 }, { "acc": 0.94736919, "epoch": 6.7940004687133815, "grad_norm": 6.784670352935791, "learning_rate": 9.79960385037579e-06, "loss": 0.30296786, "memory(GiB)": 13.7, "step": 14495, "train_speed(iter/s)": 1.534838 }, { "acc": 0.97909584, "epoch": 6.796344035622217, "grad_norm": 21.296669006347656, "learning_rate": 9.799386540420019e-06, "loss": 0.12949555, "memory(GiB)": 13.7, "step": 14500, "train_speed(iter/s)": 1.534857 }, { "acc": 0.96150303, "epoch": 6.798687602531052, "grad_norm": 7.3638763427734375, "learning_rate": 9.799169115114782e-06, "loss": 0.21577969, "memory(GiB)": 13.7, "step": 14505, "train_speed(iter/s)": 1.534846 }, { "acc": 0.97003326, "epoch": 6.801031169439888, "grad_norm": 1.5067332983016968, "learning_rate": 9.798951574465307e-06, "loss": 0.24588001, "memory(GiB)": 13.7, "step": 14510, "train_speed(iter/s)": 1.534857 }, { "acc": 0.95678482, "epoch": 6.803374736348723, "grad_norm": 19.7967472076416, "learning_rate": 9.798733918476817e-06, "loss": 0.18158726, "memory(GiB)": 13.7, "step": 14515, "train_speed(iter/s)": 1.534863 }, { "acc": 0.97545462, "epoch": 6.805718303257558, "grad_norm": 7.204344272613525, "learning_rate": 9.79851614715455e-06, "loss": 0.14824071, "memory(GiB)": 13.7, "step": 14520, "train_speed(iter/s)": 1.534852 }, { "acc": 0.98053036, "epoch": 6.8080618701663935, "grad_norm": 6.981916904449463, "learning_rate": 9.798298260503733e-06, "loss": 0.14998093, "memory(GiB)": 13.7, "step": 14525, "train_speed(iter/s)": 1.534843 }, { "acc": 0.95597754, "epoch": 6.810405437075229, "grad_norm": 8.580238342285156, "learning_rate": 9.798080258529611e-06, "loss": 0.28501306, "memory(GiB)": 13.7, "step": 14530, "train_speed(iter/s)": 1.534877 }, { "acc": 0.98156252, "epoch": 6.812749003984064, "grad_norm": 6.044493198394775, "learning_rate": 9.797862141237422e-06, "loss": 0.05779264, "memory(GiB)": 13.7, "step": 14535, "train_speed(iter/s)": 1.534901 }, { "acc": 0.96845646, "epoch": 6.815092570892899, "grad_norm": 11.062625885009766, "learning_rate": 9.797643908632405e-06, "loss": 0.17332335, "memory(GiB)": 13.7, "step": 14540, "train_speed(iter/s)": 1.534891 }, { "acc": 0.95828438, "epoch": 6.817436137801734, "grad_norm": 13.099539756774902, "learning_rate": 9.79742556071981e-06, "loss": 0.23872337, "memory(GiB)": 13.7, "step": 14545, "train_speed(iter/s)": 1.534904 }, { "acc": 0.96333561, "epoch": 6.8197797047105695, "grad_norm": 4.1692399978637695, "learning_rate": 9.797207097504882e-06, "loss": 0.23974645, "memory(GiB)": 13.7, "step": 14550, "train_speed(iter/s)": 1.534915 }, { "acc": 0.97131176, "epoch": 6.822123271619405, "grad_norm": 2.944835901260376, "learning_rate": 9.796988518992876e-06, "loss": 0.1211117, "memory(GiB)": 13.7, "step": 14555, "train_speed(iter/s)": 1.534952 }, { "acc": 0.96547327, "epoch": 6.82446683852824, "grad_norm": 21.497255325317383, "learning_rate": 9.796769825189043e-06, "loss": 0.26116862, "memory(GiB)": 13.7, "step": 14560, "train_speed(iter/s)": 1.534982 }, { "acc": 0.94380608, "epoch": 6.826810405437075, "grad_norm": 10.080863952636719, "learning_rate": 9.79655101609864e-06, "loss": 0.29831882, "memory(GiB)": 13.7, "step": 14565, "train_speed(iter/s)": 1.53499 }, { "acc": 0.96849899, "epoch": 6.82915397234591, "grad_norm": 1.5920491218566895, "learning_rate": 9.796332091726926e-06, "loss": 0.20860839, "memory(GiB)": 13.7, "step": 14570, "train_speed(iter/s)": 1.535022 }, { "acc": 0.97137833, "epoch": 6.8314975392547455, "grad_norm": 7.008699417114258, "learning_rate": 9.796113052079165e-06, "loss": 0.20925651, "memory(GiB)": 13.7, "step": 14575, "train_speed(iter/s)": 1.535068 }, { "acc": 0.95617561, "epoch": 6.833841106163581, "grad_norm": 1.1060415506362915, "learning_rate": 9.79589389716062e-06, "loss": 0.12490914, "memory(GiB)": 13.7, "step": 14580, "train_speed(iter/s)": 1.535121 }, { "acc": 0.97890625, "epoch": 6.836184673072416, "grad_norm": 8.711516380310059, "learning_rate": 9.795674626976561e-06, "loss": 0.11451617, "memory(GiB)": 13.7, "step": 14585, "train_speed(iter/s)": 1.53509 }, { "acc": 0.96776733, "epoch": 6.838528239981251, "grad_norm": 3.7777903079986572, "learning_rate": 9.795455241532256e-06, "loss": 0.15892977, "memory(GiB)": 13.7, "step": 14590, "train_speed(iter/s)": 1.535124 }, { "acc": 0.95702839, "epoch": 6.840871806890087, "grad_norm": 16.713491439819336, "learning_rate": 9.795235740832981e-06, "loss": 0.19711301, "memory(GiB)": 13.7, "step": 14595, "train_speed(iter/s)": 1.535148 }, { "acc": 0.96273479, "epoch": 6.843215373798922, "grad_norm": 3.2953600883483887, "learning_rate": 9.795016124884011e-06, "loss": 0.19423051, "memory(GiB)": 13.7, "step": 14600, "train_speed(iter/s)": 1.53516 }, { "acc": 0.96123495, "epoch": 6.8455589407077575, "grad_norm": 3.9413177967071533, "learning_rate": 9.794796393690623e-06, "loss": 0.22588255, "memory(GiB)": 13.7, "step": 14605, "train_speed(iter/s)": 1.535184 }, { "acc": 0.95222473, "epoch": 6.847902507616593, "grad_norm": 7.99741268157959, "learning_rate": 9.794576547258101e-06, "loss": 0.35350971, "memory(GiB)": 13.7, "step": 14610, "train_speed(iter/s)": 1.535187 }, { "acc": 0.96255369, "epoch": 6.850246074525428, "grad_norm": 6.3206562995910645, "learning_rate": 9.794356585591729e-06, "loss": 0.29545524, "memory(GiB)": 13.7, "step": 14615, "train_speed(iter/s)": 1.535214 }, { "acc": 0.96636791, "epoch": 6.852589641434263, "grad_norm": 4.026330471038818, "learning_rate": 9.794136508696794e-06, "loss": 0.22387943, "memory(GiB)": 13.7, "step": 14620, "train_speed(iter/s)": 1.53526 }, { "acc": 0.98135414, "epoch": 6.854933208343098, "grad_norm": 10.53857421875, "learning_rate": 9.793916316578584e-06, "loss": 0.12631159, "memory(GiB)": 13.7, "step": 14625, "train_speed(iter/s)": 1.535252 }, { "acc": 0.97638893, "epoch": 6.8572767752519335, "grad_norm": 1.1683779954910278, "learning_rate": 9.793696009242395e-06, "loss": 0.15019897, "memory(GiB)": 13.7, "step": 14630, "train_speed(iter/s)": 1.535282 }, { "acc": 0.97600355, "epoch": 6.859620342160769, "grad_norm": 6.935296058654785, "learning_rate": 9.79347558669352e-06, "loss": 0.17648423, "memory(GiB)": 13.7, "step": 14635, "train_speed(iter/s)": 1.535303 }, { "acc": 0.96708336, "epoch": 6.861963909069604, "grad_norm": 6.721271514892578, "learning_rate": 9.79325504893726e-06, "loss": 0.22986989, "memory(GiB)": 13.7, "step": 14640, "train_speed(iter/s)": 1.535315 }, { "acc": 0.96450901, "epoch": 6.864307475978439, "grad_norm": 3.1278862953186035, "learning_rate": 9.793034395978911e-06, "loss": 0.1264889, "memory(GiB)": 13.7, "step": 14645, "train_speed(iter/s)": 1.535345 }, { "acc": 0.96667614, "epoch": 6.866651042887274, "grad_norm": 7.631982803344727, "learning_rate": 9.792813627823781e-06, "loss": 0.15520577, "memory(GiB)": 13.7, "step": 14650, "train_speed(iter/s)": 1.535374 }, { "acc": 0.96309528, "epoch": 6.868994609796109, "grad_norm": 15.969141960144043, "learning_rate": 9.792592744477174e-06, "loss": 0.2325695, "memory(GiB)": 13.7, "step": 14655, "train_speed(iter/s)": 1.535399 }, { "acc": 0.96323318, "epoch": 6.871338176704945, "grad_norm": 6.670562267303467, "learning_rate": 9.792371745944402e-06, "loss": 0.13550782, "memory(GiB)": 13.7, "step": 14660, "train_speed(iter/s)": 1.535407 }, { "acc": 0.97039337, "epoch": 6.87368174361378, "grad_norm": 7.849125862121582, "learning_rate": 9.792150632230776e-06, "loss": 0.17610018, "memory(GiB)": 13.7, "step": 14665, "train_speed(iter/s)": 1.535422 }, { "acc": 0.98258934, "epoch": 6.876025310522616, "grad_norm": 3.6634035110473633, "learning_rate": 9.791929403341612e-06, "loss": 0.1085654, "memory(GiB)": 13.7, "step": 14670, "train_speed(iter/s)": 1.535419 }, { "acc": 0.96971216, "epoch": 6.87836887743145, "grad_norm": 3.8815810680389404, "learning_rate": 9.791708059282225e-06, "loss": 0.18118777, "memory(GiB)": 13.7, "step": 14675, "train_speed(iter/s)": 1.535432 }, { "acc": 0.97153187, "epoch": 6.880712444340286, "grad_norm": 44.076541900634766, "learning_rate": 9.791486600057934e-06, "loss": 0.16625171, "memory(GiB)": 13.7, "step": 14680, "train_speed(iter/s)": 1.535419 }, { "acc": 0.96326923, "epoch": 6.8830560112491215, "grad_norm": 4.428592681884766, "learning_rate": 9.791265025674067e-06, "loss": 0.13889208, "memory(GiB)": 13.7, "step": 14685, "train_speed(iter/s)": 1.53544 }, { "acc": 0.96804256, "epoch": 6.885399578157957, "grad_norm": 4.425119400024414, "learning_rate": 9.79104333613595e-06, "loss": 0.17451417, "memory(GiB)": 13.7, "step": 14690, "train_speed(iter/s)": 1.535457 }, { "acc": 0.97583332, "epoch": 6.887743145066792, "grad_norm": 9.705787658691406, "learning_rate": 9.790821531448905e-06, "loss": 0.1129636, "memory(GiB)": 13.7, "step": 14695, "train_speed(iter/s)": 1.535452 }, { "acc": 0.97124996, "epoch": 6.890086711975627, "grad_norm": 7.08992862701416, "learning_rate": 9.79059961161827e-06, "loss": 0.16164851, "memory(GiB)": 13.7, "step": 14700, "train_speed(iter/s)": 1.535441 }, { "acc": 0.96839972, "epoch": 6.892430278884462, "grad_norm": 1.4835820198059082, "learning_rate": 9.790377576649377e-06, "loss": 0.16161668, "memory(GiB)": 13.7, "step": 14705, "train_speed(iter/s)": 1.535481 }, { "acc": 0.97372589, "epoch": 6.894773845793297, "grad_norm": 9.63837718963623, "learning_rate": 9.790155426547563e-06, "loss": 0.21226709, "memory(GiB)": 13.7, "step": 14710, "train_speed(iter/s)": 1.535487 }, { "acc": 0.97606068, "epoch": 6.897117412702133, "grad_norm": 4.384520530700684, "learning_rate": 9.789933161318166e-06, "loss": 0.1432816, "memory(GiB)": 13.7, "step": 14715, "train_speed(iter/s)": 1.535477 }, { "acc": 0.95902281, "epoch": 6.899460979610968, "grad_norm": 5.1867756843566895, "learning_rate": 9.789710780966533e-06, "loss": 0.36555204, "memory(GiB)": 13.7, "step": 14720, "train_speed(iter/s)": 1.53549 }, { "acc": 0.94124994, "epoch": 6.901804546519803, "grad_norm": 13.80456829071045, "learning_rate": 9.789488285498005e-06, "loss": 0.29771323, "memory(GiB)": 13.7, "step": 14725, "train_speed(iter/s)": 1.53552 }, { "acc": 0.97168655, "epoch": 6.904148113428638, "grad_norm": 18.542659759521484, "learning_rate": 9.789265674917933e-06, "loss": 0.18030387, "memory(GiB)": 13.7, "step": 14730, "train_speed(iter/s)": 1.53553 }, { "acc": 0.95083332, "epoch": 6.906491680337473, "grad_norm": 12.377114295959473, "learning_rate": 9.789042949231666e-06, "loss": 0.30231998, "memory(GiB)": 13.7, "step": 14735, "train_speed(iter/s)": 1.535556 }, { "acc": 0.95854502, "epoch": 6.908835247246309, "grad_norm": 8.039176940917969, "learning_rate": 9.788820108444557e-06, "loss": 0.17737576, "memory(GiB)": 13.7, "step": 14740, "train_speed(iter/s)": 1.535541 }, { "acc": 0.97059212, "epoch": 6.911178814155144, "grad_norm": 28.839048385620117, "learning_rate": 9.788597152561965e-06, "loss": 0.18614066, "memory(GiB)": 13.7, "step": 14745, "train_speed(iter/s)": 1.535547 }, { "acc": 0.97208328, "epoch": 6.913522381063979, "grad_norm": 8.177898406982422, "learning_rate": 9.788374081589246e-06, "loss": 0.13268967, "memory(GiB)": 13.7, "step": 14750, "train_speed(iter/s)": 1.535575 }, { "acc": 0.97581329, "epoch": 6.915865947972815, "grad_norm": 12.717848777770996, "learning_rate": 9.788150895531766e-06, "loss": 0.15428913, "memory(GiB)": 13.7, "step": 14755, "train_speed(iter/s)": 1.535608 }, { "acc": 0.96518269, "epoch": 6.91820951488165, "grad_norm": 14.100668907165527, "learning_rate": 9.787927594394885e-06, "loss": 0.20364139, "memory(GiB)": 13.7, "step": 14760, "train_speed(iter/s)": 1.535647 }, { "acc": 0.96518354, "epoch": 6.9205530817904855, "grad_norm": 16.878185272216797, "learning_rate": 9.787704178183972e-06, "loss": 0.20496695, "memory(GiB)": 13.7, "step": 14765, "train_speed(iter/s)": 1.535676 }, { "acc": 0.98015137, "epoch": 6.922896648699321, "grad_norm": 4.367379188537598, "learning_rate": 9.7874806469044e-06, "loss": 0.10370951, "memory(GiB)": 13.7, "step": 14770, "train_speed(iter/s)": 1.535703 }, { "acc": 0.95561008, "epoch": 6.925240215608156, "grad_norm": 4.479492664337158, "learning_rate": 9.787257000561536e-06, "loss": 0.22237206, "memory(GiB)": 13.7, "step": 14775, "train_speed(iter/s)": 1.535713 }, { "acc": 0.95077648, "epoch": 6.927583782516991, "grad_norm": 17.438791275024414, "learning_rate": 9.787033239160762e-06, "loss": 0.33749166, "memory(GiB)": 13.7, "step": 14780, "train_speed(iter/s)": 1.535686 }, { "acc": 0.9686676, "epoch": 6.929927349425826, "grad_norm": 6.283036231994629, "learning_rate": 9.786809362707454e-06, "loss": 0.19081278, "memory(GiB)": 13.7, "step": 14785, "train_speed(iter/s)": 1.535712 }, { "acc": 0.94708099, "epoch": 6.932270916334661, "grad_norm": 14.455389976501465, "learning_rate": 9.786585371206993e-06, "loss": 0.37721195, "memory(GiB)": 13.7, "step": 14790, "train_speed(iter/s)": 1.535733 }, { "acc": 0.95812321, "epoch": 6.934614483243497, "grad_norm": 7.593224048614502, "learning_rate": 9.786361264664763e-06, "loss": 0.23259268, "memory(GiB)": 13.7, "step": 14795, "train_speed(iter/s)": 1.535745 }, { "acc": 0.96857891, "epoch": 6.936958050152332, "grad_norm": 11.239110946655273, "learning_rate": 9.786137043086151e-06, "loss": 0.16766362, "memory(GiB)": 13.7, "step": 14800, "train_speed(iter/s)": 1.535733 }, { "acc": 0.96906652, "epoch": 6.939301617061167, "grad_norm": 0.02798702009022236, "learning_rate": 9.785912706476547e-06, "loss": 0.11793919, "memory(GiB)": 13.7, "step": 14805, "train_speed(iter/s)": 1.535709 }, { "acc": 0.96861115, "epoch": 6.941645183970002, "grad_norm": 5.092782020568848, "learning_rate": 9.785688254841342e-06, "loss": 0.07526115, "memory(GiB)": 13.7, "step": 14810, "train_speed(iter/s)": 1.535708 }, { "acc": 0.98504286, "epoch": 6.943988750878837, "grad_norm": 4.741040229797363, "learning_rate": 9.785463688185933e-06, "loss": 0.10898972, "memory(GiB)": 13.7, "step": 14815, "train_speed(iter/s)": 1.535708 }, { "acc": 0.9828373, "epoch": 6.946332317787673, "grad_norm": 0.9566575884819031, "learning_rate": 9.785239006515715e-06, "loss": 0.12772332, "memory(GiB)": 13.7, "step": 14820, "train_speed(iter/s)": 1.535683 }, { "acc": 0.97488098, "epoch": 6.948675884696508, "grad_norm": 18.06761932373047, "learning_rate": 9.785014209836094e-06, "loss": 0.0836545, "memory(GiB)": 13.7, "step": 14825, "train_speed(iter/s)": 1.535705 }, { "acc": 0.97607002, "epoch": 6.951019451605343, "grad_norm": 6.445627212524414, "learning_rate": 9.784789298152467e-06, "loss": 0.12048374, "memory(GiB)": 13.7, "step": 14830, "train_speed(iter/s)": 1.535708 }, { "acc": 0.96843863, "epoch": 6.953363018514178, "grad_norm": 4.0733466148376465, "learning_rate": 9.784564271470242e-06, "loss": 0.20793698, "memory(GiB)": 13.7, "step": 14835, "train_speed(iter/s)": 1.535726 }, { "acc": 0.95698471, "epoch": 6.955706585423014, "grad_norm": 15.961247444152832, "learning_rate": 9.78433912979483e-06, "loss": 0.27496927, "memory(GiB)": 13.7, "step": 14840, "train_speed(iter/s)": 1.535713 }, { "acc": 0.97927494, "epoch": 6.958050152331849, "grad_norm": 6.44999885559082, "learning_rate": 9.78411387313164e-06, "loss": 0.18397489, "memory(GiB)": 13.7, "step": 14845, "train_speed(iter/s)": 1.535733 }, { "acc": 0.97060404, "epoch": 6.960393719240685, "grad_norm": 0.2817046046257019, "learning_rate": 9.78388850148609e-06, "loss": 0.13308043, "memory(GiB)": 13.7, "step": 14850, "train_speed(iter/s)": 1.535758 }, { "acc": 0.95492516, "epoch": 6.96273728614952, "grad_norm": 11.694095611572266, "learning_rate": 9.783663014863593e-06, "loss": 0.32981205, "memory(GiB)": 13.7, "step": 14855, "train_speed(iter/s)": 1.535774 }, { "acc": 0.9558569, "epoch": 6.965080853058355, "grad_norm": 3.712543487548828, "learning_rate": 9.783437413269574e-06, "loss": 0.19557409, "memory(GiB)": 13.7, "step": 14860, "train_speed(iter/s)": 1.535766 }, { "acc": 0.96396828, "epoch": 6.96742441996719, "grad_norm": 12.154010772705078, "learning_rate": 9.783211696709451e-06, "loss": 0.26635232, "memory(GiB)": 13.7, "step": 14865, "train_speed(iter/s)": 1.535776 }, { "acc": 0.95965281, "epoch": 6.969767986876025, "grad_norm": 2.955507755279541, "learning_rate": 9.78298586518865e-06, "loss": 0.21703188, "memory(GiB)": 13.7, "step": 14870, "train_speed(iter/s)": 1.5358 }, { "acc": 0.98509874, "epoch": 6.972111553784861, "grad_norm": 3.6586639881134033, "learning_rate": 9.782759918712602e-06, "loss": 0.07170419, "memory(GiB)": 13.7, "step": 14875, "train_speed(iter/s)": 1.535796 }, { "acc": 0.96621113, "epoch": 6.974455120693696, "grad_norm": 1.0725308656692505, "learning_rate": 9.782533857286739e-06, "loss": 0.14420667, "memory(GiB)": 13.7, "step": 14880, "train_speed(iter/s)": 1.535781 }, { "acc": 0.97723207, "epoch": 6.976798687602531, "grad_norm": 0.18602870404720306, "learning_rate": 9.782307680916488e-06, "loss": 0.14876924, "memory(GiB)": 13.7, "step": 14885, "train_speed(iter/s)": 1.535783 }, { "acc": 0.96092262, "epoch": 6.979142254511366, "grad_norm": 7.15862512588501, "learning_rate": 9.782081389607292e-06, "loss": 0.28847752, "memory(GiB)": 13.7, "step": 14890, "train_speed(iter/s)": 1.535778 }, { "acc": 0.97093239, "epoch": 6.981485821420201, "grad_norm": 7.783437728881836, "learning_rate": 9.781854983364585e-06, "loss": 0.15664482, "memory(GiB)": 13.7, "step": 14895, "train_speed(iter/s)": 1.535805 }, { "acc": 0.96901283, "epoch": 6.9838293883290365, "grad_norm": 4.362534999847412, "learning_rate": 9.781628462193817e-06, "loss": 0.12213852, "memory(GiB)": 13.7, "step": 14900, "train_speed(iter/s)": 1.535805 }, { "acc": 0.96066742, "epoch": 6.986172955237872, "grad_norm": 10.419160842895508, "learning_rate": 9.781401826100426e-06, "loss": 0.29081752, "memory(GiB)": 13.7, "step": 14905, "train_speed(iter/s)": 1.535841 }, { "acc": 0.96693163, "epoch": 6.988516522146707, "grad_norm": 4.418824672698975, "learning_rate": 9.78117507508986e-06, "loss": 0.19220966, "memory(GiB)": 13.7, "step": 14910, "train_speed(iter/s)": 1.535867 }, { "acc": 0.94838428, "epoch": 6.990860089055543, "grad_norm": 42.253841400146484, "learning_rate": 9.780948209167572e-06, "loss": 0.25813396, "memory(GiB)": 13.7, "step": 14915, "train_speed(iter/s)": 1.53586 }, { "acc": 0.96436005, "epoch": 6.993203655964377, "grad_norm": 5.331353187561035, "learning_rate": 9.780721228339014e-06, "loss": 0.12968071, "memory(GiB)": 13.7, "step": 14920, "train_speed(iter/s)": 1.535879 }, { "acc": 0.97048607, "epoch": 6.995547222873213, "grad_norm": 36.535640716552734, "learning_rate": 9.780494132609643e-06, "loss": 0.23409801, "memory(GiB)": 13.7, "step": 14925, "train_speed(iter/s)": 1.535857 }, { "acc": 0.96377296, "epoch": 6.997890789782049, "grad_norm": 6.727839469909668, "learning_rate": 9.780266921984915e-06, "loss": 0.15673976, "memory(GiB)": 13.7, "step": 14930, "train_speed(iter/s)": 1.53589 }, { "acc": 0.96544094, "epoch": 7.000234356690884, "grad_norm": 6.75751256942749, "learning_rate": 9.780039596470293e-06, "loss": 0.2562726, "memory(GiB)": 13.7, "step": 14935, "train_speed(iter/s)": 1.535805 }, { "acc": 0.95272121, "epoch": 7.002577923599719, "grad_norm": 22.052400588989258, "learning_rate": 9.779812156071242e-06, "loss": 0.20676699, "memory(GiB)": 13.7, "step": 14940, "train_speed(iter/s)": 1.535834 }, { "acc": 0.98108835, "epoch": 7.004921490508554, "grad_norm": 7.230310440063477, "learning_rate": 9.779584600793229e-06, "loss": 0.0945228, "memory(GiB)": 13.7, "step": 14945, "train_speed(iter/s)": 1.535854 }, { "acc": 0.95925827, "epoch": 7.007265057417389, "grad_norm": 5.59814453125, "learning_rate": 9.77935693064172e-06, "loss": 0.16716762, "memory(GiB)": 13.7, "step": 14950, "train_speed(iter/s)": 1.535882 }, { "acc": 0.9770052, "epoch": 7.009608624326225, "grad_norm": 9.19429874420166, "learning_rate": 9.779129145622191e-06, "loss": 0.13321352, "memory(GiB)": 13.7, "step": 14955, "train_speed(iter/s)": 1.53589 }, { "acc": 0.967342, "epoch": 7.01195219123506, "grad_norm": 4.748887062072754, "learning_rate": 9.778901245740118e-06, "loss": 0.23272004, "memory(GiB)": 13.7, "step": 14960, "train_speed(iter/s)": 1.535893 }, { "acc": 0.96066465, "epoch": 7.014295758143895, "grad_norm": 13.82585620880127, "learning_rate": 9.778673231000977e-06, "loss": 0.17271781, "memory(GiB)": 13.7, "step": 14965, "train_speed(iter/s)": 1.535929 }, { "acc": 0.95796738, "epoch": 7.01663932505273, "grad_norm": 15.433380126953125, "learning_rate": 9.778445101410246e-06, "loss": 0.26605828, "memory(GiB)": 13.7, "step": 14970, "train_speed(iter/s)": 1.535942 }, { "acc": 0.97758923, "epoch": 7.018982891961565, "grad_norm": 1.4972589015960693, "learning_rate": 9.778216856973415e-06, "loss": 0.08024361, "memory(GiB)": 13.7, "step": 14975, "train_speed(iter/s)": 1.535954 }, { "acc": 0.96729164, "epoch": 7.0213264588704005, "grad_norm": 23.03068733215332, "learning_rate": 9.777988497695966e-06, "loss": 0.13574879, "memory(GiB)": 13.7, "step": 14980, "train_speed(iter/s)": 1.535932 }, { "acc": 0.99526291, "epoch": 7.023670025779236, "grad_norm": 6.078921318054199, "learning_rate": 9.777760023583388e-06, "loss": 0.03451531, "memory(GiB)": 13.7, "step": 14985, "train_speed(iter/s)": 1.535938 }, { "acc": 0.97087631, "epoch": 7.026013592688071, "grad_norm": 3.2276031970977783, "learning_rate": 9.777531434641175e-06, "loss": 0.11441524, "memory(GiB)": 13.7, "step": 14990, "train_speed(iter/s)": 1.535971 }, { "acc": 0.97574253, "epoch": 7.028357159596906, "grad_norm": 7.225069522857666, "learning_rate": 9.777302730874817e-06, "loss": 0.15541644, "memory(GiB)": 13.7, "step": 14995, "train_speed(iter/s)": 1.536001 }, { "acc": 0.98376884, "epoch": 7.030700726505742, "grad_norm": 2.635082244873047, "learning_rate": 9.777073912289817e-06, "loss": 0.11393187, "memory(GiB)": 13.7, "step": 15000, "train_speed(iter/s)": 1.536009 }, { "acc": 0.96315613, "epoch": 7.033044293414577, "grad_norm": 10.693981170654297, "learning_rate": 9.776844978891672e-06, "loss": 0.21617091, "memory(GiB)": 13.7, "step": 15005, "train_speed(iter/s)": 1.535995 }, { "acc": 0.97893429, "epoch": 7.035387860323413, "grad_norm": 5.18265438079834, "learning_rate": 9.776615930685885e-06, "loss": 0.12532042, "memory(GiB)": 13.7, "step": 15010, "train_speed(iter/s)": 1.535978 }, { "acc": 0.96113586, "epoch": 7.037731427232248, "grad_norm": 10.566425323486328, "learning_rate": 9.776386767677963e-06, "loss": 0.29518282, "memory(GiB)": 13.7, "step": 15015, "train_speed(iter/s)": 1.535987 }, { "acc": 0.96243057, "epoch": 7.040074994141083, "grad_norm": 6.555646896362305, "learning_rate": 9.776157489873411e-06, "loss": 0.23935168, "memory(GiB)": 13.7, "step": 15020, "train_speed(iter/s)": 1.535996 }, { "acc": 0.97158031, "epoch": 7.042418561049918, "grad_norm": 6.132505893707275, "learning_rate": 9.775928097277743e-06, "loss": 0.11361482, "memory(GiB)": 13.7, "step": 15025, "train_speed(iter/s)": 1.536008 }, { "acc": 0.98340912, "epoch": 7.044762127958753, "grad_norm": 9.529570579528809, "learning_rate": 9.775698589896474e-06, "loss": 0.10300341, "memory(GiB)": 13.7, "step": 15030, "train_speed(iter/s)": 1.536022 }, { "acc": 0.97197914, "epoch": 7.0471056948675885, "grad_norm": 1.7570475339889526, "learning_rate": 9.775468967735114e-06, "loss": 0.09012468, "memory(GiB)": 13.7, "step": 15035, "train_speed(iter/s)": 1.536068 }, { "acc": 0.97213745, "epoch": 7.049449261776424, "grad_norm": 3.4917683601379395, "learning_rate": 9.775239230799191e-06, "loss": 0.17455339, "memory(GiB)": 13.7, "step": 15040, "train_speed(iter/s)": 1.536089 }, { "acc": 0.96407738, "epoch": 7.051792828685259, "grad_norm": 9.967965126037598, "learning_rate": 9.775009379094222e-06, "loss": 0.2170697, "memory(GiB)": 13.7, "step": 15045, "train_speed(iter/s)": 1.536098 }, { "acc": 0.97114582, "epoch": 7.054136395594094, "grad_norm": 6.268711090087891, "learning_rate": 9.774779412625731e-06, "loss": 0.1854792, "memory(GiB)": 13.7, "step": 15050, "train_speed(iter/s)": 1.536141 }, { "acc": 0.94806547, "epoch": 7.056479962502929, "grad_norm": 77.52974700927734, "learning_rate": 9.77454933139925e-06, "loss": 0.32025361, "memory(GiB)": 13.7, "step": 15055, "train_speed(iter/s)": 1.536134 }, { "acc": 0.9517004, "epoch": 7.0588235294117645, "grad_norm": 119.96160125732422, "learning_rate": 9.774319135420306e-06, "loss": 0.20322714, "memory(GiB)": 13.7, "step": 15060, "train_speed(iter/s)": 1.536141 }, { "acc": 0.95523224, "epoch": 7.0611670963206, "grad_norm": 35.41407775878906, "learning_rate": 9.774088824694436e-06, "loss": 0.2502532, "memory(GiB)": 13.7, "step": 15065, "train_speed(iter/s)": 1.536189 }, { "acc": 0.97334862, "epoch": 7.063510663229435, "grad_norm": 6.219341278076172, "learning_rate": 9.77385839922717e-06, "loss": 0.10381669, "memory(GiB)": 13.7, "step": 15070, "train_speed(iter/s)": 1.536201 }, { "acc": 0.96564636, "epoch": 7.06585423013827, "grad_norm": 5.520659923553467, "learning_rate": 9.77362785902405e-06, "loss": 0.2057512, "memory(GiB)": 13.7, "step": 15075, "train_speed(iter/s)": 1.536209 }, { "acc": 0.96910715, "epoch": 7.068197797047105, "grad_norm": 10.060084342956543, "learning_rate": 9.773397204090619e-06, "loss": 0.15296202, "memory(GiB)": 13.7, "step": 15080, "train_speed(iter/s)": 1.536228 }, { "acc": 0.96446009, "epoch": 7.070541363955941, "grad_norm": 55.853675842285156, "learning_rate": 9.773166434432418e-06, "loss": 0.16701367, "memory(GiB)": 13.7, "step": 15085, "train_speed(iter/s)": 1.536253 }, { "acc": 0.96469154, "epoch": 7.0728849308647765, "grad_norm": 7.630614757537842, "learning_rate": 9.772935550054997e-06, "loss": 0.20115619, "memory(GiB)": 13.7, "step": 15090, "train_speed(iter/s)": 1.536258 }, { "acc": 0.95276794, "epoch": 7.075228497773612, "grad_norm": 19.69412612915039, "learning_rate": 9.772704550963902e-06, "loss": 0.26746759, "memory(GiB)": 13.7, "step": 15095, "train_speed(iter/s)": 1.536288 }, { "acc": 0.95737057, "epoch": 7.077572064682447, "grad_norm": 8.400039672851562, "learning_rate": 9.772473437164686e-06, "loss": 0.19023693, "memory(GiB)": 13.7, "step": 15100, "train_speed(iter/s)": 1.536295 }, { "acc": 0.97125587, "epoch": 7.079915631591282, "grad_norm": 5.808990478515625, "learning_rate": 9.77224220866291e-06, "loss": 0.16487061, "memory(GiB)": 13.7, "step": 15105, "train_speed(iter/s)": 1.536294 }, { "acc": 0.97627802, "epoch": 7.082259198500117, "grad_norm": 3.919905662536621, "learning_rate": 9.772010865464125e-06, "loss": 0.13767798, "memory(GiB)": 13.7, "step": 15110, "train_speed(iter/s)": 1.536314 }, { "acc": 0.96659126, "epoch": 7.0846027654089525, "grad_norm": 9.243152618408203, "learning_rate": 9.771779407573895e-06, "loss": 0.18070669, "memory(GiB)": 13.7, "step": 15115, "train_speed(iter/s)": 1.53632 }, { "acc": 0.9435853, "epoch": 7.086946332317788, "grad_norm": 5.41697883605957, "learning_rate": 9.771547834997783e-06, "loss": 0.23922584, "memory(GiB)": 13.7, "step": 15120, "train_speed(iter/s)": 1.536345 }, { "acc": 0.96942463, "epoch": 7.089289899226623, "grad_norm": 3.4025309085845947, "learning_rate": 9.771316147741355e-06, "loss": 0.13447874, "memory(GiB)": 13.7, "step": 15125, "train_speed(iter/s)": 1.53635 }, { "acc": 0.96142588, "epoch": 7.091633466135458, "grad_norm": 6.49388313293457, "learning_rate": 9.771084345810181e-06, "loss": 0.19750373, "memory(GiB)": 13.7, "step": 15130, "train_speed(iter/s)": 1.536353 }, { "acc": 0.96180716, "epoch": 7.093977033044293, "grad_norm": 5.716974258422852, "learning_rate": 9.770852429209831e-06, "loss": 0.19701691, "memory(GiB)": 13.7, "step": 15135, "train_speed(iter/s)": 1.536367 }, { "acc": 0.9472744, "epoch": 7.0963205999531285, "grad_norm": 7.259920120239258, "learning_rate": 9.77062039794588e-06, "loss": 0.30954931, "memory(GiB)": 13.7, "step": 15140, "train_speed(iter/s)": 1.53636 }, { "acc": 0.96187, "epoch": 7.098664166861964, "grad_norm": 12.270742416381836, "learning_rate": 9.770388252023906e-06, "loss": 0.19604399, "memory(GiB)": 13.7, "step": 15145, "train_speed(iter/s)": 1.536376 }, { "acc": 0.98160715, "epoch": 7.101007733770799, "grad_norm": 2.011206865310669, "learning_rate": 9.770155991449488e-06, "loss": 0.08411903, "memory(GiB)": 13.7, "step": 15150, "train_speed(iter/s)": 1.53639 }, { "acc": 0.97174911, "epoch": 7.103351300679634, "grad_norm": 0.916255533695221, "learning_rate": 9.769923616228211e-06, "loss": 0.15034814, "memory(GiB)": 13.7, "step": 15155, "train_speed(iter/s)": 1.536378 }, { "acc": 0.95992069, "epoch": 7.105694867588469, "grad_norm": 17.343231201171875, "learning_rate": 9.76969112636566e-06, "loss": 0.22430391, "memory(GiB)": 13.7, "step": 15160, "train_speed(iter/s)": 1.536383 }, { "acc": 0.96479168, "epoch": 7.108038434497305, "grad_norm": 6.648384094238281, "learning_rate": 9.76945852186742e-06, "loss": 0.22242193, "memory(GiB)": 13.7, "step": 15165, "train_speed(iter/s)": 1.536435 }, { "acc": 0.98424377, "epoch": 7.1103820014061405, "grad_norm": 4.8152689933776855, "learning_rate": 9.769225802739085e-06, "loss": 0.15359623, "memory(GiB)": 13.7, "step": 15170, "train_speed(iter/s)": 1.53644 }, { "acc": 0.97584686, "epoch": 7.112725568314976, "grad_norm": 13.197333335876465, "learning_rate": 9.768992968986248e-06, "loss": 0.18799343, "memory(GiB)": 13.7, "step": 15175, "train_speed(iter/s)": 1.536435 }, { "acc": 0.96618786, "epoch": 7.115069135223811, "grad_norm": 10.053864479064941, "learning_rate": 9.768760020614508e-06, "loss": 0.19569021, "memory(GiB)": 13.7, "step": 15180, "train_speed(iter/s)": 1.536423 }, { "acc": 0.97919769, "epoch": 7.117412702132646, "grad_norm": 8.027447700500488, "learning_rate": 9.768526957629459e-06, "loss": 0.12292764, "memory(GiB)": 13.7, "step": 15185, "train_speed(iter/s)": 1.536401 }, { "acc": 0.96019344, "epoch": 7.119756269041481, "grad_norm": 14.926433563232422, "learning_rate": 9.768293780036707e-06, "loss": 0.17740216, "memory(GiB)": 13.7, "step": 15190, "train_speed(iter/s)": 1.536421 }, { "acc": 0.96749315, "epoch": 7.1220998359503165, "grad_norm": 4.856602191925049, "learning_rate": 9.768060487841856e-06, "loss": 0.1793648, "memory(GiB)": 13.7, "step": 15195, "train_speed(iter/s)": 1.536437 }, { "acc": 0.95412464, "epoch": 7.124443402859152, "grad_norm": 8.651326179504395, "learning_rate": 9.767827081050513e-06, "loss": 0.24623656, "memory(GiB)": 13.7, "step": 15200, "train_speed(iter/s)": 1.536425 }, { "acc": 0.96654758, "epoch": 7.126786969767987, "grad_norm": 8.36074161529541, "learning_rate": 9.76759355966829e-06, "loss": 0.23278394, "memory(GiB)": 13.7, "step": 15205, "train_speed(iter/s)": 1.536412 }, { "acc": 0.96891823, "epoch": 7.129130536676822, "grad_norm": 7.818451881408691, "learning_rate": 9.7673599237008e-06, "loss": 0.15969727, "memory(GiB)": 13.7, "step": 15210, "train_speed(iter/s)": 1.53642 }, { "acc": 0.97024708, "epoch": 7.131474103585657, "grad_norm": 7.9000935554504395, "learning_rate": 9.767126173153656e-06, "loss": 0.12753106, "memory(GiB)": 13.7, "step": 15215, "train_speed(iter/s)": 1.536444 }, { "acc": 0.97391939, "epoch": 7.1338176704944924, "grad_norm": 3.47613787651062, "learning_rate": 9.76689230803248e-06, "loss": 0.15600116, "memory(GiB)": 13.7, "step": 15220, "train_speed(iter/s)": 1.536446 }, { "acc": 0.98812504, "epoch": 7.136161237403328, "grad_norm": 1.2256324291229248, "learning_rate": 9.76665832834289e-06, "loss": 0.04025817, "memory(GiB)": 13.7, "step": 15225, "train_speed(iter/s)": 1.536481 }, { "acc": 0.9864584, "epoch": 7.138504804312163, "grad_norm": 0.27619317173957825, "learning_rate": 9.766424234090512e-06, "loss": 0.09702423, "memory(GiB)": 13.7, "step": 15230, "train_speed(iter/s)": 1.536492 }, { "acc": 0.9531023, "epoch": 7.140848371220998, "grad_norm": 8.657378196716309, "learning_rate": 9.766190025280973e-06, "loss": 0.20975173, "memory(GiB)": 13.7, "step": 15235, "train_speed(iter/s)": 1.536485 }, { "acc": 0.95975504, "epoch": 7.143191938129833, "grad_norm": 9.600983619689941, "learning_rate": 9.765955701919901e-06, "loss": 0.17591187, "memory(GiB)": 13.7, "step": 15240, "train_speed(iter/s)": 1.536492 }, { "acc": 0.96971054, "epoch": 7.145535505038669, "grad_norm": 4.795388698577881, "learning_rate": 9.765721264012932e-06, "loss": 0.13780572, "memory(GiB)": 13.7, "step": 15245, "train_speed(iter/s)": 1.536502 }, { "acc": 0.96586952, "epoch": 7.1478790719475045, "grad_norm": 7.702426433563232, "learning_rate": 9.765486711565697e-06, "loss": 0.24857469, "memory(GiB)": 13.7, "step": 15250, "train_speed(iter/s)": 1.5365 }, { "acc": 0.96564293, "epoch": 7.15022263885634, "grad_norm": 5.106157302856445, "learning_rate": 9.765252044583837e-06, "loss": 0.24354024, "memory(GiB)": 13.7, "step": 15255, "train_speed(iter/s)": 1.536525 }, { "acc": 0.96655006, "epoch": 7.152566205765175, "grad_norm": 8.922758102416992, "learning_rate": 9.76501726307299e-06, "loss": 0.16767062, "memory(GiB)": 13.7, "step": 15260, "train_speed(iter/s)": 1.536566 }, { "acc": 0.95871525, "epoch": 7.15490977267401, "grad_norm": 26.510852813720703, "learning_rate": 9.764782367038803e-06, "loss": 0.1923331, "memory(GiB)": 13.7, "step": 15265, "train_speed(iter/s)": 1.53656 }, { "acc": 0.96661711, "epoch": 7.157253339582845, "grad_norm": 9.684043884277344, "learning_rate": 9.764547356486917e-06, "loss": 0.15524107, "memory(GiB)": 13.7, "step": 15270, "train_speed(iter/s)": 1.536567 }, { "acc": 0.97485123, "epoch": 7.1595969064916805, "grad_norm": 1.075422763824463, "learning_rate": 9.764312231422986e-06, "loss": 0.21200252, "memory(GiB)": 13.7, "step": 15275, "train_speed(iter/s)": 1.536579 }, { "acc": 0.97379007, "epoch": 7.161940473400516, "grad_norm": 9.992100715637207, "learning_rate": 9.764076991852658e-06, "loss": 0.13132423, "memory(GiB)": 13.7, "step": 15280, "train_speed(iter/s)": 1.536576 }, { "acc": 0.97569027, "epoch": 7.164284040309351, "grad_norm": 4.665257930755615, "learning_rate": 9.763841637781588e-06, "loss": 0.14767671, "memory(GiB)": 13.7, "step": 15285, "train_speed(iter/s)": 1.536591 }, { "acc": 0.96731491, "epoch": 7.166627607218186, "grad_norm": 9.805846214294434, "learning_rate": 9.763606169215433e-06, "loss": 0.14805558, "memory(GiB)": 13.7, "step": 15290, "train_speed(iter/s)": 1.536611 }, { "acc": 0.96434526, "epoch": 7.168971174127021, "grad_norm": 12.514806747436523, "learning_rate": 9.763370586159856e-06, "loss": 0.14740552, "memory(GiB)": 13.7, "step": 15295, "train_speed(iter/s)": 1.53663 }, { "acc": 0.95194721, "epoch": 7.171314741035856, "grad_norm": 44.893516540527344, "learning_rate": 9.763134888620518e-06, "loss": 0.33103158, "memory(GiB)": 13.7, "step": 15300, "train_speed(iter/s)": 1.536652 }, { "acc": 0.9641964, "epoch": 7.173658307944692, "grad_norm": 3.843968629837036, "learning_rate": 9.762899076603081e-06, "loss": 0.21643436, "memory(GiB)": 13.7, "step": 15305, "train_speed(iter/s)": 1.536656 }, { "acc": 0.95645838, "epoch": 7.176001874853527, "grad_norm": 2.545750856399536, "learning_rate": 9.762663150113218e-06, "loss": 0.1978587, "memory(GiB)": 13.7, "step": 15310, "train_speed(iter/s)": 1.536646 }, { "acc": 0.97515755, "epoch": 7.178345441762362, "grad_norm": 7.491647243499756, "learning_rate": 9.762427109156598e-06, "loss": 0.14026904, "memory(GiB)": 13.7, "step": 15315, "train_speed(iter/s)": 1.536656 }, { "acc": 0.95401783, "epoch": 7.180689008671197, "grad_norm": 8.406378746032715, "learning_rate": 9.762190953738892e-06, "loss": 0.25287929, "memory(GiB)": 13.7, "step": 15320, "train_speed(iter/s)": 1.536685 }, { "acc": 0.95366383, "epoch": 7.183032575580032, "grad_norm": 8.17718505859375, "learning_rate": 9.76195468386578e-06, "loss": 0.31789231, "memory(GiB)": 13.7, "step": 15325, "train_speed(iter/s)": 1.536692 }, { "acc": 0.95632191, "epoch": 7.1853761424888685, "grad_norm": 16.619691848754883, "learning_rate": 9.76171829954294e-06, "loss": 0.25587111, "memory(GiB)": 13.7, "step": 15330, "train_speed(iter/s)": 1.536724 }, { "acc": 0.96673775, "epoch": 7.187719709397704, "grad_norm": 17.014570236206055, "learning_rate": 9.761481800776055e-06, "loss": 0.18427887, "memory(GiB)": 13.7, "step": 15335, "train_speed(iter/s)": 1.536713 }, { "acc": 0.96715279, "epoch": 7.190063276306539, "grad_norm": 2.74206280708313, "learning_rate": 9.761245187570808e-06, "loss": 0.17159896, "memory(GiB)": 13.7, "step": 15340, "train_speed(iter/s)": 1.536727 }, { "acc": 0.96719704, "epoch": 7.192406843215374, "grad_norm": 8.93834114074707, "learning_rate": 9.761008459932887e-06, "loss": 0.1778657, "memory(GiB)": 13.7, "step": 15345, "train_speed(iter/s)": 1.536722 }, { "acc": 0.97606144, "epoch": 7.194750410124209, "grad_norm": 1.1772440671920776, "learning_rate": 9.76077161786798e-06, "loss": 0.12932694, "memory(GiB)": 13.7, "step": 15350, "train_speed(iter/s)": 1.536724 }, { "acc": 0.96655636, "epoch": 7.197093977033044, "grad_norm": 5.700015544891357, "learning_rate": 9.760534661381786e-06, "loss": 0.16720289, "memory(GiB)": 13.7, "step": 15355, "train_speed(iter/s)": 1.536714 }, { "acc": 0.97038689, "epoch": 7.19943754394188, "grad_norm": 11.022523880004883, "learning_rate": 9.760297590479994e-06, "loss": 0.138811, "memory(GiB)": 13.7, "step": 15360, "train_speed(iter/s)": 1.536692 }, { "acc": 0.95753651, "epoch": 7.201781110850715, "grad_norm": 8.91913890838623, "learning_rate": 9.760060405168304e-06, "loss": 0.17165661, "memory(GiB)": 13.7, "step": 15365, "train_speed(iter/s)": 1.53668 }, { "acc": 0.99080811, "epoch": 7.20412467775955, "grad_norm": 0.868492841720581, "learning_rate": 9.759823105452421e-06, "loss": 0.03785029, "memory(GiB)": 13.7, "step": 15370, "train_speed(iter/s)": 1.536679 }, { "acc": 0.97458334, "epoch": 7.206468244668385, "grad_norm": 10.814738273620605, "learning_rate": 9.759585691338045e-06, "loss": 0.10030925, "memory(GiB)": 13.7, "step": 15375, "train_speed(iter/s)": 1.536697 }, { "acc": 0.96508932, "epoch": 7.20881181157722, "grad_norm": 8.991223335266113, "learning_rate": 9.759348162830884e-06, "loss": 0.19708619, "memory(GiB)": 13.7, "step": 15380, "train_speed(iter/s)": 1.53669 }, { "acc": 0.96526031, "epoch": 7.211155378486056, "grad_norm": 5.322924613952637, "learning_rate": 9.759110519936648e-06, "loss": 0.17390945, "memory(GiB)": 13.7, "step": 15385, "train_speed(iter/s)": 1.536717 }, { "acc": 0.95376816, "epoch": 7.213498945394891, "grad_norm": 8.571348190307617, "learning_rate": 9.758872762661046e-06, "loss": 0.30233786, "memory(GiB)": 13.7, "step": 15390, "train_speed(iter/s)": 1.53669 }, { "acc": 0.97279758, "epoch": 7.215842512303726, "grad_norm": 5.611294746398926, "learning_rate": 9.758634891009796e-06, "loss": 0.16569972, "memory(GiB)": 13.7, "step": 15395, "train_speed(iter/s)": 1.53672 }, { "acc": 0.95564241, "epoch": 7.218186079212561, "grad_norm": 13.935674667358398, "learning_rate": 9.758396904988616e-06, "loss": 0.24938598, "memory(GiB)": 13.7, "step": 15400, "train_speed(iter/s)": 1.53672 }, { "acc": 0.97280426, "epoch": 7.220529646121396, "grad_norm": 2.775902271270752, "learning_rate": 9.758158804603225e-06, "loss": 0.13734009, "memory(GiB)": 13.7, "step": 15405, "train_speed(iter/s)": 1.536716 }, { "acc": 0.9614996, "epoch": 7.222873213030232, "grad_norm": 24.148767471313477, "learning_rate": 9.757920589859347e-06, "loss": 0.12189802, "memory(GiB)": 13.7, "step": 15410, "train_speed(iter/s)": 1.536681 }, { "acc": 0.96945457, "epoch": 7.225216779939068, "grad_norm": 10.763063430786133, "learning_rate": 9.757682260762705e-06, "loss": 0.14806217, "memory(GiB)": 13.7, "step": 15415, "train_speed(iter/s)": 1.536674 }, { "acc": 0.97740326, "epoch": 7.227560346847903, "grad_norm": 4.9893083572387695, "learning_rate": 9.757443817319032e-06, "loss": 0.11357746, "memory(GiB)": 13.7, "step": 15420, "train_speed(iter/s)": 1.536657 }, { "acc": 0.97434216, "epoch": 7.229903913756738, "grad_norm": 6.932253837585449, "learning_rate": 9.757205259534057e-06, "loss": 0.09701314, "memory(GiB)": 13.7, "step": 15425, "train_speed(iter/s)": 1.536662 }, { "acc": 0.95458336, "epoch": 7.232247480665573, "grad_norm": 8.939666748046875, "learning_rate": 9.756966587413516e-06, "loss": 0.2614162, "memory(GiB)": 13.7, "step": 15430, "train_speed(iter/s)": 1.53669 }, { "acc": 0.96873016, "epoch": 7.234591047574408, "grad_norm": 8.973530769348145, "learning_rate": 9.75672780096314e-06, "loss": 0.16965935, "memory(GiB)": 13.7, "step": 15435, "train_speed(iter/s)": 1.536723 }, { "acc": 0.97040186, "epoch": 7.236934614483244, "grad_norm": 2.231783866882324, "learning_rate": 9.756488900188677e-06, "loss": 0.1862025, "memory(GiB)": 13.7, "step": 15440, "train_speed(iter/s)": 1.536748 }, { "acc": 0.96809025, "epoch": 7.239278181392079, "grad_norm": 0.39196497201919556, "learning_rate": 9.756249885095864e-06, "loss": 0.12782049, "memory(GiB)": 13.7, "step": 15445, "train_speed(iter/s)": 1.536752 }, { "acc": 0.9827178, "epoch": 7.241621748300914, "grad_norm": 4.705694198608398, "learning_rate": 9.75601075569045e-06, "loss": 0.10204504, "memory(GiB)": 13.7, "step": 15450, "train_speed(iter/s)": 1.536776 }, { "acc": 0.9725893, "epoch": 7.243965315209749, "grad_norm": 9.260392189025879, "learning_rate": 9.755771511978179e-06, "loss": 0.14263475, "memory(GiB)": 13.7, "step": 15455, "train_speed(iter/s)": 1.536777 }, { "acc": 0.97123508, "epoch": 7.246308882118584, "grad_norm": 41.944549560546875, "learning_rate": 9.7555321539648e-06, "loss": 0.13527124, "memory(GiB)": 13.7, "step": 15460, "train_speed(iter/s)": 1.53681 }, { "acc": 0.96133919, "epoch": 7.24865244902742, "grad_norm": 3.1227970123291016, "learning_rate": 9.755292681656074e-06, "loss": 0.26170888, "memory(GiB)": 13.7, "step": 15465, "train_speed(iter/s)": 1.536814 }, { "acc": 0.96821432, "epoch": 7.250996015936255, "grad_norm": 4.590845584869385, "learning_rate": 9.75505309505775e-06, "loss": 0.1214653, "memory(GiB)": 13.7, "step": 15470, "train_speed(iter/s)": 1.536802 }, { "acc": 0.97830362, "epoch": 7.25333958284509, "grad_norm": 15.97510814666748, "learning_rate": 9.75481339417559e-06, "loss": 0.11493196, "memory(GiB)": 13.7, "step": 15475, "train_speed(iter/s)": 1.536814 }, { "acc": 0.95069265, "epoch": 7.255683149753925, "grad_norm": 8.833024978637695, "learning_rate": 9.754573579015356e-06, "loss": 0.28857248, "memory(GiB)": 13.7, "step": 15480, "train_speed(iter/s)": 1.536819 }, { "acc": 0.9584816, "epoch": 7.25802671666276, "grad_norm": 6.993808269500732, "learning_rate": 9.75433364958281e-06, "loss": 0.2423444, "memory(GiB)": 13.7, "step": 15485, "train_speed(iter/s)": 1.536803 }, { "acc": 0.9771883, "epoch": 7.260370283571596, "grad_norm": 3.6409125328063965, "learning_rate": 9.754093605883724e-06, "loss": 0.1687168, "memory(GiB)": 13.7, "step": 15490, "train_speed(iter/s)": 1.536814 }, { "acc": 0.96525335, "epoch": 7.262713850480432, "grad_norm": 17.835878372192383, "learning_rate": 9.753853447923861e-06, "loss": 0.22224808, "memory(GiB)": 13.7, "step": 15495, "train_speed(iter/s)": 1.536804 }, { "acc": 0.97461052, "epoch": 7.265057417389267, "grad_norm": 7.831606388092041, "learning_rate": 9.753613175708997e-06, "loss": 0.1696486, "memory(GiB)": 13.7, "step": 15500, "train_speed(iter/s)": 1.536815 }, { "acc": 0.96972218, "epoch": 7.267400984298102, "grad_norm": 5.789906978607178, "learning_rate": 9.753372789244911e-06, "loss": 0.13194187, "memory(GiB)": 13.7, "step": 15505, "train_speed(iter/s)": 1.536811 }, { "acc": 0.98291664, "epoch": 7.269744551206937, "grad_norm": 1.7941569089889526, "learning_rate": 9.753132288537375e-06, "loss": 0.12196625, "memory(GiB)": 13.7, "step": 15510, "train_speed(iter/s)": 1.53681 }, { "acc": 0.96374874, "epoch": 7.272088118115772, "grad_norm": 18.470314025878906, "learning_rate": 9.752891673592172e-06, "loss": 0.15500941, "memory(GiB)": 13.7, "step": 15515, "train_speed(iter/s)": 1.536817 }, { "acc": 0.96754417, "epoch": 7.274431685024608, "grad_norm": 3.58878755569458, "learning_rate": 9.752650944415086e-06, "loss": 0.25193672, "memory(GiB)": 13.7, "step": 15520, "train_speed(iter/s)": 1.536804 }, { "acc": 0.97878284, "epoch": 7.276775251933443, "grad_norm": 7.02347469329834, "learning_rate": 9.752410101011905e-06, "loss": 0.10642003, "memory(GiB)": 13.7, "step": 15525, "train_speed(iter/s)": 1.536831 }, { "acc": 0.97967262, "epoch": 7.279118818842278, "grad_norm": 4.169643402099609, "learning_rate": 9.752169143388415e-06, "loss": 0.07518268, "memory(GiB)": 13.7, "step": 15530, "train_speed(iter/s)": 1.536856 }, { "acc": 0.95178566, "epoch": 7.281462385751113, "grad_norm": 11.397602081298828, "learning_rate": 9.751928071550408e-06, "loss": 0.28700099, "memory(GiB)": 13.7, "step": 15535, "train_speed(iter/s)": 1.536863 }, { "acc": 0.96454258, "epoch": 7.283805952659948, "grad_norm": 6.22026252746582, "learning_rate": 9.751686885503685e-06, "loss": 0.19081109, "memory(GiB)": 13.7, "step": 15540, "train_speed(iter/s)": 1.536883 }, { "acc": 0.97618771, "epoch": 7.2861495195687835, "grad_norm": 18.93072509765625, "learning_rate": 9.751445585254033e-06, "loss": 0.16080042, "memory(GiB)": 13.7, "step": 15545, "train_speed(iter/s)": 1.536891 }, { "acc": 0.97216034, "epoch": 7.288493086477619, "grad_norm": 3.2223596572875977, "learning_rate": 9.751204170807258e-06, "loss": 0.14650737, "memory(GiB)": 13.7, "step": 15550, "train_speed(iter/s)": 1.536907 }, { "acc": 0.96860991, "epoch": 7.290836653386454, "grad_norm": 6.624410629272461, "learning_rate": 9.750962642169164e-06, "loss": 0.17010813, "memory(GiB)": 13.7, "step": 15555, "train_speed(iter/s)": 1.536915 }, { "acc": 0.95049505, "epoch": 7.293180220295289, "grad_norm": 7.74077033996582, "learning_rate": 9.750720999345552e-06, "loss": 0.32085798, "memory(GiB)": 13.7, "step": 15560, "train_speed(iter/s)": 1.536921 }, { "acc": 0.96658192, "epoch": 7.295523787204124, "grad_norm": 4.114230632781982, "learning_rate": 9.750479242342235e-06, "loss": 0.18657992, "memory(GiB)": 13.7, "step": 15565, "train_speed(iter/s)": 1.536934 }, { "acc": 0.97479172, "epoch": 7.2978673541129595, "grad_norm": 4.9967732429504395, "learning_rate": 9.750237371165021e-06, "loss": 0.13211652, "memory(GiB)": 13.7, "step": 15570, "train_speed(iter/s)": 1.536973 }, { "acc": 0.97675591, "epoch": 7.300210921021796, "grad_norm": 1.18740713596344, "learning_rate": 9.749995385819724e-06, "loss": 0.11034455, "memory(GiB)": 13.7, "step": 15575, "train_speed(iter/s)": 1.536992 }, { "acc": 0.96079445, "epoch": 7.302554487930631, "grad_norm": 7.670441150665283, "learning_rate": 9.749753286312162e-06, "loss": 0.24072413, "memory(GiB)": 13.7, "step": 15580, "train_speed(iter/s)": 1.537003 }, { "acc": 0.94261417, "epoch": 7.304898054839466, "grad_norm": 10.371379852294922, "learning_rate": 9.749511072648153e-06, "loss": 0.3333204, "memory(GiB)": 13.7, "step": 15585, "train_speed(iter/s)": 1.537019 }, { "acc": 0.94944878, "epoch": 7.307241621748301, "grad_norm": 8.076799392700195, "learning_rate": 9.74926874483352e-06, "loss": 0.25244889, "memory(GiB)": 13.7, "step": 15590, "train_speed(iter/s)": 1.537044 }, { "acc": 0.97142477, "epoch": 7.309585188657136, "grad_norm": 8.705803871154785, "learning_rate": 9.749026302874086e-06, "loss": 0.16261033, "memory(GiB)": 13.7, "step": 15595, "train_speed(iter/s)": 1.53707 }, { "acc": 0.97034721, "epoch": 7.3119287555659715, "grad_norm": 16.945148468017578, "learning_rate": 9.748783746775682e-06, "loss": 0.16056461, "memory(GiB)": 13.7, "step": 15600, "train_speed(iter/s)": 1.537065 }, { "acc": 0.96978626, "epoch": 7.314272322474807, "grad_norm": 12.501150131225586, "learning_rate": 9.748541076544137e-06, "loss": 0.21789265, "memory(GiB)": 13.7, "step": 15605, "train_speed(iter/s)": 1.537074 }, { "acc": 0.9667551, "epoch": 7.316615889383642, "grad_norm": 5.22429084777832, "learning_rate": 9.74829829218528e-06, "loss": 0.19020672, "memory(GiB)": 13.7, "step": 15610, "train_speed(iter/s)": 1.537107 }, { "acc": 0.96327381, "epoch": 7.318959456292477, "grad_norm": 9.606762886047363, "learning_rate": 9.748055393704952e-06, "loss": 0.18519828, "memory(GiB)": 13.7, "step": 15615, "train_speed(iter/s)": 1.537102 }, { "acc": 0.96463089, "epoch": 7.321303023201312, "grad_norm": 11.492446899414062, "learning_rate": 9.747812381108988e-06, "loss": 0.17218145, "memory(GiB)": 13.7, "step": 15620, "train_speed(iter/s)": 1.537109 }, { "acc": 0.95163689, "epoch": 7.3236465901101475, "grad_norm": 7.23938512802124, "learning_rate": 9.74756925440323e-06, "loss": 0.27606499, "memory(GiB)": 13.7, "step": 15625, "train_speed(iter/s)": 1.537128 }, { "acc": 0.97675056, "epoch": 7.325990157018983, "grad_norm": 5.238812446594238, "learning_rate": 9.747326013593524e-06, "loss": 0.11754338, "memory(GiB)": 13.7, "step": 15630, "train_speed(iter/s)": 1.537101 }, { "acc": 0.97246113, "epoch": 7.328333723927818, "grad_norm": 8.513067245483398, "learning_rate": 9.747082658685714e-06, "loss": 0.23027916, "memory(GiB)": 13.7, "step": 15635, "train_speed(iter/s)": 1.537123 }, { "acc": 0.98147545, "epoch": 7.330677290836653, "grad_norm": 3.3391659259796143, "learning_rate": 9.74683918968565e-06, "loss": 0.08567237, "memory(GiB)": 13.7, "step": 15640, "train_speed(iter/s)": 1.537151 }, { "acc": 0.96734619, "epoch": 7.333020857745488, "grad_norm": 5.800381660461426, "learning_rate": 9.746595606599186e-06, "loss": 0.12141864, "memory(GiB)": 13.7, "step": 15645, "train_speed(iter/s)": 1.537153 }, { "acc": 0.95363102, "epoch": 7.3353644246543235, "grad_norm": 16.408592224121094, "learning_rate": 9.746351909432175e-06, "loss": 0.37437944, "memory(GiB)": 13.7, "step": 15650, "train_speed(iter/s)": 1.537169 }, { "acc": 0.98041668, "epoch": 7.337707991563159, "grad_norm": 3.1314890384674072, "learning_rate": 9.746108098190477e-06, "loss": 0.14381251, "memory(GiB)": 13.7, "step": 15655, "train_speed(iter/s)": 1.537187 }, { "acc": 0.97818451, "epoch": 7.340051558471995, "grad_norm": 10.057592391967773, "learning_rate": 9.745864172879949e-06, "loss": 0.12252982, "memory(GiB)": 13.7, "step": 15660, "train_speed(iter/s)": 1.537207 }, { "acc": 0.98413754, "epoch": 7.34239512538083, "grad_norm": 0.5427709221839905, "learning_rate": 9.745620133506457e-06, "loss": 0.11164463, "memory(GiB)": 13.7, "step": 15665, "train_speed(iter/s)": 1.537214 }, { "acc": 0.95983372, "epoch": 7.344738692289665, "grad_norm": 4.59196138381958, "learning_rate": 9.745375980075867e-06, "loss": 0.2958184, "memory(GiB)": 13.7, "step": 15670, "train_speed(iter/s)": 1.537218 }, { "acc": 0.97433033, "epoch": 7.3470822591985, "grad_norm": 4.623455047607422, "learning_rate": 9.745131712594045e-06, "loss": 0.18669646, "memory(GiB)": 13.7, "step": 15675, "train_speed(iter/s)": 1.537185 }, { "acc": 0.96455803, "epoch": 7.3494258261073355, "grad_norm": 0.8316611647605896, "learning_rate": 9.744887331066863e-06, "loss": 0.20988016, "memory(GiB)": 13.7, "step": 15680, "train_speed(iter/s)": 1.537191 }, { "acc": 0.95941439, "epoch": 7.351769393016171, "grad_norm": 7.018581390380859, "learning_rate": 9.744642835500199e-06, "loss": 0.20179136, "memory(GiB)": 13.7, "step": 15685, "train_speed(iter/s)": 1.537203 }, { "acc": 0.97120991, "epoch": 7.354112959925006, "grad_norm": 6.21019983291626, "learning_rate": 9.744398225899925e-06, "loss": 0.14330848, "memory(GiB)": 13.7, "step": 15690, "train_speed(iter/s)": 1.537203 }, { "acc": 0.98120193, "epoch": 7.356456526833841, "grad_norm": 6.530875205993652, "learning_rate": 9.744153502271922e-06, "loss": 0.12411556, "memory(GiB)": 13.7, "step": 15695, "train_speed(iter/s)": 1.537219 }, { "acc": 0.97333326, "epoch": 7.358800093742676, "grad_norm": 7.786337852478027, "learning_rate": 9.743908664622073e-06, "loss": 0.18788331, "memory(GiB)": 13.7, "step": 15700, "train_speed(iter/s)": 1.537243 }, { "acc": 0.9561161, "epoch": 7.3611436606515115, "grad_norm": 11.746878623962402, "learning_rate": 9.743663712956265e-06, "loss": 0.17168828, "memory(GiB)": 13.7, "step": 15705, "train_speed(iter/s)": 1.53727 }, { "acc": 0.96716347, "epoch": 7.363487227560347, "grad_norm": 6.2125115394592285, "learning_rate": 9.743418647280382e-06, "loss": 0.12990183, "memory(GiB)": 13.7, "step": 15710, "train_speed(iter/s)": 1.537261 }, { "acc": 0.95352135, "epoch": 7.365830794469182, "grad_norm": 8.169988632202148, "learning_rate": 9.743173467600317e-06, "loss": 0.1645069, "memory(GiB)": 13.7, "step": 15715, "train_speed(iter/s)": 1.537266 }, { "acc": 0.96117153, "epoch": 7.368174361378017, "grad_norm": 9.133684158325195, "learning_rate": 9.742928173921962e-06, "loss": 0.26641865, "memory(GiB)": 13.7, "step": 15720, "train_speed(iter/s)": 1.537291 }, { "acc": 0.97131929, "epoch": 7.370517928286852, "grad_norm": 9.368704795837402, "learning_rate": 9.742682766251215e-06, "loss": 0.13549607, "memory(GiB)": 13.7, "step": 15725, "train_speed(iter/s)": 1.537303 }, { "acc": 0.95408812, "epoch": 7.3728614951956875, "grad_norm": 8.38820743560791, "learning_rate": 9.742437244593973e-06, "loss": 0.19703689, "memory(GiB)": 13.7, "step": 15730, "train_speed(iter/s)": 1.537314 }, { "acc": 0.9740612, "epoch": 7.3752050621045235, "grad_norm": 17.36431312561035, "learning_rate": 9.74219160895614e-06, "loss": 0.15315673, "memory(GiB)": 13.7, "step": 15735, "train_speed(iter/s)": 1.537305 }, { "acc": 0.9491518, "epoch": 7.377548629013359, "grad_norm": 5.308844089508057, "learning_rate": 9.741945859343616e-06, "loss": 0.22588058, "memory(GiB)": 13.7, "step": 15740, "train_speed(iter/s)": 1.537335 }, { "acc": 0.970051, "epoch": 7.379892195922194, "grad_norm": 7.405439853668213, "learning_rate": 9.741699995762311e-06, "loss": 0.16275504, "memory(GiB)": 13.7, "step": 15745, "train_speed(iter/s)": 1.537349 }, { "acc": 0.98181553, "epoch": 7.382235762831029, "grad_norm": 8.265480995178223, "learning_rate": 9.741454018218134e-06, "loss": 0.16004683, "memory(GiB)": 13.7, "step": 15750, "train_speed(iter/s)": 1.537348 }, { "acc": 0.96838741, "epoch": 7.384579329739864, "grad_norm": 0.23281905055046082, "learning_rate": 9.741207926716997e-06, "loss": 0.15850921, "memory(GiB)": 13.7, "step": 15755, "train_speed(iter/s)": 1.537358 }, { "acc": 0.96317463, "epoch": 7.3869228966486995, "grad_norm": 6.807745933532715, "learning_rate": 9.740961721264816e-06, "loss": 0.27329261, "memory(GiB)": 13.7, "step": 15760, "train_speed(iter/s)": 1.537384 }, { "acc": 0.96522121, "epoch": 7.389266463557535, "grad_norm": 1.7271913290023804, "learning_rate": 9.740715401867512e-06, "loss": 0.11612649, "memory(GiB)": 13.7, "step": 15765, "train_speed(iter/s)": 1.537383 }, { "acc": 0.9719593, "epoch": 7.39161003046637, "grad_norm": 4.697029113769531, "learning_rate": 9.740468968531e-06, "loss": 0.24984112, "memory(GiB)": 13.7, "step": 15770, "train_speed(iter/s)": 1.537398 }, { "acc": 0.97008934, "epoch": 7.393953597375205, "grad_norm": 32.74968719482422, "learning_rate": 9.740222421261207e-06, "loss": 0.12180552, "memory(GiB)": 13.7, "step": 15775, "train_speed(iter/s)": 1.537398 }, { "acc": 0.96826735, "epoch": 7.39629716428404, "grad_norm": 9.203800201416016, "learning_rate": 9.739975760064056e-06, "loss": 0.21479673, "memory(GiB)": 13.7, "step": 15780, "train_speed(iter/s)": 1.537395 }, { "acc": 0.9842803, "epoch": 7.3986407311928755, "grad_norm": 1.8037675619125366, "learning_rate": 9.73972898494548e-06, "loss": 0.10557387, "memory(GiB)": 13.7, "step": 15785, "train_speed(iter/s)": 1.53742 }, { "acc": 0.97267857, "epoch": 7.400984298101711, "grad_norm": 4.721466064453125, "learning_rate": 9.739482095911407e-06, "loss": 0.1600938, "memory(GiB)": 13.7, "step": 15790, "train_speed(iter/s)": 1.537434 }, { "acc": 0.97139759, "epoch": 7.403327865010546, "grad_norm": 10.268031120300293, "learning_rate": 9.739235092967775e-06, "loss": 0.13927728, "memory(GiB)": 13.7, "step": 15795, "train_speed(iter/s)": 1.537481 }, { "acc": 0.98469162, "epoch": 7.405671431919381, "grad_norm": 9.487192153930664, "learning_rate": 9.738987976120518e-06, "loss": 0.12709186, "memory(GiB)": 13.7, "step": 15800, "train_speed(iter/s)": 1.537472 }, { "acc": 0.94949074, "epoch": 7.408014998828216, "grad_norm": 13.8272066116333, "learning_rate": 9.738740745375578e-06, "loss": 0.30983925, "memory(GiB)": 13.7, "step": 15805, "train_speed(iter/s)": 1.537501 }, { "acc": 0.9642868, "epoch": 7.410358565737051, "grad_norm": 12.798200607299805, "learning_rate": 9.738493400738896e-06, "loss": 0.16791961, "memory(GiB)": 13.7, "step": 15810, "train_speed(iter/s)": 1.537527 }, { "acc": 0.97825546, "epoch": 7.412702132645887, "grad_norm": 10.598252296447754, "learning_rate": 9.73824594221642e-06, "loss": 0.11959666, "memory(GiB)": 13.7, "step": 15815, "train_speed(iter/s)": 1.537554 }, { "acc": 0.97172079, "epoch": 7.415045699554723, "grad_norm": 4.218219757080078, "learning_rate": 9.737998369814095e-06, "loss": 0.18111413, "memory(GiB)": 13.7, "step": 15820, "train_speed(iter/s)": 1.53757 }, { "acc": 0.97892857, "epoch": 7.417389266463558, "grad_norm": 8.108136177062988, "learning_rate": 9.737750683537872e-06, "loss": 0.10944977, "memory(GiB)": 13.7, "step": 15825, "train_speed(iter/s)": 1.537599 }, { "acc": 0.98061018, "epoch": 7.419732833372393, "grad_norm": 0.6543990969657898, "learning_rate": 9.737502883393708e-06, "loss": 0.11356281, "memory(GiB)": 13.7, "step": 15830, "train_speed(iter/s)": 1.537603 }, { "acc": 0.96688452, "epoch": 7.422076400281228, "grad_norm": 5.420001983642578, "learning_rate": 9.737254969387556e-06, "loss": 0.12832837, "memory(GiB)": 13.7, "step": 15835, "train_speed(iter/s)": 1.537623 }, { "acc": 0.96996107, "epoch": 7.4244199671900635, "grad_norm": 6.973052024841309, "learning_rate": 9.737006941525376e-06, "loss": 0.18225325, "memory(GiB)": 13.7, "step": 15840, "train_speed(iter/s)": 1.537626 }, { "acc": 0.98342266, "epoch": 7.426763534098899, "grad_norm": 12.184588432312012, "learning_rate": 9.736758799813129e-06, "loss": 0.10720696, "memory(GiB)": 13.7, "step": 15845, "train_speed(iter/s)": 1.537615 }, { "acc": 0.97882891, "epoch": 7.429107101007734, "grad_norm": 6.46637487411499, "learning_rate": 9.73651054425678e-06, "loss": 0.10553446, "memory(GiB)": 13.7, "step": 15850, "train_speed(iter/s)": 1.537634 }, { "acc": 0.96771936, "epoch": 7.431450667916569, "grad_norm": 1.4933041334152222, "learning_rate": 9.736262174862298e-06, "loss": 0.15070283, "memory(GiB)": 13.7, "step": 15855, "train_speed(iter/s)": 1.537665 }, { "acc": 0.97312498, "epoch": 7.433794234825404, "grad_norm": 8.049840927124023, "learning_rate": 9.736013691635651e-06, "loss": 0.12356555, "memory(GiB)": 13.7, "step": 15860, "train_speed(iter/s)": 1.537669 }, { "acc": 0.97763777, "epoch": 7.436137801734239, "grad_norm": 12.414462089538574, "learning_rate": 9.735765094582813e-06, "loss": 0.13915665, "memory(GiB)": 13.7, "step": 15865, "train_speed(iter/s)": 1.53767 }, { "acc": 0.94892864, "epoch": 7.438481368643075, "grad_norm": 7.729330539703369, "learning_rate": 9.735516383709756e-06, "loss": 0.26871254, "memory(GiB)": 13.7, "step": 15870, "train_speed(iter/s)": 1.53768 }, { "acc": 0.97080135, "epoch": 7.44082493555191, "grad_norm": 17.28614616394043, "learning_rate": 9.735267559022463e-06, "loss": 0.20333133, "memory(GiB)": 13.7, "step": 15875, "train_speed(iter/s)": 1.537689 }, { "acc": 0.96314163, "epoch": 7.443168502460745, "grad_norm": 12.172101020812988, "learning_rate": 9.735018620526913e-06, "loss": 0.22603879, "memory(GiB)": 13.7, "step": 15880, "train_speed(iter/s)": 1.537692 }, { "acc": 0.97173929, "epoch": 7.44551206936958, "grad_norm": 4.4943623542785645, "learning_rate": 9.734769568229087e-06, "loss": 0.13934298, "memory(GiB)": 13.7, "step": 15885, "train_speed(iter/s)": 1.537697 }, { "acc": 0.96520844, "epoch": 7.447855636278415, "grad_norm": 6.462552547454834, "learning_rate": 9.734520402134976e-06, "loss": 0.07350677, "memory(GiB)": 13.7, "step": 15890, "train_speed(iter/s)": 1.537692 }, { "acc": 0.96071711, "epoch": 7.450199203187251, "grad_norm": 12.632925987243652, "learning_rate": 9.734271122250568e-06, "loss": 0.23367054, "memory(GiB)": 13.7, "step": 15895, "train_speed(iter/s)": 1.537676 }, { "acc": 0.95441628, "epoch": 7.452542770096086, "grad_norm": 9.848185539245605, "learning_rate": 9.734021728581854e-06, "loss": 0.25400746, "memory(GiB)": 13.7, "step": 15900, "train_speed(iter/s)": 1.537704 }, { "acc": 0.9598115, "epoch": 7.454886337004922, "grad_norm": 10.016992568969727, "learning_rate": 9.733772221134825e-06, "loss": 0.23221068, "memory(GiB)": 13.7, "step": 15905, "train_speed(iter/s)": 1.537717 }, { "acc": 0.95188847, "epoch": 7.457229903913757, "grad_norm": 8.386284828186035, "learning_rate": 9.733522599915484e-06, "loss": 0.15613334, "memory(GiB)": 13.7, "step": 15910, "train_speed(iter/s)": 1.537705 }, { "acc": 0.96549339, "epoch": 7.459573470822592, "grad_norm": 11.557576179504395, "learning_rate": 9.733272864929829e-06, "loss": 0.19785571, "memory(GiB)": 13.7, "step": 15915, "train_speed(iter/s)": 1.537698 }, { "acc": 0.97972755, "epoch": 7.4619170377314274, "grad_norm": 6.352644920349121, "learning_rate": 9.733023016183863e-06, "loss": 0.1570714, "memory(GiB)": 13.7, "step": 15920, "train_speed(iter/s)": 1.537708 }, { "acc": 0.96058683, "epoch": 7.464260604640263, "grad_norm": 8.944265365600586, "learning_rate": 9.73277305368359e-06, "loss": 0.24545999, "memory(GiB)": 13.7, "step": 15925, "train_speed(iter/s)": 1.537719 }, { "acc": 0.96942959, "epoch": 7.466604171549098, "grad_norm": 8.609665870666504, "learning_rate": 9.73252297743502e-06, "loss": 0.18070238, "memory(GiB)": 13.7, "step": 15930, "train_speed(iter/s)": 1.537723 }, { "acc": 0.9532299, "epoch": 7.468947738457933, "grad_norm": 8.797883033752441, "learning_rate": 9.732272787444164e-06, "loss": 0.30780106, "memory(GiB)": 13.7, "step": 15935, "train_speed(iter/s)": 1.537733 }, { "acc": 0.95321217, "epoch": 7.471291305366768, "grad_norm": 17.356740951538086, "learning_rate": 9.732022483717034e-06, "loss": 0.24078896, "memory(GiB)": 13.7, "step": 15940, "train_speed(iter/s)": 1.537748 }, { "acc": 0.98494053, "epoch": 7.473634872275603, "grad_norm": 3.722872257232666, "learning_rate": 9.73177206625965e-06, "loss": 0.11442716, "memory(GiB)": 13.7, "step": 15945, "train_speed(iter/s)": 1.537754 }, { "acc": 0.97183714, "epoch": 7.475978439184439, "grad_norm": 10.679364204406738, "learning_rate": 9.731521535078025e-06, "loss": 0.14254156, "memory(GiB)": 13.7, "step": 15950, "train_speed(iter/s)": 1.537774 }, { "acc": 0.96320896, "epoch": 7.478322006093274, "grad_norm": 7.8755621910095215, "learning_rate": 9.731270890178189e-06, "loss": 0.18872617, "memory(GiB)": 13.7, "step": 15955, "train_speed(iter/s)": 1.537777 }, { "acc": 0.96712055, "epoch": 7.480665573002109, "grad_norm": 16.445249557495117, "learning_rate": 9.731020131566162e-06, "loss": 0.24803858, "memory(GiB)": 13.7, "step": 15960, "train_speed(iter/s)": 1.537793 }, { "acc": 0.97202377, "epoch": 7.483009139910944, "grad_norm": 0.62481689453125, "learning_rate": 9.730769259247971e-06, "loss": 0.140303, "memory(GiB)": 13.7, "step": 15965, "train_speed(iter/s)": 1.537813 }, { "acc": 0.96981401, "epoch": 7.485352706819779, "grad_norm": 30.892169952392578, "learning_rate": 9.730518273229648e-06, "loss": 0.09665771, "memory(GiB)": 13.7, "step": 15970, "train_speed(iter/s)": 1.537854 }, { "acc": 0.97580423, "epoch": 7.487696273728615, "grad_norm": 7.71800422668457, "learning_rate": 9.730267173517224e-06, "loss": 0.11049092, "memory(GiB)": 13.7, "step": 15975, "train_speed(iter/s)": 1.537867 }, { "acc": 0.94687405, "epoch": 7.490039840637451, "grad_norm": 6.5461554527282715, "learning_rate": 9.730015960116736e-06, "loss": 0.35198059, "memory(GiB)": 13.7, "step": 15980, "train_speed(iter/s)": 1.537904 }, { "acc": 0.9667532, "epoch": 7.492383407546286, "grad_norm": 4.9579997062683105, "learning_rate": 9.729764633034221e-06, "loss": 0.217838, "memory(GiB)": 13.7, "step": 15985, "train_speed(iter/s)": 1.537909 }, { "acc": 0.9760417, "epoch": 7.494726974455121, "grad_norm": 4.976069927215576, "learning_rate": 9.729513192275723e-06, "loss": 0.08860056, "memory(GiB)": 13.7, "step": 15990, "train_speed(iter/s)": 1.537911 }, { "acc": 0.96255455, "epoch": 7.497070541363956, "grad_norm": 5.334620952606201, "learning_rate": 9.729261637847282e-06, "loss": 0.24774013, "memory(GiB)": 13.7, "step": 15995, "train_speed(iter/s)": 1.537926 }, { "acc": 0.98946438, "epoch": 7.499414108272791, "grad_norm": 4.4328999519348145, "learning_rate": 9.729009969754947e-06, "loss": 0.06060748, "memory(GiB)": 13.7, "step": 16000, "train_speed(iter/s)": 1.537924 }, { "acc": 0.95255413, "epoch": 7.501757675181627, "grad_norm": 5.148627758026123, "learning_rate": 9.728758188004767e-06, "loss": 0.21992579, "memory(GiB)": 13.7, "step": 16005, "train_speed(iter/s)": 1.537924 }, { "acc": 0.97406769, "epoch": 7.504101242090462, "grad_norm": 11.337841987609863, "learning_rate": 9.728506292602793e-06, "loss": 0.16307118, "memory(GiB)": 13.7, "step": 16010, "train_speed(iter/s)": 1.537918 }, { "acc": 0.96285095, "epoch": 7.506444808999297, "grad_norm": 6.702181816101074, "learning_rate": 9.728254283555083e-06, "loss": 0.18454151, "memory(GiB)": 13.7, "step": 16015, "train_speed(iter/s)": 1.537918 }, { "acc": 0.95571585, "epoch": 7.508788375908132, "grad_norm": 8.809274673461914, "learning_rate": 9.728002160867689e-06, "loss": 0.26655445, "memory(GiB)": 13.7, "step": 16020, "train_speed(iter/s)": 1.537907 }, { "acc": 0.96487141, "epoch": 7.511131942816967, "grad_norm": 6.894974231719971, "learning_rate": 9.727749924546676e-06, "loss": 0.27820072, "memory(GiB)": 13.7, "step": 16025, "train_speed(iter/s)": 1.537932 }, { "acc": 0.98307009, "epoch": 7.513475509725803, "grad_norm": 21.999855041503906, "learning_rate": 9.727497574598103e-06, "loss": 0.12632623, "memory(GiB)": 13.7, "step": 16030, "train_speed(iter/s)": 1.537947 }, { "acc": 0.97361107, "epoch": 7.515819076634638, "grad_norm": 8.892799377441406, "learning_rate": 9.727245111028039e-06, "loss": 0.11378397, "memory(GiB)": 13.7, "step": 16035, "train_speed(iter/s)": 1.537954 }, { "acc": 0.98743057, "epoch": 7.518162643543473, "grad_norm": 2.2335612773895264, "learning_rate": 9.726992533842552e-06, "loss": 0.08662752, "memory(GiB)": 13.7, "step": 16040, "train_speed(iter/s)": 1.537955 }, { "acc": 0.96408138, "epoch": 7.520506210452308, "grad_norm": 4.039156436920166, "learning_rate": 9.72673984304771e-06, "loss": 0.18854046, "memory(GiB)": 13.7, "step": 16045, "train_speed(iter/s)": 1.537979 }, { "acc": 0.97862854, "epoch": 7.522849777361143, "grad_norm": 21.988000869750977, "learning_rate": 9.72648703864959e-06, "loss": 0.16840928, "memory(GiB)": 13.7, "step": 16050, "train_speed(iter/s)": 1.538 }, { "acc": 0.9599781, "epoch": 7.5251933442699785, "grad_norm": 3.7009966373443604, "learning_rate": 9.726234120654268e-06, "loss": 0.15688651, "memory(GiB)": 13.7, "step": 16055, "train_speed(iter/s)": 1.538003 }, { "acc": 0.98056126, "epoch": 7.527536911178814, "grad_norm": 4.190093517303467, "learning_rate": 9.725981089067823e-06, "loss": 0.08845953, "memory(GiB)": 13.7, "step": 16060, "train_speed(iter/s)": 1.538044 }, { "acc": 0.97690468, "epoch": 7.52988047808765, "grad_norm": 3.8136188983917236, "learning_rate": 9.725727943896336e-06, "loss": 0.13604019, "memory(GiB)": 13.7, "step": 16065, "train_speed(iter/s)": 1.53805 }, { "acc": 0.97393303, "epoch": 7.532224044996485, "grad_norm": 3.3743104934692383, "learning_rate": 9.725474685145895e-06, "loss": 0.20745034, "memory(GiB)": 13.7, "step": 16070, "train_speed(iter/s)": 1.538064 }, { "acc": 0.97092152, "epoch": 7.53456761190532, "grad_norm": 2.0647025108337402, "learning_rate": 9.725221312822583e-06, "loss": 0.18071282, "memory(GiB)": 13.7, "step": 16075, "train_speed(iter/s)": 1.538064 }, { "acc": 0.9625206, "epoch": 7.536911178814155, "grad_norm": 9.07995319366455, "learning_rate": 9.724967826932494e-06, "loss": 0.20540767, "memory(GiB)": 13.7, "step": 16080, "train_speed(iter/s)": 1.538062 }, { "acc": 0.95588121, "epoch": 7.539254745722991, "grad_norm": 25.461589813232422, "learning_rate": 9.72471422748172e-06, "loss": 0.29079275, "memory(GiB)": 13.7, "step": 16085, "train_speed(iter/s)": 1.538084 }, { "acc": 0.96615534, "epoch": 7.541598312631826, "grad_norm": 9.882258415222168, "learning_rate": 9.724460514476356e-06, "loss": 0.15723071, "memory(GiB)": 13.7, "step": 16090, "train_speed(iter/s)": 1.538097 }, { "acc": 0.97589779, "epoch": 7.543941879540661, "grad_norm": 5.442098140716553, "learning_rate": 9.7242066879225e-06, "loss": 0.11182119, "memory(GiB)": 13.7, "step": 16095, "train_speed(iter/s)": 1.538112 }, { "acc": 0.96592264, "epoch": 7.546285446449496, "grad_norm": 0.7825483083724976, "learning_rate": 9.723952747826254e-06, "loss": 0.17735375, "memory(GiB)": 13.7, "step": 16100, "train_speed(iter/s)": 1.538147 }, { "acc": 0.98187504, "epoch": 7.548629013358331, "grad_norm": 3.266105890274048, "learning_rate": 9.72369869419372e-06, "loss": 0.091218, "memory(GiB)": 13.7, "step": 16105, "train_speed(iter/s)": 1.538132 }, { "acc": 0.96934528, "epoch": 7.550972580267167, "grad_norm": 10.749006271362305, "learning_rate": 9.72344452703101e-06, "loss": 0.15572649, "memory(GiB)": 13.7, "step": 16110, "train_speed(iter/s)": 1.538171 }, { "acc": 0.97023945, "epoch": 7.553316147176002, "grad_norm": 6.437938213348389, "learning_rate": 9.723190246344225e-06, "loss": 0.134899, "memory(GiB)": 13.7, "step": 16115, "train_speed(iter/s)": 1.538168 }, { "acc": 0.97445297, "epoch": 7.555659714084837, "grad_norm": 24.806795120239258, "learning_rate": 9.722935852139484e-06, "loss": 0.14048004, "memory(GiB)": 13.7, "step": 16120, "train_speed(iter/s)": 1.538188 }, { "acc": 0.96642857, "epoch": 7.558003280993672, "grad_norm": 7.8707404136657715, "learning_rate": 9.722681344422901e-06, "loss": 0.16448884, "memory(GiB)": 13.7, "step": 16125, "train_speed(iter/s)": 1.538187 }, { "acc": 0.95673065, "epoch": 7.560346847902507, "grad_norm": 6.269660472869873, "learning_rate": 9.72242672320059e-06, "loss": 0.2648396, "memory(GiB)": 13.7, "step": 16130, "train_speed(iter/s)": 1.538208 }, { "acc": 0.96898203, "epoch": 7.5626904148113425, "grad_norm": 20.194562911987305, "learning_rate": 9.722171988478675e-06, "loss": 0.22410769, "memory(GiB)": 13.7, "step": 16135, "train_speed(iter/s)": 1.538207 }, { "acc": 0.95583334, "epoch": 7.565033981720179, "grad_norm": 81.07290649414062, "learning_rate": 9.721917140263275e-06, "loss": 0.25192428, "memory(GiB)": 13.7, "step": 16140, "train_speed(iter/s)": 1.538231 }, { "acc": 0.95637169, "epoch": 7.567377548629013, "grad_norm": 9.491938591003418, "learning_rate": 9.721662178560518e-06, "loss": 0.2760987, "memory(GiB)": 13.7, "step": 16145, "train_speed(iter/s)": 1.538231 }, { "acc": 0.98842258, "epoch": 7.569721115537849, "grad_norm": 5.3273539543151855, "learning_rate": 9.721407103376535e-06, "loss": 0.10241823, "memory(GiB)": 13.7, "step": 16150, "train_speed(iter/s)": 1.538247 }, { "acc": 0.96644745, "epoch": 7.572064682446684, "grad_norm": 5.192386150360107, "learning_rate": 9.721151914717453e-06, "loss": 0.16254731, "memory(GiB)": 13.7, "step": 16155, "train_speed(iter/s)": 1.538262 }, { "acc": 0.94457836, "epoch": 7.574408249355519, "grad_norm": 8.581863403320312, "learning_rate": 9.720896612589407e-06, "loss": 0.27954063, "memory(GiB)": 13.7, "step": 16160, "train_speed(iter/s)": 1.538257 }, { "acc": 0.97086315, "epoch": 7.576751816264355, "grad_norm": 9.570929527282715, "learning_rate": 9.720641196998537e-06, "loss": 0.14430797, "memory(GiB)": 13.7, "step": 16165, "train_speed(iter/s)": 1.538278 }, { "acc": 0.96540413, "epoch": 7.57909538317319, "grad_norm": 5.768642425537109, "learning_rate": 9.720385667950975e-06, "loss": 0.15795293, "memory(GiB)": 13.7, "step": 16170, "train_speed(iter/s)": 1.538287 }, { "acc": 0.96579227, "epoch": 7.581438950082025, "grad_norm": 6.517798900604248, "learning_rate": 9.720130025452872e-06, "loss": 0.19615556, "memory(GiB)": 13.7, "step": 16175, "train_speed(iter/s)": 1.538248 }, { "acc": 0.96422119, "epoch": 7.58378251699086, "grad_norm": 8.310637474060059, "learning_rate": 9.719874269510367e-06, "loss": 0.14195044, "memory(GiB)": 13.7, "step": 16180, "train_speed(iter/s)": 1.538242 }, { "acc": 0.96965275, "epoch": 7.586126083899695, "grad_norm": 12.47861099243164, "learning_rate": 9.719618400129608e-06, "loss": 0.16922748, "memory(GiB)": 13.7, "step": 16185, "train_speed(iter/s)": 1.538257 }, { "acc": 0.95446138, "epoch": 7.5884696508085305, "grad_norm": 13.390738487243652, "learning_rate": 9.719362417316746e-06, "loss": 0.237956, "memory(GiB)": 13.7, "step": 16190, "train_speed(iter/s)": 1.538277 }, { "acc": 0.95932541, "epoch": 7.590813217717366, "grad_norm": 8.481101036071777, "learning_rate": 9.719106321077935e-06, "loss": 0.22289119, "memory(GiB)": 13.7, "step": 16195, "train_speed(iter/s)": 1.538295 }, { "acc": 0.97416668, "epoch": 7.593156784626201, "grad_norm": 4.691662788391113, "learning_rate": 9.71885011141933e-06, "loss": 0.1489575, "memory(GiB)": 13.7, "step": 16200, "train_speed(iter/s)": 1.538308 }, { "acc": 0.9766964, "epoch": 7.595500351535036, "grad_norm": 5.649619102478027, "learning_rate": 9.718593788347089e-06, "loss": 0.10084264, "memory(GiB)": 13.7, "step": 16205, "train_speed(iter/s)": 1.538324 }, { "acc": 0.95136147, "epoch": 7.597843918443871, "grad_norm": 10.158698081970215, "learning_rate": 9.718337351867372e-06, "loss": 0.29995527, "memory(GiB)": 13.7, "step": 16210, "train_speed(iter/s)": 1.538324 }, { "acc": 0.95078125, "epoch": 7.6001874853527065, "grad_norm": 9.185775756835938, "learning_rate": 9.718080801986346e-06, "loss": 0.33263848, "memory(GiB)": 13.7, "step": 16215, "train_speed(iter/s)": 1.538342 }, { "acc": 0.97446346, "epoch": 7.602531052261542, "grad_norm": 8.476746559143066, "learning_rate": 9.717824138710177e-06, "loss": 0.11337154, "memory(GiB)": 13.7, "step": 16220, "train_speed(iter/s)": 1.538347 }, { "acc": 0.96434526, "epoch": 7.604874619170378, "grad_norm": 13.556075096130371, "learning_rate": 9.71756736204503e-06, "loss": 0.18306055, "memory(GiB)": 13.7, "step": 16225, "train_speed(iter/s)": 1.538356 }, { "acc": 0.97821426, "epoch": 7.607218186079212, "grad_norm": 6.927188873291016, "learning_rate": 9.717310471997084e-06, "loss": 0.09569846, "memory(GiB)": 13.7, "step": 16230, "train_speed(iter/s)": 1.538391 }, { "acc": 0.97008934, "epoch": 7.609561752988048, "grad_norm": 16.903841018676758, "learning_rate": 9.717053468572508e-06, "loss": 0.14537435, "memory(GiB)": 13.7, "step": 16235, "train_speed(iter/s)": 1.538411 }, { "acc": 0.98159971, "epoch": 7.611905319896883, "grad_norm": 2.7717390060424805, "learning_rate": 9.716796351777483e-06, "loss": 0.08985475, "memory(GiB)": 13.7, "step": 16240, "train_speed(iter/s)": 1.538397 }, { "acc": 0.97487354, "epoch": 7.6142488868057185, "grad_norm": 7.466623783111572, "learning_rate": 9.716539121618186e-06, "loss": 0.14649279, "memory(GiB)": 13.7, "step": 16245, "train_speed(iter/s)": 1.538404 }, { "acc": 0.96106062, "epoch": 7.616592453714554, "grad_norm": 4.341220855712891, "learning_rate": 9.716281778100803e-06, "loss": 0.21563048, "memory(GiB)": 13.7, "step": 16250, "train_speed(iter/s)": 1.538394 }, { "acc": 0.97745953, "epoch": 7.618936020623389, "grad_norm": 2.3727059364318848, "learning_rate": 9.716024321231518e-06, "loss": 0.13226659, "memory(GiB)": 13.7, "step": 16255, "train_speed(iter/s)": 1.538424 }, { "acc": 0.95624218, "epoch": 7.621279587532224, "grad_norm": 25.045530319213867, "learning_rate": 9.715766751016523e-06, "loss": 0.22425528, "memory(GiB)": 13.7, "step": 16260, "train_speed(iter/s)": 1.538455 }, { "acc": 0.96430225, "epoch": 7.623623154441059, "grad_norm": 2.5951857566833496, "learning_rate": 9.715509067462002e-06, "loss": 0.23100405, "memory(GiB)": 13.7, "step": 16265, "train_speed(iter/s)": 1.538472 }, { "acc": 0.97439556, "epoch": 7.6259667213498945, "grad_norm": 6.576389312744141, "learning_rate": 9.715251270574156e-06, "loss": 0.1252038, "memory(GiB)": 13.7, "step": 16270, "train_speed(iter/s)": 1.538472 }, { "acc": 0.96625414, "epoch": 7.62831028825873, "grad_norm": 11.436553955078125, "learning_rate": 9.714993360359176e-06, "loss": 0.20395055, "memory(GiB)": 13.7, "step": 16275, "train_speed(iter/s)": 1.538473 }, { "acc": 0.95217266, "epoch": 7.630653855167565, "grad_norm": 7.0525126457214355, "learning_rate": 9.714735336823266e-06, "loss": 0.19474628, "memory(GiB)": 13.7, "step": 16280, "train_speed(iter/s)": 1.538464 }, { "acc": 0.98554316, "epoch": 7.6329974220764, "grad_norm": 3.547325849533081, "learning_rate": 9.714477199972626e-06, "loss": 0.05101472, "memory(GiB)": 13.7, "step": 16285, "train_speed(iter/s)": 1.538464 }, { "acc": 0.97550716, "epoch": 7.635340988985235, "grad_norm": 1.597693681716919, "learning_rate": 9.71421894981346e-06, "loss": 0.12671491, "memory(GiB)": 13.7, "step": 16290, "train_speed(iter/s)": 1.538465 }, { "acc": 0.97488613, "epoch": 7.6376845558940705, "grad_norm": 6.896924018859863, "learning_rate": 9.713960586351976e-06, "loss": 0.11335125, "memory(GiB)": 13.7, "step": 16295, "train_speed(iter/s)": 1.538466 }, { "acc": 0.95986862, "epoch": 7.640028122802906, "grad_norm": 6.740309715270996, "learning_rate": 9.713702109594385e-06, "loss": 0.20755291, "memory(GiB)": 13.7, "step": 16300, "train_speed(iter/s)": 1.53848 }, { "acc": 0.95768833, "epoch": 7.642371689711741, "grad_norm": 5.570488452911377, "learning_rate": 9.713443519546898e-06, "loss": 0.16737183, "memory(GiB)": 13.7, "step": 16305, "train_speed(iter/s)": 1.53848 }, { "acc": 0.98197918, "epoch": 7.644715256620577, "grad_norm": 7.42177152633667, "learning_rate": 9.713184816215734e-06, "loss": 0.09125165, "memory(GiB)": 13.7, "step": 16310, "train_speed(iter/s)": 1.538514 }, { "acc": 0.94731531, "epoch": 7.647058823529412, "grad_norm": 6.295188903808594, "learning_rate": 9.71292599960711e-06, "loss": 0.25218334, "memory(GiB)": 13.7, "step": 16315, "train_speed(iter/s)": 1.538535 }, { "acc": 0.97201643, "epoch": 7.649402390438247, "grad_norm": 11.032844543457031, "learning_rate": 9.712667069727245e-06, "loss": 0.18780699, "memory(GiB)": 13.7, "step": 16320, "train_speed(iter/s)": 1.538523 }, { "acc": 0.97536907, "epoch": 7.6517459573470825, "grad_norm": 3.937418222427368, "learning_rate": 9.712408026582363e-06, "loss": 0.23205142, "memory(GiB)": 13.7, "step": 16325, "train_speed(iter/s)": 1.538549 }, { "acc": 0.97427673, "epoch": 7.654089524255918, "grad_norm": 3.218677520751953, "learning_rate": 9.712148870178693e-06, "loss": 0.16801139, "memory(GiB)": 13.7, "step": 16330, "train_speed(iter/s)": 1.538551 }, { "acc": 0.96904755, "epoch": 7.656433091164753, "grad_norm": 8.630892753601074, "learning_rate": 9.711889600522464e-06, "loss": 0.17863891, "memory(GiB)": 13.7, "step": 16335, "train_speed(iter/s)": 1.538536 }, { "acc": 0.9637352, "epoch": 7.658776658073588, "grad_norm": 4.877862930297852, "learning_rate": 9.711630217619905e-06, "loss": 0.17206172, "memory(GiB)": 13.7, "step": 16340, "train_speed(iter/s)": 1.53853 }, { "acc": 0.95316658, "epoch": 7.661120224982423, "grad_norm": 7.95220422744751, "learning_rate": 9.711370721477255e-06, "loss": 0.33322258, "memory(GiB)": 13.7, "step": 16345, "train_speed(iter/s)": 1.538552 }, { "acc": 0.97091722, "epoch": 7.6634637918912585, "grad_norm": 1.0863553285598755, "learning_rate": 9.71111111210075e-06, "loss": 0.1421586, "memory(GiB)": 13.7, "step": 16350, "train_speed(iter/s)": 1.538552 }, { "acc": 0.98033733, "epoch": 7.665807358800094, "grad_norm": 2.605480909347534, "learning_rate": 9.710851389496627e-06, "loss": 0.09559621, "memory(GiB)": 13.7, "step": 16355, "train_speed(iter/s)": 1.538571 }, { "acc": 0.95992069, "epoch": 7.668150925708929, "grad_norm": 13.254982948303223, "learning_rate": 9.710591553671131e-06, "loss": 0.27187338, "memory(GiB)": 13.7, "step": 16360, "train_speed(iter/s)": 1.538552 }, { "acc": 0.95869789, "epoch": 7.670494492617764, "grad_norm": 1.9625169038772583, "learning_rate": 9.710331604630511e-06, "loss": 0.19545801, "memory(GiB)": 13.7, "step": 16365, "train_speed(iter/s)": 1.538566 }, { "acc": 0.97536278, "epoch": 7.672838059526599, "grad_norm": 6.230367660522461, "learning_rate": 9.710071542381008e-06, "loss": 0.1040215, "memory(GiB)": 13.7, "step": 16370, "train_speed(iter/s)": 1.538575 }, { "acc": 0.96917152, "epoch": 7.6751816264354344, "grad_norm": 6.389875888824463, "learning_rate": 9.70981136692888e-06, "loss": 0.17469306, "memory(GiB)": 13.7, "step": 16375, "train_speed(iter/s)": 1.538582 }, { "acc": 0.96429195, "epoch": 7.67752519334427, "grad_norm": 11.242867469787598, "learning_rate": 9.709551078280378e-06, "loss": 0.24809442, "memory(GiB)": 13.7, "step": 16380, "train_speed(iter/s)": 1.538594 }, { "acc": 0.97919722, "epoch": 7.679868760253106, "grad_norm": 4.060973644256592, "learning_rate": 9.709290676441758e-06, "loss": 0.10672066, "memory(GiB)": 13.7, "step": 16385, "train_speed(iter/s)": 1.538577 }, { "acc": 0.97085352, "epoch": 7.68221232716194, "grad_norm": 17.016733169555664, "learning_rate": 9.70903016141928e-06, "loss": 0.15976975, "memory(GiB)": 13.7, "step": 16390, "train_speed(iter/s)": 1.538565 }, { "acc": 0.97316551, "epoch": 7.684555894070776, "grad_norm": 9.075369834899902, "learning_rate": 9.708769533219205e-06, "loss": 0.17894499, "memory(GiB)": 13.7, "step": 16395, "train_speed(iter/s)": 1.538575 }, { "acc": 0.9854167, "epoch": 7.686899460979611, "grad_norm": 7.043218612670898, "learning_rate": 9.7085087918478e-06, "loss": 0.05457642, "memory(GiB)": 13.7, "step": 16400, "train_speed(iter/s)": 1.538563 }, { "acc": 0.97952461, "epoch": 7.6892430278884465, "grad_norm": 3.2408430576324463, "learning_rate": 9.70824793731133e-06, "loss": 0.1427338, "memory(GiB)": 13.7, "step": 16405, "train_speed(iter/s)": 1.538565 }, { "acc": 0.96781006, "epoch": 7.691586594797282, "grad_norm": 10.445384979248047, "learning_rate": 9.707986969616065e-06, "loss": 0.25906146, "memory(GiB)": 13.7, "step": 16410, "train_speed(iter/s)": 1.538568 }, { "acc": 0.98356647, "epoch": 7.693930161706117, "grad_norm": 4.4095563888549805, "learning_rate": 9.707725888768278e-06, "loss": 0.11394643, "memory(GiB)": 13.7, "step": 16415, "train_speed(iter/s)": 1.538585 }, { "acc": 0.96439486, "epoch": 7.696273728614952, "grad_norm": 8.572405815124512, "learning_rate": 9.707464694774246e-06, "loss": 0.27569504, "memory(GiB)": 13.7, "step": 16420, "train_speed(iter/s)": 1.538619 }, { "acc": 0.9720932, "epoch": 7.698617295523787, "grad_norm": 7.815298080444336, "learning_rate": 9.707203387640246e-06, "loss": 0.15970922, "memory(GiB)": 13.7, "step": 16425, "train_speed(iter/s)": 1.53864 }, { "acc": 0.96415825, "epoch": 7.7009608624326225, "grad_norm": 17.595256805419922, "learning_rate": 9.70694196737256e-06, "loss": 0.23725181, "memory(GiB)": 13.7, "step": 16430, "train_speed(iter/s)": 1.538639 }, { "acc": 0.95656252, "epoch": 7.703304429341458, "grad_norm": 15.359963417053223, "learning_rate": 9.70668043397747e-06, "loss": 0.16060064, "memory(GiB)": 13.7, "step": 16435, "train_speed(iter/s)": 1.53864 }, { "acc": 0.97397022, "epoch": 7.705647996250293, "grad_norm": 5.679917335510254, "learning_rate": 9.706418787461264e-06, "loss": 0.14053038, "memory(GiB)": 13.7, "step": 16440, "train_speed(iter/s)": 1.538637 }, { "acc": 0.95549984, "epoch": 7.707991563159128, "grad_norm": 5.714469909667969, "learning_rate": 9.706157027830229e-06, "loss": 0.19252653, "memory(GiB)": 13.7, "step": 16445, "train_speed(iter/s)": 1.538677 }, { "acc": 0.96659355, "epoch": 7.710335130067963, "grad_norm": 4.8031463623046875, "learning_rate": 9.70589515509066e-06, "loss": 0.23568096, "memory(GiB)": 13.7, "step": 16450, "train_speed(iter/s)": 1.538689 }, { "acc": 0.96385117, "epoch": 7.712678696976798, "grad_norm": 15.235310554504395, "learning_rate": 9.70563316924885e-06, "loss": 0.24022498, "memory(GiB)": 13.7, "step": 16455, "train_speed(iter/s)": 1.538671 }, { "acc": 0.96992426, "epoch": 7.715022263885634, "grad_norm": 8.579622268676758, "learning_rate": 9.705371070311096e-06, "loss": 0.13899616, "memory(GiB)": 13.7, "step": 16460, "train_speed(iter/s)": 1.538669 }, { "acc": 0.98331242, "epoch": 7.717365830794469, "grad_norm": 6.368840217590332, "learning_rate": 9.705108858283698e-06, "loss": 0.06258991, "memory(GiB)": 13.7, "step": 16465, "train_speed(iter/s)": 1.538673 }, { "acc": 0.95888805, "epoch": 7.719709397703305, "grad_norm": 8.10970687866211, "learning_rate": 9.704846533172957e-06, "loss": 0.2052911, "memory(GiB)": 13.7, "step": 16470, "train_speed(iter/s)": 1.538708 }, { "acc": 0.96791668, "epoch": 7.722052964612139, "grad_norm": 6.289029121398926, "learning_rate": 9.704584094985184e-06, "loss": 0.16121101, "memory(GiB)": 13.7, "step": 16475, "train_speed(iter/s)": 1.538714 }, { "acc": 0.97469254, "epoch": 7.724396531520975, "grad_norm": 7.5900702476501465, "learning_rate": 9.70432154372668e-06, "loss": 0.10752765, "memory(GiB)": 13.7, "step": 16480, "train_speed(iter/s)": 1.53872 }, { "acc": 0.96527195, "epoch": 7.7267400984298105, "grad_norm": 9.948662757873535, "learning_rate": 9.704058879403762e-06, "loss": 0.22226686, "memory(GiB)": 13.7, "step": 16485, "train_speed(iter/s)": 1.538721 }, { "acc": 0.98501444, "epoch": 7.729083665338646, "grad_norm": 5.750886917114258, "learning_rate": 9.70379610202274e-06, "loss": 0.1328933, "memory(GiB)": 13.7, "step": 16490, "train_speed(iter/s)": 1.538754 }, { "acc": 0.96799145, "epoch": 7.731427232247481, "grad_norm": 11.232162475585938, "learning_rate": 9.703533211589931e-06, "loss": 0.14530295, "memory(GiB)": 13.7, "step": 16495, "train_speed(iter/s)": 1.538765 }, { "acc": 0.98335247, "epoch": 7.733770799156316, "grad_norm": 5.591550827026367, "learning_rate": 9.703270208111654e-06, "loss": 0.16053374, "memory(GiB)": 13.7, "step": 16500, "train_speed(iter/s)": 1.53877 }, { "acc": 0.96888351, "epoch": 7.736114366065151, "grad_norm": 4.973517894744873, "learning_rate": 9.70300709159423e-06, "loss": 0.13427999, "memory(GiB)": 13.7, "step": 16505, "train_speed(iter/s)": 1.538791 }, { "acc": 0.95698471, "epoch": 7.738457932973986, "grad_norm": 11.131686210632324, "learning_rate": 9.702743862043986e-06, "loss": 0.2562732, "memory(GiB)": 13.7, "step": 16510, "train_speed(iter/s)": 1.538821 }, { "acc": 0.95749454, "epoch": 7.740801499882822, "grad_norm": 0.4719336926937103, "learning_rate": 9.702480519467247e-06, "loss": 0.27414107, "memory(GiB)": 13.7, "step": 16515, "train_speed(iter/s)": 1.538806 }, { "acc": 0.95513973, "epoch": 7.743145066791657, "grad_norm": 1.7792203426361084, "learning_rate": 9.702217063870343e-06, "loss": 0.1893132, "memory(GiB)": 13.7, "step": 16520, "train_speed(iter/s)": 1.538821 }, { "acc": 0.96833248, "epoch": 7.745488633700492, "grad_norm": 3.0676679611206055, "learning_rate": 9.701953495259608e-06, "loss": 0.16336834, "memory(GiB)": 13.7, "step": 16525, "train_speed(iter/s)": 1.538832 }, { "acc": 0.97041893, "epoch": 7.747832200609327, "grad_norm": 4.956025123596191, "learning_rate": 9.701689813641378e-06, "loss": 0.11724985, "memory(GiB)": 13.7, "step": 16530, "train_speed(iter/s)": 1.538841 }, { "acc": 0.96251755, "epoch": 7.750175767518162, "grad_norm": 3.3289713859558105, "learning_rate": 9.701426019021988e-06, "loss": 0.18571686, "memory(GiB)": 13.7, "step": 16535, "train_speed(iter/s)": 1.538841 }, { "acc": 0.97715778, "epoch": 7.752519334426998, "grad_norm": 2.3233656883239746, "learning_rate": 9.701162111407781e-06, "loss": 0.08519834, "memory(GiB)": 13.7, "step": 16540, "train_speed(iter/s)": 1.538855 }, { "acc": 0.97717266, "epoch": 7.754862901335833, "grad_norm": 2.2947020530700684, "learning_rate": 9.700898090805098e-06, "loss": 0.12832955, "memory(GiB)": 13.7, "step": 16545, "train_speed(iter/s)": 1.538861 }, { "acc": 0.95702038, "epoch": 7.757206468244668, "grad_norm": 6.200275421142578, "learning_rate": 9.700633957220287e-06, "loss": 0.23726971, "memory(GiB)": 13.7, "step": 16550, "train_speed(iter/s)": 1.538852 }, { "acc": 0.96920137, "epoch": 7.759550035153504, "grad_norm": 1.11859130859375, "learning_rate": 9.700369710659699e-06, "loss": 0.19007475, "memory(GiB)": 13.7, "step": 16555, "train_speed(iter/s)": 1.538894 }, { "acc": 0.96038685, "epoch": 7.761893602062339, "grad_norm": 27.43971061706543, "learning_rate": 9.700105351129681e-06, "loss": 0.16477872, "memory(GiB)": 13.7, "step": 16560, "train_speed(iter/s)": 1.538914 }, { "acc": 0.97740536, "epoch": 7.764237168971174, "grad_norm": 6.911987781524658, "learning_rate": 9.699840878636591e-06, "loss": 0.13175414, "memory(GiB)": 13.7, "step": 16565, "train_speed(iter/s)": 1.538898 }, { "acc": 0.96740532, "epoch": 7.76658073588001, "grad_norm": 19.878664016723633, "learning_rate": 9.699576293186784e-06, "loss": 0.19291666, "memory(GiB)": 13.7, "step": 16570, "train_speed(iter/s)": 1.538907 }, { "acc": 0.97145214, "epoch": 7.768924302788845, "grad_norm": 1.3559160232543945, "learning_rate": 9.69931159478662e-06, "loss": 0.12039998, "memory(GiB)": 13.7, "step": 16575, "train_speed(iter/s)": 1.538911 }, { "acc": 0.95129328, "epoch": 7.77126786969768, "grad_norm": 6.855823040008545, "learning_rate": 9.699046783442464e-06, "loss": 0.25061719, "memory(GiB)": 13.7, "step": 16580, "train_speed(iter/s)": 1.538936 }, { "acc": 0.97092266, "epoch": 7.773611436606515, "grad_norm": 8.139016151428223, "learning_rate": 9.69878185916068e-06, "loss": 0.1447381, "memory(GiB)": 13.7, "step": 16585, "train_speed(iter/s)": 1.538927 }, { "acc": 0.97066469, "epoch": 7.77595500351535, "grad_norm": 36.4846076965332, "learning_rate": 9.698516821947632e-06, "loss": 0.11684909, "memory(GiB)": 13.7, "step": 16590, "train_speed(iter/s)": 1.538941 }, { "acc": 0.97324409, "epoch": 7.778298570424186, "grad_norm": 3.1775848865509033, "learning_rate": 9.698251671809697e-06, "loss": 0.1538897, "memory(GiB)": 13.7, "step": 16595, "train_speed(iter/s)": 1.538961 }, { "acc": 0.96437511, "epoch": 7.780642137333021, "grad_norm": 5.116672515869141, "learning_rate": 9.697986408753243e-06, "loss": 0.20255756, "memory(GiB)": 13.7, "step": 16600, "train_speed(iter/s)": 1.538978 }, { "acc": 0.96915178, "epoch": 7.782985704241856, "grad_norm": 2.7785561084747314, "learning_rate": 9.697721032784647e-06, "loss": 0.18342153, "memory(GiB)": 13.7, "step": 16605, "train_speed(iter/s)": 1.538973 }, { "acc": 0.9791502, "epoch": 7.785329271150691, "grad_norm": 5.232326507568359, "learning_rate": 9.697455543910291e-06, "loss": 0.13093082, "memory(GiB)": 13.7, "step": 16610, "train_speed(iter/s)": 1.538994 }, { "acc": 0.97018175, "epoch": 7.787672838059526, "grad_norm": 4.2496747970581055, "learning_rate": 9.697189942136554e-06, "loss": 0.20510049, "memory(GiB)": 13.7, "step": 16615, "train_speed(iter/s)": 1.538996 }, { "acc": 0.96414547, "epoch": 7.790016404968362, "grad_norm": 4.56882905960083, "learning_rate": 9.696924227469821e-06, "loss": 0.23951373, "memory(GiB)": 13.7, "step": 16620, "train_speed(iter/s)": 1.539014 }, { "acc": 0.96052074, "epoch": 7.792359971877197, "grad_norm": 27.430557250976562, "learning_rate": 9.696658399916479e-06, "loss": 0.18401709, "memory(GiB)": 13.7, "step": 16625, "train_speed(iter/s)": 1.539019 }, { "acc": 0.96733131, "epoch": 7.794703538786033, "grad_norm": 8.303386688232422, "learning_rate": 9.696392459482915e-06, "loss": 0.20365224, "memory(GiB)": 13.7, "step": 16630, "train_speed(iter/s)": 1.539055 }, { "acc": 0.96355839, "epoch": 7.797047105694867, "grad_norm": 5.737936973571777, "learning_rate": 9.696126406175525e-06, "loss": 0.18956627, "memory(GiB)": 13.7, "step": 16635, "train_speed(iter/s)": 1.539051 }, { "acc": 0.98143311, "epoch": 7.799390672603703, "grad_norm": 0.9759249091148376, "learning_rate": 9.695860240000703e-06, "loss": 0.09951886, "memory(GiB)": 13.7, "step": 16640, "train_speed(iter/s)": 1.539068 }, { "acc": 0.96848211, "epoch": 7.801734239512538, "grad_norm": 4.108701705932617, "learning_rate": 9.695593960964845e-06, "loss": 0.15340648, "memory(GiB)": 13.7, "step": 16645, "train_speed(iter/s)": 1.539073 }, { "acc": 0.96799669, "epoch": 7.804077806421374, "grad_norm": 7.200947284698486, "learning_rate": 9.695327569074353e-06, "loss": 0.15035151, "memory(GiB)": 13.7, "step": 16650, "train_speed(iter/s)": 1.539076 }, { "acc": 0.98660717, "epoch": 7.806421373330209, "grad_norm": 7.118844032287598, "learning_rate": 9.69506106433563e-06, "loss": 0.15503759, "memory(GiB)": 13.7, "step": 16655, "train_speed(iter/s)": 1.539067 }, { "acc": 0.97453384, "epoch": 7.808764940239044, "grad_norm": 6.807535648345947, "learning_rate": 9.694794446755083e-06, "loss": 0.10436887, "memory(GiB)": 13.7, "step": 16660, "train_speed(iter/s)": 1.5391 }, { "acc": 0.96931238, "epoch": 7.811108507147879, "grad_norm": 2.6273372173309326, "learning_rate": 9.694527716339118e-06, "loss": 0.15525569, "memory(GiB)": 13.7, "step": 16665, "train_speed(iter/s)": 1.539113 }, { "acc": 0.96244431, "epoch": 7.813452074056714, "grad_norm": 5.584632873535156, "learning_rate": 9.694260873094149e-06, "loss": 0.17256596, "memory(GiB)": 13.7, "step": 16670, "train_speed(iter/s)": 1.539147 }, { "acc": 0.96875, "epoch": 7.81579564096555, "grad_norm": 5.881162166595459, "learning_rate": 9.693993917026588e-06, "loss": 0.13609121, "memory(GiB)": 13.7, "step": 16675, "train_speed(iter/s)": 1.539153 }, { "acc": 0.96569939, "epoch": 7.818139207874385, "grad_norm": 7.061618328094482, "learning_rate": 9.693726848142853e-06, "loss": 0.23750677, "memory(GiB)": 13.7, "step": 16680, "train_speed(iter/s)": 1.539181 }, { "acc": 0.98677788, "epoch": 7.82048277478322, "grad_norm": 1.56093168258667, "learning_rate": 9.693459666449362e-06, "loss": 0.05780388, "memory(GiB)": 13.7, "step": 16685, "train_speed(iter/s)": 1.539175 }, { "acc": 0.97635078, "epoch": 7.822826341692055, "grad_norm": 7.793604850769043, "learning_rate": 9.69319237195254e-06, "loss": 0.17643313, "memory(GiB)": 13.7, "step": 16690, "train_speed(iter/s)": 1.539183 }, { "acc": 0.98067713, "epoch": 7.82516990860089, "grad_norm": 3.098848819732666, "learning_rate": 9.692924964658809e-06, "loss": 0.09324331, "memory(GiB)": 13.7, "step": 16695, "train_speed(iter/s)": 1.539171 }, { "acc": 0.94841337, "epoch": 7.8275134755097255, "grad_norm": 6.742364406585693, "learning_rate": 9.692657444574597e-06, "loss": 0.26319485, "memory(GiB)": 13.7, "step": 16700, "train_speed(iter/s)": 1.539192 }, { "acc": 0.95943451, "epoch": 7.829857042418561, "grad_norm": 0.7636353373527527, "learning_rate": 9.692389811706336e-06, "loss": 0.18010831, "memory(GiB)": 13.7, "step": 16705, "train_speed(iter/s)": 1.539227 }, { "acc": 0.96954031, "epoch": 7.832200609327396, "grad_norm": 5.020031452178955, "learning_rate": 9.692122066060457e-06, "loss": 0.18131666, "memory(GiB)": 13.7, "step": 16710, "train_speed(iter/s)": 1.539254 }, { "acc": 0.99097223, "epoch": 7.834544176236232, "grad_norm": 1.9538663625717163, "learning_rate": 9.691854207643397e-06, "loss": 0.057872, "memory(GiB)": 13.7, "step": 16715, "train_speed(iter/s)": 1.539267 }, { "acc": 0.96840277, "epoch": 7.836887743145066, "grad_norm": 172.27786254882812, "learning_rate": 9.691586236461596e-06, "loss": 0.15914512, "memory(GiB)": 13.7, "step": 16720, "train_speed(iter/s)": 1.539273 }, { "acc": 0.95398121, "epoch": 7.839231310053902, "grad_norm": 13.04891300201416, "learning_rate": 9.691318152521492e-06, "loss": 0.24146709, "memory(GiB)": 13.7, "step": 16725, "train_speed(iter/s)": 1.539271 }, { "acc": 0.95013256, "epoch": 7.841574876962738, "grad_norm": 10.63722038269043, "learning_rate": 9.691049955829528e-06, "loss": 0.30983071, "memory(GiB)": 13.7, "step": 16730, "train_speed(iter/s)": 1.539291 }, { "acc": 0.9655014, "epoch": 7.843918443871573, "grad_norm": 6.6308488845825195, "learning_rate": 9.690781646392155e-06, "loss": 0.15776045, "memory(GiB)": 13.7, "step": 16735, "train_speed(iter/s)": 1.539305 }, { "acc": 0.96407804, "epoch": 7.846262010780408, "grad_norm": 13.206669807434082, "learning_rate": 9.69051322421582e-06, "loss": 0.26291146, "memory(GiB)": 13.7, "step": 16740, "train_speed(iter/s)": 1.539311 }, { "acc": 0.95541668, "epoch": 7.848605577689243, "grad_norm": 33.92976379394531, "learning_rate": 9.690244689306974e-06, "loss": 0.26190901, "memory(GiB)": 13.7, "step": 16745, "train_speed(iter/s)": 1.53935 }, { "acc": 0.97720737, "epoch": 7.850949144598078, "grad_norm": 7.8360090255737305, "learning_rate": 9.689976041672074e-06, "loss": 0.12905762, "memory(GiB)": 13.7, "step": 16750, "train_speed(iter/s)": 1.539362 }, { "acc": 0.96328373, "epoch": 7.8532927115069135, "grad_norm": 8.864005088806152, "learning_rate": 9.689707281317576e-06, "loss": 0.2076551, "memory(GiB)": 13.7, "step": 16755, "train_speed(iter/s)": 1.539377 }, { "acc": 0.98743057, "epoch": 7.855636278415749, "grad_norm": 3.367183208465576, "learning_rate": 9.68943840824994e-06, "loss": 0.08041697, "memory(GiB)": 13.7, "step": 16760, "train_speed(iter/s)": 1.539388 }, { "acc": 0.96460819, "epoch": 7.857979845324584, "grad_norm": 0.490291565656662, "learning_rate": 9.689169422475627e-06, "loss": 0.17542822, "memory(GiB)": 13.7, "step": 16765, "train_speed(iter/s)": 1.539395 }, { "acc": 0.98015623, "epoch": 7.860323412233419, "grad_norm": 7.309151649475098, "learning_rate": 9.688900324001107e-06, "loss": 0.13131603, "memory(GiB)": 13.7, "step": 16770, "train_speed(iter/s)": 1.539401 }, { "acc": 0.98696766, "epoch": 7.862666979142254, "grad_norm": 5.276813983917236, "learning_rate": 9.688631112832844e-06, "loss": 0.07261025, "memory(GiB)": 13.7, "step": 16775, "train_speed(iter/s)": 1.539398 }, { "acc": 0.95472527, "epoch": 7.8650105460510895, "grad_norm": 3.3664567470550537, "learning_rate": 9.688361788977313e-06, "loss": 0.2243402, "memory(GiB)": 13.7, "step": 16780, "train_speed(iter/s)": 1.539404 }, { "acc": 0.96347218, "epoch": 7.867354112959925, "grad_norm": 4.681986331939697, "learning_rate": 9.688092352440983e-06, "loss": 0.14686513, "memory(GiB)": 13.7, "step": 16785, "train_speed(iter/s)": 1.539405 }, { "acc": 0.95396824, "epoch": 7.86969767986876, "grad_norm": 7.652491092681885, "learning_rate": 9.687822803230333e-06, "loss": 0.18219925, "memory(GiB)": 13.7, "step": 16790, "train_speed(iter/s)": 1.539396 }, { "acc": 0.97070351, "epoch": 7.872041246777595, "grad_norm": 8.394479751586914, "learning_rate": 9.687553141351843e-06, "loss": 0.16527114, "memory(GiB)": 13.7, "step": 16795, "train_speed(iter/s)": 1.539405 }, { "acc": 0.97331848, "epoch": 7.874384813686431, "grad_norm": 2.1096110343933105, "learning_rate": 9.687283366811993e-06, "loss": 0.13289354, "memory(GiB)": 13.7, "step": 16800, "train_speed(iter/s)": 1.539405 }, { "acc": 0.96595669, "epoch": 7.876728380595266, "grad_norm": 6.765102863311768, "learning_rate": 9.68701347961727e-06, "loss": 0.1204011, "memory(GiB)": 13.7, "step": 16805, "train_speed(iter/s)": 1.539406 }, { "acc": 0.96977005, "epoch": 7.879071947504102, "grad_norm": 5.73842716217041, "learning_rate": 9.686743479774158e-06, "loss": 0.16035793, "memory(GiB)": 13.7, "step": 16810, "train_speed(iter/s)": 1.539405 }, { "acc": 0.97099209, "epoch": 7.881415514412937, "grad_norm": 7.060873031616211, "learning_rate": 9.686473367289148e-06, "loss": 0.10760304, "memory(GiB)": 13.7, "step": 16815, "train_speed(iter/s)": 1.53943 }, { "acc": 0.9665699, "epoch": 7.883759081321772, "grad_norm": 3.3277881145477295, "learning_rate": 9.686203142168733e-06, "loss": 0.15517026, "memory(GiB)": 13.7, "step": 16820, "train_speed(iter/s)": 1.539445 }, { "acc": 0.96319971, "epoch": 7.886102648230607, "grad_norm": 10.81781005859375, "learning_rate": 9.685932804419407e-06, "loss": 0.1411932, "memory(GiB)": 13.7, "step": 16825, "train_speed(iter/s)": 1.539453 }, { "acc": 0.95180016, "epoch": 7.888446215139442, "grad_norm": 8.357211112976074, "learning_rate": 9.685662354047672e-06, "loss": 0.27543757, "memory(GiB)": 13.7, "step": 16830, "train_speed(iter/s)": 1.539451 }, { "acc": 0.98258934, "epoch": 7.8907897820482775, "grad_norm": 0.741596519947052, "learning_rate": 9.685391791060023e-06, "loss": 0.14670119, "memory(GiB)": 13.7, "step": 16835, "train_speed(iter/s)": 1.539475 }, { "acc": 0.9632658, "epoch": 7.893133348957113, "grad_norm": 4.697906970977783, "learning_rate": 9.685121115462968e-06, "loss": 0.123543, "memory(GiB)": 13.7, "step": 16840, "train_speed(iter/s)": 1.53949 }, { "acc": 0.97555065, "epoch": 7.895476915865948, "grad_norm": 30.96173667907715, "learning_rate": 9.684850327263011e-06, "loss": 0.21684265, "memory(GiB)": 13.7, "step": 16845, "train_speed(iter/s)": 1.539508 }, { "acc": 0.97282696, "epoch": 7.897820482774783, "grad_norm": 7.881575107574463, "learning_rate": 9.684579426466662e-06, "loss": 0.18915818, "memory(GiB)": 13.7, "step": 16850, "train_speed(iter/s)": 1.53951 }, { "acc": 0.981534, "epoch": 7.900164049683618, "grad_norm": 2.10672664642334, "learning_rate": 9.684308413080431e-06, "loss": 0.09996287, "memory(GiB)": 13.7, "step": 16855, "train_speed(iter/s)": 1.539512 }, { "acc": 0.96166668, "epoch": 7.9025076165924535, "grad_norm": 14.064400672912598, "learning_rate": 9.684037287110835e-06, "loss": 0.20464983, "memory(GiB)": 13.7, "step": 16860, "train_speed(iter/s)": 1.539525 }, { "acc": 0.97050591, "epoch": 7.904851183501289, "grad_norm": 4.264359951019287, "learning_rate": 9.683766048564387e-06, "loss": 0.20582376, "memory(GiB)": 13.7, "step": 16865, "train_speed(iter/s)": 1.539538 }, { "acc": 0.97066469, "epoch": 7.907194750410124, "grad_norm": 13.247594833374023, "learning_rate": 9.683494697447611e-06, "loss": 0.20539503, "memory(GiB)": 13.7, "step": 16870, "train_speed(iter/s)": 1.539556 }, { "acc": 0.95946922, "epoch": 7.90953831731896, "grad_norm": 8.181381225585938, "learning_rate": 9.683223233767026e-06, "loss": 0.18170403, "memory(GiB)": 13.7, "step": 16875, "train_speed(iter/s)": 1.539551 }, { "acc": 0.96344147, "epoch": 7.911881884227794, "grad_norm": 9.905997276306152, "learning_rate": 9.682951657529159e-06, "loss": 0.19922702, "memory(GiB)": 13.7, "step": 16880, "train_speed(iter/s)": 1.539535 }, { "acc": 0.96530342, "epoch": 7.91422545113663, "grad_norm": 14.544515609741211, "learning_rate": 9.682679968740537e-06, "loss": 0.20593991, "memory(GiB)": 13.7, "step": 16885, "train_speed(iter/s)": 1.53953 }, { "acc": 0.94324093, "epoch": 7.9165690180454655, "grad_norm": 26.40766143798828, "learning_rate": 9.68240816740769e-06, "loss": 0.27082813, "memory(GiB)": 13.7, "step": 16890, "train_speed(iter/s)": 1.539562 }, { "acc": 0.96590872, "epoch": 7.918912584954301, "grad_norm": 3.4588406085968018, "learning_rate": 9.682136253537155e-06, "loss": 0.19075344, "memory(GiB)": 13.7, "step": 16895, "train_speed(iter/s)": 1.539568 }, { "acc": 0.96346102, "epoch": 7.921256151863136, "grad_norm": 10.13397216796875, "learning_rate": 9.681864227135462e-06, "loss": 0.14828546, "memory(GiB)": 13.7, "step": 16900, "train_speed(iter/s)": 1.539603 }, { "acc": 0.96880922, "epoch": 7.923599718771971, "grad_norm": 33.21638107299805, "learning_rate": 9.681592088209153e-06, "loss": 0.11360495, "memory(GiB)": 13.7, "step": 16905, "train_speed(iter/s)": 1.539613 }, { "acc": 0.97768307, "epoch": 7.925943285680806, "grad_norm": 3.8595831394195557, "learning_rate": 9.681319836764767e-06, "loss": 0.07359494, "memory(GiB)": 13.7, "step": 16910, "train_speed(iter/s)": 1.539637 }, { "acc": 0.970644, "epoch": 7.9282868525896415, "grad_norm": 18.383113861083984, "learning_rate": 9.68104747280885e-06, "loss": 0.16284393, "memory(GiB)": 13.7, "step": 16915, "train_speed(iter/s)": 1.539655 }, { "acc": 0.96775303, "epoch": 7.930630419498477, "grad_norm": 21.285289764404297, "learning_rate": 9.680774996347954e-06, "loss": 0.1503533, "memory(GiB)": 13.7, "step": 16920, "train_speed(iter/s)": 1.539659 }, { "acc": 0.98224468, "epoch": 7.932973986407312, "grad_norm": 4.957615852355957, "learning_rate": 9.680502407388618e-06, "loss": 0.11820645, "memory(GiB)": 13.7, "step": 16925, "train_speed(iter/s)": 1.539675 }, { "acc": 0.98777885, "epoch": 7.935317553316147, "grad_norm": 5.179643154144287, "learning_rate": 9.6802297059374e-06, "loss": 0.09737915, "memory(GiB)": 13.7, "step": 16930, "train_speed(iter/s)": 1.539709 }, { "acc": 0.97353477, "epoch": 7.937661120224982, "grad_norm": 10.099196434020996, "learning_rate": 9.679956892000854e-06, "loss": 0.18259581, "memory(GiB)": 13.7, "step": 16935, "train_speed(iter/s)": 1.539741 }, { "acc": 0.95658264, "epoch": 7.9400046871338175, "grad_norm": 10.001808166503906, "learning_rate": 9.679683965585537e-06, "loss": 0.25529134, "memory(GiB)": 13.7, "step": 16940, "train_speed(iter/s)": 1.539741 }, { "acc": 0.96750984, "epoch": 7.942348254042653, "grad_norm": 6.8196024894714355, "learning_rate": 9.679410926698012e-06, "loss": 0.18919718, "memory(GiB)": 13.7, "step": 16945, "train_speed(iter/s)": 1.539741 }, { "acc": 0.96958332, "epoch": 7.944691820951488, "grad_norm": 8.05240535736084, "learning_rate": 9.679137775344837e-06, "loss": 0.15459888, "memory(GiB)": 13.7, "step": 16950, "train_speed(iter/s)": 1.539756 }, { "acc": 0.96683798, "epoch": 7.947035387860323, "grad_norm": 10.189347267150879, "learning_rate": 9.678864511532582e-06, "loss": 0.22802324, "memory(GiB)": 13.7, "step": 16955, "train_speed(iter/s)": 1.53977 }, { "acc": 0.97904758, "epoch": 7.949378954769159, "grad_norm": 8.200529098510742, "learning_rate": 9.678591135267814e-06, "loss": 0.14572297, "memory(GiB)": 13.7, "step": 16960, "train_speed(iter/s)": 1.539769 }, { "acc": 0.97275095, "epoch": 7.951722521677993, "grad_norm": 10.239434242248535, "learning_rate": 9.678317646557101e-06, "loss": 0.15160427, "memory(GiB)": 13.7, "step": 16965, "train_speed(iter/s)": 1.53979 }, { "acc": 0.96506176, "epoch": 7.9540660885868295, "grad_norm": 6.231630802154541, "learning_rate": 9.678044045407022e-06, "loss": 0.30554702, "memory(GiB)": 13.7, "step": 16970, "train_speed(iter/s)": 1.539806 }, { "acc": 0.96747971, "epoch": 7.956409655495665, "grad_norm": 3.292426824569702, "learning_rate": 9.677770331824153e-06, "loss": 0.17639349, "memory(GiB)": 13.7, "step": 16975, "train_speed(iter/s)": 1.539798 }, { "acc": 0.97979164, "epoch": 7.9587532224045, "grad_norm": 4.867947101593018, "learning_rate": 9.677496505815066e-06, "loss": 0.11558785, "memory(GiB)": 13.7, "step": 16980, "train_speed(iter/s)": 1.539791 }, { "acc": 0.96812153, "epoch": 7.961096789313335, "grad_norm": 7.299894332885742, "learning_rate": 9.677222567386353e-06, "loss": 0.19805651, "memory(GiB)": 13.7, "step": 16985, "train_speed(iter/s)": 1.539797 }, { "acc": 0.96397591, "epoch": 7.96344035622217, "grad_norm": 3.9733362197875977, "learning_rate": 9.676948516544591e-06, "loss": 0.24324913, "memory(GiB)": 13.7, "step": 16990, "train_speed(iter/s)": 1.53983 }, { "acc": 0.97436008, "epoch": 7.9657839231310055, "grad_norm": 5.767712593078613, "learning_rate": 9.676674353296372e-06, "loss": 0.15648313, "memory(GiB)": 13.7, "step": 16995, "train_speed(iter/s)": 1.539871 }, { "acc": 0.97458334, "epoch": 7.968127490039841, "grad_norm": 4.735682487487793, "learning_rate": 9.676400077648284e-06, "loss": 0.11612264, "memory(GiB)": 13.7, "step": 17000, "train_speed(iter/s)": 1.539878 }, { "acc": 0.98632441, "epoch": 7.970471056948676, "grad_norm": 4.988519668579102, "learning_rate": 9.676125689606916e-06, "loss": 0.11122421, "memory(GiB)": 13.7, "step": 17005, "train_speed(iter/s)": 1.539864 }, { "acc": 0.96988773, "epoch": 7.972814623857511, "grad_norm": 0.5036294460296631, "learning_rate": 9.67585118917887e-06, "loss": 0.13601654, "memory(GiB)": 13.7, "step": 17010, "train_speed(iter/s)": 1.539882 }, { "acc": 0.95864086, "epoch": 7.975158190766346, "grad_norm": 10.228964805603027, "learning_rate": 9.675576576370742e-06, "loss": 0.29298625, "memory(GiB)": 13.7, "step": 17015, "train_speed(iter/s)": 1.539902 }, { "acc": 0.96845169, "epoch": 7.977501757675181, "grad_norm": 13.594338417053223, "learning_rate": 9.675301851189131e-06, "loss": 0.17321115, "memory(GiB)": 13.7, "step": 17020, "train_speed(iter/s)": 1.539932 }, { "acc": 0.95704575, "epoch": 7.979845324584017, "grad_norm": 12.793474197387695, "learning_rate": 9.675027013640639e-06, "loss": 0.22008886, "memory(GiB)": 13.7, "step": 17025, "train_speed(iter/s)": 1.539933 }, { "acc": 0.96703091, "epoch": 7.982188891492852, "grad_norm": 4.648350238800049, "learning_rate": 9.674752063731878e-06, "loss": 0.15925176, "memory(GiB)": 13.7, "step": 17030, "train_speed(iter/s)": 1.539916 }, { "acc": 0.97332344, "epoch": 7.984532458401687, "grad_norm": 12.940080642700195, "learning_rate": 9.674477001469452e-06, "loss": 0.18342557, "memory(GiB)": 13.7, "step": 17035, "train_speed(iter/s)": 1.539948 }, { "acc": 0.9651062, "epoch": 7.986876025310522, "grad_norm": 6.88134241104126, "learning_rate": 9.674201826859974e-06, "loss": 0.16408255, "memory(GiB)": 13.7, "step": 17040, "train_speed(iter/s)": 1.539925 }, { "acc": 0.97537775, "epoch": 7.989219592219358, "grad_norm": 6.201226234436035, "learning_rate": 9.673926539910058e-06, "loss": 0.15188835, "memory(GiB)": 13.7, "step": 17045, "train_speed(iter/s)": 1.539927 }, { "acc": 0.95934525, "epoch": 7.9915631591281935, "grad_norm": 8.836700439453125, "learning_rate": 9.673651140626324e-06, "loss": 0.22370887, "memory(GiB)": 13.7, "step": 17050, "train_speed(iter/s)": 1.539952 }, { "acc": 0.98495045, "epoch": 7.993906726037029, "grad_norm": 7.195446491241455, "learning_rate": 9.673375629015386e-06, "loss": 0.1136113, "memory(GiB)": 13.7, "step": 17055, "train_speed(iter/s)": 1.539959 }, { "acc": 0.97096729, "epoch": 7.996250292945864, "grad_norm": 13.82974624633789, "learning_rate": 9.67310000508387e-06, "loss": 0.1868873, "memory(GiB)": 13.7, "step": 17060, "train_speed(iter/s)": 1.539958 }, { "acc": 0.95909967, "epoch": 7.998593859854699, "grad_norm": 6.764557361602783, "learning_rate": 9.672824268838402e-06, "loss": 0.23086402, "memory(GiB)": 13.7, "step": 17065, "train_speed(iter/s)": 1.539991 }, { "acc": 0.96599798, "epoch": 8.000937426763533, "grad_norm": 7.2408366203308105, "learning_rate": 9.672548420285606e-06, "loss": 0.2805532, "memory(GiB)": 13.7, "step": 17070, "train_speed(iter/s)": 1.539915 }, { "acc": 0.96622019, "epoch": 8.00328099367237, "grad_norm": 3.887537956237793, "learning_rate": 9.672272459432118e-06, "loss": 0.16814088, "memory(GiB)": 13.7, "step": 17075, "train_speed(iter/s)": 1.539947 }, { "acc": 0.98210392, "epoch": 8.005624560581204, "grad_norm": 2.236785888671875, "learning_rate": 9.671996386284565e-06, "loss": 0.07799101, "memory(GiB)": 13.7, "step": 17080, "train_speed(iter/s)": 1.539924 }, { "acc": 0.97534723, "epoch": 8.00796812749004, "grad_norm": 7.095798492431641, "learning_rate": 9.671720200849588e-06, "loss": 0.15679798, "memory(GiB)": 13.7, "step": 17085, "train_speed(iter/s)": 1.539909 }, { "acc": 0.98809528, "epoch": 8.010311694398876, "grad_norm": 10.664420127868652, "learning_rate": 9.671443903133823e-06, "loss": 0.07794357, "memory(GiB)": 13.7, "step": 17090, "train_speed(iter/s)": 1.539945 }, { "acc": 0.95986662, "epoch": 8.01265526130771, "grad_norm": 6.81536865234375, "learning_rate": 9.671167493143911e-06, "loss": 0.18308231, "memory(GiB)": 13.7, "step": 17095, "train_speed(iter/s)": 1.539946 }, { "acc": 0.97443457, "epoch": 8.014998828216546, "grad_norm": 19.66705894470215, "learning_rate": 9.670890970886498e-06, "loss": 0.11849971, "memory(GiB)": 13.7, "step": 17100, "train_speed(iter/s)": 1.539939 }, { "acc": 0.96679783, "epoch": 8.01734239512538, "grad_norm": 4.104161739349365, "learning_rate": 9.670614336368229e-06, "loss": 0.13535564, "memory(GiB)": 13.7, "step": 17105, "train_speed(iter/s)": 1.539948 }, { "acc": 0.97147961, "epoch": 8.019685962034217, "grad_norm": 7.496381759643555, "learning_rate": 9.670337589595752e-06, "loss": 0.13761827, "memory(GiB)": 13.7, "step": 17110, "train_speed(iter/s)": 1.53999 }, { "acc": 0.96977682, "epoch": 8.022029528943051, "grad_norm": 7.216104030609131, "learning_rate": 9.670060730575724e-06, "loss": 0.09794255, "memory(GiB)": 13.7, "step": 17115, "train_speed(iter/s)": 1.539988 }, { "acc": 0.98226013, "epoch": 8.024373095851887, "grad_norm": 305.85272216796875, "learning_rate": 9.669783759314797e-06, "loss": 0.14301488, "memory(GiB)": 13.7, "step": 17120, "train_speed(iter/s)": 1.540015 }, { "acc": 0.9776042, "epoch": 8.026716662760721, "grad_norm": 9.42199420928955, "learning_rate": 9.669506675819624e-06, "loss": 0.10201776, "memory(GiB)": 13.7, "step": 17125, "train_speed(iter/s)": 1.539998 }, { "acc": 0.98309288, "epoch": 8.029060229669557, "grad_norm": 7.672924518585205, "learning_rate": 9.669229480096873e-06, "loss": 0.1033008, "memory(GiB)": 13.7, "step": 17130, "train_speed(iter/s)": 1.539989 }, { "acc": 0.96958141, "epoch": 8.031403796578392, "grad_norm": 12.047307014465332, "learning_rate": 9.668952172153203e-06, "loss": 0.18330348, "memory(GiB)": 13.7, "step": 17135, "train_speed(iter/s)": 1.540025 }, { "acc": 0.97166672, "epoch": 8.033747363487228, "grad_norm": 6.894463539123535, "learning_rate": 9.668674751995278e-06, "loss": 0.17484035, "memory(GiB)": 13.7, "step": 17140, "train_speed(iter/s)": 1.540012 }, { "acc": 0.97371521, "epoch": 8.036090930396062, "grad_norm": 5.621033191680908, "learning_rate": 9.668397219629769e-06, "loss": 0.16382895, "memory(GiB)": 13.7, "step": 17145, "train_speed(iter/s)": 1.540023 }, { "acc": 0.94876986, "epoch": 8.038434497304898, "grad_norm": 14.656013488769531, "learning_rate": 9.668119575063348e-06, "loss": 0.22509151, "memory(GiB)": 13.7, "step": 17150, "train_speed(iter/s)": 1.540029 }, { "acc": 0.97621107, "epoch": 8.040778064213733, "grad_norm": 12.886636734008789, "learning_rate": 9.667841818302684e-06, "loss": 0.1061103, "memory(GiB)": 13.7, "step": 17155, "train_speed(iter/s)": 1.540051 }, { "acc": 0.96734114, "epoch": 8.043121631122569, "grad_norm": 7.99383544921875, "learning_rate": 9.667563949354457e-06, "loss": 0.17323012, "memory(GiB)": 13.7, "step": 17160, "train_speed(iter/s)": 1.540065 }, { "acc": 0.96961222, "epoch": 8.045465198031403, "grad_norm": 1.5533382892608643, "learning_rate": 9.667285968225346e-06, "loss": 0.14870778, "memory(GiB)": 13.7, "step": 17165, "train_speed(iter/s)": 1.54007 }, { "acc": 0.9708333, "epoch": 8.047808764940239, "grad_norm": 2.4915149211883545, "learning_rate": 9.66700787492203e-06, "loss": 0.17002084, "memory(GiB)": 13.7, "step": 17170, "train_speed(iter/s)": 1.540085 }, { "acc": 0.95157642, "epoch": 8.050152331849075, "grad_norm": 13.923967361450195, "learning_rate": 9.666729669451198e-06, "loss": 0.27506237, "memory(GiB)": 13.7, "step": 17175, "train_speed(iter/s)": 1.540056 }, { "acc": 0.97710476, "epoch": 8.05249589875791, "grad_norm": 6.571476459503174, "learning_rate": 9.666451351819533e-06, "loss": 0.12759457, "memory(GiB)": 13.7, "step": 17180, "train_speed(iter/s)": 1.540069 }, { "acc": 0.96039829, "epoch": 8.054839465666745, "grad_norm": 5.834791660308838, "learning_rate": 9.666172922033726e-06, "loss": 0.23237834, "memory(GiB)": 13.7, "step": 17185, "train_speed(iter/s)": 1.540081 }, { "acc": 0.95675592, "epoch": 8.05718303257558, "grad_norm": 4.991513729095459, "learning_rate": 9.665894380100469e-06, "loss": 0.22886462, "memory(GiB)": 13.7, "step": 17190, "train_speed(iter/s)": 1.540094 }, { "acc": 0.97225275, "epoch": 8.059526599484416, "grad_norm": 8.579452514648438, "learning_rate": 9.665615726026461e-06, "loss": 0.1810374, "memory(GiB)": 13.7, "step": 17195, "train_speed(iter/s)": 1.540091 }, { "acc": 0.9648942, "epoch": 8.06187016639325, "grad_norm": 10.442150115966797, "learning_rate": 9.665336959818395e-06, "loss": 0.21727455, "memory(GiB)": 13.7, "step": 17200, "train_speed(iter/s)": 1.540094 }, { "acc": 0.95656662, "epoch": 8.064213733302086, "grad_norm": 10.373702049255371, "learning_rate": 9.665058081482974e-06, "loss": 0.19131982, "memory(GiB)": 13.7, "step": 17205, "train_speed(iter/s)": 1.540112 }, { "acc": 0.97986107, "epoch": 8.06655730021092, "grad_norm": 5.397919654846191, "learning_rate": 9.6647790910269e-06, "loss": 0.06480929, "memory(GiB)": 13.7, "step": 17210, "train_speed(iter/s)": 1.540113 }, { "acc": 0.95976191, "epoch": 8.068900867119757, "grad_norm": 8.04063606262207, "learning_rate": 9.664499988456883e-06, "loss": 0.24598298, "memory(GiB)": 13.7, "step": 17215, "train_speed(iter/s)": 1.540128 }, { "acc": 0.97714291, "epoch": 8.071244434028591, "grad_norm": 9.405165672302246, "learning_rate": 9.664220773779625e-06, "loss": 0.12246089, "memory(GiB)": 13.7, "step": 17220, "train_speed(iter/s)": 1.540133 }, { "acc": 0.9726347, "epoch": 8.073588000937427, "grad_norm": 7.7821149826049805, "learning_rate": 9.663941447001844e-06, "loss": 0.12026441, "memory(GiB)": 13.7, "step": 17225, "train_speed(iter/s)": 1.54015 }, { "acc": 0.97515011, "epoch": 8.075931567846261, "grad_norm": 8.499667167663574, "learning_rate": 9.663662008130249e-06, "loss": 0.09680017, "memory(GiB)": 13.7, "step": 17230, "train_speed(iter/s)": 1.540147 }, { "acc": 0.97644253, "epoch": 8.078275134755097, "grad_norm": 5.229824066162109, "learning_rate": 9.663382457171561e-06, "loss": 0.17532475, "memory(GiB)": 13.7, "step": 17235, "train_speed(iter/s)": 1.540166 }, { "acc": 0.98345194, "epoch": 8.080618701663932, "grad_norm": 3.8696420192718506, "learning_rate": 9.663102794132497e-06, "loss": 0.10299985, "memory(GiB)": 13.7, "step": 17240, "train_speed(iter/s)": 1.540164 }, { "acc": 0.96945753, "epoch": 8.082962268572768, "grad_norm": 4.1929030418396, "learning_rate": 9.662823019019779e-06, "loss": 0.21133223, "memory(GiB)": 13.7, "step": 17245, "train_speed(iter/s)": 1.540174 }, { "acc": 0.95272827, "epoch": 8.085305835481604, "grad_norm": 13.798128128051758, "learning_rate": 9.662543131840133e-06, "loss": 0.26281233, "memory(GiB)": 13.7, "step": 17250, "train_speed(iter/s)": 1.54019 }, { "acc": 0.9672677, "epoch": 8.087649402390438, "grad_norm": 3.9508814811706543, "learning_rate": 9.662263132600287e-06, "loss": 0.10903172, "memory(GiB)": 13.7, "step": 17255, "train_speed(iter/s)": 1.54018 }, { "acc": 0.95733585, "epoch": 8.089992969299274, "grad_norm": 6.628971576690674, "learning_rate": 9.661983021306968e-06, "loss": 0.25294974, "memory(GiB)": 13.7, "step": 17260, "train_speed(iter/s)": 1.540179 }, { "acc": 0.98481064, "epoch": 8.092336536208109, "grad_norm": 4.826632022857666, "learning_rate": 9.661702797966912e-06, "loss": 0.08842258, "memory(GiB)": 13.7, "step": 17265, "train_speed(iter/s)": 1.540208 }, { "acc": 0.9819478, "epoch": 8.094680103116945, "grad_norm": 3.949100971221924, "learning_rate": 9.661422462586855e-06, "loss": 0.10690786, "memory(GiB)": 13.7, "step": 17270, "train_speed(iter/s)": 1.540216 }, { "acc": 0.96483421, "epoch": 8.097023670025779, "grad_norm": 4.522622108459473, "learning_rate": 9.661142015173534e-06, "loss": 0.19558122, "memory(GiB)": 13.7, "step": 17275, "train_speed(iter/s)": 1.540243 }, { "acc": 0.96631289, "epoch": 8.099367236934615, "grad_norm": 9.055547714233398, "learning_rate": 9.66086145573369e-06, "loss": 0.16855893, "memory(GiB)": 13.7, "step": 17280, "train_speed(iter/s)": 1.540284 }, { "acc": 0.96841354, "epoch": 8.10171080384345, "grad_norm": 5.4042181968688965, "learning_rate": 9.660580784274068e-06, "loss": 0.1962445, "memory(GiB)": 13.7, "step": 17285, "train_speed(iter/s)": 1.540279 }, { "acc": 0.98130035, "epoch": 8.104054370752285, "grad_norm": 15.266656875610352, "learning_rate": 9.660300000801413e-06, "loss": 0.11627542, "memory(GiB)": 13.7, "step": 17290, "train_speed(iter/s)": 1.540296 }, { "acc": 0.9659276, "epoch": 8.10639793766112, "grad_norm": 4.577246189117432, "learning_rate": 9.660019105322473e-06, "loss": 0.17233982, "memory(GiB)": 13.7, "step": 17295, "train_speed(iter/s)": 1.540301 }, { "acc": 0.98270836, "epoch": 8.108741504569956, "grad_norm": 3.843198776245117, "learning_rate": 9.659738097844002e-06, "loss": 0.11104105, "memory(GiB)": 13.7, "step": 17300, "train_speed(iter/s)": 1.540306 }, { "acc": 0.9552846, "epoch": 8.11108507147879, "grad_norm": 8.514615058898926, "learning_rate": 9.659456978372757e-06, "loss": 0.20218081, "memory(GiB)": 13.7, "step": 17305, "train_speed(iter/s)": 1.540338 }, { "acc": 0.96246777, "epoch": 8.113428638387626, "grad_norm": 7.71929407119751, "learning_rate": 9.65917574691549e-06, "loss": 0.26255093, "memory(GiB)": 13.7, "step": 17310, "train_speed(iter/s)": 1.540348 }, { "acc": 0.97260418, "epoch": 8.11577220529646, "grad_norm": 7.631932258605957, "learning_rate": 9.65889440347896e-06, "loss": 0.20651133, "memory(GiB)": 13.7, "step": 17315, "train_speed(iter/s)": 1.540374 }, { "acc": 0.96679831, "epoch": 8.118115772205297, "grad_norm": 3.579719066619873, "learning_rate": 9.658612948069938e-06, "loss": 0.17188026, "memory(GiB)": 13.7, "step": 17320, "train_speed(iter/s)": 1.54037 }, { "acc": 0.96191216, "epoch": 8.12045933911413, "grad_norm": 7.886780261993408, "learning_rate": 9.65833138069518e-06, "loss": 0.19298577, "memory(GiB)": 13.7, "step": 17325, "train_speed(iter/s)": 1.540403 }, { "acc": 0.97588673, "epoch": 8.122802906022967, "grad_norm": 5.394309043884277, "learning_rate": 9.65804970136146e-06, "loss": 0.11329825, "memory(GiB)": 13.7, "step": 17330, "train_speed(iter/s)": 1.540412 }, { "acc": 0.96645832, "epoch": 8.125146472931803, "grad_norm": 6.779317855834961, "learning_rate": 9.657767910075545e-06, "loss": 0.1596832, "memory(GiB)": 13.7, "step": 17335, "train_speed(iter/s)": 1.540424 }, { "acc": 0.97165518, "epoch": 8.127490039840637, "grad_norm": 8.465618133544922, "learning_rate": 9.65748600684421e-06, "loss": 0.20861707, "memory(GiB)": 13.7, "step": 17340, "train_speed(iter/s)": 1.54042 }, { "acc": 0.95870914, "epoch": 8.129833606749473, "grad_norm": 7.874786853790283, "learning_rate": 9.65720399167423e-06, "loss": 0.16342595, "memory(GiB)": 13.7, "step": 17345, "train_speed(iter/s)": 1.540434 }, { "acc": 0.95747776, "epoch": 8.132177173658308, "grad_norm": 4.227302074432373, "learning_rate": 9.656921864572387e-06, "loss": 0.17908939, "memory(GiB)": 13.7, "step": 17350, "train_speed(iter/s)": 1.540449 }, { "acc": 0.98197498, "epoch": 8.134520740567144, "grad_norm": 6.605560779571533, "learning_rate": 9.656639625545458e-06, "loss": 0.08534822, "memory(GiB)": 13.7, "step": 17355, "train_speed(iter/s)": 1.540459 }, { "acc": 0.94954529, "epoch": 8.136864307475978, "grad_norm": 11.091843605041504, "learning_rate": 9.656357274600228e-06, "loss": 0.25479536, "memory(GiB)": 13.7, "step": 17360, "train_speed(iter/s)": 1.540484 }, { "acc": 0.97075472, "epoch": 8.139207874384814, "grad_norm": 6.991400241851807, "learning_rate": 9.656074811743487e-06, "loss": 0.17528162, "memory(GiB)": 13.7, "step": 17365, "train_speed(iter/s)": 1.540474 }, { "acc": 0.95535851, "epoch": 8.141551441293648, "grad_norm": 4.27592134475708, "learning_rate": 9.65579223698202e-06, "loss": 0.29420948, "memory(GiB)": 13.7, "step": 17370, "train_speed(iter/s)": 1.54048 }, { "acc": 0.97782402, "epoch": 8.143895008202485, "grad_norm": 18.509336471557617, "learning_rate": 9.655509550322624e-06, "loss": 0.20017145, "memory(GiB)": 13.7, "step": 17375, "train_speed(iter/s)": 1.54049 }, { "acc": 0.96937828, "epoch": 8.146238575111319, "grad_norm": 10.98658275604248, "learning_rate": 9.655226751772091e-06, "loss": 0.19464666, "memory(GiB)": 13.7, "step": 17380, "train_speed(iter/s)": 1.540517 }, { "acc": 0.98500004, "epoch": 8.148582142020155, "grad_norm": 1.7001104354858398, "learning_rate": 9.654943841337217e-06, "loss": 0.09161693, "memory(GiB)": 13.7, "step": 17385, "train_speed(iter/s)": 1.54057 }, { "acc": 0.97948322, "epoch": 8.15092570892899, "grad_norm": 2.967573642730713, "learning_rate": 9.654660819024807e-06, "loss": 0.12280411, "memory(GiB)": 13.7, "step": 17390, "train_speed(iter/s)": 1.54059 }, { "acc": 0.95233631, "epoch": 8.153269275837825, "grad_norm": 11.537970542907715, "learning_rate": 9.654377684841658e-06, "loss": 0.15273826, "memory(GiB)": 13.7, "step": 17395, "train_speed(iter/s)": 1.540603 }, { "acc": 0.98035793, "epoch": 8.15561284274666, "grad_norm": 5.710811614990234, "learning_rate": 9.65409443879458e-06, "loss": 0.08725511, "memory(GiB)": 13.7, "step": 17400, "train_speed(iter/s)": 1.540619 }, { "acc": 0.97842264, "epoch": 8.157956409655496, "grad_norm": 8.303184509277344, "learning_rate": 9.653811080890378e-06, "loss": 0.08807341, "memory(GiB)": 13.7, "step": 17405, "train_speed(iter/s)": 1.540621 }, { "acc": 0.98001986, "epoch": 8.16029997656433, "grad_norm": 3.835294485092163, "learning_rate": 9.65352761113587e-06, "loss": 0.10178044, "memory(GiB)": 13.7, "step": 17410, "train_speed(iter/s)": 1.540627 }, { "acc": 0.98274002, "epoch": 8.162643543473166, "grad_norm": 5.630791187286377, "learning_rate": 9.65324402953786e-06, "loss": 0.08527869, "memory(GiB)": 13.7, "step": 17415, "train_speed(iter/s)": 1.540636 }, { "acc": 0.95219765, "epoch": 8.164987110382002, "grad_norm": 12.706259727478027, "learning_rate": 9.65296033610317e-06, "loss": 0.2441885, "memory(GiB)": 13.7, "step": 17420, "train_speed(iter/s)": 1.540641 }, { "acc": 0.97951393, "epoch": 8.167330677290837, "grad_norm": 4.923243522644043, "learning_rate": 9.652676530838617e-06, "loss": 0.12093961, "memory(GiB)": 13.7, "step": 17425, "train_speed(iter/s)": 1.540638 }, { "acc": 0.98423605, "epoch": 8.169674244199673, "grad_norm": 7.226844310760498, "learning_rate": 9.652392613751026e-06, "loss": 0.0886951, "memory(GiB)": 13.7, "step": 17430, "train_speed(iter/s)": 1.540637 }, { "acc": 0.95215282, "epoch": 8.172017811108507, "grad_norm": 12.498889923095703, "learning_rate": 9.652108584847218e-06, "loss": 0.29217324, "memory(GiB)": 13.7, "step": 17435, "train_speed(iter/s)": 1.540652 }, { "acc": 0.98197308, "epoch": 8.174361378017343, "grad_norm": 6.78230094909668, "learning_rate": 9.651824444134022e-06, "loss": 0.08008893, "memory(GiB)": 13.7, "step": 17440, "train_speed(iter/s)": 1.540653 }, { "acc": 0.96052494, "epoch": 8.176704944926177, "grad_norm": 113.07514190673828, "learning_rate": 9.651540191618267e-06, "loss": 0.2378232, "memory(GiB)": 13.7, "step": 17445, "train_speed(iter/s)": 1.54067 }, { "acc": 0.97953873, "epoch": 8.179048511835013, "grad_norm": 4.365645885467529, "learning_rate": 9.651255827306785e-06, "loss": 0.16393675, "memory(GiB)": 13.7, "step": 17450, "train_speed(iter/s)": 1.540671 }, { "acc": 0.94721088, "epoch": 8.181392078743848, "grad_norm": 9.802062034606934, "learning_rate": 9.650971351206411e-06, "loss": 0.28400822, "memory(GiB)": 13.7, "step": 17455, "train_speed(iter/s)": 1.540678 }, { "acc": 0.9905304, "epoch": 8.183735645652684, "grad_norm": 4.213434219360352, "learning_rate": 9.650686763323985e-06, "loss": 0.07941315, "memory(GiB)": 13.7, "step": 17460, "train_speed(iter/s)": 1.540693 }, { "acc": 0.96868372, "epoch": 8.186079212561518, "grad_norm": 0.10190506279468536, "learning_rate": 9.650402063666348e-06, "loss": 0.19110081, "memory(GiB)": 13.7, "step": 17465, "train_speed(iter/s)": 1.540705 }, { "acc": 0.97907696, "epoch": 8.188422779470354, "grad_norm": 6.574291706085205, "learning_rate": 9.650117252240338e-06, "loss": 0.12970169, "memory(GiB)": 13.7, "step": 17470, "train_speed(iter/s)": 1.540701 }, { "acc": 0.97790585, "epoch": 8.190766346379188, "grad_norm": 7.349337577819824, "learning_rate": 9.649832329052806e-06, "loss": 0.12714187, "memory(GiB)": 13.7, "step": 17475, "train_speed(iter/s)": 1.540697 }, { "acc": 0.98115578, "epoch": 8.193109913288025, "grad_norm": 9.428752899169922, "learning_rate": 9.6495472941106e-06, "loss": 0.1393425, "memory(GiB)": 13.7, "step": 17480, "train_speed(iter/s)": 1.5407 }, { "acc": 0.97274799, "epoch": 8.195453480196859, "grad_norm": 6.641345977783203, "learning_rate": 9.649262147420569e-06, "loss": 0.13701591, "memory(GiB)": 13.7, "step": 17485, "train_speed(iter/s)": 1.54073 }, { "acc": 0.97568455, "epoch": 8.197797047105695, "grad_norm": 3.7693979740142822, "learning_rate": 9.64897688898957e-06, "loss": 0.13756146, "memory(GiB)": 13.7, "step": 17490, "train_speed(iter/s)": 1.540717 }, { "acc": 0.98255625, "epoch": 8.200140614014531, "grad_norm": 4.974018573760986, "learning_rate": 9.648691518824455e-06, "loss": 0.16728786, "memory(GiB)": 13.7, "step": 17495, "train_speed(iter/s)": 1.540742 }, { "acc": 0.96346102, "epoch": 8.202484180923365, "grad_norm": 6.095403671264648, "learning_rate": 9.64840603693209e-06, "loss": 0.18965149, "memory(GiB)": 13.7, "step": 17500, "train_speed(iter/s)": 1.540753 }, { "acc": 0.96460075, "epoch": 8.204827747832201, "grad_norm": 5.605024814605713, "learning_rate": 9.648120443319331e-06, "loss": 0.18146927, "memory(GiB)": 13.7, "step": 17505, "train_speed(iter/s)": 1.540772 }, { "acc": 0.95925102, "epoch": 8.207171314741036, "grad_norm": 8.64977741241455, "learning_rate": 9.647834737993046e-06, "loss": 0.28060131, "memory(GiB)": 13.7, "step": 17510, "train_speed(iter/s)": 1.540789 }, { "acc": 0.96661701, "epoch": 8.209514881649872, "grad_norm": 9.294489860534668, "learning_rate": 9.6475489209601e-06, "loss": 0.1806199, "memory(GiB)": 13.7, "step": 17515, "train_speed(iter/s)": 1.540807 }, { "acc": 0.98130455, "epoch": 8.211858448558706, "grad_norm": 6.095012664794922, "learning_rate": 9.647262992227369e-06, "loss": 0.09707512, "memory(GiB)": 13.7, "step": 17520, "train_speed(iter/s)": 1.540815 }, { "acc": 0.98166656, "epoch": 8.214202015467542, "grad_norm": 4.3438239097595215, "learning_rate": 9.646976951801716e-06, "loss": 0.0795023, "memory(GiB)": 13.7, "step": 17525, "train_speed(iter/s)": 1.540823 }, { "acc": 0.98055553, "epoch": 8.216545582376376, "grad_norm": 0.9470287561416626, "learning_rate": 9.646690799690026e-06, "loss": 0.08312007, "memory(GiB)": 13.7, "step": 17530, "train_speed(iter/s)": 1.540824 }, { "acc": 0.97137356, "epoch": 8.218889149285213, "grad_norm": 6.636977195739746, "learning_rate": 9.646404535899171e-06, "loss": 0.13683205, "memory(GiB)": 13.7, "step": 17535, "train_speed(iter/s)": 1.540826 }, { "acc": 0.97813454, "epoch": 8.221232716194047, "grad_norm": 5.517277240753174, "learning_rate": 9.646118160436034e-06, "loss": 0.08862918, "memory(GiB)": 13.7, "step": 17540, "train_speed(iter/s)": 1.54085 }, { "acc": 0.97388964, "epoch": 8.223576283102883, "grad_norm": 2.2013816833496094, "learning_rate": 9.6458316733075e-06, "loss": 0.10952742, "memory(GiB)": 13.7, "step": 17545, "train_speed(iter/s)": 1.540853 }, { "acc": 0.9864584, "epoch": 8.225919850011717, "grad_norm": 3.794104814529419, "learning_rate": 9.645545074520452e-06, "loss": 0.1160898, "memory(GiB)": 13.7, "step": 17550, "train_speed(iter/s)": 1.540863 }, { "acc": 0.96665001, "epoch": 8.228263416920553, "grad_norm": 0.6174826622009277, "learning_rate": 9.645258364081782e-06, "loss": 0.17857678, "memory(GiB)": 13.7, "step": 17555, "train_speed(iter/s)": 1.540885 }, { "acc": 0.94887543, "epoch": 8.230606983829388, "grad_norm": 6.395421981811523, "learning_rate": 9.644971541998379e-06, "loss": 0.18148937, "memory(GiB)": 13.7, "step": 17560, "train_speed(iter/s)": 1.540905 }, { "acc": 0.95853891, "epoch": 8.232950550738224, "grad_norm": 3.7211358547210693, "learning_rate": 9.64468460827714e-06, "loss": 0.21224217, "memory(GiB)": 13.7, "step": 17565, "train_speed(iter/s)": 1.540917 }, { "acc": 0.98284931, "epoch": 8.235294117647058, "grad_norm": 5.514648914337158, "learning_rate": 9.644397562924958e-06, "loss": 0.08340567, "memory(GiB)": 13.7, "step": 17570, "train_speed(iter/s)": 1.540916 }, { "acc": 0.96708202, "epoch": 8.237637684555894, "grad_norm": 5.830240249633789, "learning_rate": 9.644110405948736e-06, "loss": 0.20705047, "memory(GiB)": 13.7, "step": 17575, "train_speed(iter/s)": 1.540925 }, { "acc": 0.9696023, "epoch": 8.23998125146473, "grad_norm": 8.45623779296875, "learning_rate": 9.643823137355376e-06, "loss": 0.16122525, "memory(GiB)": 13.7, "step": 17580, "train_speed(iter/s)": 1.540948 }, { "acc": 0.96832018, "epoch": 8.242324818373564, "grad_norm": 7.441836833953857, "learning_rate": 9.643535757151782e-06, "loss": 0.1328284, "memory(GiB)": 13.7, "step": 17585, "train_speed(iter/s)": 1.540937 }, { "acc": 0.98782196, "epoch": 8.2446683852824, "grad_norm": 2.6380984783172607, "learning_rate": 9.64324826534486e-06, "loss": 0.09713761, "memory(GiB)": 13.7, "step": 17590, "train_speed(iter/s)": 1.540952 }, { "acc": 0.96849375, "epoch": 8.247011952191235, "grad_norm": 1.977329134941101, "learning_rate": 9.642960661941525e-06, "loss": 0.11371939, "memory(GiB)": 13.7, "step": 17595, "train_speed(iter/s)": 1.540966 }, { "acc": 0.98343754, "epoch": 8.249355519100071, "grad_norm": 10.326642990112305, "learning_rate": 9.642672946948686e-06, "loss": 0.10094657, "memory(GiB)": 13.7, "step": 17600, "train_speed(iter/s)": 1.540969 }, { "acc": 0.99282293, "epoch": 8.251699086008905, "grad_norm": 0.46227529644966125, "learning_rate": 9.64238512037326e-06, "loss": 0.06062176, "memory(GiB)": 13.7, "step": 17605, "train_speed(iter/s)": 1.540966 }, { "acc": 0.9686842, "epoch": 8.254042652917741, "grad_norm": 40.12831497192383, "learning_rate": 9.642097182222165e-06, "loss": 0.19193527, "memory(GiB)": 13.7, "step": 17610, "train_speed(iter/s)": 1.540974 }, { "acc": 0.98225079, "epoch": 8.256386219826576, "grad_norm": 4.252617835998535, "learning_rate": 9.641809132502324e-06, "loss": 0.1625888, "memory(GiB)": 13.7, "step": 17615, "train_speed(iter/s)": 1.540965 }, { "acc": 0.97504425, "epoch": 8.258729786735412, "grad_norm": 0.11478795111179352, "learning_rate": 9.64152097122066e-06, "loss": 0.14204543, "memory(GiB)": 13.7, "step": 17620, "train_speed(iter/s)": 1.540965 }, { "acc": 0.97850695, "epoch": 8.261073353644246, "grad_norm": 11.992341041564941, "learning_rate": 9.641232698384097e-06, "loss": 0.09040736, "memory(GiB)": 13.7, "step": 17625, "train_speed(iter/s)": 1.540983 }, { "acc": 0.96799679, "epoch": 8.263416920553082, "grad_norm": 6.624368190765381, "learning_rate": 9.640944313999565e-06, "loss": 0.12652222, "memory(GiB)": 13.7, "step": 17630, "train_speed(iter/s)": 1.54101 }, { "acc": 0.96621103, "epoch": 8.265760487461916, "grad_norm": 4.417349815368652, "learning_rate": 9.640655818074e-06, "loss": 0.19396363, "memory(GiB)": 13.7, "step": 17635, "train_speed(iter/s)": 1.540981 }, { "acc": 0.98488102, "epoch": 8.268104054370752, "grad_norm": 8.830412864685059, "learning_rate": 9.640367210614332e-06, "loss": 0.07162523, "memory(GiB)": 13.7, "step": 17640, "train_speed(iter/s)": 1.540976 }, { "acc": 0.98005953, "epoch": 8.270447621279587, "grad_norm": 6.268989086151123, "learning_rate": 9.640078491627498e-06, "loss": 0.11516908, "memory(GiB)": 13.7, "step": 17645, "train_speed(iter/s)": 1.541002 }, { "acc": 0.96319027, "epoch": 8.272791188188423, "grad_norm": 4.222204685211182, "learning_rate": 9.63978966112044e-06, "loss": 0.24496403, "memory(GiB)": 13.7, "step": 17650, "train_speed(iter/s)": 1.541019 }, { "acc": 0.96822414, "epoch": 8.275134755097259, "grad_norm": 2.6055006980895996, "learning_rate": 9.6395007191001e-06, "loss": 0.23507166, "memory(GiB)": 13.7, "step": 17655, "train_speed(iter/s)": 1.541037 }, { "acc": 0.98284225, "epoch": 8.277478322006093, "grad_norm": 3.2698135375976562, "learning_rate": 9.639211665573422e-06, "loss": 0.10030265, "memory(GiB)": 13.7, "step": 17660, "train_speed(iter/s)": 1.541067 }, { "acc": 0.97766342, "epoch": 8.27982188891493, "grad_norm": 4.837192058563232, "learning_rate": 9.638922500547356e-06, "loss": 0.15147644, "memory(GiB)": 13.7, "step": 17665, "train_speed(iter/s)": 1.541083 }, { "acc": 0.97652779, "epoch": 8.282165455823764, "grad_norm": 6.799649715423584, "learning_rate": 9.63863322402885e-06, "loss": 0.09518633, "memory(GiB)": 13.7, "step": 17670, "train_speed(iter/s)": 1.541107 }, { "acc": 0.95790443, "epoch": 8.2845090227326, "grad_norm": 5.415162563323975, "learning_rate": 9.638343836024861e-06, "loss": 0.25263627, "memory(GiB)": 13.7, "step": 17675, "train_speed(iter/s)": 1.541123 }, { "acc": 0.9526042, "epoch": 8.286852589641434, "grad_norm": 4.535107612609863, "learning_rate": 9.638054336542342e-06, "loss": 0.16998966, "memory(GiB)": 13.7, "step": 17680, "train_speed(iter/s)": 1.54115 }, { "acc": 0.95612917, "epoch": 8.28919615655027, "grad_norm": 4.804324626922607, "learning_rate": 9.637764725588253e-06, "loss": 0.2345448, "memory(GiB)": 13.7, "step": 17685, "train_speed(iter/s)": 1.541176 }, { "acc": 0.97902241, "epoch": 8.291539723459104, "grad_norm": 0.951160728931427, "learning_rate": 9.637475003169554e-06, "loss": 0.13652658, "memory(GiB)": 13.7, "step": 17690, "train_speed(iter/s)": 1.541178 }, { "acc": 0.9755209, "epoch": 8.29388329036794, "grad_norm": 6.106442928314209, "learning_rate": 9.637185169293208e-06, "loss": 0.1058081, "memory(GiB)": 13.7, "step": 17695, "train_speed(iter/s)": 1.541193 }, { "acc": 0.97940617, "epoch": 8.296226857276775, "grad_norm": 5.453008651733398, "learning_rate": 9.636895223966184e-06, "loss": 0.08364804, "memory(GiB)": 13.7, "step": 17700, "train_speed(iter/s)": 1.541189 }, { "acc": 0.95645638, "epoch": 8.29857042418561, "grad_norm": 17.94881820678711, "learning_rate": 9.636605167195451e-06, "loss": 0.26614938, "memory(GiB)": 13.7, "step": 17705, "train_speed(iter/s)": 1.541203 }, { "acc": 0.98305063, "epoch": 8.300913991094445, "grad_norm": 5.703038215637207, "learning_rate": 9.636314998987981e-06, "loss": 0.06141715, "memory(GiB)": 13.7, "step": 17710, "train_speed(iter/s)": 1.541219 }, { "acc": 0.97486115, "epoch": 8.303257558003281, "grad_norm": 15.202123641967773, "learning_rate": 9.636024719350748e-06, "loss": 0.20522165, "memory(GiB)": 13.7, "step": 17715, "train_speed(iter/s)": 1.541205 }, { "acc": 0.96159229, "epoch": 8.305601124912116, "grad_norm": 5.432927131652832, "learning_rate": 9.635734328290729e-06, "loss": 0.20968194, "memory(GiB)": 13.7, "step": 17720, "train_speed(iter/s)": 1.5412 }, { "acc": 0.94836674, "epoch": 8.307944691820952, "grad_norm": 9.233139038085938, "learning_rate": 9.635443825814906e-06, "loss": 0.26277723, "memory(GiB)": 13.7, "step": 17725, "train_speed(iter/s)": 1.541222 }, { "acc": 0.96365023, "epoch": 8.310288258729786, "grad_norm": 7.019996643066406, "learning_rate": 9.635153211930258e-06, "loss": 0.17415066, "memory(GiB)": 13.7, "step": 17730, "train_speed(iter/s)": 1.541223 }, { "acc": 0.94738159, "epoch": 8.312631825638622, "grad_norm": 5.5291428565979, "learning_rate": 9.634862486643775e-06, "loss": 0.29340959, "memory(GiB)": 13.7, "step": 17735, "train_speed(iter/s)": 1.541208 }, { "acc": 0.9796813, "epoch": 8.314975392547456, "grad_norm": 6.73910665512085, "learning_rate": 9.634571649962441e-06, "loss": 0.06060613, "memory(GiB)": 13.7, "step": 17740, "train_speed(iter/s)": 1.541212 }, { "acc": 0.97321434, "epoch": 8.317318959456292, "grad_norm": 4.918234825134277, "learning_rate": 9.634280701893251e-06, "loss": 0.15358934, "memory(GiB)": 13.7, "step": 17745, "train_speed(iter/s)": 1.541209 }, { "acc": 0.9683897, "epoch": 8.319662526365128, "grad_norm": 11.423409461975098, "learning_rate": 9.633989642443196e-06, "loss": 0.1785929, "memory(GiB)": 13.7, "step": 17750, "train_speed(iter/s)": 1.541224 }, { "acc": 0.96593666, "epoch": 8.322006093273963, "grad_norm": 5.688299655914307, "learning_rate": 9.63369847161927e-06, "loss": 0.1808543, "memory(GiB)": 13.7, "step": 17755, "train_speed(iter/s)": 1.541235 }, { "acc": 0.97250004, "epoch": 8.324349660182799, "grad_norm": 2.290497303009033, "learning_rate": 9.633407189428474e-06, "loss": 0.17238008, "memory(GiB)": 13.7, "step": 17760, "train_speed(iter/s)": 1.541234 }, { "acc": 0.92876339, "epoch": 8.326693227091633, "grad_norm": 11.702651977539062, "learning_rate": 9.63311579587781e-06, "loss": 0.42911844, "memory(GiB)": 13.7, "step": 17765, "train_speed(iter/s)": 1.541266 }, { "acc": 0.96962299, "epoch": 8.32903679400047, "grad_norm": 5.473093509674072, "learning_rate": 9.632824290974284e-06, "loss": 0.13967469, "memory(GiB)": 13.7, "step": 17770, "train_speed(iter/s)": 1.541268 }, { "acc": 0.96038685, "epoch": 8.331380360909304, "grad_norm": 2.9993603229522705, "learning_rate": 9.632532674724898e-06, "loss": 0.21084323, "memory(GiB)": 13.7, "step": 17775, "train_speed(iter/s)": 1.541266 }, { "acc": 0.98323784, "epoch": 8.33372392781814, "grad_norm": 0.028575781732797623, "learning_rate": 9.632240947136665e-06, "loss": 0.13004777, "memory(GiB)": 13.7, "step": 17780, "train_speed(iter/s)": 1.541267 }, { "acc": 0.97663193, "epoch": 8.336067494726974, "grad_norm": 5.258497714996338, "learning_rate": 9.631949108216595e-06, "loss": 0.10376954, "memory(GiB)": 13.7, "step": 17785, "train_speed(iter/s)": 1.54129 }, { "acc": 0.96561012, "epoch": 8.33841106163581, "grad_norm": 21.42559814453125, "learning_rate": 9.631657157971704e-06, "loss": 0.23494453, "memory(GiB)": 13.7, "step": 17790, "train_speed(iter/s)": 1.541319 }, { "acc": 0.97246609, "epoch": 8.340754628544644, "grad_norm": 6.146519660949707, "learning_rate": 9.63136509640901e-06, "loss": 0.15761278, "memory(GiB)": 13.7, "step": 17795, "train_speed(iter/s)": 1.541324 }, { "acc": 0.96933308, "epoch": 8.34309819545348, "grad_norm": 7.126873016357422, "learning_rate": 9.631072923535533e-06, "loss": 0.12790267, "memory(GiB)": 13.7, "step": 17800, "train_speed(iter/s)": 1.541335 }, { "acc": 0.98121109, "epoch": 8.345441762362315, "grad_norm": 6.284426212310791, "learning_rate": 9.630780639358295e-06, "loss": 0.09647707, "memory(GiB)": 13.7, "step": 17805, "train_speed(iter/s)": 1.541347 }, { "acc": 0.9590889, "epoch": 8.34778532927115, "grad_norm": 11.08389949798584, "learning_rate": 9.630488243884322e-06, "loss": 0.18070228, "memory(GiB)": 13.7, "step": 17810, "train_speed(iter/s)": 1.541349 }, { "acc": 0.97215271, "epoch": 8.350128896179985, "grad_norm": 6.320909023284912, "learning_rate": 9.630195737120642e-06, "loss": 0.15873048, "memory(GiB)": 13.7, "step": 17815, "train_speed(iter/s)": 1.541363 }, { "acc": 0.96696424, "epoch": 8.352472463088821, "grad_norm": 19.88562774658203, "learning_rate": 9.629903119074286e-06, "loss": 0.14194633, "memory(GiB)": 13.7, "step": 17820, "train_speed(iter/s)": 1.541395 }, { "acc": 0.95934525, "epoch": 8.354816029997657, "grad_norm": 5.671276569366455, "learning_rate": 9.62961038975229e-06, "loss": 0.2298697, "memory(GiB)": 13.7, "step": 17825, "train_speed(iter/s)": 1.541421 }, { "acc": 0.9680398, "epoch": 8.357159596906492, "grad_norm": 10.647453308105469, "learning_rate": 9.629317549161685e-06, "loss": 0.20944002, "memory(GiB)": 13.7, "step": 17830, "train_speed(iter/s)": 1.541441 }, { "acc": 0.97920837, "epoch": 8.359503163815328, "grad_norm": 7.905130863189697, "learning_rate": 9.629024597309516e-06, "loss": 0.1400389, "memory(GiB)": 13.7, "step": 17835, "train_speed(iter/s)": 1.541443 }, { "acc": 0.94801283, "epoch": 8.361846730724162, "grad_norm": 8.033713340759277, "learning_rate": 9.62873153420282e-06, "loss": 0.31237702, "memory(GiB)": 13.7, "step": 17840, "train_speed(iter/s)": 1.541422 }, { "acc": 0.96139984, "epoch": 8.364190297632998, "grad_norm": 13.086355209350586, "learning_rate": 9.628438359848642e-06, "loss": 0.22364154, "memory(GiB)": 13.7, "step": 17845, "train_speed(iter/s)": 1.541402 }, { "acc": 0.97415209, "epoch": 8.366533864541832, "grad_norm": 2.875887155532837, "learning_rate": 9.62814507425403e-06, "loss": 0.15257827, "memory(GiB)": 13.7, "step": 17850, "train_speed(iter/s)": 1.541419 }, { "acc": 0.9710187, "epoch": 8.368877431450668, "grad_norm": 6.773133754730225, "learning_rate": 9.627851677426035e-06, "loss": 0.15379226, "memory(GiB)": 13.7, "step": 17855, "train_speed(iter/s)": 1.541437 }, { "acc": 0.98142586, "epoch": 8.371220998359503, "grad_norm": 3.998730182647705, "learning_rate": 9.627558169371706e-06, "loss": 0.12733859, "memory(GiB)": 13.7, "step": 17860, "train_speed(iter/s)": 1.54146 }, { "acc": 0.97164326, "epoch": 8.373564565268339, "grad_norm": 2.9441418647766113, "learning_rate": 9.6272645500981e-06, "loss": 0.15134797, "memory(GiB)": 13.7, "step": 17865, "train_speed(iter/s)": 1.54147 }, { "acc": 0.97367554, "epoch": 8.375908132177173, "grad_norm": 8.403104782104492, "learning_rate": 9.626970819612276e-06, "loss": 0.11302834, "memory(GiB)": 13.7, "step": 17870, "train_speed(iter/s)": 1.54149 }, { "acc": 0.96138391, "epoch": 8.37825169908601, "grad_norm": 10.076680183410645, "learning_rate": 9.626676977921293e-06, "loss": 0.19088688, "memory(GiB)": 13.7, "step": 17875, "train_speed(iter/s)": 1.541476 }, { "acc": 0.98642311, "epoch": 8.380595265994844, "grad_norm": 7.0649943351745605, "learning_rate": 9.62638302503221e-06, "loss": 0.10863807, "memory(GiB)": 13.7, "step": 17880, "train_speed(iter/s)": 1.541506 }, { "acc": 0.98208332, "epoch": 8.38293883290368, "grad_norm": 3.747199058532715, "learning_rate": 9.626088960952099e-06, "loss": 0.07564243, "memory(GiB)": 13.7, "step": 17885, "train_speed(iter/s)": 1.541506 }, { "acc": 0.98123512, "epoch": 8.385282399812514, "grad_norm": 3.1824042797088623, "learning_rate": 9.625794785688025e-06, "loss": 0.09100496, "memory(GiB)": 13.7, "step": 17890, "train_speed(iter/s)": 1.54152 }, { "acc": 0.9650135, "epoch": 8.38762596672135, "grad_norm": 5.401589870452881, "learning_rate": 9.62550049924706e-06, "loss": 0.25131469, "memory(GiB)": 13.7, "step": 17895, "train_speed(iter/s)": 1.541546 }, { "acc": 0.97023811, "epoch": 8.389969533630186, "grad_norm": 48.347965240478516, "learning_rate": 9.625206101636275e-06, "loss": 0.11948795, "memory(GiB)": 13.7, "step": 17900, "train_speed(iter/s)": 1.541588 }, { "acc": 0.9708004, "epoch": 8.39231310053902, "grad_norm": 6.325894832611084, "learning_rate": 9.624911592862751e-06, "loss": 0.14314094, "memory(GiB)": 13.7, "step": 17905, "train_speed(iter/s)": 1.541584 }, { "acc": 0.97997026, "epoch": 8.394656667447856, "grad_norm": 2.7149133682250977, "learning_rate": 9.624616972933564e-06, "loss": 0.15407592, "memory(GiB)": 13.7, "step": 17910, "train_speed(iter/s)": 1.541578 }, { "acc": 0.96409626, "epoch": 8.39700023435669, "grad_norm": 8.308164596557617, "learning_rate": 9.624322241855796e-06, "loss": 0.23005548, "memory(GiB)": 13.7, "step": 17915, "train_speed(iter/s)": 1.541573 }, { "acc": 0.97073002, "epoch": 8.399343801265527, "grad_norm": 7.783923625946045, "learning_rate": 9.624027399636533e-06, "loss": 0.10825351, "memory(GiB)": 13.7, "step": 17920, "train_speed(iter/s)": 1.541564 }, { "acc": 0.96969872, "epoch": 8.401687368174361, "grad_norm": 1.0029239654541016, "learning_rate": 9.623732446282859e-06, "loss": 0.13495047, "memory(GiB)": 13.7, "step": 17925, "train_speed(iter/s)": 1.541568 }, { "acc": 0.97138615, "epoch": 8.404030935083197, "grad_norm": 3.078578472137451, "learning_rate": 9.623437381801867e-06, "loss": 0.19195197, "memory(GiB)": 13.7, "step": 17930, "train_speed(iter/s)": 1.541581 }, { "acc": 0.96134605, "epoch": 8.406374501992032, "grad_norm": 5.094178676605225, "learning_rate": 9.623142206200648e-06, "loss": 0.18243048, "memory(GiB)": 13.7, "step": 17935, "train_speed(iter/s)": 1.541574 }, { "acc": 0.99209852, "epoch": 8.408718068900868, "grad_norm": 4.549040794372559, "learning_rate": 9.622846919486297e-06, "loss": 0.0403821, "memory(GiB)": 13.7, "step": 17940, "train_speed(iter/s)": 1.541587 }, { "acc": 0.96827879, "epoch": 8.411061635809702, "grad_norm": 12.403141975402832, "learning_rate": 9.622551521665909e-06, "loss": 0.16482557, "memory(GiB)": 13.7, "step": 17945, "train_speed(iter/s)": 1.541589 }, { "acc": 0.9758049, "epoch": 8.413405202718538, "grad_norm": 8.34697437286377, "learning_rate": 9.62225601274659e-06, "loss": 0.15684116, "memory(GiB)": 13.7, "step": 17950, "train_speed(iter/s)": 1.541585 }, { "acc": 0.97553024, "epoch": 8.415748769627372, "grad_norm": 14.466843605041504, "learning_rate": 9.62196039273544e-06, "loss": 0.13711698, "memory(GiB)": 13.7, "step": 17955, "train_speed(iter/s)": 1.5416 }, { "acc": 0.95009785, "epoch": 8.418092336536208, "grad_norm": 4.708561897277832, "learning_rate": 9.621664661639563e-06, "loss": 0.26284938, "memory(GiB)": 13.7, "step": 17960, "train_speed(iter/s)": 1.54159 }, { "acc": 0.95863094, "epoch": 8.420435903445043, "grad_norm": 3.739626884460449, "learning_rate": 9.62136881946607e-06, "loss": 0.25952718, "memory(GiB)": 13.7, "step": 17965, "train_speed(iter/s)": 1.541571 }, { "acc": 0.97095833, "epoch": 8.422779470353879, "grad_norm": 5.652414321899414, "learning_rate": 9.62107286622207e-06, "loss": 0.15861462, "memory(GiB)": 13.7, "step": 17970, "train_speed(iter/s)": 1.541569 }, { "acc": 0.97718754, "epoch": 8.425123037262713, "grad_norm": 2.499147653579712, "learning_rate": 9.62077680191468e-06, "loss": 0.13128566, "memory(GiB)": 13.7, "step": 17975, "train_speed(iter/s)": 1.541582 }, { "acc": 0.95187492, "epoch": 8.42746660417155, "grad_norm": 9.42238998413086, "learning_rate": 9.620480626551016e-06, "loss": 0.1729463, "memory(GiB)": 13.7, "step": 17980, "train_speed(iter/s)": 1.541604 }, { "acc": 0.96089783, "epoch": 8.429810171080383, "grad_norm": 15.623054504394531, "learning_rate": 9.620184340138192e-06, "loss": 0.18239939, "memory(GiB)": 13.7, "step": 17985, "train_speed(iter/s)": 1.541605 }, { "acc": 0.9680357, "epoch": 8.43215373798922, "grad_norm": 4.403812885284424, "learning_rate": 9.619887942683335e-06, "loss": 0.21211612, "memory(GiB)": 13.7, "step": 17990, "train_speed(iter/s)": 1.5416 }, { "acc": 0.98110695, "epoch": 8.434497304898056, "grad_norm": 6.481757640838623, "learning_rate": 9.619591434193568e-06, "loss": 0.15766187, "memory(GiB)": 13.7, "step": 17995, "train_speed(iter/s)": 1.541623 }, { "acc": 0.97767859, "epoch": 8.43684087180689, "grad_norm": 4.271130561828613, "learning_rate": 9.619294814676017e-06, "loss": 0.07016263, "memory(GiB)": 13.7, "step": 18000, "train_speed(iter/s)": 1.541627 }, { "acc": 0.96785622, "epoch": 8.439184438715726, "grad_norm": 15.273211479187012, "learning_rate": 9.618998084137812e-06, "loss": 0.19681411, "memory(GiB)": 13.7, "step": 18005, "train_speed(iter/s)": 1.54163 }, { "acc": 0.9758852, "epoch": 8.44152800562456, "grad_norm": 11.849180221557617, "learning_rate": 9.618701242586088e-06, "loss": 0.1132005, "memory(GiB)": 13.7, "step": 18010, "train_speed(iter/s)": 1.541646 }, { "acc": 0.96940651, "epoch": 8.443871572533396, "grad_norm": 16.544921875, "learning_rate": 9.618404290027978e-06, "loss": 0.15794303, "memory(GiB)": 13.7, "step": 18015, "train_speed(iter/s)": 1.541654 }, { "acc": 0.95636797, "epoch": 8.44621513944223, "grad_norm": 3.6025075912475586, "learning_rate": 9.61810722647062e-06, "loss": 0.17449063, "memory(GiB)": 13.7, "step": 18020, "train_speed(iter/s)": 1.541671 }, { "acc": 0.95479164, "epoch": 8.448558706351067, "grad_norm": 35.19188690185547, "learning_rate": 9.617810051921151e-06, "loss": 0.24472919, "memory(GiB)": 13.7, "step": 18025, "train_speed(iter/s)": 1.541685 }, { "acc": 0.95653954, "epoch": 8.450902273259901, "grad_norm": 4.70748233795166, "learning_rate": 9.61751276638672e-06, "loss": 0.20075505, "memory(GiB)": 13.7, "step": 18030, "train_speed(iter/s)": 1.541699 }, { "acc": 0.95976191, "epoch": 8.453245840168737, "grad_norm": 3.048233985900879, "learning_rate": 9.61721536987447e-06, "loss": 0.13085879, "memory(GiB)": 13.7, "step": 18035, "train_speed(iter/s)": 1.541718 }, { "acc": 0.97557297, "epoch": 8.455589407077571, "grad_norm": 5.9750213623046875, "learning_rate": 9.61691786239155e-06, "loss": 0.06806748, "memory(GiB)": 13.7, "step": 18040, "train_speed(iter/s)": 1.541735 }, { "acc": 0.98874998, "epoch": 8.457932973986408, "grad_norm": 1.2552777528762817, "learning_rate": 9.616620243945108e-06, "loss": 0.07238225, "memory(GiB)": 13.7, "step": 18045, "train_speed(iter/s)": 1.541743 }, { "acc": 0.9864584, "epoch": 8.460276540895242, "grad_norm": 3.0022313594818115, "learning_rate": 9.616322514542305e-06, "loss": 0.03742698, "memory(GiB)": 13.7, "step": 18050, "train_speed(iter/s)": 1.541785 }, { "acc": 0.98415184, "epoch": 8.462620107804078, "grad_norm": 4.692809104919434, "learning_rate": 9.61602467419029e-06, "loss": 0.09051547, "memory(GiB)": 13.7, "step": 18055, "train_speed(iter/s)": 1.541781 }, { "acc": 0.96561813, "epoch": 8.464963674712912, "grad_norm": 28.299253463745117, "learning_rate": 9.615726722896226e-06, "loss": 0.21271677, "memory(GiB)": 13.7, "step": 18060, "train_speed(iter/s)": 1.541771 }, { "acc": 0.98399553, "epoch": 8.467307241621748, "grad_norm": 4.344547271728516, "learning_rate": 9.61542866066727e-06, "loss": 0.12589376, "memory(GiB)": 13.7, "step": 18065, "train_speed(iter/s)": 1.541767 }, { "acc": 0.97417622, "epoch": 8.469650808530584, "grad_norm": 1.796405553817749, "learning_rate": 9.615130487510596e-06, "loss": 0.19074079, "memory(GiB)": 13.7, "step": 18070, "train_speed(iter/s)": 1.541785 }, { "acc": 0.96393642, "epoch": 8.471994375439419, "grad_norm": 0.7834996581077576, "learning_rate": 9.614832203433362e-06, "loss": 0.14469081, "memory(GiB)": 13.7, "step": 18075, "train_speed(iter/s)": 1.54183 }, { "acc": 0.9753274, "epoch": 8.474337942348255, "grad_norm": 2.232980966567993, "learning_rate": 9.614533808442742e-06, "loss": 0.14074835, "memory(GiB)": 13.7, "step": 18080, "train_speed(iter/s)": 1.541863 }, { "acc": 0.97966537, "epoch": 8.476681509257089, "grad_norm": 1.8285045623779297, "learning_rate": 9.614235302545907e-06, "loss": 0.12702811, "memory(GiB)": 13.7, "step": 18085, "train_speed(iter/s)": 1.54187 }, { "acc": 0.97322311, "epoch": 8.479025076165925, "grad_norm": 2.6182680130004883, "learning_rate": 9.613936685750032e-06, "loss": 0.12442342, "memory(GiB)": 13.7, "step": 18090, "train_speed(iter/s)": 1.541892 }, { "acc": 0.97407112, "epoch": 8.48136864307476, "grad_norm": 5.171781063079834, "learning_rate": 9.613637958062299e-06, "loss": 0.13380004, "memory(GiB)": 13.7, "step": 18095, "train_speed(iter/s)": 1.541896 }, { "acc": 0.97208338, "epoch": 8.483712209983596, "grad_norm": 9.571345329284668, "learning_rate": 9.613339119489882e-06, "loss": 0.16622289, "memory(GiB)": 13.7, "step": 18100, "train_speed(iter/s)": 1.541901 }, { "acc": 0.96984301, "epoch": 8.48605577689243, "grad_norm": 8.95651912689209, "learning_rate": 9.613040170039965e-06, "loss": 0.13183017, "memory(GiB)": 13.7, "step": 18105, "train_speed(iter/s)": 1.541916 }, { "acc": 0.97064257, "epoch": 8.488399343801266, "grad_norm": 7.68984842300415, "learning_rate": 9.61274110971974e-06, "loss": 0.18360336, "memory(GiB)": 13.7, "step": 18110, "train_speed(iter/s)": 1.541919 }, { "acc": 0.96987104, "epoch": 8.4907429107101, "grad_norm": 21.38983917236328, "learning_rate": 9.612441938536387e-06, "loss": 0.20715015, "memory(GiB)": 13.7, "step": 18115, "train_speed(iter/s)": 1.541907 }, { "acc": 0.9675808, "epoch": 8.493086477618936, "grad_norm": 86.26570892333984, "learning_rate": 9.612142656497103e-06, "loss": 0.23271112, "memory(GiB)": 13.7, "step": 18120, "train_speed(iter/s)": 1.541903 }, { "acc": 0.95864973, "epoch": 8.49543004452777, "grad_norm": 7.077566623687744, "learning_rate": 9.61184326360908e-06, "loss": 0.24445672, "memory(GiB)": 13.7, "step": 18125, "train_speed(iter/s)": 1.541896 }, { "acc": 0.9895833, "epoch": 8.497773611436607, "grad_norm": 2.0511367321014404, "learning_rate": 9.611543759879515e-06, "loss": 0.08259037, "memory(GiB)": 13.7, "step": 18130, "train_speed(iter/s)": 1.541926 }, { "acc": 0.97491474, "epoch": 8.500117178345441, "grad_norm": 5.195970058441162, "learning_rate": 9.611244145315606e-06, "loss": 0.11037169, "memory(GiB)": 13.7, "step": 18135, "train_speed(iter/s)": 1.541941 }, { "acc": 0.9567421, "epoch": 8.502460745254277, "grad_norm": 6.972622394561768, "learning_rate": 9.610944419924556e-06, "loss": 0.27580204, "memory(GiB)": 13.7, "step": 18140, "train_speed(iter/s)": 1.541965 }, { "acc": 0.98604164, "epoch": 8.504804312163113, "grad_norm": 17.742136001586914, "learning_rate": 9.610644583713568e-06, "loss": 0.16868742, "memory(GiB)": 13.7, "step": 18145, "train_speed(iter/s)": 1.541962 }, { "acc": 0.96187496, "epoch": 8.507147879071947, "grad_norm": 6.6497483253479, "learning_rate": 9.610344636689849e-06, "loss": 0.11228021, "memory(GiB)": 13.7, "step": 18150, "train_speed(iter/s)": 1.541974 }, { "acc": 0.98948097, "epoch": 8.509491445980784, "grad_norm": 0.9753205180168152, "learning_rate": 9.610044578860608e-06, "loss": 0.07297912, "memory(GiB)": 13.7, "step": 18155, "train_speed(iter/s)": 1.541977 }, { "acc": 0.9895834, "epoch": 8.511835012889618, "grad_norm": 5.357980251312256, "learning_rate": 9.609744410233062e-06, "loss": 0.11495156, "memory(GiB)": 13.7, "step": 18160, "train_speed(iter/s)": 1.541975 }, { "acc": 0.96955357, "epoch": 8.514178579798454, "grad_norm": 5.789769649505615, "learning_rate": 9.609444130814422e-06, "loss": 0.12425411, "memory(GiB)": 13.7, "step": 18165, "train_speed(iter/s)": 1.541977 }, { "acc": 0.96983891, "epoch": 8.516522146707288, "grad_norm": 0.07473531365394592, "learning_rate": 9.609143740611906e-06, "loss": 0.13894281, "memory(GiB)": 13.7, "step": 18170, "train_speed(iter/s)": 1.54199 }, { "acc": 0.95795135, "epoch": 8.518865713616124, "grad_norm": 6.935571193695068, "learning_rate": 9.608843239632735e-06, "loss": 0.1825115, "memory(GiB)": 13.7, "step": 18175, "train_speed(iter/s)": 1.542033 }, { "acc": 0.96814728, "epoch": 8.521209280524959, "grad_norm": 3.327470541000366, "learning_rate": 9.608542627884134e-06, "loss": 0.15926425, "memory(GiB)": 13.7, "step": 18180, "train_speed(iter/s)": 1.542025 }, { "acc": 0.96127682, "epoch": 8.523552847433795, "grad_norm": 5.930906295776367, "learning_rate": 9.608241905373325e-06, "loss": 0.18713802, "memory(GiB)": 13.7, "step": 18185, "train_speed(iter/s)": 1.542065 }, { "acc": 0.95414267, "epoch": 8.525896414342629, "grad_norm": 6.829648494720459, "learning_rate": 9.607941072107537e-06, "loss": 0.24747264, "memory(GiB)": 13.7, "step": 18190, "train_speed(iter/s)": 1.542067 }, { "acc": 0.9537447, "epoch": 8.528239981251465, "grad_norm": 13.248196601867676, "learning_rate": 9.607640128094003e-06, "loss": 0.18921667, "memory(GiB)": 13.7, "step": 18195, "train_speed(iter/s)": 1.542087 }, { "acc": 0.97791357, "epoch": 8.5305835481603, "grad_norm": 2.5714898109436035, "learning_rate": 9.607339073339958e-06, "loss": 0.16839708, "memory(GiB)": 13.7, "step": 18200, "train_speed(iter/s)": 1.54207 }, { "acc": 0.9734601, "epoch": 8.532927115069135, "grad_norm": 2.1705567836761475, "learning_rate": 9.607037907852636e-06, "loss": 0.21956255, "memory(GiB)": 13.7, "step": 18205, "train_speed(iter/s)": 1.542098 }, { "acc": 0.9786129, "epoch": 8.53527068197797, "grad_norm": 4.749450206756592, "learning_rate": 9.606736631639275e-06, "loss": 0.12077091, "memory(GiB)": 13.7, "step": 18210, "train_speed(iter/s)": 1.542124 }, { "acc": 0.97625008, "epoch": 8.537614248886806, "grad_norm": 33.23356246948242, "learning_rate": 9.606435244707118e-06, "loss": 0.16262798, "memory(GiB)": 13.7, "step": 18215, "train_speed(iter/s)": 1.542101 }, { "acc": 0.97763977, "epoch": 8.53995781579564, "grad_norm": 5.479976654052734, "learning_rate": 9.60613374706341e-06, "loss": 0.18357536, "memory(GiB)": 13.7, "step": 18220, "train_speed(iter/s)": 1.542083 }, { "acc": 0.97049599, "epoch": 8.542301382704476, "grad_norm": 2.919825792312622, "learning_rate": 9.605832138715397e-06, "loss": 0.15203979, "memory(GiB)": 13.7, "step": 18225, "train_speed(iter/s)": 1.542108 }, { "acc": 0.96863098, "epoch": 8.54464494961331, "grad_norm": 44.04548263549805, "learning_rate": 9.60553041967033e-06, "loss": 0.18959057, "memory(GiB)": 13.7, "step": 18230, "train_speed(iter/s)": 1.542133 }, { "acc": 0.96726646, "epoch": 8.546988516522147, "grad_norm": 11.231228828430176, "learning_rate": 9.605228589935458e-06, "loss": 0.11895502, "memory(GiB)": 13.7, "step": 18235, "train_speed(iter/s)": 1.542159 }, { "acc": 0.96693459, "epoch": 8.549332083430983, "grad_norm": 21.69557762145996, "learning_rate": 9.604926649518042e-06, "loss": 0.21565208, "memory(GiB)": 13.7, "step": 18240, "train_speed(iter/s)": 1.542163 }, { "acc": 0.96349211, "epoch": 8.551675650339817, "grad_norm": 15.665473937988281, "learning_rate": 9.604624598425333e-06, "loss": 0.17402101, "memory(GiB)": 13.7, "step": 18245, "train_speed(iter/s)": 1.54218 }, { "acc": 0.97492561, "epoch": 8.554019217248653, "grad_norm": 8.474967956542969, "learning_rate": 9.604322436664596e-06, "loss": 0.123706, "memory(GiB)": 13.7, "step": 18250, "train_speed(iter/s)": 1.542202 }, { "acc": 0.97075901, "epoch": 8.556362784157487, "grad_norm": 9.263864517211914, "learning_rate": 9.604020164243091e-06, "loss": 0.135654, "memory(GiB)": 13.7, "step": 18255, "train_speed(iter/s)": 1.542226 }, { "acc": 0.95855007, "epoch": 8.558706351066323, "grad_norm": 15.187080383300781, "learning_rate": 9.603717781168086e-06, "loss": 0.31838055, "memory(GiB)": 13.7, "step": 18260, "train_speed(iter/s)": 1.542224 }, { "acc": 0.96933613, "epoch": 8.561049917975158, "grad_norm": 6.174253940582275, "learning_rate": 9.603415287446847e-06, "loss": 0.15155342, "memory(GiB)": 13.7, "step": 18265, "train_speed(iter/s)": 1.54227 }, { "acc": 0.95997477, "epoch": 8.563393484883994, "grad_norm": 6.585067272186279, "learning_rate": 9.603112683086648e-06, "loss": 0.21818528, "memory(GiB)": 13.7, "step": 18270, "train_speed(iter/s)": 1.542294 }, { "acc": 0.94883251, "epoch": 8.565737051792828, "grad_norm": 7.859777927398682, "learning_rate": 9.60280996809476e-06, "loss": 0.25088885, "memory(GiB)": 13.7, "step": 18275, "train_speed(iter/s)": 1.54232 }, { "acc": 0.97603626, "epoch": 8.568080618701664, "grad_norm": 3.873197078704834, "learning_rate": 9.60250714247846e-06, "loss": 0.13340735, "memory(GiB)": 13.7, "step": 18280, "train_speed(iter/s)": 1.542318 }, { "acc": 0.96347218, "epoch": 8.570424185610499, "grad_norm": 8.88740062713623, "learning_rate": 9.602204206245028e-06, "loss": 0.27849329, "memory(GiB)": 13.7, "step": 18285, "train_speed(iter/s)": 1.542324 }, { "acc": 0.97459393, "epoch": 8.572767752519335, "grad_norm": 4.765229225158691, "learning_rate": 9.601901159401746e-06, "loss": 0.12759095, "memory(GiB)": 13.7, "step": 18290, "train_speed(iter/s)": 1.542322 }, { "acc": 0.98963795, "epoch": 8.575111319428169, "grad_norm": 2.661248207092285, "learning_rate": 9.601598001955895e-06, "loss": 0.10313714, "memory(GiB)": 13.7, "step": 18295, "train_speed(iter/s)": 1.542303 }, { "acc": 0.97581882, "epoch": 8.577454886337005, "grad_norm": 0.2826223075389862, "learning_rate": 9.601294733914763e-06, "loss": 0.10831475, "memory(GiB)": 13.7, "step": 18300, "train_speed(iter/s)": 1.542301 }, { "acc": 0.98934031, "epoch": 8.57979845324584, "grad_norm": 4.1914262771606445, "learning_rate": 9.60099135528564e-06, "loss": 0.04319526, "memory(GiB)": 13.7, "step": 18305, "train_speed(iter/s)": 1.542284 }, { "acc": 0.9624548, "epoch": 8.582142020154675, "grad_norm": 20.634363174438477, "learning_rate": 9.60068786607582e-06, "loss": 0.22877672, "memory(GiB)": 13.7, "step": 18310, "train_speed(iter/s)": 1.54229 }, { "acc": 0.96087303, "epoch": 8.584485587063512, "grad_norm": 7.761970043182373, "learning_rate": 9.600384266292596e-06, "loss": 0.17919619, "memory(GiB)": 13.7, "step": 18315, "train_speed(iter/s)": 1.5423 }, { "acc": 0.9610796, "epoch": 8.586829153972346, "grad_norm": 17.05516242980957, "learning_rate": 9.600080555943267e-06, "loss": 0.2279222, "memory(GiB)": 13.7, "step": 18320, "train_speed(iter/s)": 1.542294 }, { "acc": 0.98157196, "epoch": 8.589172720881182, "grad_norm": 4.8030171394348145, "learning_rate": 9.59977673503513e-06, "loss": 0.06111012, "memory(GiB)": 13.7, "step": 18325, "train_speed(iter/s)": 1.542304 }, { "acc": 0.98232718, "epoch": 8.591516287790016, "grad_norm": 6.318202495574951, "learning_rate": 9.599472803575492e-06, "loss": 0.10838134, "memory(GiB)": 13.7, "step": 18330, "train_speed(iter/s)": 1.542333 }, { "acc": 0.96508732, "epoch": 8.593859854698852, "grad_norm": 7.713627338409424, "learning_rate": 9.599168761571657e-06, "loss": 0.14179779, "memory(GiB)": 13.7, "step": 18335, "train_speed(iter/s)": 1.54236 }, { "acc": 0.96604538, "epoch": 8.596203421607687, "grad_norm": 6.817778587341309, "learning_rate": 9.598864609030931e-06, "loss": 0.10321975, "memory(GiB)": 13.7, "step": 18340, "train_speed(iter/s)": 1.54239 }, { "acc": 0.97402668, "epoch": 8.598546988516523, "grad_norm": 7.840151786804199, "learning_rate": 9.598560345960628e-06, "loss": 0.11030892, "memory(GiB)": 13.7, "step": 18345, "train_speed(iter/s)": 1.542383 }, { "acc": 0.9661047, "epoch": 8.600890555425357, "grad_norm": 7.406527519226074, "learning_rate": 9.59825597236806e-06, "loss": 0.2151824, "memory(GiB)": 13.7, "step": 18350, "train_speed(iter/s)": 1.542403 }, { "acc": 0.97337055, "epoch": 8.603234122334193, "grad_norm": 3.9447925090789795, "learning_rate": 9.597951488260543e-06, "loss": 0.17088159, "memory(GiB)": 13.7, "step": 18355, "train_speed(iter/s)": 1.542387 }, { "acc": 0.9833333, "epoch": 8.605577689243027, "grad_norm": 5.970498085021973, "learning_rate": 9.597646893645396e-06, "loss": 0.09032522, "memory(GiB)": 13.7, "step": 18360, "train_speed(iter/s)": 1.5424 }, { "acc": 0.97453442, "epoch": 8.607921256151863, "grad_norm": 7.472535610198975, "learning_rate": 9.59734218852994e-06, "loss": 0.10937247, "memory(GiB)": 13.7, "step": 18365, "train_speed(iter/s)": 1.542424 }, { "acc": 0.97182693, "epoch": 8.610264823060698, "grad_norm": 6.4722981452941895, "learning_rate": 9.597037372921501e-06, "loss": 0.11103601, "memory(GiB)": 13.7, "step": 18370, "train_speed(iter/s)": 1.542443 }, { "acc": 0.98859262, "epoch": 8.612608389969534, "grad_norm": 6.737087726593018, "learning_rate": 9.596732446827403e-06, "loss": 0.07266885, "memory(GiB)": 13.7, "step": 18375, "train_speed(iter/s)": 1.542447 }, { "acc": 0.95637903, "epoch": 8.614951956878368, "grad_norm": 7.00941801071167, "learning_rate": 9.596427410254976e-06, "loss": 0.31797268, "memory(GiB)": 13.7, "step": 18380, "train_speed(iter/s)": 1.542456 }, { "acc": 0.98559532, "epoch": 8.617295523787204, "grad_norm": 5.214661121368408, "learning_rate": 9.596122263211554e-06, "loss": 0.09738607, "memory(GiB)": 13.7, "step": 18385, "train_speed(iter/s)": 1.54243 }, { "acc": 0.97503853, "epoch": 8.61963909069604, "grad_norm": 10.783590316772461, "learning_rate": 9.595817005704472e-06, "loss": 0.12014693, "memory(GiB)": 13.7, "step": 18390, "train_speed(iter/s)": 1.542421 }, { "acc": 0.97011414, "epoch": 8.621982657604875, "grad_norm": 5.421189785003662, "learning_rate": 9.595511637741065e-06, "loss": 0.17789414, "memory(GiB)": 13.7, "step": 18395, "train_speed(iter/s)": 1.542436 }, { "acc": 0.96823864, "epoch": 8.62432622451371, "grad_norm": 9.699675559997559, "learning_rate": 9.595206159328673e-06, "loss": 0.1831748, "memory(GiB)": 13.7, "step": 18400, "train_speed(iter/s)": 1.542455 }, { "acc": 0.96465721, "epoch": 8.626669791422545, "grad_norm": 6.358250141143799, "learning_rate": 9.594900570474639e-06, "loss": 0.23800769, "memory(GiB)": 13.7, "step": 18405, "train_speed(iter/s)": 1.542502 }, { "acc": 0.98869047, "epoch": 8.629013358331381, "grad_norm": 0.939003586769104, "learning_rate": 9.59459487118631e-06, "loss": 0.0882698, "memory(GiB)": 13.7, "step": 18410, "train_speed(iter/s)": 1.542487 }, { "acc": 0.98276043, "epoch": 8.631356925240215, "grad_norm": 6.396338939666748, "learning_rate": 9.594289061471033e-06, "loss": 0.08335993, "memory(GiB)": 13.7, "step": 18415, "train_speed(iter/s)": 1.542499 }, { "acc": 0.96883926, "epoch": 8.633700492149051, "grad_norm": 12.954504013061523, "learning_rate": 9.59398314133616e-06, "loss": 0.23357317, "memory(GiB)": 13.7, "step": 18420, "train_speed(iter/s)": 1.54249 }, { "acc": 0.97145824, "epoch": 8.636044059057886, "grad_norm": 4.339768886566162, "learning_rate": 9.593677110789041e-06, "loss": 0.16510087, "memory(GiB)": 13.7, "step": 18425, "train_speed(iter/s)": 1.54249 }, { "acc": 0.96847219, "epoch": 8.638387625966722, "grad_norm": 1.3443931341171265, "learning_rate": 9.593370969837036e-06, "loss": 0.13455863, "memory(GiB)": 13.7, "step": 18430, "train_speed(iter/s)": 1.542509 }, { "acc": 0.97609577, "epoch": 8.640731192875556, "grad_norm": 2.473193645477295, "learning_rate": 9.593064718487501e-06, "loss": 0.14456036, "memory(GiB)": 13.7, "step": 18435, "train_speed(iter/s)": 1.542529 }, { "acc": 0.96012402, "epoch": 8.643074759784392, "grad_norm": 9.6154146194458, "learning_rate": 9.592758356747796e-06, "loss": 0.18155324, "memory(GiB)": 13.7, "step": 18440, "train_speed(iter/s)": 1.542536 }, { "acc": 0.95810251, "epoch": 8.645418326693227, "grad_norm": 9.226457595825195, "learning_rate": 9.59245188462529e-06, "loss": 0.21901202, "memory(GiB)": 13.7, "step": 18445, "train_speed(iter/s)": 1.542542 }, { "acc": 0.97868061, "epoch": 8.647761893602063, "grad_norm": 9.675589561462402, "learning_rate": 9.592145302127346e-06, "loss": 0.09224231, "memory(GiB)": 13.7, "step": 18450, "train_speed(iter/s)": 1.542566 }, { "acc": 0.96514883, "epoch": 8.650105460510897, "grad_norm": 6.007647514343262, "learning_rate": 9.591838609261332e-06, "loss": 0.20168979, "memory(GiB)": 13.7, "step": 18455, "train_speed(iter/s)": 1.542563 }, { "acc": 0.95111065, "epoch": 8.652449027419733, "grad_norm": 5.092126369476318, "learning_rate": 9.591531806034622e-06, "loss": 0.26621642, "memory(GiB)": 13.7, "step": 18460, "train_speed(iter/s)": 1.54257 }, { "acc": 0.96378965, "epoch": 8.654792594328567, "grad_norm": 3.837327480316162, "learning_rate": 9.59122489245459e-06, "loss": 0.09483758, "memory(GiB)": 13.7, "step": 18465, "train_speed(iter/s)": 1.54259 }, { "acc": 0.97585316, "epoch": 8.657136161237403, "grad_norm": 4.511785507202148, "learning_rate": 9.590917868528614e-06, "loss": 0.11009179, "memory(GiB)": 13.7, "step": 18470, "train_speed(iter/s)": 1.542596 }, { "acc": 0.97743053, "epoch": 8.659479728146238, "grad_norm": 8.327301979064941, "learning_rate": 9.590610734264071e-06, "loss": 0.10389051, "memory(GiB)": 13.7, "step": 18475, "train_speed(iter/s)": 1.54263 }, { "acc": 0.97154226, "epoch": 8.661823295055074, "grad_norm": 5.777171611785889, "learning_rate": 9.59030348966835e-06, "loss": 0.17587376, "memory(GiB)": 13.7, "step": 18480, "train_speed(iter/s)": 1.54263 }, { "acc": 0.95777779, "epoch": 8.66416686196391, "grad_norm": 41.782135009765625, "learning_rate": 9.589996134748827e-06, "loss": 0.19382555, "memory(GiB)": 13.7, "step": 18485, "train_speed(iter/s)": 1.542623 }, { "acc": 0.97332954, "epoch": 8.666510428872744, "grad_norm": 5.691554546356201, "learning_rate": 9.589688669512896e-06, "loss": 0.11056696, "memory(GiB)": 13.7, "step": 18490, "train_speed(iter/s)": 1.542644 }, { "acc": 0.96567345, "epoch": 8.66885399578158, "grad_norm": 11.568742752075195, "learning_rate": 9.58938109396795e-06, "loss": 0.1198529, "memory(GiB)": 13.7, "step": 18495, "train_speed(iter/s)": 1.542667 }, { "acc": 0.98207111, "epoch": 8.671197562690415, "grad_norm": 5.846282958984375, "learning_rate": 9.589073408121373e-06, "loss": 0.09891563, "memory(GiB)": 13.7, "step": 18500, "train_speed(iter/s)": 1.542673 }, { "acc": 0.96448002, "epoch": 8.67354112959925, "grad_norm": 6.969454765319824, "learning_rate": 9.588765611980567e-06, "loss": 0.1470063, "memory(GiB)": 13.7, "step": 18505, "train_speed(iter/s)": 1.54269 }, { "acc": 0.97885818, "epoch": 8.675884696508085, "grad_norm": 7.32813835144043, "learning_rate": 9.588457705552931e-06, "loss": 0.11933928, "memory(GiB)": 13.7, "step": 18510, "train_speed(iter/s)": 1.542692 }, { "acc": 0.98708334, "epoch": 8.678228263416921, "grad_norm": 7.217494487762451, "learning_rate": 9.588149688845864e-06, "loss": 0.03213225, "memory(GiB)": 13.7, "step": 18515, "train_speed(iter/s)": 1.542711 }, { "acc": 0.95913343, "epoch": 8.680571830325755, "grad_norm": 14.190954208374023, "learning_rate": 9.587841561866768e-06, "loss": 0.24163396, "memory(GiB)": 13.7, "step": 18520, "train_speed(iter/s)": 1.542718 }, { "acc": 0.96365442, "epoch": 8.682915397234591, "grad_norm": 9.593592643737793, "learning_rate": 9.587533324623054e-06, "loss": 0.16923649, "memory(GiB)": 13.7, "step": 18525, "train_speed(iter/s)": 1.54276 }, { "acc": 0.9661459, "epoch": 8.685258964143426, "grad_norm": 4.43090295791626, "learning_rate": 9.587224977122127e-06, "loss": 0.08970546, "memory(GiB)": 13.7, "step": 18530, "train_speed(iter/s)": 1.542765 }, { "acc": 0.95681629, "epoch": 8.687602531052262, "grad_norm": 6.687219142913818, "learning_rate": 9.586916519371401e-06, "loss": 0.14055471, "memory(GiB)": 13.7, "step": 18535, "train_speed(iter/s)": 1.542778 }, { "acc": 0.96691227, "epoch": 8.689946097961096, "grad_norm": 18.68897819519043, "learning_rate": 9.586607951378289e-06, "loss": 0.18241318, "memory(GiB)": 13.7, "step": 18540, "train_speed(iter/s)": 1.542783 }, { "acc": 0.98555918, "epoch": 8.692289664869932, "grad_norm": 4.515606880187988, "learning_rate": 9.586299273150206e-06, "loss": 0.06323272, "memory(GiB)": 13.7, "step": 18545, "train_speed(iter/s)": 1.542806 }, { "acc": 0.97730827, "epoch": 8.694633231778766, "grad_norm": 41.99276351928711, "learning_rate": 9.585990484694579e-06, "loss": 0.15893916, "memory(GiB)": 13.7, "step": 18550, "train_speed(iter/s)": 1.542778 }, { "acc": 0.9721096, "epoch": 8.696976798687603, "grad_norm": 4.770528316497803, "learning_rate": 9.585681586018823e-06, "loss": 0.12598504, "memory(GiB)": 13.7, "step": 18555, "train_speed(iter/s)": 1.542789 }, { "acc": 0.97053566, "epoch": 8.699320365596439, "grad_norm": 4.999110698699951, "learning_rate": 9.585372577130362e-06, "loss": 0.18574488, "memory(GiB)": 13.7, "step": 18560, "train_speed(iter/s)": 1.542791 }, { "acc": 0.96463509, "epoch": 8.701663932505273, "grad_norm": 6.6374711990356445, "learning_rate": 9.58506345803663e-06, "loss": 0.15359219, "memory(GiB)": 13.7, "step": 18565, "train_speed(iter/s)": 1.54279 }, { "acc": 0.9589798, "epoch": 8.704007499414109, "grad_norm": 5.8083696365356445, "learning_rate": 9.584754228745053e-06, "loss": 0.23150952, "memory(GiB)": 13.7, "step": 18570, "train_speed(iter/s)": 1.542793 }, { "acc": 0.9831356, "epoch": 8.706351066322943, "grad_norm": 2.285231113433838, "learning_rate": 9.584444889263065e-06, "loss": 0.07795817, "memory(GiB)": 13.7, "step": 18575, "train_speed(iter/s)": 1.542796 }, { "acc": 0.97493553, "epoch": 8.70869463323178, "grad_norm": 4.378653049468994, "learning_rate": 9.584135439598102e-06, "loss": 0.16002929, "memory(GiB)": 13.7, "step": 18580, "train_speed(iter/s)": 1.54279 }, { "acc": 0.98325386, "epoch": 8.711038200140614, "grad_norm": 0.16946832835674286, "learning_rate": 9.5838258797576e-06, "loss": 0.11587012, "memory(GiB)": 13.7, "step": 18585, "train_speed(iter/s)": 1.542815 }, { "acc": 0.97180023, "epoch": 8.71338176704945, "grad_norm": 5.465851783752441, "learning_rate": 9.583516209749003e-06, "loss": 0.10258534, "memory(GiB)": 13.7, "step": 18590, "train_speed(iter/s)": 1.542817 }, { "acc": 0.96758938, "epoch": 8.715725333958284, "grad_norm": 3.4900765419006348, "learning_rate": 9.583206429579754e-06, "loss": 0.2818012, "memory(GiB)": 13.7, "step": 18595, "train_speed(iter/s)": 1.54282 }, { "acc": 0.95257492, "epoch": 8.71806890086712, "grad_norm": 14.10095500946045, "learning_rate": 9.582896539257295e-06, "loss": 0.30066519, "memory(GiB)": 13.7, "step": 18600, "train_speed(iter/s)": 1.542829 }, { "acc": 0.96341639, "epoch": 8.720412467775954, "grad_norm": 5.282632350921631, "learning_rate": 9.582586538789078e-06, "loss": 0.25262761, "memory(GiB)": 13.7, "step": 18605, "train_speed(iter/s)": 1.542828 }, { "acc": 0.96570816, "epoch": 8.72275603468479, "grad_norm": 4.820457935333252, "learning_rate": 9.582276428182554e-06, "loss": 0.1968822, "memory(GiB)": 13.7, "step": 18610, "train_speed(iter/s)": 1.542846 }, { "acc": 0.96454449, "epoch": 8.725099601593625, "grad_norm": 8.867218971252441, "learning_rate": 9.581966207445177e-06, "loss": 0.15076714, "memory(GiB)": 13.7, "step": 18615, "train_speed(iter/s)": 1.542855 }, { "acc": 0.97625008, "epoch": 8.727443168502461, "grad_norm": 44.498435974121094, "learning_rate": 9.581655876584404e-06, "loss": 0.15966532, "memory(GiB)": 13.7, "step": 18620, "train_speed(iter/s)": 1.54285 }, { "acc": 0.98343086, "epoch": 8.729786735411295, "grad_norm": 3.0619287490844727, "learning_rate": 9.581345435607693e-06, "loss": 0.1346154, "memory(GiB)": 13.7, "step": 18625, "train_speed(iter/s)": 1.54284 }, { "acc": 0.96672249, "epoch": 8.732130302320131, "grad_norm": 8.17570972442627, "learning_rate": 9.581034884522507e-06, "loss": 0.13014524, "memory(GiB)": 13.7, "step": 18630, "train_speed(iter/s)": 1.542842 }, { "acc": 0.95741577, "epoch": 8.734473869228967, "grad_norm": 27.267200469970703, "learning_rate": 9.580724223336312e-06, "loss": 0.22647231, "memory(GiB)": 13.7, "step": 18635, "train_speed(iter/s)": 1.542843 }, { "acc": 0.96979618, "epoch": 8.736817436137802, "grad_norm": 8.740602493286133, "learning_rate": 9.580413452056572e-06, "loss": 0.13384855, "memory(GiB)": 13.7, "step": 18640, "train_speed(iter/s)": 1.542853 }, { "acc": 0.96652241, "epoch": 8.739161003046638, "grad_norm": 3.4515798091888428, "learning_rate": 9.58010257069076e-06, "loss": 0.18608553, "memory(GiB)": 13.7, "step": 18645, "train_speed(iter/s)": 1.542879 }, { "acc": 0.96794548, "epoch": 8.741504569955472, "grad_norm": 3.955540895462036, "learning_rate": 9.579791579246349e-06, "loss": 0.17086062, "memory(GiB)": 13.7, "step": 18650, "train_speed(iter/s)": 1.542876 }, { "acc": 0.97378473, "epoch": 8.743848136864308, "grad_norm": 3.510512590408325, "learning_rate": 9.57948047773081e-06, "loss": 0.10634048, "memory(GiB)": 13.7, "step": 18655, "train_speed(iter/s)": 1.542884 }, { "acc": 0.99293652, "epoch": 8.746191703773142, "grad_norm": 5.627201080322266, "learning_rate": 9.579169266151625e-06, "loss": 0.09825915, "memory(GiB)": 13.7, "step": 18660, "train_speed(iter/s)": 1.542917 }, { "acc": 0.96164074, "epoch": 8.748535270681979, "grad_norm": 6.734591484069824, "learning_rate": 9.578857944516271e-06, "loss": 0.23121619, "memory(GiB)": 13.7, "step": 18665, "train_speed(iter/s)": 1.542938 }, { "acc": 0.98509941, "epoch": 8.750878837590813, "grad_norm": 0.523788571357727, "learning_rate": 9.578546512832234e-06, "loss": 0.07013894, "memory(GiB)": 13.7, "step": 18670, "train_speed(iter/s)": 1.542944 }, { "acc": 0.97822781, "epoch": 8.753222404499649, "grad_norm": 6.037862777709961, "learning_rate": 9.578234971107e-06, "loss": 0.11937485, "memory(GiB)": 13.7, "step": 18675, "train_speed(iter/s)": 1.542943 }, { "acc": 0.9725893, "epoch": 8.755565971408483, "grad_norm": 4.760229110717773, "learning_rate": 9.577923319348057e-06, "loss": 0.16285903, "memory(GiB)": 13.7, "step": 18680, "train_speed(iter/s)": 1.542941 }, { "acc": 0.95564098, "epoch": 8.75790953831732, "grad_norm": 5.896570682525635, "learning_rate": 9.577611557562893e-06, "loss": 0.24729738, "memory(GiB)": 13.7, "step": 18685, "train_speed(iter/s)": 1.542961 }, { "acc": 0.9871726, "epoch": 8.760253105226154, "grad_norm": 6.419198513031006, "learning_rate": 9.577299685759005e-06, "loss": 0.08427302, "memory(GiB)": 13.7, "step": 18690, "train_speed(iter/s)": 1.542981 }, { "acc": 0.96875, "epoch": 8.76259667213499, "grad_norm": 4.8691487312316895, "learning_rate": 9.57698770394389e-06, "loss": 0.11564834, "memory(GiB)": 13.7, "step": 18695, "train_speed(iter/s)": 1.542974 }, { "acc": 0.95915852, "epoch": 8.764940239043824, "grad_norm": 5.2480363845825195, "learning_rate": 9.576675612125043e-06, "loss": 0.31570063, "memory(GiB)": 13.7, "step": 18700, "train_speed(iter/s)": 1.542999 }, { "acc": 0.97371111, "epoch": 8.76728380595266, "grad_norm": 11.9367036819458, "learning_rate": 9.576363410309969e-06, "loss": 0.16332334, "memory(GiB)": 13.7, "step": 18705, "train_speed(iter/s)": 1.543021 }, { "acc": 0.97065163, "epoch": 8.769627372861494, "grad_norm": 3.9824016094207764, "learning_rate": 9.576051098506172e-06, "loss": 0.21960373, "memory(GiB)": 13.7, "step": 18710, "train_speed(iter/s)": 1.543006 }, { "acc": 0.96854172, "epoch": 8.77197093977033, "grad_norm": 8.396676063537598, "learning_rate": 9.575738676721159e-06, "loss": 0.19908917, "memory(GiB)": 13.7, "step": 18715, "train_speed(iter/s)": 1.543011 }, { "acc": 0.98490524, "epoch": 8.774314506679165, "grad_norm": 0.49320438504219055, "learning_rate": 9.575426144962438e-06, "loss": 0.06510403, "memory(GiB)": 13.7, "step": 18720, "train_speed(iter/s)": 1.543007 }, { "acc": 0.96776247, "epoch": 8.776658073588, "grad_norm": 5.856011390686035, "learning_rate": 9.575113503237522e-06, "loss": 0.16835132, "memory(GiB)": 13.7, "step": 18725, "train_speed(iter/s)": 1.54301 }, { "acc": 0.97066727, "epoch": 8.779001640496837, "grad_norm": 6.64907169342041, "learning_rate": 9.574800751553927e-06, "loss": 0.10858917, "memory(GiB)": 13.7, "step": 18730, "train_speed(iter/s)": 1.543003 }, { "acc": 0.96812496, "epoch": 8.781345207405671, "grad_norm": 4.915771007537842, "learning_rate": 9.574487889919168e-06, "loss": 0.22969809, "memory(GiB)": 13.7, "step": 18735, "train_speed(iter/s)": 1.543025 }, { "acc": 0.96357718, "epoch": 8.783688774314507, "grad_norm": 5.386413097381592, "learning_rate": 9.57417491834077e-06, "loss": 0.23869178, "memory(GiB)": 13.7, "step": 18740, "train_speed(iter/s)": 1.543033 }, { "acc": 0.97720909, "epoch": 8.786032341223342, "grad_norm": 20.88081932067871, "learning_rate": 9.573861836826251e-06, "loss": 0.14910095, "memory(GiB)": 13.7, "step": 18745, "train_speed(iter/s)": 1.543041 }, { "acc": 0.98322306, "epoch": 8.788375908132178, "grad_norm": 0.04069098085165024, "learning_rate": 9.573548645383136e-06, "loss": 0.08999504, "memory(GiB)": 13.7, "step": 18750, "train_speed(iter/s)": 1.543051 }, { "acc": 0.97739048, "epoch": 8.790719475041012, "grad_norm": 1.6261290311813354, "learning_rate": 9.573235344018958e-06, "loss": 0.11108501, "memory(GiB)": 13.7, "step": 18755, "train_speed(iter/s)": 1.543041 }, { "acc": 0.97744045, "epoch": 8.793063041949848, "grad_norm": 7.722774505615234, "learning_rate": 9.572921932741243e-06, "loss": 0.05400994, "memory(GiB)": 13.7, "step": 18760, "train_speed(iter/s)": 1.543057 }, { "acc": 0.98516636, "epoch": 8.795406608858682, "grad_norm": 23.81444549560547, "learning_rate": 9.572608411557528e-06, "loss": 0.10485213, "memory(GiB)": 13.7, "step": 18765, "train_speed(iter/s)": 1.543052 }, { "acc": 0.96531744, "epoch": 8.797750175767518, "grad_norm": 5.414401054382324, "learning_rate": 9.572294780475348e-06, "loss": 0.17116374, "memory(GiB)": 13.7, "step": 18770, "train_speed(iter/s)": 1.543038 }, { "acc": 0.97781372, "epoch": 8.800093742676353, "grad_norm": 2.1282339096069336, "learning_rate": 9.57198103950224e-06, "loss": 0.12134414, "memory(GiB)": 13.7, "step": 18775, "train_speed(iter/s)": 1.543044 }, { "acc": 0.97660446, "epoch": 8.802437309585189, "grad_norm": 9.214323997497559, "learning_rate": 9.571667188645745e-06, "loss": 0.10600386, "memory(GiB)": 13.7, "step": 18780, "train_speed(iter/s)": 1.543052 }, { "acc": 0.95955019, "epoch": 8.804780876494023, "grad_norm": 7.6390180587768555, "learning_rate": 9.57135322791341e-06, "loss": 0.21695569, "memory(GiB)": 13.7, "step": 18785, "train_speed(iter/s)": 1.543084 }, { "acc": 0.96517315, "epoch": 8.80712444340286, "grad_norm": 6.870787143707275, "learning_rate": 9.571039157312779e-06, "loss": 0.2293992, "memory(GiB)": 13.7, "step": 18790, "train_speed(iter/s)": 1.543074 }, { "acc": 0.96701097, "epoch": 8.809468010311694, "grad_norm": 4.819853782653809, "learning_rate": 9.5707249768514e-06, "loss": 0.19526322, "memory(GiB)": 13.7, "step": 18795, "train_speed(iter/s)": 1.543074 }, { "acc": 0.9746727, "epoch": 8.81181157722053, "grad_norm": 2.114121198654175, "learning_rate": 9.57041068653683e-06, "loss": 0.14063435, "memory(GiB)": 13.7, "step": 18800, "train_speed(iter/s)": 1.543046 }, { "acc": 0.98142862, "epoch": 8.814155144129366, "grad_norm": 8.275214195251465, "learning_rate": 9.57009628637662e-06, "loss": 0.07691849, "memory(GiB)": 13.7, "step": 18805, "train_speed(iter/s)": 1.543048 }, { "acc": 0.96806545, "epoch": 8.8164987110382, "grad_norm": 0.22555892169475555, "learning_rate": 9.569781776378327e-06, "loss": 0.19420658, "memory(GiB)": 13.7, "step": 18810, "train_speed(iter/s)": 1.543057 }, { "acc": 0.9833334, "epoch": 8.818842277947036, "grad_norm": 10.502608299255371, "learning_rate": 9.569467156549512e-06, "loss": 0.06652109, "memory(GiB)": 13.7, "step": 18815, "train_speed(iter/s)": 1.543064 }, { "acc": 0.97863102, "epoch": 8.82118584485587, "grad_norm": 3.7574377059936523, "learning_rate": 9.569152426897736e-06, "loss": 0.08255463, "memory(GiB)": 13.7, "step": 18820, "train_speed(iter/s)": 1.543075 }, { "acc": 0.98388891, "epoch": 8.823529411764707, "grad_norm": 5.979493618011475, "learning_rate": 9.568837587430564e-06, "loss": 0.08596317, "memory(GiB)": 13.7, "step": 18825, "train_speed(iter/s)": 1.543085 }, { "acc": 0.97434435, "epoch": 8.82587297867354, "grad_norm": 4.9432549476623535, "learning_rate": 9.568522638155566e-06, "loss": 0.15581974, "memory(GiB)": 13.7, "step": 18830, "train_speed(iter/s)": 1.543084 }, { "acc": 0.97020292, "epoch": 8.828216545582377, "grad_norm": 4.300993919372559, "learning_rate": 9.56820757908031e-06, "loss": 0.10319548, "memory(GiB)": 13.7, "step": 18835, "train_speed(iter/s)": 1.543094 }, { "acc": 0.98897972, "epoch": 8.830560112491211, "grad_norm": 4.085690498352051, "learning_rate": 9.567892410212371e-06, "loss": 0.06572416, "memory(GiB)": 13.7, "step": 18840, "train_speed(iter/s)": 1.54312 }, { "acc": 0.971875, "epoch": 8.832903679400047, "grad_norm": 0.09820837527513504, "learning_rate": 9.567577131559323e-06, "loss": 0.12789761, "memory(GiB)": 13.7, "step": 18845, "train_speed(iter/s)": 1.54311 }, { "acc": 0.98285713, "epoch": 8.835247246308882, "grad_norm": 6.763129234313965, "learning_rate": 9.567261743128743e-06, "loss": 0.12811232, "memory(GiB)": 13.7, "step": 18850, "train_speed(iter/s)": 1.543104 }, { "acc": 0.95753956, "epoch": 8.837590813217718, "grad_norm": 7.053033828735352, "learning_rate": 9.566946244928215e-06, "loss": 0.18834193, "memory(GiB)": 13.7, "step": 18855, "train_speed(iter/s)": 1.543136 }, { "acc": 0.97610121, "epoch": 8.839934380126552, "grad_norm": 0.7420010566711426, "learning_rate": 9.566630636965322e-06, "loss": 0.13795377, "memory(GiB)": 13.7, "step": 18860, "train_speed(iter/s)": 1.543151 }, { "acc": 0.9777029, "epoch": 8.842277947035388, "grad_norm": 4.514733791351318, "learning_rate": 9.56631491924765e-06, "loss": 0.16991837, "memory(GiB)": 13.7, "step": 18865, "train_speed(iter/s)": 1.543152 }, { "acc": 0.97930059, "epoch": 8.844621513944222, "grad_norm": 3.7805445194244385, "learning_rate": 9.565999091782784e-06, "loss": 0.06211268, "memory(GiB)": 13.7, "step": 18870, "train_speed(iter/s)": 1.543168 }, { "acc": 0.96935101, "epoch": 8.846965080853058, "grad_norm": 3.537515640258789, "learning_rate": 9.565683154578323e-06, "loss": 0.14909208, "memory(GiB)": 13.7, "step": 18875, "train_speed(iter/s)": 1.543173 }, { "acc": 0.96095371, "epoch": 8.849308647761895, "grad_norm": 5.348008632659912, "learning_rate": 9.565367107641855e-06, "loss": 0.15236182, "memory(GiB)": 13.7, "step": 18880, "train_speed(iter/s)": 1.543189 }, { "acc": 0.96802578, "epoch": 8.851652214670729, "grad_norm": 6.09695291519165, "learning_rate": 9.565050950980977e-06, "loss": 0.18501337, "memory(GiB)": 13.7, "step": 18885, "train_speed(iter/s)": 1.54321 }, { "acc": 0.96782188, "epoch": 8.853995781579565, "grad_norm": 7.616329193115234, "learning_rate": 9.56473468460329e-06, "loss": 0.18335009, "memory(GiB)": 13.7, "step": 18890, "train_speed(iter/s)": 1.543216 }, { "acc": 0.9791667, "epoch": 8.8563393484884, "grad_norm": 2.8065319061279297, "learning_rate": 9.564418308516398e-06, "loss": 0.09075569, "memory(GiB)": 13.7, "step": 18895, "train_speed(iter/s)": 1.543225 }, { "acc": 0.94901075, "epoch": 8.858682915397235, "grad_norm": 7.668127059936523, "learning_rate": 9.5641018227279e-06, "loss": 0.24749999, "memory(GiB)": 13.7, "step": 18900, "train_speed(iter/s)": 1.543213 }, { "acc": 0.98664837, "epoch": 8.86102648230607, "grad_norm": 6.993311882019043, "learning_rate": 9.56378522724541e-06, "loss": 0.09609696, "memory(GiB)": 13.7, "step": 18905, "train_speed(iter/s)": 1.543248 }, { "acc": 0.95777254, "epoch": 8.863370049214906, "grad_norm": 2.884394884109497, "learning_rate": 9.563468522076533e-06, "loss": 0.28733871, "memory(GiB)": 13.7, "step": 18910, "train_speed(iter/s)": 1.543255 }, { "acc": 0.97348671, "epoch": 8.86571361612374, "grad_norm": 7.170350074768066, "learning_rate": 9.563151707228883e-06, "loss": 0.18433061, "memory(GiB)": 13.7, "step": 18915, "train_speed(iter/s)": 1.543259 }, { "acc": 0.96634083, "epoch": 8.868057183032576, "grad_norm": 82.20449829101562, "learning_rate": 9.562834782710077e-06, "loss": 0.21918364, "memory(GiB)": 13.7, "step": 18920, "train_speed(iter/s)": 1.543289 }, { "acc": 0.96626263, "epoch": 8.87040074994141, "grad_norm": 6.469664573669434, "learning_rate": 9.56251774852773e-06, "loss": 0.18107016, "memory(GiB)": 13.7, "step": 18925, "train_speed(iter/s)": 1.543289 }, { "acc": 0.95979176, "epoch": 8.872744316850246, "grad_norm": 6.604055881500244, "learning_rate": 9.562200604689465e-06, "loss": 0.16360298, "memory(GiB)": 13.7, "step": 18930, "train_speed(iter/s)": 1.543306 }, { "acc": 0.97157192, "epoch": 8.87508788375908, "grad_norm": 3.6606359481811523, "learning_rate": 9.561883351202904e-06, "loss": 0.12202531, "memory(GiB)": 13.7, "step": 18935, "train_speed(iter/s)": 1.543307 }, { "acc": 0.98812504, "epoch": 8.877431450667917, "grad_norm": 4.458623886108398, "learning_rate": 9.561565988075672e-06, "loss": 0.07125129, "memory(GiB)": 13.7, "step": 18940, "train_speed(iter/s)": 1.543322 }, { "acc": 0.97701387, "epoch": 8.879775017576751, "grad_norm": 6.682061195373535, "learning_rate": 9.561248515315396e-06, "loss": 0.13324122, "memory(GiB)": 13.7, "step": 18945, "train_speed(iter/s)": 1.543338 }, { "acc": 0.97466602, "epoch": 8.882118584485587, "grad_norm": 3.5901529788970947, "learning_rate": 9.560930932929712e-06, "loss": 0.13961667, "memory(GiB)": 13.7, "step": 18950, "train_speed(iter/s)": 1.543361 }, { "acc": 0.98323412, "epoch": 8.884462151394422, "grad_norm": 3.4992799758911133, "learning_rate": 9.560613240926248e-06, "loss": 0.07963706, "memory(GiB)": 13.7, "step": 18955, "train_speed(iter/s)": 1.54339 }, { "acc": 0.98695507, "epoch": 8.886805718303258, "grad_norm": 5.836501598358154, "learning_rate": 9.560295439312644e-06, "loss": 0.06344454, "memory(GiB)": 13.7, "step": 18960, "train_speed(iter/s)": 1.543398 }, { "acc": 0.9692708, "epoch": 8.889149285212092, "grad_norm": 8.156328201293945, "learning_rate": 9.559977528096539e-06, "loss": 0.15914001, "memory(GiB)": 13.7, "step": 18965, "train_speed(iter/s)": 1.543397 }, { "acc": 0.97250948, "epoch": 8.891492852120928, "grad_norm": 7.553549289703369, "learning_rate": 9.55965950728557e-06, "loss": 0.25261188, "memory(GiB)": 13.7, "step": 18970, "train_speed(iter/s)": 1.543388 }, { "acc": 0.96817284, "epoch": 8.893836419029764, "grad_norm": 16.484699249267578, "learning_rate": 9.559341376887388e-06, "loss": 0.26021047, "memory(GiB)": 13.7, "step": 18975, "train_speed(iter/s)": 1.543396 }, { "acc": 0.98086815, "epoch": 8.896179985938598, "grad_norm": 2.3862173557281494, "learning_rate": 9.559023136909632e-06, "loss": 0.10870171, "memory(GiB)": 13.7, "step": 18980, "train_speed(iter/s)": 1.543394 }, { "acc": 0.99040184, "epoch": 8.898523552847434, "grad_norm": 17.864919662475586, "learning_rate": 9.55870478735996e-06, "loss": 0.07390597, "memory(GiB)": 13.7, "step": 18985, "train_speed(iter/s)": 1.543411 }, { "acc": 0.97404766, "epoch": 8.900867119756269, "grad_norm": 13.481430053710938, "learning_rate": 9.558386328246018e-06, "loss": 0.14876143, "memory(GiB)": 13.7, "step": 18990, "train_speed(iter/s)": 1.543401 }, { "acc": 0.97536287, "epoch": 8.903210686665105, "grad_norm": 7.292857646942139, "learning_rate": 9.558067759575463e-06, "loss": 0.13744814, "memory(GiB)": 13.7, "step": 18995, "train_speed(iter/s)": 1.543391 }, { "acc": 0.97297621, "epoch": 8.90555425357394, "grad_norm": 33.640960693359375, "learning_rate": 9.55774908135595e-06, "loss": 0.12740036, "memory(GiB)": 13.7, "step": 19000, "train_speed(iter/s)": 1.543398 }, { "acc": 0.97505264, "epoch": 8.907897820482775, "grad_norm": 4.483416557312012, "learning_rate": 9.557430293595143e-06, "loss": 0.27843804, "memory(GiB)": 13.7, "step": 19005, "train_speed(iter/s)": 1.543438 }, { "acc": 0.97529764, "epoch": 8.91024138739161, "grad_norm": 1.5787811279296875, "learning_rate": 9.5571113963007e-06, "loss": 0.13339492, "memory(GiB)": 13.7, "step": 19010, "train_speed(iter/s)": 1.543447 }, { "acc": 0.98093758, "epoch": 8.912584954300446, "grad_norm": 5.850263595581055, "learning_rate": 9.556792389480292e-06, "loss": 0.16981132, "memory(GiB)": 13.7, "step": 19015, "train_speed(iter/s)": 1.543438 }, { "acc": 0.97654638, "epoch": 8.91492852120928, "grad_norm": 2.0011394023895264, "learning_rate": 9.556473273141581e-06, "loss": 0.12524781, "memory(GiB)": 13.7, "step": 19020, "train_speed(iter/s)": 1.543427 }, { "acc": 0.98099899, "epoch": 8.917272088118116, "grad_norm": 5.019211292266846, "learning_rate": 9.556154047292239e-06, "loss": 0.09123395, "memory(GiB)": 13.7, "step": 19025, "train_speed(iter/s)": 1.543433 }, { "acc": 0.95957794, "epoch": 8.91961565502695, "grad_norm": 6.973141193389893, "learning_rate": 9.555834711939943e-06, "loss": 0.15118086, "memory(GiB)": 13.7, "step": 19030, "train_speed(iter/s)": 1.543446 }, { "acc": 0.97591343, "epoch": 8.921959221935786, "grad_norm": 4.32768440246582, "learning_rate": 9.555515267092366e-06, "loss": 0.12112356, "memory(GiB)": 13.7, "step": 19035, "train_speed(iter/s)": 1.543466 }, { "acc": 0.95527306, "epoch": 8.92430278884462, "grad_norm": 6.3795576095581055, "learning_rate": 9.555195712757184e-06, "loss": 0.21201153, "memory(GiB)": 13.7, "step": 19040, "train_speed(iter/s)": 1.543462 }, { "acc": 0.979072, "epoch": 8.926646355753457, "grad_norm": 7.969493389129639, "learning_rate": 9.554876048942082e-06, "loss": 0.08397217, "memory(GiB)": 13.7, "step": 19045, "train_speed(iter/s)": 1.54346 }, { "acc": 0.97196426, "epoch": 8.928989922662293, "grad_norm": 6.233604907989502, "learning_rate": 9.554556275654741e-06, "loss": 0.15879431, "memory(GiB)": 13.7, "step": 19050, "train_speed(iter/s)": 1.543464 }, { "acc": 0.96848221, "epoch": 8.931333489571127, "grad_norm": 4.121875762939453, "learning_rate": 9.554236392902852e-06, "loss": 0.11806104, "memory(GiB)": 13.7, "step": 19055, "train_speed(iter/s)": 1.543475 }, { "acc": 0.98251896, "epoch": 8.933677056479963, "grad_norm": 0.12290371209383011, "learning_rate": 9.553916400694096e-06, "loss": 0.08368401, "memory(GiB)": 13.7, "step": 19060, "train_speed(iter/s)": 1.543484 }, { "acc": 0.97231159, "epoch": 8.936020623388798, "grad_norm": 8.590888977050781, "learning_rate": 9.553596299036171e-06, "loss": 0.1972133, "memory(GiB)": 13.7, "step": 19065, "train_speed(iter/s)": 1.54349 }, { "acc": 0.97723141, "epoch": 8.938364190297634, "grad_norm": 3.8698511123657227, "learning_rate": 9.553276087936768e-06, "loss": 0.11185791, "memory(GiB)": 13.7, "step": 19070, "train_speed(iter/s)": 1.54349 }, { "acc": 0.98770294, "epoch": 8.940707757206468, "grad_norm": 2.917992353439331, "learning_rate": 9.552955767403585e-06, "loss": 0.07503729, "memory(GiB)": 13.7, "step": 19075, "train_speed(iter/s)": 1.543492 }, { "acc": 0.96264439, "epoch": 8.943051324115304, "grad_norm": 5.945765018463135, "learning_rate": 9.552635337444321e-06, "loss": 0.12760527, "memory(GiB)": 13.7, "step": 19080, "train_speed(iter/s)": 1.543495 }, { "acc": 0.97524948, "epoch": 8.945394891024138, "grad_norm": 3.752086877822876, "learning_rate": 9.552314798066679e-06, "loss": 0.10794389, "memory(GiB)": 13.7, "step": 19085, "train_speed(iter/s)": 1.543503 }, { "acc": 0.97482643, "epoch": 8.947738457932974, "grad_norm": 9.167360305786133, "learning_rate": 9.551994149278364e-06, "loss": 0.13407831, "memory(GiB)": 13.7, "step": 19090, "train_speed(iter/s)": 1.543504 }, { "acc": 0.97860699, "epoch": 8.950082024841809, "grad_norm": 6.155630111694336, "learning_rate": 9.551673391087082e-06, "loss": 0.15646256, "memory(GiB)": 13.7, "step": 19095, "train_speed(iter/s)": 1.543514 }, { "acc": 0.96956129, "epoch": 8.952425591750645, "grad_norm": 4.777044296264648, "learning_rate": 9.551352523500543e-06, "loss": 0.18647401, "memory(GiB)": 13.7, "step": 19100, "train_speed(iter/s)": 1.543516 }, { "acc": 0.96932545, "epoch": 8.954769158659479, "grad_norm": 8.662226676940918, "learning_rate": 9.55103154652646e-06, "loss": 0.13953145, "memory(GiB)": 13.7, "step": 19105, "train_speed(iter/s)": 1.543527 }, { "acc": 0.97440386, "epoch": 8.957112725568315, "grad_norm": 8.14672565460205, "learning_rate": 9.550710460172548e-06, "loss": 0.1154842, "memory(GiB)": 13.7, "step": 19110, "train_speed(iter/s)": 1.543518 }, { "acc": 0.97541666, "epoch": 8.95945629247715, "grad_norm": 0.7013137936592102, "learning_rate": 9.550389264446525e-06, "loss": 0.18727162, "memory(GiB)": 13.7, "step": 19115, "train_speed(iter/s)": 1.543511 }, { "acc": 0.98079357, "epoch": 8.961799859385986, "grad_norm": 5.053638935089111, "learning_rate": 9.550067959356113e-06, "loss": 0.12014601, "memory(GiB)": 13.7, "step": 19120, "train_speed(iter/s)": 1.543535 }, { "acc": 0.96149178, "epoch": 8.964143426294822, "grad_norm": 9.072854042053223, "learning_rate": 9.549746544909033e-06, "loss": 0.17805567, "memory(GiB)": 13.7, "step": 19125, "train_speed(iter/s)": 1.543526 }, { "acc": 0.98722897, "epoch": 8.966486993203656, "grad_norm": 1.0813578367233276, "learning_rate": 9.549425021113012e-06, "loss": 0.06257493, "memory(GiB)": 13.7, "step": 19130, "train_speed(iter/s)": 1.543526 }, { "acc": 0.95567398, "epoch": 8.968830560112492, "grad_norm": 4.931115627288818, "learning_rate": 9.549103387975775e-06, "loss": 0.22710717, "memory(GiB)": 13.7, "step": 19135, "train_speed(iter/s)": 1.54354 }, { "acc": 0.95245037, "epoch": 8.971174127021326, "grad_norm": 12.336677551269531, "learning_rate": 9.548781645505058e-06, "loss": 0.18089583, "memory(GiB)": 13.7, "step": 19140, "train_speed(iter/s)": 1.543555 }, { "acc": 0.97342472, "epoch": 8.973517693930162, "grad_norm": 9.514662742614746, "learning_rate": 9.548459793708592e-06, "loss": 0.16539736, "memory(GiB)": 13.7, "step": 19145, "train_speed(iter/s)": 1.543582 }, { "acc": 0.97342262, "epoch": 8.975861260838997, "grad_norm": 4.0336503982543945, "learning_rate": 9.548137832594116e-06, "loss": 0.12182709, "memory(GiB)": 13.7, "step": 19150, "train_speed(iter/s)": 1.543598 }, { "acc": 0.95047073, "epoch": 8.978204827747833, "grad_norm": 20.071441650390625, "learning_rate": 9.547815762169365e-06, "loss": 0.24012845, "memory(GiB)": 13.7, "step": 19155, "train_speed(iter/s)": 1.543614 }, { "acc": 0.97684183, "epoch": 8.980548394656667, "grad_norm": 6.229187965393066, "learning_rate": 9.547493582442081e-06, "loss": 0.13679738, "memory(GiB)": 13.7, "step": 19160, "train_speed(iter/s)": 1.543622 }, { "acc": 0.97424679, "epoch": 8.982891961565503, "grad_norm": 6.04890775680542, "learning_rate": 9.54717129342001e-06, "loss": 0.11593816, "memory(GiB)": 13.7, "step": 19165, "train_speed(iter/s)": 1.543628 }, { "acc": 0.97722225, "epoch": 8.985235528474337, "grad_norm": 3.182572603225708, "learning_rate": 9.5468488951109e-06, "loss": 0.12656275, "memory(GiB)": 13.7, "step": 19170, "train_speed(iter/s)": 1.543617 }, { "acc": 0.9443717, "epoch": 8.987579095383174, "grad_norm": 12.988298416137695, "learning_rate": 9.546526387522497e-06, "loss": 0.34466696, "memory(GiB)": 13.7, "step": 19175, "train_speed(iter/s)": 1.54362 }, { "acc": 0.98336315, "epoch": 8.989922662292008, "grad_norm": 7.305063724517822, "learning_rate": 9.546203770662556e-06, "loss": 0.1427645, "memory(GiB)": 13.7, "step": 19180, "train_speed(iter/s)": 1.543619 }, { "acc": 0.96652775, "epoch": 8.992266229200844, "grad_norm": 0.7983732223510742, "learning_rate": 9.545881044538828e-06, "loss": 0.16576856, "memory(GiB)": 13.7, "step": 19185, "train_speed(iter/s)": 1.543617 }, { "acc": 0.96895828, "epoch": 8.994609796109678, "grad_norm": 5.745577335357666, "learning_rate": 9.545558209159073e-06, "loss": 0.17714071, "memory(GiB)": 13.7, "step": 19190, "train_speed(iter/s)": 1.543632 }, { "acc": 0.96600647, "epoch": 8.996953363018514, "grad_norm": 7.310431003570557, "learning_rate": 9.54523526453105e-06, "loss": 0.17844946, "memory(GiB)": 13.7, "step": 19195, "train_speed(iter/s)": 1.54364 }, { "acc": 0.96871281, "epoch": 8.999296929927349, "grad_norm": 4.745477199554443, "learning_rate": 9.544912210662522e-06, "loss": 0.14087663, "memory(GiB)": 13.7, "step": 19200, "train_speed(iter/s)": 1.543624 }, { "acc": 0.98006535, "epoch": 9.001640496836185, "grad_norm": 5.880277156829834, "learning_rate": 9.544589047561254e-06, "loss": 0.14410191, "memory(GiB)": 13.7, "step": 19205, "train_speed(iter/s)": 1.543567 }, { "acc": 0.96342258, "epoch": 9.00398406374502, "grad_norm": 5.403604507446289, "learning_rate": 9.544265775235015e-06, "loss": 0.20353251, "memory(GiB)": 13.7, "step": 19210, "train_speed(iter/s)": 1.54358 }, { "acc": 0.97975025, "epoch": 9.006327630653855, "grad_norm": 2.8321783542633057, "learning_rate": 9.543942393691573e-06, "loss": 0.16908404, "memory(GiB)": 13.7, "step": 19215, "train_speed(iter/s)": 1.543582 }, { "acc": 0.978125, "epoch": 9.008671197562691, "grad_norm": 7.4018120765686035, "learning_rate": 9.543618902938703e-06, "loss": 0.10764153, "memory(GiB)": 13.7, "step": 19220, "train_speed(iter/s)": 1.543598 }, { "acc": 0.97993984, "epoch": 9.011014764471525, "grad_norm": 18.70636558532715, "learning_rate": 9.543295302984178e-06, "loss": 0.11595023, "memory(GiB)": 13.7, "step": 19225, "train_speed(iter/s)": 1.543599 }, { "acc": 0.96683912, "epoch": 9.013358331380362, "grad_norm": 1.7893425226211548, "learning_rate": 9.54297159383578e-06, "loss": 0.21294675, "memory(GiB)": 13.7, "step": 19230, "train_speed(iter/s)": 1.543598 }, { "acc": 0.95566473, "epoch": 9.015701898289196, "grad_norm": 4.108914375305176, "learning_rate": 9.542647775501287e-06, "loss": 0.28442304, "memory(GiB)": 13.7, "step": 19235, "train_speed(iter/s)": 1.543587 }, { "acc": 0.96860523, "epoch": 9.018045465198032, "grad_norm": 4.574261665344238, "learning_rate": 9.542323847988485e-06, "loss": 0.18808677, "memory(GiB)": 13.7, "step": 19240, "train_speed(iter/s)": 1.543586 }, { "acc": 0.96388893, "epoch": 9.020389032106866, "grad_norm": 6.941440105438232, "learning_rate": 9.541999811305159e-06, "loss": 0.13094831, "memory(GiB)": 13.7, "step": 19245, "train_speed(iter/s)": 1.543611 }, { "acc": 0.97181664, "epoch": 9.022732599015702, "grad_norm": 6.827473163604736, "learning_rate": 9.541675665459096e-06, "loss": 0.0976126, "memory(GiB)": 13.7, "step": 19250, "train_speed(iter/s)": 1.543609 }, { "acc": 0.97687492, "epoch": 9.025076165924537, "grad_norm": 4.928205966949463, "learning_rate": 9.541351410458088e-06, "loss": 0.17852259, "memory(GiB)": 13.7, "step": 19255, "train_speed(iter/s)": 1.543599 }, { "acc": 0.97974205, "epoch": 9.027419732833373, "grad_norm": 6.953197479248047, "learning_rate": 9.541027046309933e-06, "loss": 0.11770022, "memory(GiB)": 13.7, "step": 19260, "train_speed(iter/s)": 1.543631 }, { "acc": 0.97948866, "epoch": 9.029763299742207, "grad_norm": 3.9487171173095703, "learning_rate": 9.540702573022421e-06, "loss": 0.10629822, "memory(GiB)": 13.7, "step": 19265, "train_speed(iter/s)": 1.543637 }, { "acc": 0.95830364, "epoch": 9.032106866651043, "grad_norm": 5.758917808532715, "learning_rate": 9.540377990603358e-06, "loss": 0.21891379, "memory(GiB)": 13.7, "step": 19270, "train_speed(iter/s)": 1.543644 }, { "acc": 0.97267361, "epoch": 9.034450433559877, "grad_norm": 4.147839069366455, "learning_rate": 9.54005329906054e-06, "loss": 0.08901376, "memory(GiB)": 13.7, "step": 19275, "train_speed(iter/s)": 1.543658 }, { "acc": 0.97661934, "epoch": 9.036794000468714, "grad_norm": 10.48944091796875, "learning_rate": 9.539728498401775e-06, "loss": 0.15256356, "memory(GiB)": 13.7, "step": 19280, "train_speed(iter/s)": 1.543674 }, { "acc": 0.97518425, "epoch": 9.039137567377548, "grad_norm": 1.1133826971054077, "learning_rate": 9.539403588634873e-06, "loss": 0.16293954, "memory(GiB)": 13.7, "step": 19285, "train_speed(iter/s)": 1.543668 }, { "acc": 0.98425598, "epoch": 9.041481134286384, "grad_norm": 4.495029449462891, "learning_rate": 9.539078569767635e-06, "loss": 0.08722304, "memory(GiB)": 13.7, "step": 19290, "train_speed(iter/s)": 1.543678 }, { "acc": 0.9746645, "epoch": 9.04382470119522, "grad_norm": 12.536709785461426, "learning_rate": 9.538753441807883e-06, "loss": 0.17997718, "memory(GiB)": 13.7, "step": 19295, "train_speed(iter/s)": 1.543656 }, { "acc": 0.97424164, "epoch": 9.046168268104054, "grad_norm": 6.51239538192749, "learning_rate": 9.538428204763425e-06, "loss": 0.13101313, "memory(GiB)": 13.7, "step": 19300, "train_speed(iter/s)": 1.543661 }, { "acc": 0.97639437, "epoch": 9.04851183501289, "grad_norm": 12.613271713256836, "learning_rate": 9.538102858642083e-06, "loss": 0.177948, "memory(GiB)": 13.7, "step": 19305, "train_speed(iter/s)": 1.543689 }, { "acc": 0.9565897, "epoch": 9.050855401921725, "grad_norm": 5.32885217666626, "learning_rate": 9.537777403451673e-06, "loss": 0.21585467, "memory(GiB)": 13.7, "step": 19310, "train_speed(iter/s)": 1.543691 }, { "acc": 0.97592258, "epoch": 9.05319896883056, "grad_norm": 6.4165730476379395, "learning_rate": 9.537451839200023e-06, "loss": 0.1415009, "memory(GiB)": 13.7, "step": 19315, "train_speed(iter/s)": 1.543677 }, { "acc": 0.9770833, "epoch": 9.055542535739395, "grad_norm": 8.147539138793945, "learning_rate": 9.537126165894953e-06, "loss": 0.08178157, "memory(GiB)": 13.7, "step": 19320, "train_speed(iter/s)": 1.543684 }, { "acc": 0.97458344, "epoch": 9.057886102648231, "grad_norm": 1.8125042915344238, "learning_rate": 9.536800383544296e-06, "loss": 0.0619408, "memory(GiB)": 13.7, "step": 19325, "train_speed(iter/s)": 1.543693 }, { "acc": 0.98366718, "epoch": 9.060229669557065, "grad_norm": 3.6036317348480225, "learning_rate": 9.53647449215588e-06, "loss": 0.06972965, "memory(GiB)": 13.7, "step": 19330, "train_speed(iter/s)": 1.543699 }, { "acc": 0.95765629, "epoch": 9.062573236465902, "grad_norm": 1.161455512046814, "learning_rate": 9.536148491737539e-06, "loss": 0.19429574, "memory(GiB)": 13.7, "step": 19335, "train_speed(iter/s)": 1.543719 }, { "acc": 0.9710268, "epoch": 9.064916803374736, "grad_norm": 7.844280242919922, "learning_rate": 9.535822382297109e-06, "loss": 0.14889857, "memory(GiB)": 13.7, "step": 19340, "train_speed(iter/s)": 1.543703 }, { "acc": 0.98173218, "epoch": 9.067260370283572, "grad_norm": 1.4192909002304077, "learning_rate": 9.535496163842428e-06, "loss": 0.11772461, "memory(GiB)": 13.7, "step": 19345, "train_speed(iter/s)": 1.543737 }, { "acc": 0.98823862, "epoch": 9.069603937192406, "grad_norm": 4.2613205909729, "learning_rate": 9.53516983638134e-06, "loss": 0.07988847, "memory(GiB)": 13.7, "step": 19350, "train_speed(iter/s)": 1.543758 }, { "acc": 0.97747898, "epoch": 9.071947504101242, "grad_norm": 11.540045738220215, "learning_rate": 9.534843399921685e-06, "loss": 0.16922253, "memory(GiB)": 13.7, "step": 19355, "train_speed(iter/s)": 1.543765 }, { "acc": 0.96915798, "epoch": 9.074291071010077, "grad_norm": 254.70278930664062, "learning_rate": 9.534516854471311e-06, "loss": 0.19562594, "memory(GiB)": 13.7, "step": 19360, "train_speed(iter/s)": 1.543789 }, { "acc": 0.97857151, "epoch": 9.076634637918913, "grad_norm": 4.076645851135254, "learning_rate": 9.534190200038068e-06, "loss": 0.09473487, "memory(GiB)": 13.7, "step": 19365, "train_speed(iter/s)": 1.543793 }, { "acc": 0.98458338, "epoch": 9.078978204827747, "grad_norm": 4.133539199829102, "learning_rate": 9.533863436629805e-06, "loss": 0.12629707, "memory(GiB)": 13.7, "step": 19370, "train_speed(iter/s)": 1.543786 }, { "acc": 0.96768303, "epoch": 9.081321771736583, "grad_norm": 3.787583827972412, "learning_rate": 9.53353656425438e-06, "loss": 0.14124832, "memory(GiB)": 13.7, "step": 19375, "train_speed(iter/s)": 1.543803 }, { "acc": 0.98544741, "epoch": 9.08366533864542, "grad_norm": 2.7494935989379883, "learning_rate": 9.533209582919648e-06, "loss": 0.05752255, "memory(GiB)": 13.7, "step": 19380, "train_speed(iter/s)": 1.54381 }, { "acc": 0.96305256, "epoch": 9.086008905554253, "grad_norm": 21.299678802490234, "learning_rate": 9.53288249263347e-06, "loss": 0.22234254, "memory(GiB)": 13.7, "step": 19385, "train_speed(iter/s)": 1.543823 }, { "acc": 0.98666668, "epoch": 9.08835247246309, "grad_norm": 4.939836025238037, "learning_rate": 9.532555293403705e-06, "loss": 0.13004102, "memory(GiB)": 13.7, "step": 19390, "train_speed(iter/s)": 1.543826 }, { "acc": 0.96654758, "epoch": 9.090696039371924, "grad_norm": 10.170251846313477, "learning_rate": 9.53222798523822e-06, "loss": 0.12642808, "memory(GiB)": 13.7, "step": 19395, "train_speed(iter/s)": 1.543826 }, { "acc": 0.97932472, "epoch": 9.09303960628076, "grad_norm": 8.088216781616211, "learning_rate": 9.531900568144882e-06, "loss": 0.15400596, "memory(GiB)": 13.7, "step": 19400, "train_speed(iter/s)": 1.54383 }, { "acc": 0.97041893, "epoch": 9.095383173189594, "grad_norm": 3.4482436180114746, "learning_rate": 9.531573042131563e-06, "loss": 0.11004028, "memory(GiB)": 13.7, "step": 19405, "train_speed(iter/s)": 1.543837 }, { "acc": 0.95105715, "epoch": 9.09772674009843, "grad_norm": 10.740950584411621, "learning_rate": 9.531245407206132e-06, "loss": 0.24123774, "memory(GiB)": 13.7, "step": 19410, "train_speed(iter/s)": 1.543856 }, { "acc": 0.97716913, "epoch": 9.100070307007265, "grad_norm": 7.43019437789917, "learning_rate": 9.530917663376465e-06, "loss": 0.19388064, "memory(GiB)": 13.7, "step": 19415, "train_speed(iter/s)": 1.543834 }, { "acc": 0.97784729, "epoch": 9.1024138739161, "grad_norm": 9.568093299865723, "learning_rate": 9.530589810650444e-06, "loss": 0.15823522, "memory(GiB)": 13.7, "step": 19420, "train_speed(iter/s)": 1.54384 }, { "acc": 0.97127972, "epoch": 9.104757440824935, "grad_norm": 7.617964267730713, "learning_rate": 9.530261849035944e-06, "loss": 0.14914563, "memory(GiB)": 13.7, "step": 19425, "train_speed(iter/s)": 1.543841 }, { "acc": 0.97484207, "epoch": 9.107101007733771, "grad_norm": 2.5997374057769775, "learning_rate": 9.529933778540852e-06, "loss": 0.14078612, "memory(GiB)": 13.7, "step": 19430, "train_speed(iter/s)": 1.54383 }, { "acc": 0.95682287, "epoch": 9.109444574642605, "grad_norm": 9.454814910888672, "learning_rate": 9.52960559917305e-06, "loss": 0.26431403, "memory(GiB)": 13.7, "step": 19435, "train_speed(iter/s)": 1.54383 }, { "acc": 0.97261724, "epoch": 9.111788141551441, "grad_norm": 9.566763877868652, "learning_rate": 9.52927731094043e-06, "loss": 0.17775127, "memory(GiB)": 13.7, "step": 19440, "train_speed(iter/s)": 1.543836 }, { "acc": 0.9770834, "epoch": 9.114131708460276, "grad_norm": 0.05183006823062897, "learning_rate": 9.528948913850883e-06, "loss": 0.12766237, "memory(GiB)": 13.7, "step": 19445, "train_speed(iter/s)": 1.543835 }, { "acc": 0.97113094, "epoch": 9.116475275369112, "grad_norm": 8.856588363647461, "learning_rate": 9.5286204079123e-06, "loss": 0.13984042, "memory(GiB)": 13.7, "step": 19450, "train_speed(iter/s)": 1.543848 }, { "acc": 0.9692709, "epoch": 9.118818842277946, "grad_norm": 7.768045902252197, "learning_rate": 9.528291793132579e-06, "loss": 0.09582436, "memory(GiB)": 13.7, "step": 19455, "train_speed(iter/s)": 1.543873 }, { "acc": 0.96167622, "epoch": 9.121162409186782, "grad_norm": 23.271024703979492, "learning_rate": 9.527963069519617e-06, "loss": 0.24376855, "memory(GiB)": 13.7, "step": 19460, "train_speed(iter/s)": 1.543885 }, { "acc": 0.9755209, "epoch": 9.123505976095618, "grad_norm": 7.340002059936523, "learning_rate": 9.527634237081318e-06, "loss": 0.12988284, "memory(GiB)": 13.7, "step": 19465, "train_speed(iter/s)": 1.543902 }, { "acc": 0.97019348, "epoch": 9.125849543004453, "grad_norm": 3.154703378677368, "learning_rate": 9.527305295825586e-06, "loss": 0.128895, "memory(GiB)": 13.7, "step": 19470, "train_speed(iter/s)": 1.543926 }, { "acc": 0.97885599, "epoch": 9.128193109913289, "grad_norm": 2.5853283405303955, "learning_rate": 9.526976245760323e-06, "loss": 0.10862865, "memory(GiB)": 13.7, "step": 19475, "train_speed(iter/s)": 1.543941 }, { "acc": 0.98236847, "epoch": 9.130536676822123, "grad_norm": 34.19994354248047, "learning_rate": 9.526647086893444e-06, "loss": 0.14843112, "memory(GiB)": 13.7, "step": 19480, "train_speed(iter/s)": 1.54395 }, { "acc": 0.97337065, "epoch": 9.132880243730959, "grad_norm": 5.4314093589782715, "learning_rate": 9.526317819232857e-06, "loss": 0.10326679, "memory(GiB)": 13.7, "step": 19485, "train_speed(iter/s)": 1.543936 }, { "acc": 0.97936954, "epoch": 9.135223810639793, "grad_norm": 8.019084930419922, "learning_rate": 9.525988442786479e-06, "loss": 0.12868118, "memory(GiB)": 13.7, "step": 19490, "train_speed(iter/s)": 1.543944 }, { "acc": 0.9702652, "epoch": 9.13756737754863, "grad_norm": 6.6408491134643555, "learning_rate": 9.525658957562227e-06, "loss": 0.10864629, "memory(GiB)": 13.7, "step": 19495, "train_speed(iter/s)": 1.543957 }, { "acc": 0.98714485, "epoch": 9.139910944457464, "grad_norm": 5.446089744567871, "learning_rate": 9.52532936356802e-06, "loss": 0.10077643, "memory(GiB)": 13.7, "step": 19500, "train_speed(iter/s)": 1.543955 }, { "acc": 0.96367559, "epoch": 9.1422545113663, "grad_norm": 7.053323268890381, "learning_rate": 9.524999660811778e-06, "loss": 0.22706528, "memory(GiB)": 13.7, "step": 19505, "train_speed(iter/s)": 1.543961 }, { "acc": 0.97092257, "epoch": 9.144598078275134, "grad_norm": 0.06895940750837326, "learning_rate": 9.52466984930143e-06, "loss": 0.18622885, "memory(GiB)": 13.7, "step": 19510, "train_speed(iter/s)": 1.543938 }, { "acc": 0.96811962, "epoch": 9.14694164518397, "grad_norm": 6.5868730545043945, "learning_rate": 9.524339929044903e-06, "loss": 0.12311705, "memory(GiB)": 13.7, "step": 19515, "train_speed(iter/s)": 1.54397 }, { "acc": 0.96712751, "epoch": 9.149285212092805, "grad_norm": 2.139209747314453, "learning_rate": 9.524009900050124e-06, "loss": 0.13414018, "memory(GiB)": 13.7, "step": 19520, "train_speed(iter/s)": 1.543987 }, { "acc": 0.97032242, "epoch": 9.15162877900164, "grad_norm": 4.2667951583862305, "learning_rate": 9.523679762325027e-06, "loss": 0.15663625, "memory(GiB)": 13.7, "step": 19525, "train_speed(iter/s)": 1.543996 }, { "acc": 0.95634737, "epoch": 9.153972345910475, "grad_norm": 5.957104682922363, "learning_rate": 9.52334951587755e-06, "loss": 0.16362386, "memory(GiB)": 13.7, "step": 19530, "train_speed(iter/s)": 1.543994 }, { "acc": 0.98740654, "epoch": 9.156315912819311, "grad_norm": 16.79030990600586, "learning_rate": 9.52301916071563e-06, "loss": 0.07782651, "memory(GiB)": 13.7, "step": 19535, "train_speed(iter/s)": 1.544019 }, { "acc": 0.97038689, "epoch": 9.158659479728147, "grad_norm": 5.729870796203613, "learning_rate": 9.522688696847206e-06, "loss": 0.15749149, "memory(GiB)": 13.7, "step": 19540, "train_speed(iter/s)": 1.544034 }, { "acc": 0.96016493, "epoch": 9.161003046636981, "grad_norm": 4.252513885498047, "learning_rate": 9.522358124280224e-06, "loss": 0.22883668, "memory(GiB)": 13.7, "step": 19545, "train_speed(iter/s)": 1.544034 }, { "acc": 0.97974968, "epoch": 9.163346613545817, "grad_norm": 5.699305057525635, "learning_rate": 9.522027443022626e-06, "loss": 0.08983247, "memory(GiB)": 13.7, "step": 19550, "train_speed(iter/s)": 1.544063 }, { "acc": 0.97770824, "epoch": 9.165690180454652, "grad_norm": 6.683788299560547, "learning_rate": 9.521696653082363e-06, "loss": 0.14887255, "memory(GiB)": 13.7, "step": 19555, "train_speed(iter/s)": 1.544076 }, { "acc": 0.97612181, "epoch": 9.168033747363488, "grad_norm": 3.343799114227295, "learning_rate": 9.521365754467387e-06, "loss": 0.16809895, "memory(GiB)": 13.7, "step": 19560, "train_speed(iter/s)": 1.544076 }, { "acc": 0.97732143, "epoch": 9.170377314272322, "grad_norm": 4.7439398765563965, "learning_rate": 9.52103474718565e-06, "loss": 0.11602798, "memory(GiB)": 13.7, "step": 19565, "train_speed(iter/s)": 1.544093 }, { "acc": 0.97407198, "epoch": 9.172720881181158, "grad_norm": 7.901774883270264, "learning_rate": 9.520703631245107e-06, "loss": 0.14328769, "memory(GiB)": 13.7, "step": 19570, "train_speed(iter/s)": 1.544093 }, { "acc": 0.96386404, "epoch": 9.175064448089993, "grad_norm": 6.419782638549805, "learning_rate": 9.520372406653721e-06, "loss": 0.14989476, "memory(GiB)": 13.7, "step": 19575, "train_speed(iter/s)": 1.544098 }, { "acc": 0.97704868, "epoch": 9.177408014998829, "grad_norm": 8.956696510314941, "learning_rate": 9.52004107341945e-06, "loss": 0.12047163, "memory(GiB)": 13.7, "step": 19580, "train_speed(iter/s)": 1.544106 }, { "acc": 0.95939846, "epoch": 9.179751581907663, "grad_norm": 4.725755214691162, "learning_rate": 9.519709631550258e-06, "loss": 0.22509298, "memory(GiB)": 13.7, "step": 19585, "train_speed(iter/s)": 1.544114 }, { "acc": 0.97833328, "epoch": 9.182095148816499, "grad_norm": 3.3434784412384033, "learning_rate": 9.519378081054116e-06, "loss": 0.12360637, "memory(GiB)": 13.7, "step": 19590, "train_speed(iter/s)": 1.544124 }, { "acc": 0.97885418, "epoch": 9.184438715725333, "grad_norm": 3.7129809856414795, "learning_rate": 9.51904642193899e-06, "loss": 0.10962615, "memory(GiB)": 13.7, "step": 19595, "train_speed(iter/s)": 1.544138 }, { "acc": 0.99192705, "epoch": 9.18678228263417, "grad_norm": 5.635781288146973, "learning_rate": 9.518714654212852e-06, "loss": 0.05200916, "memory(GiB)": 13.7, "step": 19600, "train_speed(iter/s)": 1.544143 }, { "acc": 0.97817163, "epoch": 9.189125849543004, "grad_norm": 1.7207870483398438, "learning_rate": 9.518382777883677e-06, "loss": 0.1369367, "memory(GiB)": 13.7, "step": 19605, "train_speed(iter/s)": 1.544164 }, { "acc": 0.95971956, "epoch": 9.19146941645184, "grad_norm": 3.2306671142578125, "learning_rate": 9.518050792959442e-06, "loss": 0.18605363, "memory(GiB)": 13.7, "step": 19610, "train_speed(iter/s)": 1.544157 }, { "acc": 0.98051548, "epoch": 9.193812983360674, "grad_norm": 0.6745383143424988, "learning_rate": 9.51771869944813e-06, "loss": 0.08276119, "memory(GiB)": 13.7, "step": 19615, "train_speed(iter/s)": 1.544164 }, { "acc": 0.9734375, "epoch": 9.19615655026951, "grad_norm": 8.442132949829102, "learning_rate": 9.517386497357719e-06, "loss": 0.165223, "memory(GiB)": 13.7, "step": 19620, "train_speed(iter/s)": 1.544205 }, { "acc": 0.98842258, "epoch": 9.198500117178346, "grad_norm": 6.63440465927124, "learning_rate": 9.517054186696194e-06, "loss": 0.04288132, "memory(GiB)": 13.7, "step": 19625, "train_speed(iter/s)": 1.544219 }, { "acc": 0.97810183, "epoch": 9.20084368408718, "grad_norm": 3.0988030433654785, "learning_rate": 9.516721767471546e-06, "loss": 0.1207651, "memory(GiB)": 13.7, "step": 19630, "train_speed(iter/s)": 1.544229 }, { "acc": 0.97295456, "epoch": 9.203187250996017, "grad_norm": 3.464245080947876, "learning_rate": 9.516389239691763e-06, "loss": 0.12459786, "memory(GiB)": 13.7, "step": 19635, "train_speed(iter/s)": 1.544232 }, { "acc": 0.95661287, "epoch": 9.205530817904851, "grad_norm": 5.904688835144043, "learning_rate": 9.516056603364841e-06, "loss": 0.26512823, "memory(GiB)": 13.7, "step": 19640, "train_speed(iter/s)": 1.544245 }, { "acc": 0.98242512, "epoch": 9.207874384813687, "grad_norm": 9.490765571594238, "learning_rate": 9.51572385849877e-06, "loss": 0.13857033, "memory(GiB)": 13.7, "step": 19645, "train_speed(iter/s)": 1.544253 }, { "acc": 0.96999998, "epoch": 9.210217951722521, "grad_norm": 2.9623401165008545, "learning_rate": 9.515391005101551e-06, "loss": 0.14931803, "memory(GiB)": 13.7, "step": 19650, "train_speed(iter/s)": 1.54423 }, { "acc": 0.99024925, "epoch": 9.212561518631357, "grad_norm": 5.076980113983154, "learning_rate": 9.515058043181187e-06, "loss": 0.07508259, "memory(GiB)": 13.7, "step": 19655, "train_speed(iter/s)": 1.544223 }, { "acc": 0.96996527, "epoch": 9.214905085540192, "grad_norm": 4.737968444824219, "learning_rate": 9.514724972745677e-06, "loss": 0.17893293, "memory(GiB)": 13.7, "step": 19660, "train_speed(iter/s)": 1.544191 }, { "acc": 0.98008928, "epoch": 9.217248652449028, "grad_norm": 1.0512651205062866, "learning_rate": 9.514391793803032e-06, "loss": 0.10251064, "memory(GiB)": 13.7, "step": 19665, "train_speed(iter/s)": 1.544195 }, { "acc": 0.97733135, "epoch": 9.219592219357862, "grad_norm": 2.22749924659729, "learning_rate": 9.514058506361254e-06, "loss": 0.14437296, "memory(GiB)": 13.7, "step": 19670, "train_speed(iter/s)": 1.54422 }, { "acc": 0.97942095, "epoch": 9.221935786266698, "grad_norm": 3.181903600692749, "learning_rate": 9.51372511042836e-06, "loss": 0.10926311, "memory(GiB)": 13.7, "step": 19675, "train_speed(iter/s)": 1.544217 }, { "acc": 0.96693459, "epoch": 9.224279353175532, "grad_norm": 6.052920341491699, "learning_rate": 9.513391606012361e-06, "loss": 0.14036934, "memory(GiB)": 13.7, "step": 19680, "train_speed(iter/s)": 1.544242 }, { "acc": 0.97853479, "epoch": 9.226622920084369, "grad_norm": 5.646217346191406, "learning_rate": 9.513057993121275e-06, "loss": 0.13720641, "memory(GiB)": 13.7, "step": 19685, "train_speed(iter/s)": 1.544255 }, { "acc": 0.97614584, "epoch": 9.228966486993203, "grad_norm": 8.235145568847656, "learning_rate": 9.512724271763117e-06, "loss": 0.16008272, "memory(GiB)": 13.7, "step": 19690, "train_speed(iter/s)": 1.544282 }, { "acc": 0.96218758, "epoch": 9.231310053902039, "grad_norm": 9.097421646118164, "learning_rate": 9.512390441945913e-06, "loss": 0.19396105, "memory(GiB)": 13.7, "step": 19695, "train_speed(iter/s)": 1.544284 }, { "acc": 0.96937494, "epoch": 9.233653620810873, "grad_norm": 5.714480400085449, "learning_rate": 9.512056503677684e-06, "loss": 0.12095613, "memory(GiB)": 13.7, "step": 19700, "train_speed(iter/s)": 1.544243 }, { "acc": 0.95259132, "epoch": 9.23599718771971, "grad_norm": 13.96630859375, "learning_rate": 9.51172245696646e-06, "loss": 0.22743654, "memory(GiB)": 13.7, "step": 19705, "train_speed(iter/s)": 1.544251 }, { "acc": 0.97956009, "epoch": 9.238340754628545, "grad_norm": 11.532400131225586, "learning_rate": 9.511388301820267e-06, "loss": 0.09695007, "memory(GiB)": 13.7, "step": 19710, "train_speed(iter/s)": 1.544245 }, { "acc": 0.95984612, "epoch": 9.24068432153738, "grad_norm": 6.016643047332764, "learning_rate": 9.511054038247141e-06, "loss": 0.20135045, "memory(GiB)": 13.7, "step": 19715, "train_speed(iter/s)": 1.544262 }, { "acc": 0.95528536, "epoch": 9.243027888446216, "grad_norm": 3.011949062347412, "learning_rate": 9.510719666255112e-06, "loss": 0.19732869, "memory(GiB)": 13.7, "step": 19720, "train_speed(iter/s)": 1.544261 }, { "acc": 0.96809673, "epoch": 9.24537145535505, "grad_norm": 8.260052680969238, "learning_rate": 9.51038518585222e-06, "loss": 0.17048819, "memory(GiB)": 13.7, "step": 19725, "train_speed(iter/s)": 1.544263 }, { "acc": 0.96412907, "epoch": 9.247715022263886, "grad_norm": 5.7625603675842285, "learning_rate": 9.510050597046506e-06, "loss": 0.27532778, "memory(GiB)": 13.7, "step": 19730, "train_speed(iter/s)": 1.544274 }, { "acc": 0.95377979, "epoch": 9.25005858917272, "grad_norm": 16.456693649291992, "learning_rate": 9.509715899846008e-06, "loss": 0.21248119, "memory(GiB)": 13.7, "step": 19735, "train_speed(iter/s)": 1.544291 }, { "acc": 0.97188492, "epoch": 9.252402156081557, "grad_norm": 4.989431381225586, "learning_rate": 9.509381094258776e-06, "loss": 0.12314045, "memory(GiB)": 13.7, "step": 19740, "train_speed(iter/s)": 1.544308 }, { "acc": 0.96224213, "epoch": 9.254745722990391, "grad_norm": 7.801374435424805, "learning_rate": 9.509046180292854e-06, "loss": 0.17530259, "memory(GiB)": 13.7, "step": 19745, "train_speed(iter/s)": 1.544292 }, { "acc": 0.97965279, "epoch": 9.257089289899227, "grad_norm": 4.338682651519775, "learning_rate": 9.508711157956294e-06, "loss": 0.09341823, "memory(GiB)": 13.7, "step": 19750, "train_speed(iter/s)": 1.544301 }, { "acc": 0.98180227, "epoch": 9.259432856808061, "grad_norm": 7.932986259460449, "learning_rate": 9.508376027257147e-06, "loss": 0.13017951, "memory(GiB)": 13.7, "step": 19755, "train_speed(iter/s)": 1.544314 }, { "acc": 0.978125, "epoch": 9.261776423716897, "grad_norm": 6.284205436706543, "learning_rate": 9.508040788203472e-06, "loss": 0.14703575, "memory(GiB)": 13.7, "step": 19760, "train_speed(iter/s)": 1.544302 }, { "acc": 0.96606846, "epoch": 9.264119990625732, "grad_norm": 7.8723883628845215, "learning_rate": 9.507705440803324e-06, "loss": 0.16423318, "memory(GiB)": 13.7, "step": 19765, "train_speed(iter/s)": 1.544297 }, { "acc": 0.97547512, "epoch": 9.266463557534568, "grad_norm": 4.488730430603027, "learning_rate": 9.507369985064764e-06, "loss": 0.10540853, "memory(GiB)": 13.7, "step": 19770, "train_speed(iter/s)": 1.544305 }, { "acc": 0.97128391, "epoch": 9.268807124443402, "grad_norm": 6.552639484405518, "learning_rate": 9.507034420995856e-06, "loss": 0.15059074, "memory(GiB)": 13.7, "step": 19775, "train_speed(iter/s)": 1.544309 }, { "acc": 0.96958332, "epoch": 9.271150691352238, "grad_norm": 5.32758092880249, "learning_rate": 9.506698748604666e-06, "loss": 0.11487107, "memory(GiB)": 13.7, "step": 19780, "train_speed(iter/s)": 1.544321 }, { "acc": 0.94770832, "epoch": 9.273494258261074, "grad_norm": 93.08036804199219, "learning_rate": 9.506362967899262e-06, "loss": 0.30297413, "memory(GiB)": 13.7, "step": 19785, "train_speed(iter/s)": 1.544328 }, { "acc": 0.97105885, "epoch": 9.275837825169909, "grad_norm": 4.150873184204102, "learning_rate": 9.506027078887715e-06, "loss": 0.18150144, "memory(GiB)": 13.7, "step": 19790, "train_speed(iter/s)": 1.544342 }, { "acc": 0.97868147, "epoch": 9.278181392078745, "grad_norm": 7.87045955657959, "learning_rate": 9.505691081578099e-06, "loss": 0.08973886, "memory(GiB)": 13.7, "step": 19795, "train_speed(iter/s)": 1.544349 }, { "acc": 0.9696578, "epoch": 9.280524958987579, "grad_norm": 7.427701473236084, "learning_rate": 9.50535497597849e-06, "loss": 0.10280869, "memory(GiB)": 13.7, "step": 19800, "train_speed(iter/s)": 1.544334 }, { "acc": 0.97881947, "epoch": 9.282868525896415, "grad_norm": 3.4097797870635986, "learning_rate": 9.505018762096966e-06, "loss": 0.11448538, "memory(GiB)": 13.7, "step": 19805, "train_speed(iter/s)": 1.544344 }, { "acc": 0.97473593, "epoch": 9.28521209280525, "grad_norm": 3.849301815032959, "learning_rate": 9.504682439941611e-06, "loss": 0.09220828, "memory(GiB)": 13.7, "step": 19810, "train_speed(iter/s)": 1.544338 }, { "acc": 0.97092266, "epoch": 9.287555659714085, "grad_norm": 4.6668477058410645, "learning_rate": 9.504346009520507e-06, "loss": 0.14468107, "memory(GiB)": 13.7, "step": 19815, "train_speed(iter/s)": 1.544337 }, { "acc": 0.97022352, "epoch": 9.28989922662292, "grad_norm": 23.028730392456055, "learning_rate": 9.504009470841741e-06, "loss": 0.15979407, "memory(GiB)": 13.7, "step": 19820, "train_speed(iter/s)": 1.544333 }, { "acc": 0.96732826, "epoch": 9.292242793531756, "grad_norm": 13.429519653320312, "learning_rate": 9.503672823913403e-06, "loss": 0.19733976, "memory(GiB)": 13.7, "step": 19825, "train_speed(iter/s)": 1.544329 }, { "acc": 0.98405285, "epoch": 9.29458636044059, "grad_norm": 73.96424102783203, "learning_rate": 9.503336068743586e-06, "loss": 0.09893924, "memory(GiB)": 13.7, "step": 19830, "train_speed(iter/s)": 1.54433 }, { "acc": 0.96514034, "epoch": 9.296929927349426, "grad_norm": 6.223281383514404, "learning_rate": 9.50299920534038e-06, "loss": 0.12728004, "memory(GiB)": 13.7, "step": 19835, "train_speed(iter/s)": 1.54433 }, { "acc": 0.96899042, "epoch": 9.29927349425826, "grad_norm": 6.963283061981201, "learning_rate": 9.502662233711887e-06, "loss": 0.20074542, "memory(GiB)": 13.7, "step": 19840, "train_speed(iter/s)": 1.544342 }, { "acc": 0.96672306, "epoch": 9.301617061167097, "grad_norm": 2.001380205154419, "learning_rate": 9.502325153866202e-06, "loss": 0.24005795, "memory(GiB)": 13.7, "step": 19845, "train_speed(iter/s)": 1.544362 }, { "acc": 0.96436014, "epoch": 9.30396062807593, "grad_norm": 6.086975574493408, "learning_rate": 9.501987965811431e-06, "loss": 0.19478564, "memory(GiB)": 13.7, "step": 19850, "train_speed(iter/s)": 1.544364 }, { "acc": 0.98370991, "epoch": 9.306304194984767, "grad_norm": 0.771225094795227, "learning_rate": 9.50165066955568e-06, "loss": 0.11263182, "memory(GiB)": 13.7, "step": 19855, "train_speed(iter/s)": 1.544368 }, { "acc": 0.96702461, "epoch": 9.308647761893601, "grad_norm": 6.581176280975342, "learning_rate": 9.501313265107052e-06, "loss": 0.132356, "memory(GiB)": 13.7, "step": 19860, "train_speed(iter/s)": 1.544394 }, { "acc": 0.96791239, "epoch": 9.310991328802437, "grad_norm": 4.746651649475098, "learning_rate": 9.50097575247366e-06, "loss": 0.15373938, "memory(GiB)": 13.7, "step": 19865, "train_speed(iter/s)": 1.544397 }, { "acc": 0.96394348, "epoch": 9.313334895711273, "grad_norm": 10.600115776062012, "learning_rate": 9.500638131663617e-06, "loss": 0.21606593, "memory(GiB)": 13.7, "step": 19870, "train_speed(iter/s)": 1.544415 }, { "acc": 0.96632843, "epoch": 9.315678462620108, "grad_norm": 9.563669204711914, "learning_rate": 9.500300402685036e-06, "loss": 0.20034757, "memory(GiB)": 13.7, "step": 19875, "train_speed(iter/s)": 1.544397 }, { "acc": 0.9714426, "epoch": 9.318022029528944, "grad_norm": 5.456902027130127, "learning_rate": 9.499962565546037e-06, "loss": 0.16324838, "memory(GiB)": 13.7, "step": 19880, "train_speed(iter/s)": 1.544386 }, { "acc": 0.97754374, "epoch": 9.320365596437778, "grad_norm": 3.2643673419952393, "learning_rate": 9.49962462025474e-06, "loss": 0.11560243, "memory(GiB)": 13.7, "step": 19885, "train_speed(iter/s)": 1.544386 }, { "acc": 0.9718833, "epoch": 9.322709163346614, "grad_norm": 2.782153367996216, "learning_rate": 9.499286566819267e-06, "loss": 0.14093575, "memory(GiB)": 13.7, "step": 19890, "train_speed(iter/s)": 1.54438 }, { "acc": 0.96300735, "epoch": 9.325052730255448, "grad_norm": 8.85940170288086, "learning_rate": 9.498948405247744e-06, "loss": 0.19985261, "memory(GiB)": 13.7, "step": 19895, "train_speed(iter/s)": 1.544377 }, { "acc": 0.97663631, "epoch": 9.327396297164285, "grad_norm": 7.087442398071289, "learning_rate": 9.498610135548302e-06, "loss": 0.1469187, "memory(GiB)": 13.7, "step": 19900, "train_speed(iter/s)": 1.544391 }, { "acc": 0.98265629, "epoch": 9.329739864073119, "grad_norm": 5.488898277282715, "learning_rate": 9.49827175772907e-06, "loss": 0.0980633, "memory(GiB)": 13.7, "step": 19905, "train_speed(iter/s)": 1.544397 }, { "acc": 0.96999998, "epoch": 9.332083430981955, "grad_norm": 0.7021119594573975, "learning_rate": 9.497933271798178e-06, "loss": 0.12965071, "memory(GiB)": 13.7, "step": 19910, "train_speed(iter/s)": 1.544427 }, { "acc": 0.98161459, "epoch": 9.33442699789079, "grad_norm": 15.910423278808594, "learning_rate": 9.49759467776377e-06, "loss": 0.07997187, "memory(GiB)": 13.7, "step": 19915, "train_speed(iter/s)": 1.544449 }, { "acc": 0.9794445, "epoch": 9.336770564799625, "grad_norm": 1.1112029552459717, "learning_rate": 9.497255975633977e-06, "loss": 0.06455598, "memory(GiB)": 13.7, "step": 19920, "train_speed(iter/s)": 1.544446 }, { "acc": 0.97122021, "epoch": 9.33911413170846, "grad_norm": 4.671875, "learning_rate": 9.496917165416943e-06, "loss": 0.13194422, "memory(GiB)": 13.7, "step": 19925, "train_speed(iter/s)": 1.544474 }, { "acc": 0.97163191, "epoch": 9.341457698617296, "grad_norm": 2.890885353088379, "learning_rate": 9.496578247120813e-06, "loss": 0.1437429, "memory(GiB)": 13.7, "step": 19930, "train_speed(iter/s)": 1.544487 }, { "acc": 0.98041668, "epoch": 9.34380126552613, "grad_norm": 3.8308866024017334, "learning_rate": 9.496239220753732e-06, "loss": 0.16140379, "memory(GiB)": 13.7, "step": 19935, "train_speed(iter/s)": 1.544498 }, { "acc": 0.96249084, "epoch": 9.346144832434966, "grad_norm": 17.33932113647461, "learning_rate": 9.495900086323852e-06, "loss": 0.15340586, "memory(GiB)": 13.7, "step": 19940, "train_speed(iter/s)": 1.544494 }, { "acc": 0.98971233, "epoch": 9.3484883993438, "grad_norm": 0.6002985835075378, "learning_rate": 9.495560843839321e-06, "loss": 0.0649191, "memory(GiB)": 13.7, "step": 19945, "train_speed(iter/s)": 1.544515 }, { "acc": 0.98277378, "epoch": 9.350831966252636, "grad_norm": 4.358609199523926, "learning_rate": 9.495221493308295e-06, "loss": 0.0923253, "memory(GiB)": 13.7, "step": 19950, "train_speed(iter/s)": 1.54452 }, { "acc": 0.97331133, "epoch": 9.353175533161473, "grad_norm": 11.644659996032715, "learning_rate": 9.49488203473893e-06, "loss": 0.17935171, "memory(GiB)": 13.7, "step": 19955, "train_speed(iter/s)": 1.544509 }, { "acc": 0.97666664, "epoch": 9.355519100070307, "grad_norm": 7.490176677703857, "learning_rate": 9.494542468139388e-06, "loss": 0.1899712, "memory(GiB)": 13.7, "step": 19960, "train_speed(iter/s)": 1.544529 }, { "acc": 0.97479172, "epoch": 9.357862666979143, "grad_norm": 14.258448600769043, "learning_rate": 9.494202793517828e-06, "loss": 0.09207025, "memory(GiB)": 13.7, "step": 19965, "train_speed(iter/s)": 1.544533 }, { "acc": 0.95583725, "epoch": 9.360206233887977, "grad_norm": 4.167896270751953, "learning_rate": 9.493863010882414e-06, "loss": 0.23675847, "memory(GiB)": 13.7, "step": 19970, "train_speed(iter/s)": 1.544518 }, { "acc": 0.98049107, "epoch": 9.362549800796813, "grad_norm": 1.8850980997085571, "learning_rate": 9.493523120241315e-06, "loss": 0.09869788, "memory(GiB)": 13.7, "step": 19975, "train_speed(iter/s)": 1.544524 }, { "acc": 0.95683231, "epoch": 9.364893367705648, "grad_norm": 9.212564468383789, "learning_rate": 9.493183121602704e-06, "loss": 0.22182169, "memory(GiB)": 13.7, "step": 19980, "train_speed(iter/s)": 1.544526 }, { "acc": 0.95130444, "epoch": 9.367236934614484, "grad_norm": 5.874067306518555, "learning_rate": 9.492843014974749e-06, "loss": 0.13248293, "memory(GiB)": 13.7, "step": 19985, "train_speed(iter/s)": 1.544541 }, { "acc": 0.96659231, "epoch": 9.369580501523318, "grad_norm": 24.00330924987793, "learning_rate": 9.492502800365626e-06, "loss": 0.28412032, "memory(GiB)": 13.7, "step": 19990, "train_speed(iter/s)": 1.544518 }, { "acc": 0.96328373, "epoch": 9.371924068432154, "grad_norm": 15.209607124328613, "learning_rate": 9.492162477783513e-06, "loss": 0.17626736, "memory(GiB)": 13.7, "step": 19995, "train_speed(iter/s)": 1.544518 }, { "acc": 0.95814095, "epoch": 9.374267635340988, "grad_norm": 9.626358032226562, "learning_rate": 9.491822047236591e-06, "loss": 0.26189711, "memory(GiB)": 13.7, "step": 20000, "train_speed(iter/s)": 1.54452 }, { "epoch": 9.374267635340988, "eval_acc": 0.7678222162271532, "eval_loss": 1.027672290802002, "eval_runtime": 144.9732, "eval_samples_per_second": 55.652, "eval_steps_per_second": 6.96, "step": 20000 }, { "acc": 0.99018116, "epoch": 9.376611202249824, "grad_norm": 3.6240620613098145, "learning_rate": 9.491481508733042e-06, "loss": 0.07137825, "memory(GiB)": 13.7, "step": 20005, "train_speed(iter/s)": 1.524327 }, { "acc": 0.95682545, "epoch": 9.378954769158659, "grad_norm": 5.219970703125, "learning_rate": 9.49114086228105e-06, "loss": 0.25614955, "memory(GiB)": 13.7, "step": 20010, "train_speed(iter/s)": 1.524345 }, { "acc": 0.98562498, "epoch": 9.381298336067495, "grad_norm": 2.4738762378692627, "learning_rate": 9.490800107888806e-06, "loss": 0.05917104, "memory(GiB)": 13.7, "step": 20015, "train_speed(iter/s)": 1.524354 }, { "acc": 0.96902771, "epoch": 9.38364190297633, "grad_norm": 4.68032693862915, "learning_rate": 9.490459245564499e-06, "loss": 0.19555118, "memory(GiB)": 13.7, "step": 20020, "train_speed(iter/s)": 1.524364 }, { "acc": 0.98022251, "epoch": 9.385985469885165, "grad_norm": 19.56778335571289, "learning_rate": 9.490118275316323e-06, "loss": 0.11950696, "memory(GiB)": 13.7, "step": 20025, "train_speed(iter/s)": 1.524365 }, { "acc": 0.98168564, "epoch": 9.388329036794001, "grad_norm": 11.848997116088867, "learning_rate": 9.489777197152472e-06, "loss": 0.12053188, "memory(GiB)": 13.7, "step": 20030, "train_speed(iter/s)": 1.524384 }, { "acc": 0.96187, "epoch": 9.390672603702836, "grad_norm": 3.019839286804199, "learning_rate": 9.489436011081145e-06, "loss": 0.18055288, "memory(GiB)": 13.7, "step": 20035, "train_speed(iter/s)": 1.524382 }, { "acc": 0.98065968, "epoch": 9.393016170611672, "grad_norm": 4.968120574951172, "learning_rate": 9.489094717110547e-06, "loss": 0.1032145, "memory(GiB)": 13.7, "step": 20040, "train_speed(iter/s)": 1.524399 }, { "acc": 0.95714779, "epoch": 9.395359737520506, "grad_norm": 10.473690032958984, "learning_rate": 9.488753315248876e-06, "loss": 0.14052304, "memory(GiB)": 13.7, "step": 20045, "train_speed(iter/s)": 1.524423 }, { "acc": 0.96953373, "epoch": 9.397703304429342, "grad_norm": 7.922396183013916, "learning_rate": 9.488411805504339e-06, "loss": 0.17082148, "memory(GiB)": 13.7, "step": 20050, "train_speed(iter/s)": 1.52445 }, { "acc": 0.97950039, "epoch": 9.400046871338176, "grad_norm": 7.454995155334473, "learning_rate": 9.48807018788515e-06, "loss": 0.10312535, "memory(GiB)": 13.7, "step": 20055, "train_speed(iter/s)": 1.524446 }, { "acc": 0.95428677, "epoch": 9.402390438247012, "grad_norm": 78.93402099609375, "learning_rate": 9.487728462399511e-06, "loss": 0.22941909, "memory(GiB)": 13.7, "step": 20060, "train_speed(iter/s)": 1.524467 }, { "acc": 0.97133837, "epoch": 9.404734005155847, "grad_norm": 15.55823802947998, "learning_rate": 9.487386629055646e-06, "loss": 0.16893432, "memory(GiB)": 13.7, "step": 20065, "train_speed(iter/s)": 1.524481 }, { "acc": 0.94457388, "epoch": 9.407077572064683, "grad_norm": 22.423070907592773, "learning_rate": 9.487044687861765e-06, "loss": 0.24946065, "memory(GiB)": 13.7, "step": 20070, "train_speed(iter/s)": 1.524521 }, { "acc": 0.95734377, "epoch": 9.409421138973517, "grad_norm": 17.349515914916992, "learning_rate": 9.486702638826088e-06, "loss": 0.19101785, "memory(GiB)": 13.7, "step": 20075, "train_speed(iter/s)": 1.524539 }, { "acc": 0.95153446, "epoch": 9.411764705882353, "grad_norm": 9.669500350952148, "learning_rate": 9.48636048195684e-06, "loss": 0.19778476, "memory(GiB)": 13.7, "step": 20080, "train_speed(iter/s)": 1.524559 }, { "acc": 0.98380947, "epoch": 9.414108272791188, "grad_norm": 32.29861068725586, "learning_rate": 9.486018217262243e-06, "loss": 0.14900583, "memory(GiB)": 13.7, "step": 20085, "train_speed(iter/s)": 1.524579 }, { "acc": 0.98125286, "epoch": 9.416451839700024, "grad_norm": 4.7641987800598145, "learning_rate": 9.485675844750523e-06, "loss": 0.10534971, "memory(GiB)": 13.7, "step": 20090, "train_speed(iter/s)": 1.524605 }, { "acc": 0.98065109, "epoch": 9.418795406608858, "grad_norm": 2.702688217163086, "learning_rate": 9.485333364429912e-06, "loss": 0.07932582, "memory(GiB)": 13.7, "step": 20095, "train_speed(iter/s)": 1.524618 }, { "acc": 0.97675056, "epoch": 9.421138973517694, "grad_norm": 22.2756290435791, "learning_rate": 9.48499077630864e-06, "loss": 0.14462363, "memory(GiB)": 13.7, "step": 20100, "train_speed(iter/s)": 1.524617 }, { "acc": 0.96508932, "epoch": 9.423482540426528, "grad_norm": 4.296273708343506, "learning_rate": 9.484648080394943e-06, "loss": 0.18223898, "memory(GiB)": 13.7, "step": 20105, "train_speed(iter/s)": 1.524637 }, { "acc": 0.9699543, "epoch": 9.425826107335364, "grad_norm": 1.6939916610717773, "learning_rate": 9.484305276697057e-06, "loss": 0.18437152, "memory(GiB)": 13.7, "step": 20110, "train_speed(iter/s)": 1.524649 }, { "acc": 0.97796707, "epoch": 9.4281696742442, "grad_norm": 6.260369300842285, "learning_rate": 9.483962365223223e-06, "loss": 0.13229746, "memory(GiB)": 13.7, "step": 20115, "train_speed(iter/s)": 1.524644 }, { "acc": 0.97214851, "epoch": 9.430513241153035, "grad_norm": 5.628987789154053, "learning_rate": 9.483619345981684e-06, "loss": 0.16097844, "memory(GiB)": 13.7, "step": 20120, "train_speed(iter/s)": 1.52464 }, { "acc": 0.98456554, "epoch": 9.43285680806187, "grad_norm": 4.787600040435791, "learning_rate": 9.483276218980682e-06, "loss": 0.10169586, "memory(GiB)": 13.7, "step": 20125, "train_speed(iter/s)": 1.52463 }, { "acc": 0.95743427, "epoch": 9.435200374970705, "grad_norm": 5.8953070640563965, "learning_rate": 9.482932984228467e-06, "loss": 0.22234378, "memory(GiB)": 13.7, "step": 20130, "train_speed(iter/s)": 1.524632 }, { "acc": 0.97614088, "epoch": 9.437543941879541, "grad_norm": 5.04838752746582, "learning_rate": 9.48258964173329e-06, "loss": 0.16516595, "memory(GiB)": 13.7, "step": 20135, "train_speed(iter/s)": 1.524639 }, { "acc": 0.95254116, "epoch": 9.439887508788376, "grad_norm": 20.971202850341797, "learning_rate": 9.482246191503402e-06, "loss": 0.17545217, "memory(GiB)": 13.7, "step": 20140, "train_speed(iter/s)": 1.524649 }, { "acc": 0.98321428, "epoch": 9.442231075697212, "grad_norm": 8.482405662536621, "learning_rate": 9.481902633547062e-06, "loss": 0.0982582, "memory(GiB)": 13.7, "step": 20145, "train_speed(iter/s)": 1.524661 }, { "acc": 0.97412243, "epoch": 9.444574642606046, "grad_norm": 8.251609802246094, "learning_rate": 9.481558967872522e-06, "loss": 0.10741944, "memory(GiB)": 13.7, "step": 20150, "train_speed(iter/s)": 1.524663 }, { "acc": 0.97443857, "epoch": 9.446918209514882, "grad_norm": 5.601214408874512, "learning_rate": 9.481215194488046e-06, "loss": 0.17565395, "memory(GiB)": 13.7, "step": 20155, "train_speed(iter/s)": 1.524665 }, { "acc": 0.96977844, "epoch": 9.449261776423716, "grad_norm": 3.2966854572296143, "learning_rate": 9.480871313401897e-06, "loss": 0.15084871, "memory(GiB)": 13.7, "step": 20160, "train_speed(iter/s)": 1.524687 }, { "acc": 0.9600893, "epoch": 9.451605343332552, "grad_norm": 14.912198066711426, "learning_rate": 9.480527324622344e-06, "loss": 0.20411584, "memory(GiB)": 13.7, "step": 20165, "train_speed(iter/s)": 1.524714 }, { "acc": 0.95961304, "epoch": 9.453948910241387, "grad_norm": 7.882043838500977, "learning_rate": 9.480183228157648e-06, "loss": 0.12295884, "memory(GiB)": 13.7, "step": 20170, "train_speed(iter/s)": 1.524711 }, { "acc": 0.96043653, "epoch": 9.456292477150223, "grad_norm": 2.3998308181762695, "learning_rate": 9.479839024016086e-06, "loss": 0.19156346, "memory(GiB)": 13.7, "step": 20175, "train_speed(iter/s)": 1.52473 }, { "acc": 0.96388788, "epoch": 9.458636044059057, "grad_norm": 2.4870011806488037, "learning_rate": 9.47949471220593e-06, "loss": 0.21660128, "memory(GiB)": 13.7, "step": 20180, "train_speed(iter/s)": 1.524724 }, { "acc": 0.98038197, "epoch": 9.460979610967893, "grad_norm": 9.295825958251953, "learning_rate": 9.479150292735456e-06, "loss": 0.14430308, "memory(GiB)": 13.7, "step": 20185, "train_speed(iter/s)": 1.524741 }, { "acc": 0.97128563, "epoch": 9.463323177876727, "grad_norm": 66.12226867675781, "learning_rate": 9.47880576561294e-06, "loss": 0.09737682, "memory(GiB)": 13.7, "step": 20190, "train_speed(iter/s)": 1.524751 }, { "acc": 0.97354164, "epoch": 9.465666744785564, "grad_norm": 5.4587202072143555, "learning_rate": 9.47846113084667e-06, "loss": 0.07698045, "memory(GiB)": 13.7, "step": 20195, "train_speed(iter/s)": 1.524745 }, { "acc": 0.97921219, "epoch": 9.4680103116944, "grad_norm": 5.728312015533447, "learning_rate": 9.478116388444922e-06, "loss": 0.16317426, "memory(GiB)": 13.7, "step": 20200, "train_speed(iter/s)": 1.524732 }, { "acc": 0.97198849, "epoch": 9.470353878603234, "grad_norm": 2.8442418575286865, "learning_rate": 9.477771538415988e-06, "loss": 0.14163837, "memory(GiB)": 13.7, "step": 20205, "train_speed(iter/s)": 1.524715 }, { "acc": 0.98249998, "epoch": 9.47269744551207, "grad_norm": 3.616356134414673, "learning_rate": 9.477426580768155e-06, "loss": 0.08472639, "memory(GiB)": 13.7, "step": 20210, "train_speed(iter/s)": 1.524723 }, { "acc": 0.98429928, "epoch": 9.475041012420904, "grad_norm": 14.310918807983398, "learning_rate": 9.477081515509714e-06, "loss": 0.12907376, "memory(GiB)": 13.7, "step": 20215, "train_speed(iter/s)": 1.524727 }, { "acc": 0.97666664, "epoch": 9.47738457932974, "grad_norm": 7.09453010559082, "learning_rate": 9.476736342648961e-06, "loss": 0.09148655, "memory(GiB)": 13.7, "step": 20220, "train_speed(iter/s)": 1.524738 }, { "acc": 0.9692009, "epoch": 9.479728146238575, "grad_norm": 2.733539581298828, "learning_rate": 9.476391062194192e-06, "loss": 0.16764699, "memory(GiB)": 13.7, "step": 20225, "train_speed(iter/s)": 1.524742 }, { "acc": 0.95903273, "epoch": 9.48207171314741, "grad_norm": 3.142831802368164, "learning_rate": 9.476045674153704e-06, "loss": 0.20699944, "memory(GiB)": 13.7, "step": 20230, "train_speed(iter/s)": 1.524751 }, { "acc": 0.96520824, "epoch": 9.484415280056245, "grad_norm": 6.231169700622559, "learning_rate": 9.475700178535804e-06, "loss": 0.18211091, "memory(GiB)": 13.7, "step": 20235, "train_speed(iter/s)": 1.524752 }, { "acc": 0.96590776, "epoch": 9.486758846965081, "grad_norm": 9.907700538635254, "learning_rate": 9.475354575348791e-06, "loss": 0.15270731, "memory(GiB)": 13.7, "step": 20240, "train_speed(iter/s)": 1.524774 }, { "acc": 0.9437974, "epoch": 9.489102413873916, "grad_norm": 2.244220733642578, "learning_rate": 9.475008864600978e-06, "loss": 0.25384467, "memory(GiB)": 13.7, "step": 20245, "train_speed(iter/s)": 1.524792 }, { "acc": 0.98152962, "epoch": 9.491445980782752, "grad_norm": 3.1108810901641846, "learning_rate": 9.47466304630067e-06, "loss": 0.08947908, "memory(GiB)": 13.7, "step": 20250, "train_speed(iter/s)": 1.524819 }, { "acc": 0.96840439, "epoch": 9.493789547691586, "grad_norm": 2.779536247253418, "learning_rate": 9.474317120456183e-06, "loss": 0.14395475, "memory(GiB)": 13.7, "step": 20255, "train_speed(iter/s)": 1.524822 }, { "acc": 0.96748676, "epoch": 9.496133114600422, "grad_norm": 7.1884379386901855, "learning_rate": 9.473971087075829e-06, "loss": 0.23928781, "memory(GiB)": 13.7, "step": 20260, "train_speed(iter/s)": 1.524836 }, { "acc": 0.97130718, "epoch": 9.498476681509256, "grad_norm": 7.8312153816223145, "learning_rate": 9.473624946167926e-06, "loss": 0.21408134, "memory(GiB)": 13.7, "step": 20265, "train_speed(iter/s)": 1.524858 }, { "acc": 0.97727985, "epoch": 9.500820248418092, "grad_norm": 6.943735599517822, "learning_rate": 9.473278697740796e-06, "loss": 0.09173239, "memory(GiB)": 13.7, "step": 20270, "train_speed(iter/s)": 1.524868 }, { "acc": 0.96348209, "epoch": 9.503163815326928, "grad_norm": 4.1548075675964355, "learning_rate": 9.472932341802758e-06, "loss": 0.14771783, "memory(GiB)": 13.7, "step": 20275, "train_speed(iter/s)": 1.524888 }, { "acc": 0.96964283, "epoch": 9.505507382235763, "grad_norm": 10.26558780670166, "learning_rate": 9.472585878362142e-06, "loss": 0.20046089, "memory(GiB)": 13.7, "step": 20280, "train_speed(iter/s)": 1.524885 }, { "acc": 0.97548161, "epoch": 9.507850949144599, "grad_norm": 7.103627681732178, "learning_rate": 9.472239307427271e-06, "loss": 0.15409195, "memory(GiB)": 13.7, "step": 20285, "train_speed(iter/s)": 1.524886 }, { "acc": 0.97323322, "epoch": 9.510194516053433, "grad_norm": 2.853492259979248, "learning_rate": 9.47189262900648e-06, "loss": 0.12799778, "memory(GiB)": 13.7, "step": 20290, "train_speed(iter/s)": 1.524898 }, { "acc": 0.97391024, "epoch": 9.51253808296227, "grad_norm": 9.711610794067383, "learning_rate": 9.4715458431081e-06, "loss": 0.13801287, "memory(GiB)": 13.7, "step": 20295, "train_speed(iter/s)": 1.524931 }, { "acc": 0.97351589, "epoch": 9.514881649871104, "grad_norm": 49.399574279785156, "learning_rate": 9.471198949740466e-06, "loss": 0.12843471, "memory(GiB)": 13.7, "step": 20300, "train_speed(iter/s)": 1.52495 }, { "acc": 0.96827736, "epoch": 9.51722521677994, "grad_norm": 7.166558742523193, "learning_rate": 9.470851948911916e-06, "loss": 0.14837071, "memory(GiB)": 13.7, "step": 20305, "train_speed(iter/s)": 1.524976 }, { "acc": 0.96972218, "epoch": 9.519568783688774, "grad_norm": 3.04170298576355, "learning_rate": 9.47050484063079e-06, "loss": 0.16680162, "memory(GiB)": 13.7, "step": 20310, "train_speed(iter/s)": 1.524977 }, { "acc": 0.98358135, "epoch": 9.52191235059761, "grad_norm": 4.66951322555542, "learning_rate": 9.470157624905437e-06, "loss": 0.15949209, "memory(GiB)": 13.7, "step": 20315, "train_speed(iter/s)": 1.524992 }, { "acc": 0.97599087, "epoch": 9.524255917506444, "grad_norm": 3.6505095958709717, "learning_rate": 9.469810301744194e-06, "loss": 0.13730063, "memory(GiB)": 13.7, "step": 20320, "train_speed(iter/s)": 1.525002 }, { "acc": 0.96187496, "epoch": 9.52659948441528, "grad_norm": 10.804503440856934, "learning_rate": 9.469462871155416e-06, "loss": 0.23607626, "memory(GiB)": 13.7, "step": 20325, "train_speed(iter/s)": 1.525016 }, { "acc": 0.98811283, "epoch": 9.528943051324115, "grad_norm": 5.565262317657471, "learning_rate": 9.469115333147453e-06, "loss": 0.05676982, "memory(GiB)": 13.7, "step": 20330, "train_speed(iter/s)": 1.525029 }, { "acc": 0.97549114, "epoch": 9.53128661823295, "grad_norm": 1.3812750577926636, "learning_rate": 9.468767687728657e-06, "loss": 0.10597533, "memory(GiB)": 13.7, "step": 20335, "train_speed(iter/s)": 1.525035 }, { "acc": 0.98023806, "epoch": 9.533630185141785, "grad_norm": 14.525623321533203, "learning_rate": 9.468419934907387e-06, "loss": 0.10680346, "memory(GiB)": 13.7, "step": 20340, "train_speed(iter/s)": 1.525061 }, { "acc": 0.97832108, "epoch": 9.535973752050621, "grad_norm": 3.421175718307495, "learning_rate": 9.468072074691997e-06, "loss": 0.1269529, "memory(GiB)": 13.7, "step": 20345, "train_speed(iter/s)": 1.52507 }, { "acc": 0.95841274, "epoch": 9.538317318959457, "grad_norm": 9.579959869384766, "learning_rate": 9.467724107090855e-06, "loss": 0.29345126, "memory(GiB)": 13.7, "step": 20350, "train_speed(iter/s)": 1.52508 }, { "acc": 0.96955223, "epoch": 9.540660885868292, "grad_norm": 7.167972087860107, "learning_rate": 9.467376032112318e-06, "loss": 0.16276681, "memory(GiB)": 13.7, "step": 20355, "train_speed(iter/s)": 1.525091 }, { "acc": 0.98364267, "epoch": 9.543004452777128, "grad_norm": 5.639312744140625, "learning_rate": 9.467027849764757e-06, "loss": 0.12517986, "memory(GiB)": 13.7, "step": 20360, "train_speed(iter/s)": 1.525117 }, { "acc": 0.98302994, "epoch": 9.545348019685962, "grad_norm": 5.601049900054932, "learning_rate": 9.466679560056543e-06, "loss": 0.08432223, "memory(GiB)": 13.7, "step": 20365, "train_speed(iter/s)": 1.525123 }, { "acc": 0.9572917, "epoch": 9.547691586594798, "grad_norm": 5.895474910736084, "learning_rate": 9.46633116299604e-06, "loss": 0.19643908, "memory(GiB)": 13.7, "step": 20370, "train_speed(iter/s)": 1.525134 }, { "acc": 0.97758923, "epoch": 9.550035153503632, "grad_norm": 17.70886993408203, "learning_rate": 9.465982658591631e-06, "loss": 0.11643585, "memory(GiB)": 13.7, "step": 20375, "train_speed(iter/s)": 1.525145 }, { "acc": 0.97767859, "epoch": 9.552378720412468, "grad_norm": 4.261454105377197, "learning_rate": 9.465634046851689e-06, "loss": 0.09907148, "memory(GiB)": 13.7, "step": 20380, "train_speed(iter/s)": 1.525143 }, { "acc": 0.98595114, "epoch": 9.554722287321303, "grad_norm": 6.179068088531494, "learning_rate": 9.465285327784591e-06, "loss": 0.06723504, "memory(GiB)": 13.7, "step": 20385, "train_speed(iter/s)": 1.525151 }, { "acc": 0.9745779, "epoch": 9.557065854230139, "grad_norm": 0.17852281033992767, "learning_rate": 9.464936501398724e-06, "loss": 0.14044096, "memory(GiB)": 13.7, "step": 20390, "train_speed(iter/s)": 1.525158 }, { "acc": 0.98720245, "epoch": 9.559409421138973, "grad_norm": 4.990512847900391, "learning_rate": 9.464587567702468e-06, "loss": 0.07206551, "memory(GiB)": 13.7, "step": 20395, "train_speed(iter/s)": 1.525169 }, { "acc": 0.97148809, "epoch": 9.56175298804781, "grad_norm": 6.442343235015869, "learning_rate": 9.464238526704217e-06, "loss": 0.14189606, "memory(GiB)": 13.7, "step": 20400, "train_speed(iter/s)": 1.525188 }, { "acc": 0.95864582, "epoch": 9.564096554956643, "grad_norm": 6.63049840927124, "learning_rate": 9.463889378412351e-06, "loss": 0.17218733, "memory(GiB)": 13.7, "step": 20405, "train_speed(iter/s)": 1.525197 }, { "acc": 0.97234507, "epoch": 9.56644012186548, "grad_norm": 5.322672367095947, "learning_rate": 9.46354012283527e-06, "loss": 0.1138173, "memory(GiB)": 13.7, "step": 20410, "train_speed(iter/s)": 1.525205 }, { "acc": 0.98001127, "epoch": 9.568783688774314, "grad_norm": 2.4403159618377686, "learning_rate": 9.463190759981368e-06, "loss": 0.09529571, "memory(GiB)": 13.7, "step": 20415, "train_speed(iter/s)": 1.525228 }, { "acc": 0.98675594, "epoch": 9.57112725568315, "grad_norm": 0.14079970121383667, "learning_rate": 9.46284128985904e-06, "loss": 0.05079913, "memory(GiB)": 13.7, "step": 20420, "train_speed(iter/s)": 1.52524 }, { "acc": 0.95077114, "epoch": 9.573470822591984, "grad_norm": 3.6625049114227295, "learning_rate": 9.462491712476686e-06, "loss": 0.15221988, "memory(GiB)": 13.7, "step": 20425, "train_speed(iter/s)": 1.52526 }, { "acc": 0.9777647, "epoch": 9.57581438950082, "grad_norm": 7.480146884918213, "learning_rate": 9.462142027842712e-06, "loss": 0.08268633, "memory(GiB)": 13.7, "step": 20430, "train_speed(iter/s)": 1.525273 }, { "acc": 0.96496925, "epoch": 9.578157956409655, "grad_norm": 3.543565034866333, "learning_rate": 9.46179223596552e-06, "loss": 0.18251522, "memory(GiB)": 13.7, "step": 20435, "train_speed(iter/s)": 1.525298 }, { "acc": 0.94457912, "epoch": 9.58050152331849, "grad_norm": 3.3371243476867676, "learning_rate": 9.461442336853521e-06, "loss": 0.34890909, "memory(GiB)": 13.7, "step": 20440, "train_speed(iter/s)": 1.525313 }, { "acc": 0.95243053, "epoch": 9.582845090227327, "grad_norm": 10.800511360168457, "learning_rate": 9.461092330515122e-06, "loss": 0.2957819, "memory(GiB)": 13.7, "step": 20445, "train_speed(iter/s)": 1.525339 }, { "acc": 0.97653332, "epoch": 9.585188657136161, "grad_norm": 7.037672996520996, "learning_rate": 9.460742216958737e-06, "loss": 0.07096222, "memory(GiB)": 13.7, "step": 20450, "train_speed(iter/s)": 1.525345 }, { "acc": 0.96464052, "epoch": 9.587532224044997, "grad_norm": 8.34251594543457, "learning_rate": 9.460391996192783e-06, "loss": 0.22270992, "memory(GiB)": 13.7, "step": 20455, "train_speed(iter/s)": 1.525351 }, { "acc": 0.98590279, "epoch": 9.589875790953831, "grad_norm": 1.8235983848571777, "learning_rate": 9.460041668225678e-06, "loss": 0.08749887, "memory(GiB)": 13.7, "step": 20460, "train_speed(iter/s)": 1.525343 }, { "acc": 0.96870041, "epoch": 9.592219357862668, "grad_norm": 9.169364929199219, "learning_rate": 9.459691233065843e-06, "loss": 0.16068964, "memory(GiB)": 13.7, "step": 20465, "train_speed(iter/s)": 1.52536 }, { "acc": 0.96953182, "epoch": 9.594562924771502, "grad_norm": 5.803351879119873, "learning_rate": 9.459340690721698e-06, "loss": 0.1888908, "memory(GiB)": 13.7, "step": 20470, "train_speed(iter/s)": 1.525386 }, { "acc": 0.98537769, "epoch": 9.596906491680338, "grad_norm": 5.036245346069336, "learning_rate": 9.458990041201671e-06, "loss": 0.05932909, "memory(GiB)": 13.7, "step": 20475, "train_speed(iter/s)": 1.525392 }, { "acc": 0.95510149, "epoch": 9.599250058589172, "grad_norm": 8.409035682678223, "learning_rate": 9.458639284514192e-06, "loss": 0.16251768, "memory(GiB)": 13.7, "step": 20480, "train_speed(iter/s)": 1.525402 }, { "acc": 0.96234627, "epoch": 9.601593625498008, "grad_norm": 4.370921611785889, "learning_rate": 9.458288420667691e-06, "loss": 0.21136422, "memory(GiB)": 13.7, "step": 20485, "train_speed(iter/s)": 1.525403 }, { "acc": 0.96991844, "epoch": 9.603937192406843, "grad_norm": 18.183412551879883, "learning_rate": 9.457937449670599e-06, "loss": 0.15675306, "memory(GiB)": 13.7, "step": 20490, "train_speed(iter/s)": 1.525398 }, { "acc": 0.98621216, "epoch": 9.606280759315679, "grad_norm": 8.397316932678223, "learning_rate": 9.457586371531357e-06, "loss": 0.07979382, "memory(GiB)": 13.7, "step": 20495, "train_speed(iter/s)": 1.525413 }, { "acc": 0.98234844, "epoch": 9.608624326224513, "grad_norm": 5.293055057525635, "learning_rate": 9.4572351862584e-06, "loss": 0.1592144, "memory(GiB)": 13.7, "step": 20500, "train_speed(iter/s)": 1.525429 }, { "acc": 0.97613096, "epoch": 9.610967893133349, "grad_norm": 4.753604412078857, "learning_rate": 9.456883893860172e-06, "loss": 0.15267138, "memory(GiB)": 13.7, "step": 20505, "train_speed(iter/s)": 1.525432 }, { "acc": 0.96628933, "epoch": 9.613311460042183, "grad_norm": 8.06873607635498, "learning_rate": 9.456532494345114e-06, "loss": 0.16382535, "memory(GiB)": 13.7, "step": 20510, "train_speed(iter/s)": 1.525437 }, { "acc": 0.97188768, "epoch": 9.61565502695102, "grad_norm": 4.976583480834961, "learning_rate": 9.456180987721675e-06, "loss": 0.14556041, "memory(GiB)": 13.7, "step": 20515, "train_speed(iter/s)": 1.525446 }, { "acc": 0.96379223, "epoch": 9.617998593859856, "grad_norm": 10.339786529541016, "learning_rate": 9.455829373998302e-06, "loss": 0.15180104, "memory(GiB)": 13.7, "step": 20520, "train_speed(iter/s)": 1.525459 }, { "acc": 0.96635418, "epoch": 9.62034216076869, "grad_norm": 5.703136444091797, "learning_rate": 9.455477653183448e-06, "loss": 0.1993804, "memory(GiB)": 13.7, "step": 20525, "train_speed(iter/s)": 1.52547 }, { "acc": 0.97572918, "epoch": 9.622685727677526, "grad_norm": 6.674088954925537, "learning_rate": 9.455125825285568e-06, "loss": 0.09007158, "memory(GiB)": 13.7, "step": 20530, "train_speed(iter/s)": 1.525488 }, { "acc": 0.96933784, "epoch": 9.62502929458636, "grad_norm": 8.5940580368042, "learning_rate": 9.454773890313118e-06, "loss": 0.12689552, "memory(GiB)": 13.7, "step": 20535, "train_speed(iter/s)": 1.525484 }, { "acc": 0.97057114, "epoch": 9.627372861495196, "grad_norm": 4.7793803215026855, "learning_rate": 9.454421848274557e-06, "loss": 0.11984665, "memory(GiB)": 13.7, "step": 20540, "train_speed(iter/s)": 1.5255 }, { "acc": 0.94360485, "epoch": 9.62971642840403, "grad_norm": 10.32119083404541, "learning_rate": 9.454069699178345e-06, "loss": 0.28597283, "memory(GiB)": 13.7, "step": 20545, "train_speed(iter/s)": 1.525504 }, { "acc": 0.97590332, "epoch": 9.632059995312867, "grad_norm": 5.759645462036133, "learning_rate": 9.45371744303295e-06, "loss": 0.18603089, "memory(GiB)": 13.7, "step": 20550, "train_speed(iter/s)": 1.525515 }, { "acc": 0.98079681, "epoch": 9.634403562221701, "grad_norm": 7.19189977645874, "learning_rate": 9.453365079846837e-06, "loss": 0.10691009, "memory(GiB)": 13.7, "step": 20555, "train_speed(iter/s)": 1.525517 }, { "acc": 0.95436954, "epoch": 9.636747129130537, "grad_norm": 9.626317024230957, "learning_rate": 9.453012609628478e-06, "loss": 0.19581668, "memory(GiB)": 13.7, "step": 20560, "train_speed(iter/s)": 1.525536 }, { "acc": 0.97379541, "epoch": 9.639090696039371, "grad_norm": 5.705848693847656, "learning_rate": 9.452660032386341e-06, "loss": 0.14535328, "memory(GiB)": 13.7, "step": 20565, "train_speed(iter/s)": 1.525549 }, { "acc": 0.9812212, "epoch": 9.641434262948207, "grad_norm": 7.138511657714844, "learning_rate": 9.452307348128907e-06, "loss": 0.13425207, "memory(GiB)": 13.7, "step": 20570, "train_speed(iter/s)": 1.52557 }, { "acc": 0.96810093, "epoch": 9.643777829857042, "grad_norm": 55.09284210205078, "learning_rate": 9.451954556864646e-06, "loss": 0.24562163, "memory(GiB)": 13.7, "step": 20575, "train_speed(iter/s)": 1.525599 }, { "acc": 0.97897816, "epoch": 9.646121396765878, "grad_norm": 3.184866189956665, "learning_rate": 9.451601658602042e-06, "loss": 0.16707212, "memory(GiB)": 13.7, "step": 20580, "train_speed(iter/s)": 1.525624 }, { "acc": 0.97499886, "epoch": 9.648464963674712, "grad_norm": 10.371197700500488, "learning_rate": 9.45124865334958e-06, "loss": 0.17087605, "memory(GiB)": 13.7, "step": 20585, "train_speed(iter/s)": 1.525632 }, { "acc": 0.9697998, "epoch": 9.650808530583548, "grad_norm": 10.57352352142334, "learning_rate": 9.450895541115739e-06, "loss": 0.18502216, "memory(GiB)": 13.7, "step": 20590, "train_speed(iter/s)": 1.525634 }, { "acc": 0.969697, "epoch": 9.653152097492384, "grad_norm": 10.81667709350586, "learning_rate": 9.45054232190901e-06, "loss": 0.09308746, "memory(GiB)": 13.7, "step": 20595, "train_speed(iter/s)": 1.525639 }, { "acc": 0.97824087, "epoch": 9.655495664401219, "grad_norm": 7.082833290100098, "learning_rate": 9.450188995737886e-06, "loss": 0.1180318, "memory(GiB)": 13.7, "step": 20600, "train_speed(iter/s)": 1.525648 }, { "acc": 0.96767941, "epoch": 9.657839231310055, "grad_norm": 5.807763576507568, "learning_rate": 9.449835562610855e-06, "loss": 0.1398414, "memory(GiB)": 13.7, "step": 20605, "train_speed(iter/s)": 1.525666 }, { "acc": 0.9760088, "epoch": 9.660182798218889, "grad_norm": 4.119714736938477, "learning_rate": 9.449482022536413e-06, "loss": 0.10679921, "memory(GiB)": 13.7, "step": 20610, "train_speed(iter/s)": 1.525667 }, { "acc": 0.97062492, "epoch": 9.662526365127725, "grad_norm": 0.23040057718753815, "learning_rate": 9.44912837552306e-06, "loss": 0.09066066, "memory(GiB)": 13.7, "step": 20615, "train_speed(iter/s)": 1.525666 }, { "acc": 0.96710815, "epoch": 9.66486993203656, "grad_norm": 4.471925258636475, "learning_rate": 9.448774621579298e-06, "loss": 0.13798203, "memory(GiB)": 13.7, "step": 20620, "train_speed(iter/s)": 1.52567 }, { "acc": 0.98108597, "epoch": 9.667213498945396, "grad_norm": 2.296783685684204, "learning_rate": 9.448420760713626e-06, "loss": 0.09815534, "memory(GiB)": 13.7, "step": 20625, "train_speed(iter/s)": 1.525672 }, { "acc": 0.97723885, "epoch": 9.66955706585423, "grad_norm": 0.1997383087873459, "learning_rate": 9.448066792934551e-06, "loss": 0.16492727, "memory(GiB)": 13.7, "step": 20630, "train_speed(iter/s)": 1.525693 }, { "acc": 0.95707111, "epoch": 9.671900632763066, "grad_norm": 4.718719959259033, "learning_rate": 9.447712718250582e-06, "loss": 0.1741279, "memory(GiB)": 13.7, "step": 20635, "train_speed(iter/s)": 1.525694 }, { "acc": 0.97479858, "epoch": 9.6742441996719, "grad_norm": 4.26163911819458, "learning_rate": 9.44735853667023e-06, "loss": 0.11540328, "memory(GiB)": 13.7, "step": 20640, "train_speed(iter/s)": 1.525681 }, { "acc": 0.95749454, "epoch": 9.676587766580736, "grad_norm": 6.173704147338867, "learning_rate": 9.44700424820201e-06, "loss": 0.18794589, "memory(GiB)": 13.7, "step": 20645, "train_speed(iter/s)": 1.525707 }, { "acc": 0.97609272, "epoch": 9.67893133348957, "grad_norm": 8.630911827087402, "learning_rate": 9.446649852854431e-06, "loss": 0.14086378, "memory(GiB)": 13.7, "step": 20650, "train_speed(iter/s)": 1.52571 }, { "acc": 0.98035717, "epoch": 9.681274900398407, "grad_norm": 2.627652406692505, "learning_rate": 9.446295350636018e-06, "loss": 0.08958355, "memory(GiB)": 13.7, "step": 20655, "train_speed(iter/s)": 1.525724 }, { "acc": 0.9718092, "epoch": 9.683618467307241, "grad_norm": 5.2832231521606445, "learning_rate": 9.445940741555294e-06, "loss": 0.1254326, "memory(GiB)": 13.7, "step": 20660, "train_speed(iter/s)": 1.525745 }, { "acc": 0.9741457, "epoch": 9.685962034216077, "grad_norm": 8.009071350097656, "learning_rate": 9.445586025620775e-06, "loss": 0.12439418, "memory(GiB)": 13.7, "step": 20665, "train_speed(iter/s)": 1.52576 }, { "acc": 0.96980114, "epoch": 9.688305601124911, "grad_norm": 4.456503868103027, "learning_rate": 9.445231202840993e-06, "loss": 0.1394292, "memory(GiB)": 13.7, "step": 20670, "train_speed(iter/s)": 1.525761 }, { "acc": 0.9626709, "epoch": 9.690649168033747, "grad_norm": 1.3883423805236816, "learning_rate": 9.444876273224473e-06, "loss": 0.1904816, "memory(GiB)": 13.7, "step": 20675, "train_speed(iter/s)": 1.525771 }, { "acc": 0.96308765, "epoch": 9.692992734942582, "grad_norm": 5.771895885467529, "learning_rate": 9.44452123677975e-06, "loss": 0.09695642, "memory(GiB)": 13.7, "step": 20680, "train_speed(iter/s)": 1.525809 }, { "acc": 0.97217264, "epoch": 9.695336301851418, "grad_norm": 3.250631093978882, "learning_rate": 9.444166093515355e-06, "loss": 0.1048661, "memory(GiB)": 13.7, "step": 20685, "train_speed(iter/s)": 1.525793 }, { "acc": 0.96664267, "epoch": 9.697679868760254, "grad_norm": 3.2424156665802, "learning_rate": 9.443810843439827e-06, "loss": 0.14477811, "memory(GiB)": 13.7, "step": 20690, "train_speed(iter/s)": 1.525791 }, { "acc": 0.98045464, "epoch": 9.700023435669088, "grad_norm": 5.8885722160339355, "learning_rate": 9.443455486561702e-06, "loss": 0.12190199, "memory(GiB)": 13.7, "step": 20695, "train_speed(iter/s)": 1.52578 }, { "acc": 0.96020832, "epoch": 9.702367002577924, "grad_norm": 3.233217239379883, "learning_rate": 9.443100022889525e-06, "loss": 0.18401921, "memory(GiB)": 13.7, "step": 20700, "train_speed(iter/s)": 1.525796 }, { "acc": 0.95291672, "epoch": 9.704710569486759, "grad_norm": 5.864372253417969, "learning_rate": 9.442744452431836e-06, "loss": 0.23877811, "memory(GiB)": 13.7, "step": 20705, "train_speed(iter/s)": 1.525798 }, { "acc": 0.9766573, "epoch": 9.707054136395595, "grad_norm": 7.611325263977051, "learning_rate": 9.442388775197188e-06, "loss": 0.13310366, "memory(GiB)": 13.7, "step": 20710, "train_speed(iter/s)": 1.525804 }, { "acc": 0.95741663, "epoch": 9.709397703304429, "grad_norm": 6.884674549102783, "learning_rate": 9.442032991194126e-06, "loss": 0.14515972, "memory(GiB)": 13.7, "step": 20715, "train_speed(iter/s)": 1.525808 }, { "acc": 0.98313885, "epoch": 9.711741270213265, "grad_norm": 11.268611907958984, "learning_rate": 9.441677100431201e-06, "loss": 0.13315891, "memory(GiB)": 13.7, "step": 20720, "train_speed(iter/s)": 1.525789 }, { "acc": 0.97075157, "epoch": 9.7140848371221, "grad_norm": 5.0284223556518555, "learning_rate": 9.441321102916968e-06, "loss": 0.15701058, "memory(GiB)": 13.7, "step": 20725, "train_speed(iter/s)": 1.525802 }, { "acc": 0.97685404, "epoch": 9.716428404030935, "grad_norm": 5.44786262512207, "learning_rate": 9.440964998659988e-06, "loss": 0.13121312, "memory(GiB)": 13.7, "step": 20730, "train_speed(iter/s)": 1.525793 }, { "acc": 0.97657776, "epoch": 9.71877197093977, "grad_norm": 3.7581050395965576, "learning_rate": 9.440608787668813e-06, "loss": 0.12619019, "memory(GiB)": 13.7, "step": 20735, "train_speed(iter/s)": 1.525811 }, { "acc": 0.98455353, "epoch": 9.721115537848606, "grad_norm": 0.9590499997138977, "learning_rate": 9.440252469952013e-06, "loss": 0.09360545, "memory(GiB)": 13.7, "step": 20740, "train_speed(iter/s)": 1.525821 }, { "acc": 0.95788698, "epoch": 9.72345910475744, "grad_norm": 0.13735060393810272, "learning_rate": 9.43989604551815e-06, "loss": 0.18424307, "memory(GiB)": 13.7, "step": 20745, "train_speed(iter/s)": 1.52582 }, { "acc": 0.97795954, "epoch": 9.725802671666276, "grad_norm": 14.2318754196167, "learning_rate": 9.439539514375787e-06, "loss": 0.12339491, "memory(GiB)": 13.7, "step": 20750, "train_speed(iter/s)": 1.525842 }, { "acc": 0.98969746, "epoch": 9.72814623857511, "grad_norm": 73.00533294677734, "learning_rate": 9.4391828765335e-06, "loss": 0.16628435, "memory(GiB)": 13.7, "step": 20755, "train_speed(iter/s)": 1.525866 }, { "acc": 0.96516819, "epoch": 9.730489805483947, "grad_norm": 2.7218527793884277, "learning_rate": 9.438826131999859e-06, "loss": 0.21385601, "memory(GiB)": 13.7, "step": 20760, "train_speed(iter/s)": 1.525878 }, { "acc": 0.96531754, "epoch": 9.732833372392783, "grad_norm": 7.307945728302002, "learning_rate": 9.438469280783438e-06, "loss": 0.14147826, "memory(GiB)": 13.7, "step": 20765, "train_speed(iter/s)": 1.525897 }, { "acc": 0.96054173, "epoch": 9.735176939301617, "grad_norm": 7.606388092041016, "learning_rate": 9.438112322892815e-06, "loss": 0.23594069, "memory(GiB)": 13.7, "step": 20770, "train_speed(iter/s)": 1.525926 }, { "acc": 0.97770834, "epoch": 9.737520506210453, "grad_norm": 17.215576171875, "learning_rate": 9.437755258336572e-06, "loss": 0.11691713, "memory(GiB)": 13.7, "step": 20775, "train_speed(iter/s)": 1.525941 }, { "acc": 0.9735714, "epoch": 9.739864073119287, "grad_norm": 4.6188435554504395, "learning_rate": 9.437398087123288e-06, "loss": 0.09942652, "memory(GiB)": 13.7, "step": 20780, "train_speed(iter/s)": 1.525949 }, { "acc": 0.98557291, "epoch": 9.742207640028123, "grad_norm": 6.629056930541992, "learning_rate": 9.437040809261552e-06, "loss": 0.10113878, "memory(GiB)": 13.7, "step": 20785, "train_speed(iter/s)": 1.525949 }, { "acc": 0.97981148, "epoch": 9.744551206936958, "grad_norm": 10.791213035583496, "learning_rate": 9.436683424759951e-06, "loss": 0.0949122, "memory(GiB)": 13.7, "step": 20790, "train_speed(iter/s)": 1.525945 }, { "acc": 0.9894886, "epoch": 9.746894773845794, "grad_norm": 3.2271721363067627, "learning_rate": 9.436325933627073e-06, "loss": 0.06107789, "memory(GiB)": 13.7, "step": 20795, "train_speed(iter/s)": 1.525956 }, { "acc": 0.97988129, "epoch": 9.749238340754628, "grad_norm": 13.941884994506836, "learning_rate": 9.435968335871513e-06, "loss": 0.12317908, "memory(GiB)": 13.7, "step": 20800, "train_speed(iter/s)": 1.525976 }, { "acc": 0.97354164, "epoch": 9.751581907663464, "grad_norm": 4.677137851715088, "learning_rate": 9.435610631501864e-06, "loss": 0.15929343, "memory(GiB)": 13.7, "step": 20805, "train_speed(iter/s)": 1.525997 }, { "acc": 0.96776743, "epoch": 9.753925474572299, "grad_norm": 6.810543537139893, "learning_rate": 9.435252820526728e-06, "loss": 0.15709566, "memory(GiB)": 13.7, "step": 20810, "train_speed(iter/s)": 1.526037 }, { "acc": 0.97598085, "epoch": 9.756269041481135, "grad_norm": 5.630967617034912, "learning_rate": 9.434894902954701e-06, "loss": 0.12247185, "memory(GiB)": 13.7, "step": 20815, "train_speed(iter/s)": 1.526042 }, { "acc": 0.9657238, "epoch": 9.758612608389969, "grad_norm": 4.892515182495117, "learning_rate": 9.43453687879439e-06, "loss": 0.20140245, "memory(GiB)": 13.7, "step": 20820, "train_speed(iter/s)": 1.526033 }, { "acc": 0.96861115, "epoch": 9.760956175298805, "grad_norm": 2.741915702819824, "learning_rate": 9.4341787480544e-06, "loss": 0.20376952, "memory(GiB)": 13.7, "step": 20825, "train_speed(iter/s)": 1.526048 }, { "acc": 0.97404766, "epoch": 9.76329974220764, "grad_norm": 10.330068588256836, "learning_rate": 9.43382051074334e-06, "loss": 0.12414113, "memory(GiB)": 13.7, "step": 20830, "train_speed(iter/s)": 1.526036 }, { "acc": 0.97022972, "epoch": 9.765643309116475, "grad_norm": 4.6533203125, "learning_rate": 9.433462166869817e-06, "loss": 0.12001674, "memory(GiB)": 13.7, "step": 20835, "train_speed(iter/s)": 1.526055 }, { "acc": 0.96697302, "epoch": 9.767986876025311, "grad_norm": 9.779380798339844, "learning_rate": 9.433103716442448e-06, "loss": 0.20763688, "memory(GiB)": 13.7, "step": 20840, "train_speed(iter/s)": 1.526072 }, { "acc": 0.97153988, "epoch": 9.770330442934146, "grad_norm": 7.73596715927124, "learning_rate": 9.432745159469848e-06, "loss": 0.15594997, "memory(GiB)": 13.7, "step": 20845, "train_speed(iter/s)": 1.526075 }, { "acc": 0.96687508, "epoch": 9.772674009842982, "grad_norm": 1.7278344631195068, "learning_rate": 9.432386495960635e-06, "loss": 0.23961954, "memory(GiB)": 13.7, "step": 20850, "train_speed(iter/s)": 1.526069 }, { "acc": 0.95375996, "epoch": 9.775017576751816, "grad_norm": 47.19973373413086, "learning_rate": 9.43202772592343e-06, "loss": 0.22956598, "memory(GiB)": 13.7, "step": 20855, "train_speed(iter/s)": 1.526085 }, { "acc": 0.95523815, "epoch": 9.777361143660652, "grad_norm": 11.708260536193848, "learning_rate": 9.431668849366857e-06, "loss": 0.28733559, "memory(GiB)": 13.7, "step": 20860, "train_speed(iter/s)": 1.526097 }, { "acc": 0.9809226, "epoch": 9.779704710569487, "grad_norm": 2.530890464782715, "learning_rate": 9.431309866299544e-06, "loss": 0.06301605, "memory(GiB)": 13.7, "step": 20865, "train_speed(iter/s)": 1.526101 }, { "acc": 0.97321434, "epoch": 9.782048277478323, "grad_norm": 0.818809449672699, "learning_rate": 9.430950776730117e-06, "loss": 0.12942333, "memory(GiB)": 13.7, "step": 20870, "train_speed(iter/s)": 1.526103 }, { "acc": 0.96486607, "epoch": 9.784391844387157, "grad_norm": 2.7900640964508057, "learning_rate": 9.430591580667209e-06, "loss": 0.17736998, "memory(GiB)": 13.7, "step": 20875, "train_speed(iter/s)": 1.526116 }, { "acc": 0.98279839, "epoch": 9.786735411295993, "grad_norm": 5.027184963226318, "learning_rate": 9.430232278119454e-06, "loss": 0.08388652, "memory(GiB)": 13.7, "step": 20880, "train_speed(iter/s)": 1.526128 }, { "acc": 0.98500576, "epoch": 9.789078978204827, "grad_norm": 3.7824759483337402, "learning_rate": 9.429872869095487e-06, "loss": 0.09156154, "memory(GiB)": 13.7, "step": 20885, "train_speed(iter/s)": 1.526127 }, { "acc": 0.97930803, "epoch": 9.791422545113663, "grad_norm": 4.1506757736206055, "learning_rate": 9.429513353603947e-06, "loss": 0.13235457, "memory(GiB)": 13.7, "step": 20890, "train_speed(iter/s)": 1.526166 }, { "acc": 0.9705101, "epoch": 9.793766112022498, "grad_norm": 5.533106803894043, "learning_rate": 9.42915373165348e-06, "loss": 0.21726871, "memory(GiB)": 13.7, "step": 20895, "train_speed(iter/s)": 1.526208 }, { "acc": 0.97312908, "epoch": 9.796109678931334, "grad_norm": 0.04105933755636215, "learning_rate": 9.428794003252722e-06, "loss": 0.09225596, "memory(GiB)": 13.7, "step": 20900, "train_speed(iter/s)": 1.526211 }, { "acc": 0.97691336, "epoch": 9.798453245840168, "grad_norm": 5.768761157989502, "learning_rate": 9.428434168410324e-06, "loss": 0.07754816, "memory(GiB)": 13.7, "step": 20905, "train_speed(iter/s)": 1.526224 }, { "acc": 0.9729579, "epoch": 9.800796812749004, "grad_norm": 28.380794525146484, "learning_rate": 9.428074227134938e-06, "loss": 0.13895743, "memory(GiB)": 13.7, "step": 20910, "train_speed(iter/s)": 1.526243 }, { "acc": 0.97750301, "epoch": 9.803140379657838, "grad_norm": 4.94752311706543, "learning_rate": 9.427714179435213e-06, "loss": 0.15593898, "memory(GiB)": 13.7, "step": 20915, "train_speed(iter/s)": 1.526258 }, { "acc": 0.98002234, "epoch": 9.805483946566675, "grad_norm": 3.219820499420166, "learning_rate": 9.427354025319803e-06, "loss": 0.11932708, "memory(GiB)": 13.7, "step": 20920, "train_speed(iter/s)": 1.526272 }, { "acc": 0.9677084, "epoch": 9.807827513475509, "grad_norm": 10.323718070983887, "learning_rate": 9.426993764797367e-06, "loss": 0.21413975, "memory(GiB)": 13.7, "step": 20925, "train_speed(iter/s)": 1.526291 }, { "acc": 0.94946432, "epoch": 9.810171080384345, "grad_norm": 4.000551700592041, "learning_rate": 9.426633397876562e-06, "loss": 0.19762495, "memory(GiB)": 13.7, "step": 20930, "train_speed(iter/s)": 1.526288 }, { "acc": 0.97233334, "epoch": 9.812514647293181, "grad_norm": 3.1249425411224365, "learning_rate": 9.42627292456605e-06, "loss": 0.13691539, "memory(GiB)": 13.7, "step": 20935, "train_speed(iter/s)": 1.526301 }, { "acc": 0.97277775, "epoch": 9.814858214202015, "grad_norm": 2.7358834743499756, "learning_rate": 9.4259123448745e-06, "loss": 0.20215428, "memory(GiB)": 13.7, "step": 20940, "train_speed(iter/s)": 1.526327 }, { "acc": 0.95854759, "epoch": 9.817201781110851, "grad_norm": 6.97645378112793, "learning_rate": 9.425551658810574e-06, "loss": 0.23164654, "memory(GiB)": 13.7, "step": 20945, "train_speed(iter/s)": 1.526341 }, { "acc": 0.97353535, "epoch": 9.819545348019686, "grad_norm": 4.23280143737793, "learning_rate": 9.425190866382942e-06, "loss": 0.19867425, "memory(GiB)": 13.7, "step": 20950, "train_speed(iter/s)": 1.526341 }, { "acc": 0.99005451, "epoch": 9.821888914928522, "grad_norm": 7.283763408660889, "learning_rate": 9.42482996760028e-06, "loss": 0.06308399, "memory(GiB)": 13.7, "step": 20955, "train_speed(iter/s)": 1.526352 }, { "acc": 0.9790575, "epoch": 9.824232481837356, "grad_norm": 4.043984889984131, "learning_rate": 9.424468962471258e-06, "loss": 0.05710458, "memory(GiB)": 13.7, "step": 20960, "train_speed(iter/s)": 1.526388 }, { "acc": 0.97563858, "epoch": 9.826576048746192, "grad_norm": 12.073421478271484, "learning_rate": 9.424107851004559e-06, "loss": 0.21717019, "memory(GiB)": 13.7, "step": 20965, "train_speed(iter/s)": 1.526387 }, { "acc": 0.96951571, "epoch": 9.828919615655026, "grad_norm": 3.3839306831359863, "learning_rate": 9.423746633208858e-06, "loss": 0.16070822, "memory(GiB)": 13.7, "step": 20970, "train_speed(iter/s)": 1.526409 }, { "acc": 0.98335314, "epoch": 9.831263182563863, "grad_norm": 11.785985946655273, "learning_rate": 9.42338530909284e-06, "loss": 0.06250702, "memory(GiB)": 13.7, "step": 20975, "train_speed(iter/s)": 1.52644 }, { "acc": 0.96155987, "epoch": 9.833606749472697, "grad_norm": 26.24551010131836, "learning_rate": 9.42302387866519e-06, "loss": 0.18346778, "memory(GiB)": 13.7, "step": 20980, "train_speed(iter/s)": 1.526438 }, { "acc": 0.98067369, "epoch": 9.835950316381533, "grad_norm": 3.1191928386688232, "learning_rate": 9.422662341934593e-06, "loss": 0.14605531, "memory(GiB)": 13.7, "step": 20985, "train_speed(iter/s)": 1.526452 }, { "acc": 0.96467266, "epoch": 9.838293883290367, "grad_norm": 6.359214782714844, "learning_rate": 9.422300698909746e-06, "loss": 0.20380936, "memory(GiB)": 13.7, "step": 20990, "train_speed(iter/s)": 1.526461 }, { "acc": 0.9765398, "epoch": 9.840637450199203, "grad_norm": 2.1618008613586426, "learning_rate": 9.421938949599333e-06, "loss": 0.11756282, "memory(GiB)": 13.7, "step": 20995, "train_speed(iter/s)": 1.526476 }, { "acc": 0.97634878, "epoch": 9.842981017108038, "grad_norm": 8.037763595581055, "learning_rate": 9.421577094012055e-06, "loss": 0.17733823, "memory(GiB)": 13.7, "step": 21000, "train_speed(iter/s)": 1.526494 }, { "acc": 0.95775566, "epoch": 9.845324584016874, "grad_norm": 7.986381530761719, "learning_rate": 9.421215132156606e-06, "loss": 0.19581348, "memory(GiB)": 13.7, "step": 21005, "train_speed(iter/s)": 1.526502 }, { "acc": 0.97562504, "epoch": 9.84766815092571, "grad_norm": 6.13142728805542, "learning_rate": 9.420853064041689e-06, "loss": 0.10791534, "memory(GiB)": 13.7, "step": 21010, "train_speed(iter/s)": 1.526509 }, { "acc": 0.97672749, "epoch": 9.850011717834544, "grad_norm": 4.406730651855469, "learning_rate": 9.420490889676006e-06, "loss": 0.12973011, "memory(GiB)": 13.7, "step": 21015, "train_speed(iter/s)": 1.52651 }, { "acc": 0.95940742, "epoch": 9.85235528474338, "grad_norm": 18.497642517089844, "learning_rate": 9.420128609068265e-06, "loss": 0.24591272, "memory(GiB)": 13.7, "step": 21020, "train_speed(iter/s)": 1.526535 }, { "acc": 0.97426586, "epoch": 9.854698851652214, "grad_norm": 1.5219604969024658, "learning_rate": 9.41976622222717e-06, "loss": 0.13342917, "memory(GiB)": 13.7, "step": 21025, "train_speed(iter/s)": 1.526532 }, { "acc": 0.96180058, "epoch": 9.85704241856105, "grad_norm": 6.2833571434021, "learning_rate": 9.419403729161433e-06, "loss": 0.19874288, "memory(GiB)": 13.7, "step": 21030, "train_speed(iter/s)": 1.526535 }, { "acc": 0.97477064, "epoch": 9.859385985469885, "grad_norm": 3.0251495838165283, "learning_rate": 9.419041129879768e-06, "loss": 0.14213529, "memory(GiB)": 13.7, "step": 21035, "train_speed(iter/s)": 1.526554 }, { "acc": 0.98130627, "epoch": 9.861729552378721, "grad_norm": 5.317221164703369, "learning_rate": 9.418678424390891e-06, "loss": 0.07280417, "memory(GiB)": 13.7, "step": 21040, "train_speed(iter/s)": 1.526564 }, { "acc": 0.97448864, "epoch": 9.864073119287555, "grad_norm": 3.213352680206299, "learning_rate": 9.41831561270352e-06, "loss": 0.09055532, "memory(GiB)": 13.7, "step": 21045, "train_speed(iter/s)": 1.526575 }, { "acc": 0.95410709, "epoch": 9.866416686196391, "grad_norm": 6.913238525390625, "learning_rate": 9.417952694826374e-06, "loss": 0.21650131, "memory(GiB)": 13.7, "step": 21050, "train_speed(iter/s)": 1.526605 }, { "acc": 0.97727947, "epoch": 9.868760253105226, "grad_norm": 9.876241683959961, "learning_rate": 9.417589670768179e-06, "loss": 0.14680688, "memory(GiB)": 13.7, "step": 21055, "train_speed(iter/s)": 1.52663 }, { "acc": 0.98214283, "epoch": 9.871103820014062, "grad_norm": 4.54339075088501, "learning_rate": 9.417226540537658e-06, "loss": 0.1007935, "memory(GiB)": 13.7, "step": 21060, "train_speed(iter/s)": 1.526642 }, { "acc": 0.98306637, "epoch": 9.873447386922896, "grad_norm": 3.5865590572357178, "learning_rate": 9.416863304143543e-06, "loss": 0.1339168, "memory(GiB)": 13.7, "step": 21065, "train_speed(iter/s)": 1.526642 }, { "acc": 0.95770292, "epoch": 9.875790953831732, "grad_norm": 9.448100090026855, "learning_rate": 9.416499961594561e-06, "loss": 0.19673258, "memory(GiB)": 13.7, "step": 21070, "train_speed(iter/s)": 1.526675 }, { "acc": 0.9706852, "epoch": 9.878134520740566, "grad_norm": 5.0869855880737305, "learning_rate": 9.41613651289945e-06, "loss": 0.14671133, "memory(GiB)": 13.7, "step": 21075, "train_speed(iter/s)": 1.526659 }, { "acc": 0.96511869, "epoch": 9.880478087649402, "grad_norm": 5.961461067199707, "learning_rate": 9.415772958066941e-06, "loss": 0.12929335, "memory(GiB)": 13.7, "step": 21080, "train_speed(iter/s)": 1.526661 }, { "acc": 0.97734222, "epoch": 9.882821654558239, "grad_norm": 7.69157600402832, "learning_rate": 9.415409297105776e-06, "loss": 0.09981126, "memory(GiB)": 13.7, "step": 21085, "train_speed(iter/s)": 1.526676 }, { "acc": 0.95819998, "epoch": 9.885165221467073, "grad_norm": 15.602187156677246, "learning_rate": 9.415045530024697e-06, "loss": 0.20257597, "memory(GiB)": 13.7, "step": 21090, "train_speed(iter/s)": 1.526691 }, { "acc": 0.98192539, "epoch": 9.887508788375909, "grad_norm": 14.89999771118164, "learning_rate": 9.414681656832446e-06, "loss": 0.12403955, "memory(GiB)": 13.7, "step": 21095, "train_speed(iter/s)": 1.526706 }, { "acc": 0.97684526, "epoch": 9.889852355284743, "grad_norm": 3.503176689147949, "learning_rate": 9.414317677537769e-06, "loss": 0.12112819, "memory(GiB)": 13.7, "step": 21100, "train_speed(iter/s)": 1.526693 }, { "acc": 0.97309532, "epoch": 9.89219592219358, "grad_norm": 4.683319091796875, "learning_rate": 9.413953592149415e-06, "loss": 0.10775533, "memory(GiB)": 13.7, "step": 21105, "train_speed(iter/s)": 1.526701 }, { "acc": 0.9720089, "epoch": 9.894539489102414, "grad_norm": 6.893039226531982, "learning_rate": 9.413589400676136e-06, "loss": 0.16603544, "memory(GiB)": 13.7, "step": 21110, "train_speed(iter/s)": 1.526725 }, { "acc": 0.97736111, "epoch": 9.89688305601125, "grad_norm": 2.452022075653076, "learning_rate": 9.413225103126688e-06, "loss": 0.1323204, "memory(GiB)": 13.7, "step": 21115, "train_speed(iter/s)": 1.526737 }, { "acc": 0.97469158, "epoch": 9.899226622920084, "grad_norm": 6.715700626373291, "learning_rate": 9.412860699509825e-06, "loss": 0.09102227, "memory(GiB)": 13.7, "step": 21120, "train_speed(iter/s)": 1.526766 }, { "acc": 0.98351192, "epoch": 9.90157018982892, "grad_norm": 2.336768388748169, "learning_rate": 9.412496189834304e-06, "loss": 0.09032291, "memory(GiB)": 13.7, "step": 21125, "train_speed(iter/s)": 1.526755 }, { "acc": 0.9801136, "epoch": 9.903913756737754, "grad_norm": 0.41029587388038635, "learning_rate": 9.41213157410889e-06, "loss": 0.15821857, "memory(GiB)": 13.7, "step": 21130, "train_speed(iter/s)": 1.526764 }, { "acc": 0.97650757, "epoch": 9.90625732364659, "grad_norm": 1.2044503688812256, "learning_rate": 9.411766852342349e-06, "loss": 0.12187335, "memory(GiB)": 13.7, "step": 21135, "train_speed(iter/s)": 1.526787 }, { "acc": 0.98050594, "epoch": 9.908600890555425, "grad_norm": 6.637199878692627, "learning_rate": 9.411402024543442e-06, "loss": 0.12811763, "memory(GiB)": 13.7, "step": 21140, "train_speed(iter/s)": 1.526784 }, { "acc": 0.97264843, "epoch": 9.910944457464261, "grad_norm": 31.268980026245117, "learning_rate": 9.411037090720942e-06, "loss": 0.09544264, "memory(GiB)": 13.7, "step": 21145, "train_speed(iter/s)": 1.526785 }, { "acc": 0.97668648, "epoch": 9.913288024373095, "grad_norm": 4.090563774108887, "learning_rate": 9.410672050883622e-06, "loss": 0.11851292, "memory(GiB)": 13.7, "step": 21150, "train_speed(iter/s)": 1.526805 }, { "acc": 0.95774221, "epoch": 9.915631591281931, "grad_norm": 10.597102165222168, "learning_rate": 9.410306905040252e-06, "loss": 0.24670286, "memory(GiB)": 13.7, "step": 21155, "train_speed(iter/s)": 1.526803 }, { "acc": 0.98083057, "epoch": 9.917975158190766, "grad_norm": 6.810575485229492, "learning_rate": 9.409941653199613e-06, "loss": 0.08463639, "memory(GiB)": 13.7, "step": 21160, "train_speed(iter/s)": 1.526821 }, { "acc": 0.9844223, "epoch": 9.920318725099602, "grad_norm": 7.077512264251709, "learning_rate": 9.40957629537048e-06, "loss": 0.0494327, "memory(GiB)": 13.7, "step": 21165, "train_speed(iter/s)": 1.526817 }, { "acc": 0.97947464, "epoch": 9.922662292008436, "grad_norm": 4.669286251068115, "learning_rate": 9.40921083156164e-06, "loss": 0.11000141, "memory(GiB)": 13.7, "step": 21170, "train_speed(iter/s)": 1.526825 }, { "acc": 0.97824812, "epoch": 9.925005858917272, "grad_norm": 11.770954132080078, "learning_rate": 9.408845261781875e-06, "loss": 0.19477553, "memory(GiB)": 13.7, "step": 21175, "train_speed(iter/s)": 1.526849 }, { "acc": 0.97111111, "epoch": 9.927349425826108, "grad_norm": 5.83735466003418, "learning_rate": 9.408479586039974e-06, "loss": 0.10363696, "memory(GiB)": 13.7, "step": 21180, "train_speed(iter/s)": 1.526862 }, { "acc": 0.975, "epoch": 9.929692992734942, "grad_norm": 3.4465384483337402, "learning_rate": 9.408113804344722e-06, "loss": 0.07088858, "memory(GiB)": 13.7, "step": 21185, "train_speed(iter/s)": 1.526874 }, { "acc": 0.97800598, "epoch": 9.932036559643779, "grad_norm": 2.2503607273101807, "learning_rate": 9.407747916704917e-06, "loss": 0.11400096, "memory(GiB)": 13.7, "step": 21190, "train_speed(iter/s)": 1.526887 }, { "acc": 0.96023312, "epoch": 9.934380126552613, "grad_norm": 9.12656021118164, "learning_rate": 9.40738192312935e-06, "loss": 0.28095465, "memory(GiB)": 13.7, "step": 21195, "train_speed(iter/s)": 1.526897 }, { "acc": 0.9791667, "epoch": 9.936723693461449, "grad_norm": 0.43525031208992004, "learning_rate": 9.407015823626819e-06, "loss": 0.11620908, "memory(GiB)": 13.7, "step": 21200, "train_speed(iter/s)": 1.526924 }, { "acc": 0.96092262, "epoch": 9.939067260370283, "grad_norm": 8.865437507629395, "learning_rate": 9.406649618206126e-06, "loss": 0.14010346, "memory(GiB)": 13.7, "step": 21205, "train_speed(iter/s)": 1.526929 }, { "acc": 0.97645836, "epoch": 9.94141082727912, "grad_norm": 8.282184600830078, "learning_rate": 9.40628330687607e-06, "loss": 0.1583073, "memory(GiB)": 13.7, "step": 21210, "train_speed(iter/s)": 1.52693 }, { "acc": 0.97778282, "epoch": 9.943754394187954, "grad_norm": 15.813570022583008, "learning_rate": 9.405916889645455e-06, "loss": 0.145788, "memory(GiB)": 13.7, "step": 21215, "train_speed(iter/s)": 1.526962 }, { "acc": 0.97666664, "epoch": 9.94609796109679, "grad_norm": 42.99184799194336, "learning_rate": 9.405550366523094e-06, "loss": 0.10335604, "memory(GiB)": 13.7, "step": 21220, "train_speed(iter/s)": 1.526977 }, { "acc": 0.99125004, "epoch": 9.948441528005624, "grad_norm": 31.107107162475586, "learning_rate": 9.405183737517794e-06, "loss": 0.04783472, "memory(GiB)": 13.7, "step": 21225, "train_speed(iter/s)": 1.526998 }, { "acc": 0.98062496, "epoch": 9.95078509491446, "grad_norm": 5.532943248748779, "learning_rate": 9.404817002638365e-06, "loss": 0.07793344, "memory(GiB)": 13.7, "step": 21230, "train_speed(iter/s)": 1.527011 }, { "acc": 0.97704372, "epoch": 9.953128661823294, "grad_norm": 6.767888069152832, "learning_rate": 9.404450161893627e-06, "loss": 0.13022727, "memory(GiB)": 13.7, "step": 21235, "train_speed(iter/s)": 1.527 }, { "acc": 0.97020836, "epoch": 9.95547222873213, "grad_norm": 19.927946090698242, "learning_rate": 9.404083215292392e-06, "loss": 0.15615008, "memory(GiB)": 13.7, "step": 21240, "train_speed(iter/s)": 1.527006 }, { "acc": 0.96657486, "epoch": 9.957815795640965, "grad_norm": 6.125767707824707, "learning_rate": 9.403716162843488e-06, "loss": 0.16891989, "memory(GiB)": 13.7, "step": 21245, "train_speed(iter/s)": 1.527027 }, { "acc": 0.9609623, "epoch": 9.9601593625498, "grad_norm": 0.1925879716873169, "learning_rate": 9.40334900455573e-06, "loss": 0.17728627, "memory(GiB)": 13.7, "step": 21250, "train_speed(iter/s)": 1.527055 }, { "acc": 0.95107145, "epoch": 9.962502929458637, "grad_norm": 13.02177906036377, "learning_rate": 9.402981740437948e-06, "loss": 0.18339157, "memory(GiB)": 13.7, "step": 21255, "train_speed(iter/s)": 1.527095 }, { "acc": 0.97572918, "epoch": 9.964846496367471, "grad_norm": 3.7856600284576416, "learning_rate": 9.402614370498967e-06, "loss": 0.10376197, "memory(GiB)": 13.7, "step": 21260, "train_speed(iter/s)": 1.527097 }, { "acc": 0.9778059, "epoch": 9.967190063276307, "grad_norm": 9.231016159057617, "learning_rate": 9.402246894747618e-06, "loss": 0.11570549, "memory(GiB)": 13.7, "step": 21265, "train_speed(iter/s)": 1.527131 }, { "acc": 0.97844133, "epoch": 9.969533630185142, "grad_norm": 1.717246651649475, "learning_rate": 9.401879313192738e-06, "loss": 0.12934377, "memory(GiB)": 13.7, "step": 21270, "train_speed(iter/s)": 1.527139 }, { "acc": 0.96779766, "epoch": 9.971877197093978, "grad_norm": 10.981429100036621, "learning_rate": 9.401511625843156e-06, "loss": 0.08273094, "memory(GiB)": 13.7, "step": 21275, "train_speed(iter/s)": 1.527148 }, { "acc": 0.96331339, "epoch": 9.974220764002812, "grad_norm": 10.674371719360352, "learning_rate": 9.401143832707712e-06, "loss": 0.16571245, "memory(GiB)": 13.7, "step": 21280, "train_speed(iter/s)": 1.527165 }, { "acc": 0.96791401, "epoch": 9.976564330911648, "grad_norm": 7.670598983764648, "learning_rate": 9.40077593379525e-06, "loss": 0.10640504, "memory(GiB)": 13.7, "step": 21285, "train_speed(iter/s)": 1.527179 }, { "acc": 0.98997021, "epoch": 9.978907897820482, "grad_norm": 2.260169506072998, "learning_rate": 9.40040792911461e-06, "loss": 0.07326318, "memory(GiB)": 13.7, "step": 21290, "train_speed(iter/s)": 1.5272 }, { "acc": 0.9768364, "epoch": 9.981251464729318, "grad_norm": 4.5783371925354, "learning_rate": 9.400039818674638e-06, "loss": 0.16794691, "memory(GiB)": 13.7, "step": 21295, "train_speed(iter/s)": 1.527213 }, { "acc": 0.98570395, "epoch": 9.983595031638153, "grad_norm": 9.204254150390625, "learning_rate": 9.399671602484181e-06, "loss": 0.13417943, "memory(GiB)": 13.7, "step": 21300, "train_speed(iter/s)": 1.527234 }, { "acc": 0.9561553, "epoch": 9.985938598546989, "grad_norm": 14.48151683807373, "learning_rate": 9.399303280552093e-06, "loss": 0.18804681, "memory(GiB)": 13.7, "step": 21305, "train_speed(iter/s)": 1.52725 }, { "acc": 0.9750845, "epoch": 9.988282165455823, "grad_norm": 3.7107505798339844, "learning_rate": 9.398934852887224e-06, "loss": 0.15287971, "memory(GiB)": 13.7, "step": 21310, "train_speed(iter/s)": 1.527283 }, { "acc": 0.98105164, "epoch": 9.99062573236466, "grad_norm": 0.8636452555656433, "learning_rate": 9.398566319498433e-06, "loss": 0.12648504, "memory(GiB)": 13.7, "step": 21315, "train_speed(iter/s)": 1.527309 }, { "acc": 0.97565479, "epoch": 9.992969299273494, "grad_norm": 2.163492202758789, "learning_rate": 9.398197680394574e-06, "loss": 0.09314674, "memory(GiB)": 13.7, "step": 21320, "train_speed(iter/s)": 1.527308 }, { "acc": 0.95847301, "epoch": 9.99531286618233, "grad_norm": 6.652392864227295, "learning_rate": 9.39782893558451e-06, "loss": 0.24039605, "memory(GiB)": 13.7, "step": 21325, "train_speed(iter/s)": 1.52732 }, { "acc": 0.99384193, "epoch": 9.997656433091166, "grad_norm": 5.159750461578369, "learning_rate": 9.397460085077107e-06, "loss": 0.03817814, "memory(GiB)": 13.7, "step": 21330, "train_speed(iter/s)": 1.527319 }, { "acc": 0.95285664, "epoch": 10.0, "grad_norm": 7.594409465789795, "learning_rate": 9.397091128881229e-06, "loss": 0.23337688, "memory(GiB)": 13.7, "step": 21335, "train_speed(iter/s)": 1.527283 }, { "acc": 0.97840271, "epoch": 10.002343566908836, "grad_norm": 90.10566711425781, "learning_rate": 9.396722067005744e-06, "loss": 0.15815378, "memory(GiB)": 13.7, "step": 21340, "train_speed(iter/s)": 1.527239 }, { "acc": 0.98165646, "epoch": 10.00468713381767, "grad_norm": 5.424487590789795, "learning_rate": 9.39635289945952e-06, "loss": 0.12707819, "memory(GiB)": 13.7, "step": 21345, "train_speed(iter/s)": 1.527257 }, { "acc": 0.97995262, "epoch": 10.007030700726506, "grad_norm": 4.323120594024658, "learning_rate": 9.395983626251438e-06, "loss": 0.13699915, "memory(GiB)": 13.7, "step": 21350, "train_speed(iter/s)": 1.527258 }, { "acc": 0.98535709, "epoch": 10.00937426763534, "grad_norm": 2.7798612117767334, "learning_rate": 9.395614247390367e-06, "loss": 0.09544166, "memory(GiB)": 13.7, "step": 21355, "train_speed(iter/s)": 1.527264 }, { "acc": 0.96613321, "epoch": 10.011717834544177, "grad_norm": 9.662989616394043, "learning_rate": 9.395244762885189e-06, "loss": 0.21864002, "memory(GiB)": 13.7, "step": 21360, "train_speed(iter/s)": 1.527273 }, { "acc": 0.98313446, "epoch": 10.014061401453011, "grad_norm": 4.0913543701171875, "learning_rate": 9.394875172744786e-06, "loss": 0.09171106, "memory(GiB)": 13.7, "step": 21365, "train_speed(iter/s)": 1.527299 }, { "acc": 0.9852766, "epoch": 10.016404968361847, "grad_norm": 0.054294321686029434, "learning_rate": 9.39450547697804e-06, "loss": 0.07816238, "memory(GiB)": 13.7, "step": 21370, "train_speed(iter/s)": 1.527318 }, { "acc": 0.96540184, "epoch": 10.018748535270682, "grad_norm": 9.11764144897461, "learning_rate": 9.394135675593837e-06, "loss": 0.24577713, "memory(GiB)": 13.7, "step": 21375, "train_speed(iter/s)": 1.527336 }, { "acc": 0.98061285, "epoch": 10.021092102179518, "grad_norm": 3.653952121734619, "learning_rate": 9.393765768601068e-06, "loss": 0.10866566, "memory(GiB)": 13.7, "step": 21380, "train_speed(iter/s)": 1.527356 }, { "acc": 0.96926823, "epoch": 10.023435669088352, "grad_norm": 3.299206495285034, "learning_rate": 9.393395756008622e-06, "loss": 0.20044599, "memory(GiB)": 13.7, "step": 21385, "train_speed(iter/s)": 1.527357 }, { "acc": 0.97210236, "epoch": 10.025779235997188, "grad_norm": 7.108066558837891, "learning_rate": 9.393025637825394e-06, "loss": 0.15515152, "memory(GiB)": 13.7, "step": 21390, "train_speed(iter/s)": 1.527385 }, { "acc": 0.97331696, "epoch": 10.028122802906022, "grad_norm": 7.018868923187256, "learning_rate": 9.39265541406028e-06, "loss": 0.11486943, "memory(GiB)": 13.7, "step": 21395, "train_speed(iter/s)": 1.527408 }, { "acc": 0.98383923, "epoch": 10.030466369814858, "grad_norm": 6.990059852600098, "learning_rate": 9.39228508472218e-06, "loss": 0.10237633, "memory(GiB)": 13.7, "step": 21400, "train_speed(iter/s)": 1.527434 }, { "acc": 0.98187885, "epoch": 10.032809936723693, "grad_norm": 3.9275784492492676, "learning_rate": 9.391914649819993e-06, "loss": 0.10063586, "memory(GiB)": 13.7, "step": 21405, "train_speed(iter/s)": 1.527452 }, { "acc": 0.95890875, "epoch": 10.035153503632529, "grad_norm": 4.425084114074707, "learning_rate": 9.391544109362625e-06, "loss": 0.33308458, "memory(GiB)": 13.7, "step": 21410, "train_speed(iter/s)": 1.52746 }, { "acc": 0.97624998, "epoch": 10.037497070541363, "grad_norm": 4.600484371185303, "learning_rate": 9.391173463358983e-06, "loss": 0.10800945, "memory(GiB)": 13.7, "step": 21415, "train_speed(iter/s)": 1.527456 }, { "acc": 0.9738636, "epoch": 10.0398406374502, "grad_norm": 6.322240352630615, "learning_rate": 9.390802711817976e-06, "loss": 0.16542867, "memory(GiB)": 13.7, "step": 21420, "train_speed(iter/s)": 1.527465 }, { "acc": 0.98836536, "epoch": 10.042184204359035, "grad_norm": 2.1145503520965576, "learning_rate": 9.390431854748515e-06, "loss": 0.09095145, "memory(GiB)": 13.7, "step": 21425, "train_speed(iter/s)": 1.527479 }, { "acc": 0.96535721, "epoch": 10.04452777126787, "grad_norm": 10.37281322479248, "learning_rate": 9.390060892159514e-06, "loss": 0.20781779, "memory(GiB)": 13.7, "step": 21430, "train_speed(iter/s)": 1.52751 }, { "acc": 0.97043018, "epoch": 10.046871338176706, "grad_norm": 1.9329484701156616, "learning_rate": 9.389689824059888e-06, "loss": 0.15733299, "memory(GiB)": 13.7, "step": 21435, "train_speed(iter/s)": 1.527541 }, { "acc": 0.9613657, "epoch": 10.04921490508554, "grad_norm": 8.520631790161133, "learning_rate": 9.389318650458561e-06, "loss": 0.18610024, "memory(GiB)": 13.7, "step": 21440, "train_speed(iter/s)": 1.527545 }, { "acc": 0.97569714, "epoch": 10.051558471994376, "grad_norm": 6.622961044311523, "learning_rate": 9.388947371364451e-06, "loss": 0.12609566, "memory(GiB)": 13.7, "step": 21445, "train_speed(iter/s)": 1.527566 }, { "acc": 0.96875, "epoch": 10.05390203890321, "grad_norm": 8.459921836853027, "learning_rate": 9.388575986786484e-06, "loss": 0.140012, "memory(GiB)": 13.7, "step": 21450, "train_speed(iter/s)": 1.527568 }, { "acc": 0.97666664, "epoch": 10.056245605812046, "grad_norm": 14.355389595031738, "learning_rate": 9.388204496733587e-06, "loss": 0.16357685, "memory(GiB)": 13.7, "step": 21455, "train_speed(iter/s)": 1.527609 }, { "acc": 0.9681284, "epoch": 10.05858917272088, "grad_norm": 14.888544082641602, "learning_rate": 9.387832901214686e-06, "loss": 0.14922717, "memory(GiB)": 13.7, "step": 21460, "train_speed(iter/s)": 1.527609 }, { "acc": 0.96862068, "epoch": 10.060932739629717, "grad_norm": 5.476988792419434, "learning_rate": 9.387461200238717e-06, "loss": 0.21258545, "memory(GiB)": 13.7, "step": 21465, "train_speed(iter/s)": 1.527621 }, { "acc": 0.97807541, "epoch": 10.063276306538551, "grad_norm": 8.478410720825195, "learning_rate": 9.387089393814613e-06, "loss": 0.09924889, "memory(GiB)": 13.7, "step": 21470, "train_speed(iter/s)": 1.527645 }, { "acc": 0.98055916, "epoch": 10.065619873447387, "grad_norm": 2.2252190113067627, "learning_rate": 9.386717481951309e-06, "loss": 0.05655856, "memory(GiB)": 13.7, "step": 21475, "train_speed(iter/s)": 1.527648 }, { "acc": 0.96736603, "epoch": 10.067963440356221, "grad_norm": 11.161073684692383, "learning_rate": 9.386345464657748e-06, "loss": 0.17740586, "memory(GiB)": 13.7, "step": 21480, "train_speed(iter/s)": 1.527659 }, { "acc": 0.96873512, "epoch": 10.070307007265058, "grad_norm": 4.7933807373046875, "learning_rate": 9.385973341942873e-06, "loss": 0.12396219, "memory(GiB)": 13.7, "step": 21485, "train_speed(iter/s)": 1.527675 }, { "acc": 0.98952389, "epoch": 10.072650574173892, "grad_norm": 2.9170732498168945, "learning_rate": 9.385601113815624e-06, "loss": 0.05262355, "memory(GiB)": 13.7, "step": 21490, "train_speed(iter/s)": 1.527698 }, { "acc": 0.96744728, "epoch": 10.074994141082728, "grad_norm": 22.651884078979492, "learning_rate": 9.385228780284951e-06, "loss": 0.1584635, "memory(GiB)": 13.7, "step": 21495, "train_speed(iter/s)": 1.527706 }, { "acc": 0.98555546, "epoch": 10.077337707991564, "grad_norm": 3.2410972118377686, "learning_rate": 9.384856341359802e-06, "loss": 0.09043774, "memory(GiB)": 13.7, "step": 21500, "train_speed(iter/s)": 1.527722 }, { "acc": 0.95893898, "epoch": 10.079681274900398, "grad_norm": 11.089756965637207, "learning_rate": 9.384483797049131e-06, "loss": 0.18000679, "memory(GiB)": 13.7, "step": 21505, "train_speed(iter/s)": 1.527716 }, { "acc": 0.97027245, "epoch": 10.082024841809234, "grad_norm": 4.091814041137695, "learning_rate": 9.384111147361894e-06, "loss": 0.20782876, "memory(GiB)": 13.7, "step": 21510, "train_speed(iter/s)": 1.527733 }, { "acc": 0.98007383, "epoch": 10.084368408718069, "grad_norm": 2.72873854637146, "learning_rate": 9.383738392307043e-06, "loss": 0.11251367, "memory(GiB)": 13.7, "step": 21515, "train_speed(iter/s)": 1.527743 }, { "acc": 0.95590277, "epoch": 10.086711975626905, "grad_norm": 6.27126407623291, "learning_rate": 9.383365531893543e-06, "loss": 0.19877846, "memory(GiB)": 13.7, "step": 21520, "train_speed(iter/s)": 1.527757 }, { "acc": 0.97658682, "epoch": 10.089055542535739, "grad_norm": 11.526254653930664, "learning_rate": 9.382992566130353e-06, "loss": 0.13423517, "memory(GiB)": 13.7, "step": 21525, "train_speed(iter/s)": 1.527778 }, { "acc": 0.97925749, "epoch": 10.091399109444575, "grad_norm": 1.099038004875183, "learning_rate": 9.382619495026442e-06, "loss": 0.11149569, "memory(GiB)": 13.7, "step": 21530, "train_speed(iter/s)": 1.527773 }, { "acc": 0.97029762, "epoch": 10.09374267635341, "grad_norm": 6.318192958831787, "learning_rate": 9.382246318590773e-06, "loss": 0.18711786, "memory(GiB)": 13.7, "step": 21535, "train_speed(iter/s)": 1.527794 }, { "acc": 0.9793745, "epoch": 10.096086243262246, "grad_norm": 5.848226547241211, "learning_rate": 9.381873036832318e-06, "loss": 0.14442174, "memory(GiB)": 13.7, "step": 21540, "train_speed(iter/s)": 1.527798 }, { "acc": 0.95520363, "epoch": 10.09842981017108, "grad_norm": 7.45211935043335, "learning_rate": 9.381499649760047e-06, "loss": 0.19923279, "memory(GiB)": 13.7, "step": 21545, "train_speed(iter/s)": 1.527806 }, { "acc": 0.96687498, "epoch": 10.100773377079916, "grad_norm": 4.990202903747559, "learning_rate": 9.38112615738294e-06, "loss": 0.20065777, "memory(GiB)": 13.7, "step": 21550, "train_speed(iter/s)": 1.52783 }, { "acc": 0.96294022, "epoch": 10.10311694398875, "grad_norm": 7.018986225128174, "learning_rate": 9.38075255970997e-06, "loss": 0.2374186, "memory(GiB)": 13.7, "step": 21555, "train_speed(iter/s)": 1.527854 }, { "acc": 0.97501755, "epoch": 10.105460510897586, "grad_norm": 7.703036308288574, "learning_rate": 9.380378856750118e-06, "loss": 0.12673421, "memory(GiB)": 13.7, "step": 21560, "train_speed(iter/s)": 1.52784 }, { "acc": 0.975, "epoch": 10.10780407780642, "grad_norm": 1.6994107961654663, "learning_rate": 9.380005048512369e-06, "loss": 0.15605593, "memory(GiB)": 13.7, "step": 21565, "train_speed(iter/s)": 1.52785 }, { "acc": 0.97288933, "epoch": 10.110147644715257, "grad_norm": 4.14284086227417, "learning_rate": 9.379631135005707e-06, "loss": 0.16129656, "memory(GiB)": 13.7, "step": 21570, "train_speed(iter/s)": 1.527849 }, { "acc": 0.97702675, "epoch": 10.112491211624091, "grad_norm": 7.067286491394043, "learning_rate": 9.379257116239118e-06, "loss": 0.13420523, "memory(GiB)": 13.7, "step": 21575, "train_speed(iter/s)": 1.52785 }, { "acc": 0.97513885, "epoch": 10.114834778532927, "grad_norm": 5.06044864654541, "learning_rate": 9.378882992221594e-06, "loss": 0.13288914, "memory(GiB)": 13.7, "step": 21580, "train_speed(iter/s)": 1.527872 }, { "acc": 0.97858419, "epoch": 10.117178345441763, "grad_norm": 5.227412223815918, "learning_rate": 9.378508762962126e-06, "loss": 0.14268005, "memory(GiB)": 13.7, "step": 21585, "train_speed(iter/s)": 1.527885 }, { "acc": 0.98129778, "epoch": 10.119521912350598, "grad_norm": 6.391787528991699, "learning_rate": 9.378134428469711e-06, "loss": 0.15716774, "memory(GiB)": 13.7, "step": 21590, "train_speed(iter/s)": 1.527913 }, { "acc": 0.98518524, "epoch": 10.121865479259434, "grad_norm": 3.122859001159668, "learning_rate": 9.377759988753345e-06, "loss": 0.15035937, "memory(GiB)": 13.7, "step": 21595, "train_speed(iter/s)": 1.52791 }, { "acc": 0.96662769, "epoch": 10.124209046168268, "grad_norm": 6.338395118713379, "learning_rate": 9.377385443822029e-06, "loss": 0.1937228, "memory(GiB)": 13.7, "step": 21600, "train_speed(iter/s)": 1.527911 }, { "acc": 0.97072926, "epoch": 10.126552613077104, "grad_norm": 2.0520060062408447, "learning_rate": 9.377010793684767e-06, "loss": 0.12109474, "memory(GiB)": 13.7, "step": 21605, "train_speed(iter/s)": 1.527932 }, { "acc": 0.96196461, "epoch": 10.128896179985938, "grad_norm": 12.309786796569824, "learning_rate": 9.376636038350563e-06, "loss": 0.21863263, "memory(GiB)": 13.7, "step": 21610, "train_speed(iter/s)": 1.527954 }, { "acc": 0.99258928, "epoch": 10.131239746894774, "grad_norm": 0.7609867453575134, "learning_rate": 9.376261177828427e-06, "loss": 0.11646656, "memory(GiB)": 13.7, "step": 21615, "train_speed(iter/s)": 1.527974 }, { "acc": 0.97016201, "epoch": 10.133583313803609, "grad_norm": 1.4649910926818848, "learning_rate": 9.375886212127365e-06, "loss": 0.12705532, "memory(GiB)": 13.7, "step": 21620, "train_speed(iter/s)": 1.527979 }, { "acc": 0.98325005, "epoch": 10.135926880712445, "grad_norm": 3.9928934574127197, "learning_rate": 9.375511141256395e-06, "loss": 0.08472096, "memory(GiB)": 13.7, "step": 21625, "train_speed(iter/s)": 1.527974 }, { "acc": 0.98276291, "epoch": 10.138270447621279, "grad_norm": 1.6830925941467285, "learning_rate": 9.375135965224528e-06, "loss": 0.05421842, "memory(GiB)": 13.7, "step": 21630, "train_speed(iter/s)": 1.527985 }, { "acc": 0.97578869, "epoch": 10.140614014530115, "grad_norm": 3.614928722381592, "learning_rate": 9.374760684040787e-06, "loss": 0.08132324, "memory(GiB)": 13.7, "step": 21635, "train_speed(iter/s)": 1.527986 }, { "acc": 0.96788692, "epoch": 10.14295758143895, "grad_norm": 7.454095840454102, "learning_rate": 9.374385297714188e-06, "loss": 0.12788608, "memory(GiB)": 13.7, "step": 21640, "train_speed(iter/s)": 1.528001 }, { "acc": 0.97759876, "epoch": 10.145301148347786, "grad_norm": 3.329817533493042, "learning_rate": 9.374009806253757e-06, "loss": 0.10395354, "memory(GiB)": 13.7, "step": 21645, "train_speed(iter/s)": 1.528001 }, { "acc": 0.9723958, "epoch": 10.14764471525662, "grad_norm": 3.2673306465148926, "learning_rate": 9.373634209668516e-06, "loss": 0.11700683, "memory(GiB)": 13.7, "step": 21650, "train_speed(iter/s)": 1.528015 }, { "acc": 0.99511356, "epoch": 10.149988282165456, "grad_norm": 0.5865054130554199, "learning_rate": 9.373258507967497e-06, "loss": 0.0320991, "memory(GiB)": 13.7, "step": 21655, "train_speed(iter/s)": 1.528029 }, { "acc": 0.98033409, "epoch": 10.15233184907429, "grad_norm": 4.71046257019043, "learning_rate": 9.37288270115973e-06, "loss": 0.11796298, "memory(GiB)": 13.7, "step": 21660, "train_speed(iter/s)": 1.528055 }, { "acc": 0.98510141, "epoch": 10.154675415983126, "grad_norm": 0.6174371838569641, "learning_rate": 9.372506789254245e-06, "loss": 0.0888224, "memory(GiB)": 13.7, "step": 21665, "train_speed(iter/s)": 1.528068 }, { "acc": 0.96379375, "epoch": 10.157018982891962, "grad_norm": 11.410951614379883, "learning_rate": 9.372130772260082e-06, "loss": 0.25445623, "memory(GiB)": 13.7, "step": 21670, "train_speed(iter/s)": 1.528086 }, { "acc": 0.97189627, "epoch": 10.159362549800797, "grad_norm": 5.219573974609375, "learning_rate": 9.371754650186276e-06, "loss": 0.09536023, "memory(GiB)": 13.7, "step": 21675, "train_speed(iter/s)": 1.528101 }, { "acc": 0.98676472, "epoch": 10.161706116709633, "grad_norm": 4.618861675262451, "learning_rate": 9.371378423041869e-06, "loss": 0.10452951, "memory(GiB)": 13.7, "step": 21680, "train_speed(iter/s)": 1.528112 }, { "acc": 0.96232147, "epoch": 10.164049683618467, "grad_norm": 5.571445941925049, "learning_rate": 9.371002090835906e-06, "loss": 0.23022585, "memory(GiB)": 13.7, "step": 21685, "train_speed(iter/s)": 1.528128 }, { "acc": 0.98298607, "epoch": 10.166393250527303, "grad_norm": 3.311952829360962, "learning_rate": 9.370625653577429e-06, "loss": 0.0833066, "memory(GiB)": 13.7, "step": 21690, "train_speed(iter/s)": 1.52813 }, { "acc": 0.98298063, "epoch": 10.168736817436137, "grad_norm": 3.744823455810547, "learning_rate": 9.370249111275489e-06, "loss": 0.13242825, "memory(GiB)": 13.7, "step": 21695, "train_speed(iter/s)": 1.528128 }, { "acc": 0.97389803, "epoch": 10.171080384344974, "grad_norm": 7.835707664489746, "learning_rate": 9.369872463939136e-06, "loss": 0.11303698, "memory(GiB)": 13.7, "step": 21700, "train_speed(iter/s)": 1.52816 }, { "acc": 0.97049685, "epoch": 10.173423951253808, "grad_norm": 0.2043524533510208, "learning_rate": 9.369495711577422e-06, "loss": 0.11628419, "memory(GiB)": 13.7, "step": 21705, "train_speed(iter/s)": 1.528172 }, { "acc": 0.96786709, "epoch": 10.175767518162644, "grad_norm": 5.379176139831543, "learning_rate": 9.369118854199406e-06, "loss": 0.18658395, "memory(GiB)": 13.7, "step": 21710, "train_speed(iter/s)": 1.528179 }, { "acc": 0.98093262, "epoch": 10.178111085071478, "grad_norm": 6.358339786529541, "learning_rate": 9.368741891814144e-06, "loss": 0.09633647, "memory(GiB)": 13.7, "step": 21715, "train_speed(iter/s)": 1.528185 }, { "acc": 0.96590023, "epoch": 10.180454651980314, "grad_norm": 5.445833683013916, "learning_rate": 9.368364824430696e-06, "loss": 0.19269627, "memory(GiB)": 13.7, "step": 21720, "train_speed(iter/s)": 1.528193 }, { "acc": 0.9816186, "epoch": 10.182798218889149, "grad_norm": 3.929683208465576, "learning_rate": 9.36798765205813e-06, "loss": 0.08047475, "memory(GiB)": 13.7, "step": 21725, "train_speed(iter/s)": 1.528211 }, { "acc": 0.98475199, "epoch": 10.185141785797985, "grad_norm": 41.6182861328125, "learning_rate": 9.36761037470551e-06, "loss": 0.10028248, "memory(GiB)": 13.7, "step": 21730, "train_speed(iter/s)": 1.528223 }, { "acc": 0.99091473, "epoch": 10.187485352706819, "grad_norm": 5.175098896026611, "learning_rate": 9.367232992381902e-06, "loss": 0.06819309, "memory(GiB)": 13.7, "step": 21735, "train_speed(iter/s)": 1.52823 }, { "acc": 0.97961311, "epoch": 10.189828919615655, "grad_norm": 7.9892072677612305, "learning_rate": 9.366855505096379e-06, "loss": 0.10824165, "memory(GiB)": 13.7, "step": 21740, "train_speed(iter/s)": 1.528242 }, { "acc": 0.96232147, "epoch": 10.192172486524491, "grad_norm": 7.832851409912109, "learning_rate": 9.366477912858013e-06, "loss": 0.25351801, "memory(GiB)": 13.7, "step": 21745, "train_speed(iter/s)": 1.528248 }, { "acc": 0.97875004, "epoch": 10.194516053433325, "grad_norm": 5.216420650482178, "learning_rate": 9.366100215675881e-06, "loss": 0.09259695, "memory(GiB)": 13.7, "step": 21750, "train_speed(iter/s)": 1.528259 }, { "acc": 0.96583958, "epoch": 10.196859620342162, "grad_norm": 7.002534866333008, "learning_rate": 9.365722413559065e-06, "loss": 0.16144893, "memory(GiB)": 13.7, "step": 21755, "train_speed(iter/s)": 1.528263 }, { "acc": 0.97139254, "epoch": 10.199203187250996, "grad_norm": 6.161751747131348, "learning_rate": 9.365344506516641e-06, "loss": 0.14800093, "memory(GiB)": 13.7, "step": 21760, "train_speed(iter/s)": 1.528271 }, { "acc": 0.95936871, "epoch": 10.201546754159832, "grad_norm": 2.9720194339752197, "learning_rate": 9.364966494557694e-06, "loss": 0.21482754, "memory(GiB)": 13.7, "step": 21765, "train_speed(iter/s)": 1.528271 }, { "acc": 0.98666668, "epoch": 10.203890321068666, "grad_norm": 0.010581349022686481, "learning_rate": 9.364588377691314e-06, "loss": 0.05636116, "memory(GiB)": 13.7, "step": 21770, "train_speed(iter/s)": 1.528276 }, { "acc": 0.9754365, "epoch": 10.206233887977502, "grad_norm": 4.385372161865234, "learning_rate": 9.364210155926583e-06, "loss": 0.16739407, "memory(GiB)": 13.7, "step": 21775, "train_speed(iter/s)": 1.528276 }, { "acc": 0.97807484, "epoch": 10.208577454886337, "grad_norm": 3.621262311935425, "learning_rate": 9.363831829272597e-06, "loss": 0.09582714, "memory(GiB)": 13.7, "step": 21780, "train_speed(iter/s)": 1.52828 }, { "acc": 0.98445892, "epoch": 10.210921021795173, "grad_norm": 3.041315793991089, "learning_rate": 9.363453397738448e-06, "loss": 0.09042507, "memory(GiB)": 13.7, "step": 21785, "train_speed(iter/s)": 1.528293 }, { "acc": 0.96826696, "epoch": 10.213264588704007, "grad_norm": 7.195246696472168, "learning_rate": 9.363074861333234e-06, "loss": 0.13901219, "memory(GiB)": 13.7, "step": 21790, "train_speed(iter/s)": 1.528303 }, { "acc": 0.97328377, "epoch": 10.215608155612843, "grad_norm": 4.032414436340332, "learning_rate": 9.362696220066053e-06, "loss": 0.09550323, "memory(GiB)": 13.7, "step": 21795, "train_speed(iter/s)": 1.528324 }, { "acc": 0.95572433, "epoch": 10.217951722521677, "grad_norm": 5.3113226890563965, "learning_rate": 9.362317473946003e-06, "loss": 0.20874624, "memory(GiB)": 13.7, "step": 21800, "train_speed(iter/s)": 1.528335 }, { "acc": 0.97197914, "epoch": 10.220295289430513, "grad_norm": 4.605255126953125, "learning_rate": 9.361938622982192e-06, "loss": 0.1416346, "memory(GiB)": 13.7, "step": 21805, "train_speed(iter/s)": 1.528344 }, { "acc": 0.95508938, "epoch": 10.222638856339348, "grad_norm": 10.996089935302734, "learning_rate": 9.361559667183725e-06, "loss": 0.20502689, "memory(GiB)": 13.7, "step": 21810, "train_speed(iter/s)": 1.52834 }, { "acc": 0.97540588, "epoch": 10.224982423248184, "grad_norm": 3.460256576538086, "learning_rate": 9.361180606559712e-06, "loss": 0.13413459, "memory(GiB)": 13.7, "step": 21815, "train_speed(iter/s)": 1.528336 }, { "acc": 0.95745945, "epoch": 10.227325990157018, "grad_norm": 9.363420486450195, "learning_rate": 9.360801441119261e-06, "loss": 0.15782754, "memory(GiB)": 13.7, "step": 21820, "train_speed(iter/s)": 1.528337 }, { "acc": 0.98892078, "epoch": 10.229669557065854, "grad_norm": 0.8107113838195801, "learning_rate": 9.36042217087149e-06, "loss": 0.06869048, "memory(GiB)": 13.7, "step": 21825, "train_speed(iter/s)": 1.528339 }, { "acc": 0.98579979, "epoch": 10.23201312397469, "grad_norm": 4.508003234863281, "learning_rate": 9.360042795825513e-06, "loss": 0.06503991, "memory(GiB)": 13.7, "step": 21830, "train_speed(iter/s)": 1.528378 }, { "acc": 0.97676907, "epoch": 10.234356690883525, "grad_norm": 6.430930137634277, "learning_rate": 9.359663315990449e-06, "loss": 0.11570309, "memory(GiB)": 13.7, "step": 21835, "train_speed(iter/s)": 1.528395 }, { "acc": 0.96922626, "epoch": 10.23670025779236, "grad_norm": 7.085422992706299, "learning_rate": 9.35928373137542e-06, "loss": 0.16097772, "memory(GiB)": 13.7, "step": 21840, "train_speed(iter/s)": 1.528425 }, { "acc": 0.97952385, "epoch": 10.239043824701195, "grad_norm": 3.124495506286621, "learning_rate": 9.35890404198955e-06, "loss": 0.09752062, "memory(GiB)": 13.7, "step": 21845, "train_speed(iter/s)": 1.52843 }, { "acc": 0.98104172, "epoch": 10.241387391610031, "grad_norm": 6.908413887023926, "learning_rate": 9.358524247841963e-06, "loss": 0.06595869, "memory(GiB)": 13.7, "step": 21850, "train_speed(iter/s)": 1.528449 }, { "acc": 0.96529074, "epoch": 10.243730958518865, "grad_norm": 4.3731184005737305, "learning_rate": 9.358144348941794e-06, "loss": 0.19279706, "memory(GiB)": 13.7, "step": 21855, "train_speed(iter/s)": 1.528457 }, { "acc": 0.96924934, "epoch": 10.246074525427701, "grad_norm": 6.990886688232422, "learning_rate": 9.357764345298168e-06, "loss": 0.18382934, "memory(GiB)": 13.7, "step": 21860, "train_speed(iter/s)": 1.528489 }, { "acc": 0.96532202, "epoch": 10.248418092336536, "grad_norm": 2.288313388824463, "learning_rate": 9.357384236920224e-06, "loss": 0.1244557, "memory(GiB)": 13.7, "step": 21865, "train_speed(iter/s)": 1.528509 }, { "acc": 0.9805006, "epoch": 10.250761659245372, "grad_norm": 1.1240471601486206, "learning_rate": 9.357004023817094e-06, "loss": 0.12547698, "memory(GiB)": 13.7, "step": 21870, "train_speed(iter/s)": 1.528512 }, { "acc": 0.9783989, "epoch": 10.253105226154206, "grad_norm": 7.651578903198242, "learning_rate": 9.356623705997922e-06, "loss": 0.1272015, "memory(GiB)": 13.7, "step": 21875, "train_speed(iter/s)": 1.528507 }, { "acc": 0.97477932, "epoch": 10.255448793063042, "grad_norm": 4.206801891326904, "learning_rate": 9.356243283471846e-06, "loss": 0.06323818, "memory(GiB)": 13.7, "step": 21880, "train_speed(iter/s)": 1.528509 }, { "acc": 0.98329859, "epoch": 10.257792359971877, "grad_norm": 2.644882917404175, "learning_rate": 9.35586275624801e-06, "loss": 0.10140327, "memory(GiB)": 13.7, "step": 21885, "train_speed(iter/s)": 1.528524 }, { "acc": 0.96737185, "epoch": 10.260135926880713, "grad_norm": 4.124617576599121, "learning_rate": 9.355482124335563e-06, "loss": 0.18192019, "memory(GiB)": 13.7, "step": 21890, "train_speed(iter/s)": 1.528537 }, { "acc": 0.97983637, "epoch": 10.262479493789547, "grad_norm": 7.2047576904296875, "learning_rate": 9.355101387743654e-06, "loss": 0.09777195, "memory(GiB)": 13.7, "step": 21895, "train_speed(iter/s)": 1.528542 }, { "acc": 0.9814682, "epoch": 10.264823060698383, "grad_norm": 2.833019256591797, "learning_rate": 9.354720546481433e-06, "loss": 0.08869247, "memory(GiB)": 13.7, "step": 21900, "train_speed(iter/s)": 1.528559 }, { "acc": 0.97414265, "epoch": 10.267166627607217, "grad_norm": 8.741393089294434, "learning_rate": 9.354339600558053e-06, "loss": 0.10243075, "memory(GiB)": 13.7, "step": 21905, "train_speed(iter/s)": 1.528554 }, { "acc": 0.97535706, "epoch": 10.269510194516053, "grad_norm": 13.000746726989746, "learning_rate": 9.353958549982674e-06, "loss": 0.14668113, "memory(GiB)": 13.7, "step": 21910, "train_speed(iter/s)": 1.528573 }, { "acc": 0.98142853, "epoch": 10.27185376142489, "grad_norm": 6.411056995391846, "learning_rate": 9.353577394764454e-06, "loss": 0.09707189, "memory(GiB)": 13.7, "step": 21915, "train_speed(iter/s)": 1.528585 }, { "acc": 0.98154764, "epoch": 10.274197328333724, "grad_norm": 4.89524507522583, "learning_rate": 9.353196134912555e-06, "loss": 0.10480061, "memory(GiB)": 13.7, "step": 21920, "train_speed(iter/s)": 1.52859 }, { "acc": 0.96001987, "epoch": 10.27654089524256, "grad_norm": 6.128920078277588, "learning_rate": 9.352814770436137e-06, "loss": 0.21529856, "memory(GiB)": 13.7, "step": 21925, "train_speed(iter/s)": 1.528585 }, { "acc": 0.97091036, "epoch": 10.278884462151394, "grad_norm": 8.015707969665527, "learning_rate": 9.352433301344374e-06, "loss": 0.12349466, "memory(GiB)": 13.7, "step": 21930, "train_speed(iter/s)": 1.528613 }, { "acc": 0.96630602, "epoch": 10.28122802906023, "grad_norm": 11.061930656433105, "learning_rate": 9.35205172764643e-06, "loss": 0.20534606, "memory(GiB)": 13.7, "step": 21935, "train_speed(iter/s)": 1.528623 }, { "acc": 0.97683239, "epoch": 10.283571595969065, "grad_norm": 4.00913667678833, "learning_rate": 9.35167004935148e-06, "loss": 0.07615309, "memory(GiB)": 13.7, "step": 21940, "train_speed(iter/s)": 1.528606 }, { "acc": 0.98020287, "epoch": 10.2859151628779, "grad_norm": 1.6429585218429565, "learning_rate": 9.351288266468695e-06, "loss": 0.09940427, "memory(GiB)": 13.7, "step": 21945, "train_speed(iter/s)": 1.528605 }, { "acc": 0.95565481, "epoch": 10.288258729786735, "grad_norm": 21.557695388793945, "learning_rate": 9.350906379007256e-06, "loss": 0.21249952, "memory(GiB)": 13.7, "step": 21950, "train_speed(iter/s)": 1.528599 }, { "acc": 0.96747475, "epoch": 10.290602296695571, "grad_norm": 14.333905220031738, "learning_rate": 9.350524386976337e-06, "loss": 0.14760768, "memory(GiB)": 13.7, "step": 21955, "train_speed(iter/s)": 1.528609 }, { "acc": 0.97957439, "epoch": 10.292945863604405, "grad_norm": 3.957771062850952, "learning_rate": 9.350142290385124e-06, "loss": 0.09866701, "memory(GiB)": 13.7, "step": 21960, "train_speed(iter/s)": 1.528625 }, { "acc": 0.97599707, "epoch": 10.295289430513241, "grad_norm": 4.105682373046875, "learning_rate": 9.349760089242799e-06, "loss": 0.12444477, "memory(GiB)": 13.7, "step": 21965, "train_speed(iter/s)": 1.528629 }, { "acc": 0.97823868, "epoch": 10.297632997422076, "grad_norm": 4.939192771911621, "learning_rate": 9.349377783558552e-06, "loss": 0.10045798, "memory(GiB)": 13.7, "step": 21970, "train_speed(iter/s)": 1.528645 }, { "acc": 0.95913773, "epoch": 10.299976564330912, "grad_norm": 10.2457275390625, "learning_rate": 9.348995373341568e-06, "loss": 0.22910881, "memory(GiB)": 13.7, "step": 21975, "train_speed(iter/s)": 1.528649 }, { "acc": 0.97147732, "epoch": 10.302320131239746, "grad_norm": 3.846968412399292, "learning_rate": 9.348612858601042e-06, "loss": 0.18717217, "memory(GiB)": 13.7, "step": 21980, "train_speed(iter/s)": 1.528658 }, { "acc": 0.95372677, "epoch": 10.304663698148582, "grad_norm": 12.557352066040039, "learning_rate": 9.348230239346166e-06, "loss": 0.16249032, "memory(GiB)": 13.7, "step": 21985, "train_speed(iter/s)": 1.528657 }, { "acc": 0.96540184, "epoch": 10.307007265057418, "grad_norm": 3.962092399597168, "learning_rate": 9.347847515586142e-06, "loss": 0.23630018, "memory(GiB)": 13.7, "step": 21990, "train_speed(iter/s)": 1.528656 }, { "acc": 0.96937504, "epoch": 10.309350831966253, "grad_norm": 2.0656991004943848, "learning_rate": 9.347464687330163e-06, "loss": 0.145435, "memory(GiB)": 13.7, "step": 21995, "train_speed(iter/s)": 1.528654 }, { "acc": 0.96497021, "epoch": 10.311694398875089, "grad_norm": 5.86992883682251, "learning_rate": 9.347081754587435e-06, "loss": 0.1367754, "memory(GiB)": 13.7, "step": 22000, "train_speed(iter/s)": 1.528656 }, { "acc": 0.98361111, "epoch": 10.314037965783923, "grad_norm": 3.961226224899292, "learning_rate": 9.346698717367161e-06, "loss": 0.0737395, "memory(GiB)": 13.7, "step": 22005, "train_speed(iter/s)": 1.528639 }, { "acc": 0.95653448, "epoch": 10.316381532692759, "grad_norm": 2.5991811752319336, "learning_rate": 9.346315575678548e-06, "loss": 0.18436579, "memory(GiB)": 13.7, "step": 22010, "train_speed(iter/s)": 1.528649 }, { "acc": 0.97658806, "epoch": 10.318725099601593, "grad_norm": 1.2177366018295288, "learning_rate": 9.345932329530806e-06, "loss": 0.11480291, "memory(GiB)": 13.7, "step": 22015, "train_speed(iter/s)": 1.528662 }, { "acc": 0.96550922, "epoch": 10.32106866651043, "grad_norm": 2.4860217571258545, "learning_rate": 9.345548978933145e-06, "loss": 0.14207691, "memory(GiB)": 13.7, "step": 22020, "train_speed(iter/s)": 1.528665 }, { "acc": 0.96289368, "epoch": 10.323412233419264, "grad_norm": 8.078161239624023, "learning_rate": 9.345165523894783e-06, "loss": 0.2278173, "memory(GiB)": 13.7, "step": 22025, "train_speed(iter/s)": 1.528683 }, { "acc": 0.98077383, "epoch": 10.3257558003281, "grad_norm": 1.3462275266647339, "learning_rate": 9.344781964424936e-06, "loss": 0.1191993, "memory(GiB)": 13.7, "step": 22030, "train_speed(iter/s)": 1.528701 }, { "acc": 0.98847218, "epoch": 10.328099367236934, "grad_norm": 7.870317459106445, "learning_rate": 9.34439830053282e-06, "loss": 0.0741025, "memory(GiB)": 13.7, "step": 22035, "train_speed(iter/s)": 1.528708 }, { "acc": 0.96808758, "epoch": 10.33044293414577, "grad_norm": 16.063217163085938, "learning_rate": 9.344014532227663e-06, "loss": 0.17493427, "memory(GiB)": 13.7, "step": 22040, "train_speed(iter/s)": 1.528701 }, { "acc": 0.96208305, "epoch": 10.332786501054604, "grad_norm": 23.253419876098633, "learning_rate": 9.343630659518684e-06, "loss": 0.18801295, "memory(GiB)": 13.7, "step": 22045, "train_speed(iter/s)": 1.528696 }, { "acc": 0.9854167, "epoch": 10.33513006796344, "grad_norm": 3.513692855834961, "learning_rate": 9.343246682415113e-06, "loss": 0.09829246, "memory(GiB)": 13.7, "step": 22050, "train_speed(iter/s)": 1.528693 }, { "acc": 0.97597713, "epoch": 10.337473634872275, "grad_norm": 6.25679349899292, "learning_rate": 9.342862600926177e-06, "loss": 0.13216798, "memory(GiB)": 13.7, "step": 22055, "train_speed(iter/s)": 1.528692 }, { "acc": 0.989217, "epoch": 10.339817201781111, "grad_norm": 3.0510265827178955, "learning_rate": 9.342478415061112e-06, "loss": 0.07135441, "memory(GiB)": 13.7, "step": 22060, "train_speed(iter/s)": 1.52871 }, { "acc": 0.98895836, "epoch": 10.342160768689945, "grad_norm": 3.8631558418273926, "learning_rate": 9.342094124829148e-06, "loss": 0.05397546, "memory(GiB)": 13.7, "step": 22065, "train_speed(iter/s)": 1.528711 }, { "acc": 0.98315716, "epoch": 10.344504335598781, "grad_norm": 6.2218852043151855, "learning_rate": 9.341709730239527e-06, "loss": 0.09502987, "memory(GiB)": 13.7, "step": 22070, "train_speed(iter/s)": 1.528707 }, { "acc": 0.98309517, "epoch": 10.346847902507617, "grad_norm": 1.5740495920181274, "learning_rate": 9.341325231301486e-06, "loss": 0.11614296, "memory(GiB)": 13.7, "step": 22075, "train_speed(iter/s)": 1.528713 }, { "acc": 0.97597466, "epoch": 10.349191469416452, "grad_norm": 3.7162675857543945, "learning_rate": 9.340940628024266e-06, "loss": 0.17449889, "memory(GiB)": 13.7, "step": 22080, "train_speed(iter/s)": 1.528714 }, { "acc": 0.9951952, "epoch": 10.351535036325288, "grad_norm": 0.6975557208061218, "learning_rate": 9.340555920417112e-06, "loss": 0.04866943, "memory(GiB)": 13.7, "step": 22085, "train_speed(iter/s)": 1.528704 }, { "acc": 0.98274078, "epoch": 10.353878603234122, "grad_norm": 4.961399078369141, "learning_rate": 9.340171108489272e-06, "loss": 0.07541298, "memory(GiB)": 13.7, "step": 22090, "train_speed(iter/s)": 1.528736 }, { "acc": 0.9825695, "epoch": 10.356222170142958, "grad_norm": 6.712469100952148, "learning_rate": 9.339786192249997e-06, "loss": 0.0708276, "memory(GiB)": 13.7, "step": 22095, "train_speed(iter/s)": 1.528732 }, { "acc": 0.97692957, "epoch": 10.358565737051793, "grad_norm": 4.545907497406006, "learning_rate": 9.339401171708537e-06, "loss": 0.10427229, "memory(GiB)": 13.7, "step": 22100, "train_speed(iter/s)": 1.528735 }, { "acc": 0.96098213, "epoch": 10.360909303960629, "grad_norm": 28.00369644165039, "learning_rate": 9.339016046874146e-06, "loss": 0.18201892, "memory(GiB)": 13.7, "step": 22105, "train_speed(iter/s)": 1.528744 }, { "acc": 0.97735958, "epoch": 10.363252870869463, "grad_norm": 4.935308933258057, "learning_rate": 9.338630817756085e-06, "loss": 0.10480089, "memory(GiB)": 13.7, "step": 22110, "train_speed(iter/s)": 1.528759 }, { "acc": 0.97935762, "epoch": 10.365596437778299, "grad_norm": 3.9177091121673584, "learning_rate": 9.33824548436361e-06, "loss": 0.13137388, "memory(GiB)": 13.7, "step": 22115, "train_speed(iter/s)": 1.528774 }, { "acc": 0.97384109, "epoch": 10.367940004687133, "grad_norm": 7.349381923675537, "learning_rate": 9.337860046705983e-06, "loss": 0.07213444, "memory(GiB)": 13.7, "step": 22120, "train_speed(iter/s)": 1.528784 }, { "acc": 0.9700799, "epoch": 10.37028357159597, "grad_norm": 6.061888217926025, "learning_rate": 9.33747450479247e-06, "loss": 0.15560863, "memory(GiB)": 13.7, "step": 22125, "train_speed(iter/s)": 1.528795 }, { "acc": 0.97142887, "epoch": 10.372627138504804, "grad_norm": 6.848188877105713, "learning_rate": 9.33708885863234e-06, "loss": 0.14994544, "memory(GiB)": 13.7, "step": 22130, "train_speed(iter/s)": 1.528813 }, { "acc": 0.96908236, "epoch": 10.37497070541364, "grad_norm": 34.97276306152344, "learning_rate": 9.336703108234857e-06, "loss": 0.15095108, "memory(GiB)": 13.7, "step": 22135, "train_speed(iter/s)": 1.528822 }, { "acc": 0.9804018, "epoch": 10.377314272322474, "grad_norm": 6.901594638824463, "learning_rate": 9.3363172536093e-06, "loss": 0.16034365, "memory(GiB)": 13.7, "step": 22140, "train_speed(iter/s)": 1.528829 }, { "acc": 0.97728634, "epoch": 10.37965783923131, "grad_norm": 4.3932647705078125, "learning_rate": 9.33593129476494e-06, "loss": 0.0839951, "memory(GiB)": 13.7, "step": 22145, "train_speed(iter/s)": 1.52882 }, { "acc": 0.97270832, "epoch": 10.382001406140144, "grad_norm": 3.2077884674072266, "learning_rate": 9.335545231711052e-06, "loss": 0.14466193, "memory(GiB)": 13.7, "step": 22150, "train_speed(iter/s)": 1.52885 }, { "acc": 0.98553028, "epoch": 10.38434497304898, "grad_norm": 5.576082706451416, "learning_rate": 9.335159064456921e-06, "loss": 0.07053018, "memory(GiB)": 13.7, "step": 22155, "train_speed(iter/s)": 1.528858 }, { "acc": 0.97813454, "epoch": 10.386688539957817, "grad_norm": 3.4402103424072266, "learning_rate": 9.334772793011828e-06, "loss": 0.0755381, "memory(GiB)": 13.7, "step": 22160, "train_speed(iter/s)": 1.528887 }, { "acc": 0.96669645, "epoch": 10.389032106866651, "grad_norm": 23.609956741333008, "learning_rate": 9.334386417385053e-06, "loss": 0.23974752, "memory(GiB)": 13.7, "step": 22165, "train_speed(iter/s)": 1.528913 }, { "acc": 0.95898943, "epoch": 10.391375673775487, "grad_norm": 9.553361892700195, "learning_rate": 9.333999937585888e-06, "loss": 0.27108517, "memory(GiB)": 13.7, "step": 22170, "train_speed(iter/s)": 1.528923 }, { "acc": 0.97770834, "epoch": 10.393719240684321, "grad_norm": 9.813516616821289, "learning_rate": 9.33361335362362e-06, "loss": 0.14505924, "memory(GiB)": 13.7, "step": 22175, "train_speed(iter/s)": 1.528936 }, { "acc": 0.96836309, "epoch": 10.396062807593157, "grad_norm": 7.787837028503418, "learning_rate": 9.333226665507544e-06, "loss": 0.20980105, "memory(GiB)": 13.7, "step": 22180, "train_speed(iter/s)": 1.528928 }, { "acc": 0.96548805, "epoch": 10.398406374501992, "grad_norm": 6.225345134735107, "learning_rate": 9.332839873246952e-06, "loss": 0.19963567, "memory(GiB)": 13.7, "step": 22185, "train_speed(iter/s)": 1.528951 }, { "acc": 0.97703915, "epoch": 10.400749941410828, "grad_norm": 9.049843788146973, "learning_rate": 9.332452976851142e-06, "loss": 0.17307911, "memory(GiB)": 13.7, "step": 22190, "train_speed(iter/s)": 1.528961 }, { "acc": 0.97045383, "epoch": 10.403093508319662, "grad_norm": 112.33319854736328, "learning_rate": 9.332065976329416e-06, "loss": 0.16621826, "memory(GiB)": 13.7, "step": 22195, "train_speed(iter/s)": 1.528959 }, { "acc": 0.97458334, "epoch": 10.405437075228498, "grad_norm": 4.390615463256836, "learning_rate": 9.331678871691073e-06, "loss": 0.1204916, "memory(GiB)": 13.7, "step": 22200, "train_speed(iter/s)": 1.528957 }, { "acc": 0.97362175, "epoch": 10.407780642137332, "grad_norm": 5.477278232574463, "learning_rate": 9.331291662945417e-06, "loss": 0.12600119, "memory(GiB)": 13.7, "step": 22205, "train_speed(iter/s)": 1.528969 }, { "acc": 0.98588943, "epoch": 10.410124209046169, "grad_norm": 2.5150206089019775, "learning_rate": 9.33090435010176e-06, "loss": 0.11436734, "memory(GiB)": 13.7, "step": 22210, "train_speed(iter/s)": 1.528983 }, { "acc": 0.97718754, "epoch": 10.412467775955003, "grad_norm": 4.82045316696167, "learning_rate": 9.330516933169408e-06, "loss": 0.0714852, "memory(GiB)": 13.7, "step": 22215, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97246704, "epoch": 10.414811342863839, "grad_norm": 0.9072556495666504, "learning_rate": 9.330129412157674e-06, "loss": 0.09512427, "memory(GiB)": 13.7, "step": 22220, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97428036, "epoch": 10.417154909772673, "grad_norm": 4.595922470092773, "learning_rate": 9.329741787075873e-06, "loss": 0.17310677, "memory(GiB)": 13.7, "step": 22225, "train_speed(iter/s)": 1.528997 }, { "acc": 0.99189396, "epoch": 10.41949847668151, "grad_norm": 6.028005599975586, "learning_rate": 9.329354057933322e-06, "loss": 0.08418031, "memory(GiB)": 13.7, "step": 22230, "train_speed(iter/s)": 1.528986 }, { "acc": 0.9778595, "epoch": 10.421842043590345, "grad_norm": 7.223360061645508, "learning_rate": 9.32896622473934e-06, "loss": 0.10333067, "memory(GiB)": 13.7, "step": 22235, "train_speed(iter/s)": 1.528987 }, { "acc": 0.97292099, "epoch": 10.42418561049918, "grad_norm": 5.383302688598633, "learning_rate": 9.32857828750325e-06, "loss": 0.17114471, "memory(GiB)": 13.7, "step": 22240, "train_speed(iter/s)": 1.52899 }, { "acc": 0.97111492, "epoch": 10.426529177408016, "grad_norm": 11.17439079284668, "learning_rate": 9.32819024623438e-06, "loss": 0.24335725, "memory(GiB)": 13.7, "step": 22245, "train_speed(iter/s)": 1.529004 }, { "acc": 0.97615738, "epoch": 10.42887274431685, "grad_norm": 86.19841003417969, "learning_rate": 9.32780210094205e-06, "loss": 0.14152293, "memory(GiB)": 13.7, "step": 22250, "train_speed(iter/s)": 1.529011 }, { "acc": 0.97660217, "epoch": 10.431216311225686, "grad_norm": 5.189091205596924, "learning_rate": 9.327413851635594e-06, "loss": 0.14830033, "memory(GiB)": 13.7, "step": 22255, "train_speed(iter/s)": 1.529031 }, { "acc": 0.98341722, "epoch": 10.43355987813452, "grad_norm": 5.546778202056885, "learning_rate": 9.327025498324346e-06, "loss": 0.11000804, "memory(GiB)": 13.7, "step": 22260, "train_speed(iter/s)": 1.529032 }, { "acc": 0.98447914, "epoch": 10.435903445043357, "grad_norm": 48.39866256713867, "learning_rate": 9.32663704101764e-06, "loss": 0.09871215, "memory(GiB)": 13.7, "step": 22265, "train_speed(iter/s)": 1.529057 }, { "acc": 0.97873783, "epoch": 10.43824701195219, "grad_norm": 1.1063119173049927, "learning_rate": 9.32624847972481e-06, "loss": 0.16155555, "memory(GiB)": 13.7, "step": 22270, "train_speed(iter/s)": 1.529069 }, { "acc": 0.96418457, "epoch": 10.440590578861027, "grad_norm": 11.151185035705566, "learning_rate": 9.325859814455197e-06, "loss": 0.15407119, "memory(GiB)": 13.7, "step": 22275, "train_speed(iter/s)": 1.529068 }, { "acc": 0.96312943, "epoch": 10.442934145769861, "grad_norm": 10.743012428283691, "learning_rate": 9.325471045218144e-06, "loss": 0.14996991, "memory(GiB)": 13.7, "step": 22280, "train_speed(iter/s)": 1.529068 }, { "acc": 0.96336784, "epoch": 10.445277712678697, "grad_norm": 3.333379030227661, "learning_rate": 9.325082172022999e-06, "loss": 0.1521227, "memory(GiB)": 13.7, "step": 22285, "train_speed(iter/s)": 1.52908 }, { "acc": 0.97601185, "epoch": 10.447621279587532, "grad_norm": 0.3731667101383209, "learning_rate": 9.324693194879101e-06, "loss": 0.13851609, "memory(GiB)": 13.7, "step": 22290, "train_speed(iter/s)": 1.529083 }, { "acc": 0.95631905, "epoch": 10.449964846496368, "grad_norm": 3.7247538566589355, "learning_rate": 9.32430411379581e-06, "loss": 0.19696485, "memory(GiB)": 13.7, "step": 22295, "train_speed(iter/s)": 1.529075 }, { "acc": 0.98036804, "epoch": 10.452308413405202, "grad_norm": 2.873870611190796, "learning_rate": 9.32391492878247e-06, "loss": 0.10434942, "memory(GiB)": 13.7, "step": 22300, "train_speed(iter/s)": 1.529058 }, { "acc": 0.97841492, "epoch": 10.454651980314038, "grad_norm": 1.2144029140472412, "learning_rate": 9.323525639848437e-06, "loss": 0.1376345, "memory(GiB)": 13.7, "step": 22305, "train_speed(iter/s)": 1.529059 }, { "acc": 0.95819206, "epoch": 10.456995547222872, "grad_norm": 12.347920417785645, "learning_rate": 9.323136247003073e-06, "loss": 0.21054513, "memory(GiB)": 13.7, "step": 22310, "train_speed(iter/s)": 1.529062 }, { "acc": 0.98993063, "epoch": 10.459339114131708, "grad_norm": 2.6254634857177734, "learning_rate": 9.322746750255732e-06, "loss": 0.05828965, "memory(GiB)": 13.7, "step": 22315, "train_speed(iter/s)": 1.529075 }, { "acc": 0.97600555, "epoch": 10.461682681040545, "grad_norm": 8.769267082214355, "learning_rate": 9.32235714961578e-06, "loss": 0.17336391, "memory(GiB)": 13.7, "step": 22320, "train_speed(iter/s)": 1.529087 }, { "acc": 0.9825695, "epoch": 10.464026247949379, "grad_norm": 5.246912479400635, "learning_rate": 9.321967445092581e-06, "loss": 0.05395709, "memory(GiB)": 13.7, "step": 22325, "train_speed(iter/s)": 1.52908 }, { "acc": 0.9794445, "epoch": 10.466369814858215, "grad_norm": 2.3893659114837646, "learning_rate": 9.321577636695501e-06, "loss": 0.09332718, "memory(GiB)": 13.7, "step": 22330, "train_speed(iter/s)": 1.529095 }, { "acc": 0.96984844, "epoch": 10.46871338176705, "grad_norm": 4.363649845123291, "learning_rate": 9.321187724433911e-06, "loss": 0.16217294, "memory(GiB)": 13.7, "step": 22335, "train_speed(iter/s)": 1.529101 }, { "acc": 0.98087463, "epoch": 10.471056948675885, "grad_norm": 13.508277893066406, "learning_rate": 9.320797708317183e-06, "loss": 0.07001401, "memory(GiB)": 13.7, "step": 22340, "train_speed(iter/s)": 1.529116 }, { "acc": 0.97881947, "epoch": 10.47340051558472, "grad_norm": 32.97750473022461, "learning_rate": 9.320407588354691e-06, "loss": 0.10146762, "memory(GiB)": 13.7, "step": 22345, "train_speed(iter/s)": 1.529125 }, { "acc": 0.97915134, "epoch": 10.475744082493556, "grad_norm": 23.40004539489746, "learning_rate": 9.320017364555812e-06, "loss": 0.09941577, "memory(GiB)": 13.7, "step": 22350, "train_speed(iter/s)": 1.529133 }, { "acc": 0.9702178, "epoch": 10.47808764940239, "grad_norm": 11.42516040802002, "learning_rate": 9.319627036929927e-06, "loss": 0.11958163, "memory(GiB)": 13.7, "step": 22355, "train_speed(iter/s)": 1.529143 }, { "acc": 0.96748257, "epoch": 10.480431216311226, "grad_norm": 5.907776355743408, "learning_rate": 9.319236605486417e-06, "loss": 0.13878238, "memory(GiB)": 13.7, "step": 22360, "train_speed(iter/s)": 1.529145 }, { "acc": 0.95826836, "epoch": 10.48277478322006, "grad_norm": 3.6172420978546143, "learning_rate": 9.318846070234666e-06, "loss": 0.22266281, "memory(GiB)": 13.7, "step": 22365, "train_speed(iter/s)": 1.52914 }, { "acc": 0.98497028, "epoch": 10.485118350128896, "grad_norm": 7.808123588562012, "learning_rate": 9.318455431184064e-06, "loss": 0.06503925, "memory(GiB)": 13.7, "step": 22370, "train_speed(iter/s)": 1.529152 }, { "acc": 0.97203979, "epoch": 10.48746191703773, "grad_norm": 5.690512180328369, "learning_rate": 9.318064688343999e-06, "loss": 0.12354643, "memory(GiB)": 13.7, "step": 22375, "train_speed(iter/s)": 1.529159 }, { "acc": 0.97359343, "epoch": 10.489805483946567, "grad_norm": 3.046956777572632, "learning_rate": 9.317673841723862e-06, "loss": 0.14528058, "memory(GiB)": 13.7, "step": 22380, "train_speed(iter/s)": 1.529177 }, { "acc": 0.99227028, "epoch": 10.492149050855401, "grad_norm": 3.0859241485595703, "learning_rate": 9.31728289133305e-06, "loss": 0.05623277, "memory(GiB)": 13.7, "step": 22385, "train_speed(iter/s)": 1.52919 }, { "acc": 0.9723958, "epoch": 10.494492617764237, "grad_norm": 9.685206413269043, "learning_rate": 9.316891837180957e-06, "loss": 0.0780954, "memory(GiB)": 13.7, "step": 22390, "train_speed(iter/s)": 1.529212 }, { "acc": 0.95327148, "epoch": 10.496836184673072, "grad_norm": 8.387893676757812, "learning_rate": 9.31650067927699e-06, "loss": 0.23350029, "memory(GiB)": 13.7, "step": 22395, "train_speed(iter/s)": 1.529235 }, { "acc": 0.96495333, "epoch": 10.499179751581908, "grad_norm": 10.735162734985352, "learning_rate": 9.31610941763054e-06, "loss": 0.1803309, "memory(GiB)": 13.7, "step": 22400, "train_speed(iter/s)": 1.52924 }, { "acc": 0.97180061, "epoch": 10.501523318490744, "grad_norm": 5.903664588928223, "learning_rate": 9.315718052251018e-06, "loss": 0.16601409, "memory(GiB)": 13.7, "step": 22405, "train_speed(iter/s)": 1.529251 }, { "acc": 0.96620045, "epoch": 10.503866885399578, "grad_norm": 1.3132513761520386, "learning_rate": 9.315326583147835e-06, "loss": 0.12972554, "memory(GiB)": 13.7, "step": 22410, "train_speed(iter/s)": 1.529258 }, { "acc": 0.97605114, "epoch": 10.506210452308414, "grad_norm": 4.029806137084961, "learning_rate": 9.314935010330392e-06, "loss": 0.13868947, "memory(GiB)": 13.7, "step": 22415, "train_speed(iter/s)": 1.529276 }, { "acc": 0.9663868, "epoch": 10.508554019217248, "grad_norm": 7.086440563201904, "learning_rate": 9.314543333808108e-06, "loss": 0.26079462, "memory(GiB)": 13.7, "step": 22420, "train_speed(iter/s)": 1.529277 }, { "acc": 0.9760685, "epoch": 10.510897586126084, "grad_norm": 6.847945690155029, "learning_rate": 9.314151553590394e-06, "loss": 0.13025113, "memory(GiB)": 13.7, "step": 22425, "train_speed(iter/s)": 1.529301 }, { "acc": 0.96677217, "epoch": 10.513241153034919, "grad_norm": 4.6352996826171875, "learning_rate": 9.313759669686667e-06, "loss": 0.14157147, "memory(GiB)": 13.7, "step": 22430, "train_speed(iter/s)": 1.529314 }, { "acc": 0.9729167, "epoch": 10.515584719943755, "grad_norm": 5.622723579406738, "learning_rate": 9.31336768210635e-06, "loss": 0.14303699, "memory(GiB)": 13.7, "step": 22435, "train_speed(iter/s)": 1.529334 }, { "acc": 0.9635066, "epoch": 10.51792828685259, "grad_norm": 4.886333465576172, "learning_rate": 9.312975590858861e-06, "loss": 0.19320853, "memory(GiB)": 13.7, "step": 22440, "train_speed(iter/s)": 1.52934 }, { "acc": 0.96177082, "epoch": 10.520271853761425, "grad_norm": 7.966178894042969, "learning_rate": 9.312583395953627e-06, "loss": 0.17814491, "memory(GiB)": 13.7, "step": 22445, "train_speed(iter/s)": 1.529337 }, { "acc": 0.97269344, "epoch": 10.52261542067026, "grad_norm": 7.587819576263428, "learning_rate": 9.312191097400074e-06, "loss": 0.1392761, "memory(GiB)": 13.7, "step": 22450, "train_speed(iter/s)": 1.529352 }, { "acc": 0.97538643, "epoch": 10.524958987579096, "grad_norm": 4.593592643737793, "learning_rate": 9.311798695207634e-06, "loss": 0.12085505, "memory(GiB)": 13.7, "step": 22455, "train_speed(iter/s)": 1.529363 }, { "acc": 0.9854167, "epoch": 10.52730255448793, "grad_norm": 10.545083045959473, "learning_rate": 9.311406189385734e-06, "loss": 0.05165112, "memory(GiB)": 13.7, "step": 22460, "train_speed(iter/s)": 1.529362 }, { "acc": 0.96677084, "epoch": 10.529646121396766, "grad_norm": 9.51018238067627, "learning_rate": 9.311013579943813e-06, "loss": 0.20611987, "memory(GiB)": 13.7, "step": 22465, "train_speed(iter/s)": 1.52939 }, { "acc": 0.9608429, "epoch": 10.5319896883056, "grad_norm": 7.563323974609375, "learning_rate": 9.310620866891306e-06, "loss": 0.18810122, "memory(GiB)": 13.7, "step": 22470, "train_speed(iter/s)": 1.5294 }, { "acc": 0.97529755, "epoch": 10.534333255214436, "grad_norm": 4.996985912322998, "learning_rate": 9.310228050237654e-06, "loss": 0.10685033, "memory(GiB)": 13.7, "step": 22475, "train_speed(iter/s)": 1.529421 }, { "acc": 0.98392859, "epoch": 10.536676822123273, "grad_norm": 0.29667404294013977, "learning_rate": 9.3098351299923e-06, "loss": 0.11943853, "memory(GiB)": 13.7, "step": 22480, "train_speed(iter/s)": 1.529434 }, { "acc": 0.97076178, "epoch": 10.539020389032107, "grad_norm": 10.743009567260742, "learning_rate": 9.309442106164683e-06, "loss": 0.15538759, "memory(GiB)": 13.7, "step": 22485, "train_speed(iter/s)": 1.529467 }, { "acc": 0.96706152, "epoch": 10.541363955940943, "grad_norm": 1.8150018453598022, "learning_rate": 9.309048978764256e-06, "loss": 0.20668612, "memory(GiB)": 13.7, "step": 22490, "train_speed(iter/s)": 1.52948 }, { "acc": 0.97614584, "epoch": 10.543707522849777, "grad_norm": 4.574336528778076, "learning_rate": 9.308655747800466e-06, "loss": 0.11469576, "memory(GiB)": 13.7, "step": 22495, "train_speed(iter/s)": 1.529492 }, { "acc": 0.9666666, "epoch": 10.546051089758613, "grad_norm": 5.381813049316406, "learning_rate": 9.308262413282765e-06, "loss": 0.11372484, "memory(GiB)": 13.7, "step": 22500, "train_speed(iter/s)": 1.52951 }, { "acc": 0.95322914, "epoch": 10.548394656667448, "grad_norm": 5.294894695281982, "learning_rate": 9.307868975220608e-06, "loss": 0.27627492, "memory(GiB)": 13.7, "step": 22505, "train_speed(iter/s)": 1.52953 }, { "acc": 0.98244057, "epoch": 10.550738223576284, "grad_norm": 4.321737766265869, "learning_rate": 9.307475433623452e-06, "loss": 0.10570848, "memory(GiB)": 13.7, "step": 22510, "train_speed(iter/s)": 1.529539 }, { "acc": 0.96984959, "epoch": 10.553081790485118, "grad_norm": 5.586087703704834, "learning_rate": 9.307081788500756e-06, "loss": 0.18954082, "memory(GiB)": 13.7, "step": 22515, "train_speed(iter/s)": 1.529545 }, { "acc": 0.98008928, "epoch": 10.555425357393954, "grad_norm": 4.471372127532959, "learning_rate": 9.306688039861981e-06, "loss": 0.09119454, "memory(GiB)": 13.7, "step": 22520, "train_speed(iter/s)": 1.529561 }, { "acc": 0.9730093, "epoch": 10.557768924302788, "grad_norm": 0.5465129017829895, "learning_rate": 9.306294187716592e-06, "loss": 0.10862862, "memory(GiB)": 13.7, "step": 22525, "train_speed(iter/s)": 1.529559 }, { "acc": 0.97179346, "epoch": 10.560112491211624, "grad_norm": 10.18949031829834, "learning_rate": 9.305900232074058e-06, "loss": 0.18725269, "memory(GiB)": 13.7, "step": 22530, "train_speed(iter/s)": 1.529559 }, { "acc": 0.98205395, "epoch": 10.562456058120459, "grad_norm": 6.6209869384765625, "learning_rate": 9.305506172943847e-06, "loss": 0.11573497, "memory(GiB)": 13.7, "step": 22535, "train_speed(iter/s)": 1.529552 }, { "acc": 0.97960224, "epoch": 10.564799625029295, "grad_norm": 4.48914098739624, "learning_rate": 9.305112010335428e-06, "loss": 0.10920761, "memory(GiB)": 13.7, "step": 22540, "train_speed(iter/s)": 1.529576 }, { "acc": 0.97078047, "epoch": 10.56714319193813, "grad_norm": 8.291394233703613, "learning_rate": 9.30471774425828e-06, "loss": 0.16095141, "memory(GiB)": 13.7, "step": 22545, "train_speed(iter/s)": 1.529616 }, { "acc": 0.95706749, "epoch": 10.569486758846965, "grad_norm": 3.0070114135742188, "learning_rate": 9.304323374721878e-06, "loss": 0.15529858, "memory(GiB)": 13.7, "step": 22550, "train_speed(iter/s)": 1.529635 }, { "acc": 0.98523808, "epoch": 10.5718303257558, "grad_norm": 15.231505393981934, "learning_rate": 9.303928901735701e-06, "loss": 0.10824596, "memory(GiB)": 13.7, "step": 22555, "train_speed(iter/s)": 1.529642 }, { "acc": 0.97005043, "epoch": 10.574173892664636, "grad_norm": 5.125782012939453, "learning_rate": 9.30353432530923e-06, "loss": 0.12215419, "memory(GiB)": 13.7, "step": 22560, "train_speed(iter/s)": 1.529637 }, { "acc": 0.98134918, "epoch": 10.57651745957347, "grad_norm": 2.2009449005126953, "learning_rate": 9.303139645451951e-06, "loss": 0.09459219, "memory(GiB)": 13.7, "step": 22565, "train_speed(iter/s)": 1.529634 }, { "acc": 0.97422123, "epoch": 10.578861026482306, "grad_norm": 1.9165680408477783, "learning_rate": 9.302744862173351e-06, "loss": 0.10662189, "memory(GiB)": 13.7, "step": 22570, "train_speed(iter/s)": 1.529633 }, { "acc": 0.96767855, "epoch": 10.581204593391142, "grad_norm": 9.672743797302246, "learning_rate": 9.302349975482918e-06, "loss": 0.16179199, "memory(GiB)": 13.7, "step": 22575, "train_speed(iter/s)": 1.529621 }, { "acc": 0.96084652, "epoch": 10.583548160299976, "grad_norm": 3.8180325031280518, "learning_rate": 9.301954985390144e-06, "loss": 0.21664901, "memory(GiB)": 13.7, "step": 22580, "train_speed(iter/s)": 1.529625 }, { "acc": 0.9592803, "epoch": 10.585891727208812, "grad_norm": 9.097458839416504, "learning_rate": 9.301559891904526e-06, "loss": 0.18621671, "memory(GiB)": 13.7, "step": 22585, "train_speed(iter/s)": 1.529646 }, { "acc": 0.9731945, "epoch": 10.588235294117647, "grad_norm": 16.288169860839844, "learning_rate": 9.301164695035555e-06, "loss": 0.15366824, "memory(GiB)": 13.7, "step": 22590, "train_speed(iter/s)": 1.529646 }, { "acc": 0.94712067, "epoch": 10.590578861026483, "grad_norm": 5.294785022735596, "learning_rate": 9.300769394792734e-06, "loss": 0.26170125, "memory(GiB)": 13.7, "step": 22595, "train_speed(iter/s)": 1.529671 }, { "acc": 0.95827465, "epoch": 10.592922427935317, "grad_norm": 7.438730239868164, "learning_rate": 9.300373991185566e-06, "loss": 0.23305259, "memory(GiB)": 13.7, "step": 22600, "train_speed(iter/s)": 1.52965 }, { "acc": 0.96223221, "epoch": 10.595265994844153, "grad_norm": 7.218382835388184, "learning_rate": 9.29997848422355e-06, "loss": 0.18118516, "memory(GiB)": 13.7, "step": 22605, "train_speed(iter/s)": 1.529644 }, { "acc": 0.96032782, "epoch": 10.597609561752988, "grad_norm": 5.421754837036133, "learning_rate": 9.2995828739162e-06, "loss": 0.21272147, "memory(GiB)": 13.7, "step": 22610, "train_speed(iter/s)": 1.529641 }, { "acc": 0.97336311, "epoch": 10.599953128661824, "grad_norm": 24.805177688598633, "learning_rate": 9.299187160273018e-06, "loss": 0.156564, "memory(GiB)": 13.7, "step": 22615, "train_speed(iter/s)": 1.52965 }, { "acc": 0.95643711, "epoch": 10.602296695570658, "grad_norm": 43.849082946777344, "learning_rate": 9.298791343303521e-06, "loss": 0.21260822, "memory(GiB)": 13.7, "step": 22620, "train_speed(iter/s)": 1.529648 }, { "acc": 0.95213718, "epoch": 10.604640262479494, "grad_norm": 11.332223892211914, "learning_rate": 9.29839542301722e-06, "loss": 0.24734235, "memory(GiB)": 13.7, "step": 22625, "train_speed(iter/s)": 1.529645 }, { "acc": 0.97383938, "epoch": 10.606983829388328, "grad_norm": 5.663230895996094, "learning_rate": 9.297999399423635e-06, "loss": 0.07948215, "memory(GiB)": 13.7, "step": 22630, "train_speed(iter/s)": 1.529659 }, { "acc": 0.98418407, "epoch": 10.609327396297164, "grad_norm": 7.3812408447265625, "learning_rate": 9.29760327253228e-06, "loss": 0.08493795, "memory(GiB)": 13.7, "step": 22635, "train_speed(iter/s)": 1.52963 }, { "acc": 0.98191929, "epoch": 10.611670963205999, "grad_norm": 2.777250289916992, "learning_rate": 9.297207042352681e-06, "loss": 0.09499665, "memory(GiB)": 13.7, "step": 22640, "train_speed(iter/s)": 1.529654 }, { "acc": 0.98386364, "epoch": 10.614014530114835, "grad_norm": 8.19686222076416, "learning_rate": 9.29681070889436e-06, "loss": 0.08374552, "memory(GiB)": 13.7, "step": 22645, "train_speed(iter/s)": 1.529682 }, { "acc": 0.9791214, "epoch": 10.61635809702367, "grad_norm": 4.7171549797058105, "learning_rate": 9.296414272166844e-06, "loss": 0.08172662, "memory(GiB)": 13.7, "step": 22650, "train_speed(iter/s)": 1.529673 }, { "acc": 0.96567678, "epoch": 10.618701663932505, "grad_norm": 3.880603551864624, "learning_rate": 9.296017732179662e-06, "loss": 0.15827458, "memory(GiB)": 13.7, "step": 22655, "train_speed(iter/s)": 1.529681 }, { "acc": 0.97524033, "epoch": 10.621045230841341, "grad_norm": 14.04401969909668, "learning_rate": 9.295621088942348e-06, "loss": 0.10719581, "memory(GiB)": 13.7, "step": 22660, "train_speed(iter/s)": 1.5297 }, { "acc": 0.97548494, "epoch": 10.623388797750176, "grad_norm": 5.519829273223877, "learning_rate": 9.295224342464429e-06, "loss": 0.09744645, "memory(GiB)": 13.7, "step": 22665, "train_speed(iter/s)": 1.529707 }, { "acc": 0.97984581, "epoch": 10.625732364659012, "grad_norm": 3.6874852180480957, "learning_rate": 9.294827492755449e-06, "loss": 0.07892965, "memory(GiB)": 13.7, "step": 22670, "train_speed(iter/s)": 1.529721 }, { "acc": 0.99236107, "epoch": 10.628075931567846, "grad_norm": 88.11918640136719, "learning_rate": 9.294430539824945e-06, "loss": 0.0444707, "memory(GiB)": 13.7, "step": 22675, "train_speed(iter/s)": 1.529733 }, { "acc": 0.97550945, "epoch": 10.630419498476682, "grad_norm": 5.9360222816467285, "learning_rate": 9.294033483682453e-06, "loss": 0.13623449, "memory(GiB)": 13.7, "step": 22680, "train_speed(iter/s)": 1.529725 }, { "acc": 0.98311014, "epoch": 10.632763065385516, "grad_norm": 7.480101108551025, "learning_rate": 9.293636324337527e-06, "loss": 0.11288116, "memory(GiB)": 13.7, "step": 22685, "train_speed(iter/s)": 1.529734 }, { "acc": 0.98334932, "epoch": 10.635106632294352, "grad_norm": 5.488226890563965, "learning_rate": 9.293239061799705e-06, "loss": 0.12465938, "memory(GiB)": 13.7, "step": 22690, "train_speed(iter/s)": 1.52976 }, { "acc": 0.96789265, "epoch": 10.637450199203187, "grad_norm": 8.344247817993164, "learning_rate": 9.292841696078538e-06, "loss": 0.16584085, "memory(GiB)": 13.7, "step": 22695, "train_speed(iter/s)": 1.529761 }, { "acc": 0.9558279, "epoch": 10.639793766112023, "grad_norm": 10.39801025390625, "learning_rate": 9.292444227183579e-06, "loss": 0.23302231, "memory(GiB)": 13.7, "step": 22700, "train_speed(iter/s)": 1.52976 }, { "acc": 0.97895088, "epoch": 10.642137333020857, "grad_norm": 2.9040729999542236, "learning_rate": 9.29204665512438e-06, "loss": 0.11212249, "memory(GiB)": 13.7, "step": 22705, "train_speed(iter/s)": 1.529771 }, { "acc": 0.96951389, "epoch": 10.644480899929693, "grad_norm": 4.19234561920166, "learning_rate": 9.291648979910501e-06, "loss": 0.15721707, "memory(GiB)": 13.7, "step": 22710, "train_speed(iter/s)": 1.52976 }, { "acc": 0.97286854, "epoch": 10.646824466838527, "grad_norm": 14.093188285827637, "learning_rate": 9.291251201551496e-06, "loss": 0.12633013, "memory(GiB)": 13.7, "step": 22715, "train_speed(iter/s)": 1.529762 }, { "acc": 0.96756954, "epoch": 10.649168033747364, "grad_norm": 5.661900997161865, "learning_rate": 9.290853320056928e-06, "loss": 0.12075295, "memory(GiB)": 13.7, "step": 22720, "train_speed(iter/s)": 1.529778 }, { "acc": 0.96974287, "epoch": 10.6515116006562, "grad_norm": 6.490275859832764, "learning_rate": 9.29045533543636e-06, "loss": 0.14604816, "memory(GiB)": 13.7, "step": 22725, "train_speed(iter/s)": 1.529786 }, { "acc": 0.98055553, "epoch": 10.653855167565034, "grad_norm": 8.860939979553223, "learning_rate": 9.290057247699363e-06, "loss": 0.08550173, "memory(GiB)": 13.7, "step": 22730, "train_speed(iter/s)": 1.529789 }, { "acc": 0.96718616, "epoch": 10.65619873447387, "grad_norm": 3.1705739498138428, "learning_rate": 9.289659056855499e-06, "loss": 0.14265156, "memory(GiB)": 13.7, "step": 22735, "train_speed(iter/s)": 1.529798 }, { "acc": 0.98893719, "epoch": 10.658542301382704, "grad_norm": 2.985839605331421, "learning_rate": 9.289260762914342e-06, "loss": 0.08355759, "memory(GiB)": 13.7, "step": 22740, "train_speed(iter/s)": 1.529817 }, { "acc": 0.95655766, "epoch": 10.66088586829154, "grad_norm": 5.966794013977051, "learning_rate": 9.288862365885468e-06, "loss": 0.27762461, "memory(GiB)": 13.7, "step": 22745, "train_speed(iter/s)": 1.529826 }, { "acc": 0.96779766, "epoch": 10.663229435200375, "grad_norm": 5.379351615905762, "learning_rate": 9.28846386577845e-06, "loss": 0.10063102, "memory(GiB)": 13.7, "step": 22750, "train_speed(iter/s)": 1.529834 }, { "acc": 0.94877357, "epoch": 10.66557300210921, "grad_norm": 11.252799034118652, "learning_rate": 9.288065262602868e-06, "loss": 0.31150193, "memory(GiB)": 13.7, "step": 22755, "train_speed(iter/s)": 1.52984 }, { "acc": 0.96138887, "epoch": 10.667916569018045, "grad_norm": 5.525753498077393, "learning_rate": 9.287666556368304e-06, "loss": 0.08485955, "memory(GiB)": 13.7, "step": 22760, "train_speed(iter/s)": 1.529866 }, { "acc": 0.96381893, "epoch": 10.670260135926881, "grad_norm": 6.216396808624268, "learning_rate": 9.287267747084338e-06, "loss": 0.26559005, "memory(GiB)": 13.7, "step": 22765, "train_speed(iter/s)": 1.529894 }, { "acc": 0.97932959, "epoch": 10.672603702835715, "grad_norm": 2.4931347370147705, "learning_rate": 9.286868834760561e-06, "loss": 0.10663431, "memory(GiB)": 13.7, "step": 22770, "train_speed(iter/s)": 1.529894 }, { "acc": 0.96314507, "epoch": 10.674947269744552, "grad_norm": 6.834762096405029, "learning_rate": 9.286469819406556e-06, "loss": 0.23496408, "memory(GiB)": 13.7, "step": 22775, "train_speed(iter/s)": 1.529917 }, { "acc": 0.98169641, "epoch": 10.677290836653386, "grad_norm": 2.0625386238098145, "learning_rate": 9.28607070103192e-06, "loss": 0.09063713, "memory(GiB)": 13.7, "step": 22780, "train_speed(iter/s)": 1.529931 }, { "acc": 0.97277937, "epoch": 10.679634403562222, "grad_norm": 7.139448165893555, "learning_rate": 9.28567147964624e-06, "loss": 0.12406685, "memory(GiB)": 13.7, "step": 22785, "train_speed(iter/s)": 1.529926 }, { "acc": 0.96930513, "epoch": 10.681977970471056, "grad_norm": 2.8982319831848145, "learning_rate": 9.285272155259119e-06, "loss": 0.1566421, "memory(GiB)": 13.7, "step": 22790, "train_speed(iter/s)": 1.529921 }, { "acc": 0.97195816, "epoch": 10.684321537379892, "grad_norm": 24.993816375732422, "learning_rate": 9.284872727880152e-06, "loss": 0.15599773, "memory(GiB)": 13.7, "step": 22795, "train_speed(iter/s)": 1.529938 }, { "acc": 0.97520828, "epoch": 10.686665104288727, "grad_norm": 3.9983367919921875, "learning_rate": 9.284473197518937e-06, "loss": 0.16066084, "memory(GiB)": 13.7, "step": 22800, "train_speed(iter/s)": 1.529939 }, { "acc": 0.97662039, "epoch": 10.689008671197563, "grad_norm": 5.365805149078369, "learning_rate": 9.284073564185083e-06, "loss": 0.10451214, "memory(GiB)": 13.7, "step": 22805, "train_speed(iter/s)": 1.529948 }, { "acc": 0.97190552, "epoch": 10.691352238106397, "grad_norm": 2.810164451599121, "learning_rate": 9.283673827888192e-06, "loss": 0.17556663, "memory(GiB)": 13.7, "step": 22810, "train_speed(iter/s)": 1.529944 }, { "acc": 0.97432299, "epoch": 10.693695805015233, "grad_norm": 1.5290921926498413, "learning_rate": 9.283273988637873e-06, "loss": 0.20798419, "memory(GiB)": 13.7, "step": 22815, "train_speed(iter/s)": 1.529944 }, { "acc": 0.97300053, "epoch": 10.69603937192407, "grad_norm": 5.655052185058594, "learning_rate": 9.282874046443739e-06, "loss": 0.15846819, "memory(GiB)": 13.7, "step": 22820, "train_speed(iter/s)": 1.529951 }, { "acc": 0.97671394, "epoch": 10.698382938832903, "grad_norm": 8.426778793334961, "learning_rate": 9.282474001315398e-06, "loss": 0.11208396, "memory(GiB)": 13.7, "step": 22825, "train_speed(iter/s)": 1.529966 }, { "acc": 0.97661858, "epoch": 10.70072650574174, "grad_norm": 5.77233362197876, "learning_rate": 9.282073853262473e-06, "loss": 0.10116323, "memory(GiB)": 13.7, "step": 22830, "train_speed(iter/s)": 1.529968 }, { "acc": 0.98362789, "epoch": 10.703070072650574, "grad_norm": 1.706594467163086, "learning_rate": 9.281673602294577e-06, "loss": 0.09640098, "memory(GiB)": 13.7, "step": 22835, "train_speed(iter/s)": 1.52999 }, { "acc": 0.97590275, "epoch": 10.70541363955941, "grad_norm": 13.315584182739258, "learning_rate": 9.281273248421333e-06, "loss": 0.12880588, "memory(GiB)": 13.7, "step": 22840, "train_speed(iter/s)": 1.530006 }, { "acc": 0.96844692, "epoch": 10.707757206468244, "grad_norm": 11.88234806060791, "learning_rate": 9.280872791652364e-06, "loss": 0.12579961, "memory(GiB)": 13.7, "step": 22845, "train_speed(iter/s)": 1.530004 }, { "acc": 0.98639565, "epoch": 10.71010077337708, "grad_norm": 2.8416152000427246, "learning_rate": 9.280472231997293e-06, "loss": 0.10193143, "memory(GiB)": 13.7, "step": 22850, "train_speed(iter/s)": 1.529994 }, { "acc": 0.96424103, "epoch": 10.712444340285915, "grad_norm": 16.10240936279297, "learning_rate": 9.280071569465753e-06, "loss": 0.16350721, "memory(GiB)": 13.7, "step": 22855, "train_speed(iter/s)": 1.529984 }, { "acc": 0.95567951, "epoch": 10.71478790719475, "grad_norm": 3.1080424785614014, "learning_rate": 9.27967080406737e-06, "loss": 0.17047255, "memory(GiB)": 13.7, "step": 22860, "train_speed(iter/s)": 1.52998 }, { "acc": 0.97455349, "epoch": 10.717131474103585, "grad_norm": 4.284236907958984, "learning_rate": 9.279269935811781e-06, "loss": 0.13524222, "memory(GiB)": 13.7, "step": 22865, "train_speed(iter/s)": 1.529982 }, { "acc": 0.98227673, "epoch": 10.719475041012421, "grad_norm": 5.741376876831055, "learning_rate": 9.278868964708618e-06, "loss": 0.12318611, "memory(GiB)": 13.7, "step": 22870, "train_speed(iter/s)": 1.529987 }, { "acc": 0.96957703, "epoch": 10.721818607921255, "grad_norm": 6.254085540771484, "learning_rate": 9.27846789076752e-06, "loss": 0.17689402, "memory(GiB)": 13.7, "step": 22875, "train_speed(iter/s)": 1.529979 }, { "acc": 0.96411591, "epoch": 10.724162174830091, "grad_norm": 5.489358425140381, "learning_rate": 9.278066713998131e-06, "loss": 0.20966911, "memory(GiB)": 13.7, "step": 22880, "train_speed(iter/s)": 1.529983 }, { "acc": 0.98996105, "epoch": 10.726505741738926, "grad_norm": 0.8377634882926941, "learning_rate": 9.277665434410088e-06, "loss": 0.07330087, "memory(GiB)": 13.7, "step": 22885, "train_speed(iter/s)": 1.529996 }, { "acc": 0.9744792, "epoch": 10.728849308647762, "grad_norm": 5.141735076904297, "learning_rate": 9.277264052013042e-06, "loss": 0.12348932, "memory(GiB)": 13.7, "step": 22890, "train_speed(iter/s)": 1.530009 }, { "acc": 0.97232151, "epoch": 10.731192875556598, "grad_norm": 9.477287292480469, "learning_rate": 9.276862566816637e-06, "loss": 0.13049709, "memory(GiB)": 13.7, "step": 22895, "train_speed(iter/s)": 1.530019 }, { "acc": 0.98413773, "epoch": 10.733536442465432, "grad_norm": 3.76010799407959, "learning_rate": 9.276460978830524e-06, "loss": 0.11386166, "memory(GiB)": 13.7, "step": 22900, "train_speed(iter/s)": 1.530046 }, { "acc": 0.97615929, "epoch": 10.735880009374268, "grad_norm": 8.435423851013184, "learning_rate": 9.276059288064359e-06, "loss": 0.08967313, "memory(GiB)": 13.7, "step": 22905, "train_speed(iter/s)": 1.530049 }, { "acc": 0.98136673, "epoch": 10.738223576283103, "grad_norm": 2.3523707389831543, "learning_rate": 9.275657494527793e-06, "loss": 0.14074233, "memory(GiB)": 13.7, "step": 22910, "train_speed(iter/s)": 1.530061 }, { "acc": 0.97276764, "epoch": 10.740567143191939, "grad_norm": 5.572427749633789, "learning_rate": 9.275255598230488e-06, "loss": 0.13778298, "memory(GiB)": 13.7, "step": 22915, "train_speed(iter/s)": 1.530071 }, { "acc": 0.96958542, "epoch": 10.742910710100773, "grad_norm": 2.4074811935424805, "learning_rate": 9.274853599182101e-06, "loss": 0.23464673, "memory(GiB)": 13.7, "step": 22920, "train_speed(iter/s)": 1.530082 }, { "acc": 0.97354164, "epoch": 10.745254277009609, "grad_norm": 0.2533523142337799, "learning_rate": 9.274451497392295e-06, "loss": 0.15027478, "memory(GiB)": 13.7, "step": 22925, "train_speed(iter/s)": 1.530086 }, { "acc": 0.96936283, "epoch": 10.747597843918443, "grad_norm": 6.925436019897461, "learning_rate": 9.274049292870736e-06, "loss": 0.1408375, "memory(GiB)": 13.7, "step": 22930, "train_speed(iter/s)": 1.530101 }, { "acc": 0.97057886, "epoch": 10.74994141082728, "grad_norm": 5.302249431610107, "learning_rate": 9.273646985627092e-06, "loss": 0.16274197, "memory(GiB)": 13.7, "step": 22935, "train_speed(iter/s)": 1.530114 }, { "acc": 0.9647049, "epoch": 10.752284977736114, "grad_norm": 1.555508017539978, "learning_rate": 9.273244575671035e-06, "loss": 0.12304082, "memory(GiB)": 13.7, "step": 22940, "train_speed(iter/s)": 1.530117 }, { "acc": 0.95755386, "epoch": 10.75462854464495, "grad_norm": 7.810141086578369, "learning_rate": 9.272842063012231e-06, "loss": 0.20111039, "memory(GiB)": 13.7, "step": 22945, "train_speed(iter/s)": 1.530123 }, { "acc": 0.97199812, "epoch": 10.756972111553784, "grad_norm": 3.162309408187866, "learning_rate": 9.272439447660361e-06, "loss": 0.10743294, "memory(GiB)": 13.7, "step": 22950, "train_speed(iter/s)": 1.530151 }, { "acc": 0.96888943, "epoch": 10.75931567846262, "grad_norm": 2.091843605041504, "learning_rate": 9.272036729625104e-06, "loss": 0.22998204, "memory(GiB)": 13.7, "step": 22955, "train_speed(iter/s)": 1.530153 }, { "acc": 0.96805553, "epoch": 10.761659245371455, "grad_norm": 7.804628372192383, "learning_rate": 9.271633908916137e-06, "loss": 0.11925671, "memory(GiB)": 13.7, "step": 22960, "train_speed(iter/s)": 1.530165 }, { "acc": 0.9827898, "epoch": 10.76400281228029, "grad_norm": 4.010807991027832, "learning_rate": 9.27123098554314e-06, "loss": 0.04890724, "memory(GiB)": 13.7, "step": 22965, "train_speed(iter/s)": 1.530164 }, { "acc": 0.97412434, "epoch": 10.766346379189127, "grad_norm": 14.602533340454102, "learning_rate": 9.270827959515802e-06, "loss": 0.138079, "memory(GiB)": 13.7, "step": 22970, "train_speed(iter/s)": 1.530165 }, { "acc": 0.9725893, "epoch": 10.768689946097961, "grad_norm": 5.704775333404541, "learning_rate": 9.270424830843807e-06, "loss": 0.09735747, "memory(GiB)": 13.7, "step": 22975, "train_speed(iter/s)": 1.530151 }, { "acc": 0.96585226, "epoch": 10.771033513006797, "grad_norm": 9.707941055297852, "learning_rate": 9.270021599536849e-06, "loss": 0.14944843, "memory(GiB)": 13.7, "step": 22980, "train_speed(iter/s)": 1.530145 }, { "acc": 0.96912575, "epoch": 10.773377079915631, "grad_norm": 7.243259906768799, "learning_rate": 9.269618265604617e-06, "loss": 0.1665854, "memory(GiB)": 13.7, "step": 22985, "train_speed(iter/s)": 1.530165 }, { "acc": 0.95708332, "epoch": 10.775720646824468, "grad_norm": 9.526726722717285, "learning_rate": 9.269214829056807e-06, "loss": 0.21716392, "memory(GiB)": 13.7, "step": 22990, "train_speed(iter/s)": 1.530168 }, { "acc": 0.98662777, "epoch": 10.778064213733302, "grad_norm": 7.46481990814209, "learning_rate": 9.268811289903118e-06, "loss": 0.05254281, "memory(GiB)": 13.7, "step": 22995, "train_speed(iter/s)": 1.530171 }, { "acc": 0.9875, "epoch": 10.780407780642138, "grad_norm": 4.3859639167785645, "learning_rate": 9.268407648153245e-06, "loss": 0.10086322, "memory(GiB)": 13.7, "step": 23000, "train_speed(iter/s)": 1.530171 }, { "acc": 0.97207108, "epoch": 10.782751347550972, "grad_norm": 2.9723610877990723, "learning_rate": 9.268003903816896e-06, "loss": 0.08420591, "memory(GiB)": 13.7, "step": 23005, "train_speed(iter/s)": 1.530178 }, { "acc": 0.97707787, "epoch": 10.785094914459808, "grad_norm": 6.224844932556152, "learning_rate": 9.26760005690377e-06, "loss": 0.09592964, "memory(GiB)": 13.7, "step": 23010, "train_speed(iter/s)": 1.53021 }, { "acc": 0.99750004, "epoch": 10.787438481368643, "grad_norm": 2.6394340991973877, "learning_rate": 9.267196107423582e-06, "loss": 0.019783, "memory(GiB)": 13.7, "step": 23015, "train_speed(iter/s)": 1.530212 }, { "acc": 0.97181997, "epoch": 10.789782048277479, "grad_norm": 3.8908920288085938, "learning_rate": 9.266792055386033e-06, "loss": 0.11447262, "memory(GiB)": 13.7, "step": 23020, "train_speed(iter/s)": 1.530241 }, { "acc": 0.97969704, "epoch": 10.792125615186313, "grad_norm": 5.905426979064941, "learning_rate": 9.266387900800841e-06, "loss": 0.08166431, "memory(GiB)": 13.7, "step": 23025, "train_speed(iter/s)": 1.530246 }, { "acc": 0.98143311, "epoch": 10.794469182095149, "grad_norm": 5.6016387939453125, "learning_rate": 9.265983643677717e-06, "loss": 0.10274515, "memory(GiB)": 13.7, "step": 23030, "train_speed(iter/s)": 1.53025 }, { "acc": 0.98165178, "epoch": 10.796812749003983, "grad_norm": 4.375644207000732, "learning_rate": 9.265579284026379e-06, "loss": 0.09477527, "memory(GiB)": 13.7, "step": 23035, "train_speed(iter/s)": 1.530277 }, { "acc": 0.97047615, "epoch": 10.79915631591282, "grad_norm": 5.908674240112305, "learning_rate": 9.26517482185655e-06, "loss": 0.21406384, "memory(GiB)": 13.7, "step": 23040, "train_speed(iter/s)": 1.530283 }, { "acc": 0.98442955, "epoch": 10.801499882821654, "grad_norm": 3.124063014984131, "learning_rate": 9.264770257177945e-06, "loss": 0.09527844, "memory(GiB)": 13.7, "step": 23045, "train_speed(iter/s)": 1.5303 }, { "acc": 0.96064577, "epoch": 10.80384344973049, "grad_norm": 2.584285020828247, "learning_rate": 9.264365590000294e-06, "loss": 0.27595649, "memory(GiB)": 13.7, "step": 23050, "train_speed(iter/s)": 1.530314 }, { "acc": 0.9824297, "epoch": 10.806187016639324, "grad_norm": 3.981586217880249, "learning_rate": 9.263960820333322e-06, "loss": 0.08975447, "memory(GiB)": 13.7, "step": 23055, "train_speed(iter/s)": 1.530323 }, { "acc": 0.97417011, "epoch": 10.80853058354816, "grad_norm": 5.825989723205566, "learning_rate": 9.263555948186758e-06, "loss": 0.10785931, "memory(GiB)": 13.7, "step": 23060, "train_speed(iter/s)": 1.53032 }, { "acc": 0.97003469, "epoch": 10.810874150456996, "grad_norm": 8.54672908782959, "learning_rate": 9.263150973570335e-06, "loss": 0.24951603, "memory(GiB)": 13.7, "step": 23065, "train_speed(iter/s)": 1.530318 }, { "acc": 0.97051468, "epoch": 10.81321771736583, "grad_norm": 11.116098403930664, "learning_rate": 9.262745896493785e-06, "loss": 0.1813374, "memory(GiB)": 13.7, "step": 23070, "train_speed(iter/s)": 1.530312 }, { "acc": 0.97355156, "epoch": 10.815561284274667, "grad_norm": 2.6934521198272705, "learning_rate": 9.262340716966848e-06, "loss": 0.15779619, "memory(GiB)": 13.7, "step": 23075, "train_speed(iter/s)": 1.530305 }, { "acc": 0.97509174, "epoch": 10.817904851183501, "grad_norm": 0.023273754864931107, "learning_rate": 9.261935434999261e-06, "loss": 0.19163477, "memory(GiB)": 13.7, "step": 23080, "train_speed(iter/s)": 1.530319 }, { "acc": 0.97374992, "epoch": 10.820248418092337, "grad_norm": 2.911362648010254, "learning_rate": 9.261530050600764e-06, "loss": 0.16333638, "memory(GiB)": 13.7, "step": 23085, "train_speed(iter/s)": 1.530311 }, { "acc": 0.98666668, "epoch": 10.822591985001171, "grad_norm": 4.91956090927124, "learning_rate": 9.261124563781105e-06, "loss": 0.06666059, "memory(GiB)": 13.7, "step": 23090, "train_speed(iter/s)": 1.530332 }, { "acc": 0.97782536, "epoch": 10.824935551910007, "grad_norm": 5.724135398864746, "learning_rate": 9.260718974550029e-06, "loss": 0.12149935, "memory(GiB)": 13.7, "step": 23095, "train_speed(iter/s)": 1.530347 }, { "acc": 0.97207031, "epoch": 10.827279118818842, "grad_norm": 2.7702598571777344, "learning_rate": 9.260313282917284e-06, "loss": 0.11648669, "memory(GiB)": 13.7, "step": 23100, "train_speed(iter/s)": 1.530372 }, { "acc": 0.97454443, "epoch": 10.829622685727678, "grad_norm": 3.1815786361694336, "learning_rate": 9.259907488892623e-06, "loss": 0.11776226, "memory(GiB)": 13.7, "step": 23105, "train_speed(iter/s)": 1.530388 }, { "acc": 0.97724161, "epoch": 10.831966252636512, "grad_norm": 2.8282418251037598, "learning_rate": 9.259501592485799e-06, "loss": 0.0737411, "memory(GiB)": 13.7, "step": 23110, "train_speed(iter/s)": 1.530379 }, { "acc": 0.96510782, "epoch": 10.834309819545348, "grad_norm": 5.257465362548828, "learning_rate": 9.259095593706567e-06, "loss": 0.144059, "memory(GiB)": 13.7, "step": 23115, "train_speed(iter/s)": 1.530364 }, { "acc": 0.95645714, "epoch": 10.836653386454183, "grad_norm": 0.5922459959983826, "learning_rate": 9.258689492564688e-06, "loss": 0.2032218, "memory(GiB)": 13.7, "step": 23120, "train_speed(iter/s)": 1.530359 }, { "acc": 0.97048607, "epoch": 10.838996953363019, "grad_norm": 4.168833255767822, "learning_rate": 9.258283289069925e-06, "loss": 0.21949916, "memory(GiB)": 13.7, "step": 23125, "train_speed(iter/s)": 1.530362 }, { "acc": 0.97691927, "epoch": 10.841340520271853, "grad_norm": 2.9799532890319824, "learning_rate": 9.257876983232038e-06, "loss": 0.09355341, "memory(GiB)": 13.7, "step": 23130, "train_speed(iter/s)": 1.530364 }, { "acc": 0.97541666, "epoch": 10.843684087180689, "grad_norm": 8.105499267578125, "learning_rate": 9.257470575060795e-06, "loss": 0.06686482, "memory(GiB)": 13.7, "step": 23135, "train_speed(iter/s)": 1.530361 }, { "acc": 0.97621536, "epoch": 10.846027654089525, "grad_norm": 1.9876461029052734, "learning_rate": 9.257064064565965e-06, "loss": 0.17852789, "memory(GiB)": 13.7, "step": 23140, "train_speed(iter/s)": 1.530365 }, { "acc": 0.97353086, "epoch": 10.84837122099836, "grad_norm": 1.6704847812652588, "learning_rate": 9.25665745175732e-06, "loss": 0.15515714, "memory(GiB)": 13.7, "step": 23145, "train_speed(iter/s)": 1.530376 }, { "acc": 0.9833334, "epoch": 10.850714787907195, "grad_norm": 7.892304420471191, "learning_rate": 9.25625073664463e-06, "loss": 0.06609319, "memory(GiB)": 13.7, "step": 23150, "train_speed(iter/s)": 1.530377 }, { "acc": 0.97732735, "epoch": 10.85305835481603, "grad_norm": 7.41526985168457, "learning_rate": 9.255843919237674e-06, "loss": 0.09921627, "memory(GiB)": 13.7, "step": 23155, "train_speed(iter/s)": 1.530358 }, { "acc": 0.97875004, "epoch": 10.855401921724866, "grad_norm": 520.9395751953125, "learning_rate": 9.25543699954623e-06, "loss": 0.0926754, "memory(GiB)": 13.7, "step": 23160, "train_speed(iter/s)": 1.530372 }, { "acc": 0.98208332, "epoch": 10.8577454886337, "grad_norm": 5.854686260223389, "learning_rate": 9.25502997758008e-06, "loss": 0.12593441, "memory(GiB)": 13.7, "step": 23165, "train_speed(iter/s)": 1.530373 }, { "acc": 0.97163658, "epoch": 10.860089055542536, "grad_norm": 1.1594054698944092, "learning_rate": 9.254622853349007e-06, "loss": 0.10385208, "memory(GiB)": 13.7, "step": 23170, "train_speed(iter/s)": 1.530391 }, { "acc": 0.98271894, "epoch": 10.86243262245137, "grad_norm": 9.615450859069824, "learning_rate": 9.254215626862795e-06, "loss": 0.13071315, "memory(GiB)": 13.7, "step": 23175, "train_speed(iter/s)": 1.530417 }, { "acc": 0.97562389, "epoch": 10.864776189360207, "grad_norm": 3.3397161960601807, "learning_rate": 9.253808298131235e-06, "loss": 0.1571274, "memory(GiB)": 13.7, "step": 23180, "train_speed(iter/s)": 1.530439 }, { "acc": 0.96869497, "epoch": 10.867119756269041, "grad_norm": 4.089237213134766, "learning_rate": 9.253400867164115e-06, "loss": 0.21017163, "memory(GiB)": 13.7, "step": 23185, "train_speed(iter/s)": 1.530459 }, { "acc": 0.95687008, "epoch": 10.869463323177877, "grad_norm": 12.200071334838867, "learning_rate": 9.252993333971232e-06, "loss": 0.19891291, "memory(GiB)": 13.7, "step": 23190, "train_speed(iter/s)": 1.53047 }, { "acc": 0.98422394, "epoch": 10.871806890086711, "grad_norm": 3.4210331439971924, "learning_rate": 9.25258569856238e-06, "loss": 0.05053621, "memory(GiB)": 13.7, "step": 23195, "train_speed(iter/s)": 1.530496 }, { "acc": 0.98120041, "epoch": 10.874150456995547, "grad_norm": 5.050715923309326, "learning_rate": 9.252177960947355e-06, "loss": 0.09703386, "memory(GiB)": 13.7, "step": 23200, "train_speed(iter/s)": 1.530516 }, { "acc": 0.94273672, "epoch": 10.876494023904382, "grad_norm": 4.807704448699951, "learning_rate": 9.251770121135962e-06, "loss": 0.27780147, "memory(GiB)": 13.7, "step": 23205, "train_speed(iter/s)": 1.530528 }, { "acc": 0.97279758, "epoch": 10.878837590813218, "grad_norm": 6.599029541015625, "learning_rate": 9.251362179138e-06, "loss": 0.11971461, "memory(GiB)": 13.7, "step": 23210, "train_speed(iter/s)": 1.530532 }, { "acc": 0.980966, "epoch": 10.881181157722054, "grad_norm": 4.971469879150391, "learning_rate": 9.250954134963276e-06, "loss": 0.07405674, "memory(GiB)": 13.7, "step": 23215, "train_speed(iter/s)": 1.530537 }, { "acc": 0.97566967, "epoch": 10.883524724630888, "grad_norm": 0.11494815349578857, "learning_rate": 9.2505459886216e-06, "loss": 0.13364509, "memory(GiB)": 13.7, "step": 23220, "train_speed(iter/s)": 1.530544 }, { "acc": 0.97687502, "epoch": 10.885868291539724, "grad_norm": 10.351961135864258, "learning_rate": 9.25013774012278e-06, "loss": 0.16219735, "memory(GiB)": 13.7, "step": 23225, "train_speed(iter/s)": 1.53056 }, { "acc": 0.97848434, "epoch": 10.888211858448559, "grad_norm": 0.27040982246398926, "learning_rate": 9.24972938947663e-06, "loss": 0.16193976, "memory(GiB)": 13.7, "step": 23230, "train_speed(iter/s)": 1.530583 }, { "acc": 0.97955265, "epoch": 10.890555425357395, "grad_norm": 5.6420817375183105, "learning_rate": 9.249320936692966e-06, "loss": 0.09357982, "memory(GiB)": 13.7, "step": 23235, "train_speed(iter/s)": 1.530612 }, { "acc": 0.96713791, "epoch": 10.892898992266229, "grad_norm": 12.047388076782227, "learning_rate": 9.248912381781603e-06, "loss": 0.18807323, "memory(GiB)": 13.7, "step": 23240, "train_speed(iter/s)": 1.530618 }, { "acc": 0.97907944, "epoch": 10.895242559175065, "grad_norm": 6.096090793609619, "learning_rate": 9.248503724752365e-06, "loss": 0.09632473, "memory(GiB)": 13.7, "step": 23245, "train_speed(iter/s)": 1.530611 }, { "acc": 0.95820847, "epoch": 10.8975861260839, "grad_norm": 7.190823554992676, "learning_rate": 9.248094965615074e-06, "loss": 0.17095938, "memory(GiB)": 13.7, "step": 23250, "train_speed(iter/s)": 1.530625 }, { "acc": 0.98978624, "epoch": 10.899929692992735, "grad_norm": 1.8248891830444336, "learning_rate": 9.247686104379555e-06, "loss": 0.05126325, "memory(GiB)": 13.7, "step": 23255, "train_speed(iter/s)": 1.530626 }, { "acc": 0.98716011, "epoch": 10.90227325990157, "grad_norm": 0.16406552493572235, "learning_rate": 9.247277141055633e-06, "loss": 0.09167297, "memory(GiB)": 13.7, "step": 23260, "train_speed(iter/s)": 1.530618 }, { "acc": 0.97658386, "epoch": 10.904616826810406, "grad_norm": 6.4595208168029785, "learning_rate": 9.246868075653141e-06, "loss": 0.08220272, "memory(GiB)": 13.7, "step": 23265, "train_speed(iter/s)": 1.53062 }, { "acc": 0.95470486, "epoch": 10.90696039371924, "grad_norm": 8.84541130065918, "learning_rate": 9.246458908181911e-06, "loss": 0.26539259, "memory(GiB)": 13.7, "step": 23270, "train_speed(iter/s)": 1.53062 }, { "acc": 0.96585903, "epoch": 10.909303960628076, "grad_norm": 1.0789073705673218, "learning_rate": 9.246049638651778e-06, "loss": 0.16051748, "memory(GiB)": 13.7, "step": 23275, "train_speed(iter/s)": 1.530617 }, { "acc": 0.96333332, "epoch": 10.91164752753691, "grad_norm": 7.716754913330078, "learning_rate": 9.24564026707258e-06, "loss": 0.16607869, "memory(GiB)": 13.7, "step": 23280, "train_speed(iter/s)": 1.530615 }, { "acc": 0.96560917, "epoch": 10.913991094445747, "grad_norm": 6.773292541503906, "learning_rate": 9.245230793454156e-06, "loss": 0.1590055, "memory(GiB)": 13.7, "step": 23285, "train_speed(iter/s)": 1.530619 }, { "acc": 0.97392864, "epoch": 10.91633466135458, "grad_norm": 14.616795539855957, "learning_rate": 9.244821217806348e-06, "loss": 0.10974693, "memory(GiB)": 13.7, "step": 23290, "train_speed(iter/s)": 1.530632 }, { "acc": 0.96873779, "epoch": 10.918678228263417, "grad_norm": 4.689450740814209, "learning_rate": 9.244411540139001e-06, "loss": 0.14567308, "memory(GiB)": 13.7, "step": 23295, "train_speed(iter/s)": 1.530655 }, { "acc": 0.98081846, "epoch": 10.921021795172251, "grad_norm": 5.9259819984436035, "learning_rate": 9.244001760461963e-06, "loss": 0.09989406, "memory(GiB)": 13.7, "step": 23300, "train_speed(iter/s)": 1.530673 }, { "acc": 0.98027601, "epoch": 10.923365362081087, "grad_norm": 1.742710828781128, "learning_rate": 9.243591878785086e-06, "loss": 0.08710078, "memory(GiB)": 13.7, "step": 23305, "train_speed(iter/s)": 1.530694 }, { "acc": 0.9514698, "epoch": 10.925708928989923, "grad_norm": 56.97003936767578, "learning_rate": 9.243181895118218e-06, "loss": 0.26097636, "memory(GiB)": 13.7, "step": 23310, "train_speed(iter/s)": 1.530711 }, { "acc": 0.97376986, "epoch": 10.928052495898758, "grad_norm": 9.253066062927246, "learning_rate": 9.242771809471215e-06, "loss": 0.10554763, "memory(GiB)": 13.7, "step": 23315, "train_speed(iter/s)": 1.530724 }, { "acc": 0.98525257, "epoch": 10.930396062807594, "grad_norm": 3.2498300075531006, "learning_rate": 9.242361621853937e-06, "loss": 0.07497808, "memory(GiB)": 13.7, "step": 23320, "train_speed(iter/s)": 1.530748 }, { "acc": 0.97660675, "epoch": 10.932739629716428, "grad_norm": 5.9460272789001465, "learning_rate": 9.24195133227624e-06, "loss": 0.16097525, "memory(GiB)": 13.7, "step": 23325, "train_speed(iter/s)": 1.530749 }, { "acc": 0.98211136, "epoch": 10.935083196625264, "grad_norm": 1.1743214130401611, "learning_rate": 9.241540940747985e-06, "loss": 0.07063258, "memory(GiB)": 13.7, "step": 23330, "train_speed(iter/s)": 1.530758 }, { "acc": 0.98465271, "epoch": 10.937426763534098, "grad_norm": 4.09226131439209, "learning_rate": 9.241130447279042e-06, "loss": 0.05641279, "memory(GiB)": 13.7, "step": 23335, "train_speed(iter/s)": 1.530758 }, { "acc": 0.984375, "epoch": 10.939770330442935, "grad_norm": 6.5910868644714355, "learning_rate": 9.240719851879273e-06, "loss": 0.09971153, "memory(GiB)": 13.7, "step": 23340, "train_speed(iter/s)": 1.530772 }, { "acc": 0.98983707, "epoch": 10.942113897351769, "grad_norm": 1.188628911972046, "learning_rate": 9.240309154558549e-06, "loss": 0.08668901, "memory(GiB)": 13.7, "step": 23345, "train_speed(iter/s)": 1.530798 }, { "acc": 0.96635418, "epoch": 10.944457464260605, "grad_norm": 11.745500564575195, "learning_rate": 9.239898355326742e-06, "loss": 0.1481007, "memory(GiB)": 13.7, "step": 23350, "train_speed(iter/s)": 1.530818 }, { "acc": 0.98055553, "epoch": 10.94680103116944, "grad_norm": 4.781788349151611, "learning_rate": 9.239487454193726e-06, "loss": 0.07725706, "memory(GiB)": 13.7, "step": 23355, "train_speed(iter/s)": 1.530825 }, { "acc": 0.97673607, "epoch": 10.949144598078275, "grad_norm": 5.8857951164245605, "learning_rate": 9.239076451169379e-06, "loss": 0.08979089, "memory(GiB)": 13.7, "step": 23360, "train_speed(iter/s)": 1.530831 }, { "acc": 0.97182693, "epoch": 10.95148816498711, "grad_norm": 9.982088088989258, "learning_rate": 9.238665346263578e-06, "loss": 0.12921255, "memory(GiB)": 13.7, "step": 23365, "train_speed(iter/s)": 1.53083 }, { "acc": 0.9791666, "epoch": 10.953831731895946, "grad_norm": 5.804405689239502, "learning_rate": 9.238254139486204e-06, "loss": 0.07256918, "memory(GiB)": 13.7, "step": 23370, "train_speed(iter/s)": 1.530841 }, { "acc": 0.98363094, "epoch": 10.95617529880478, "grad_norm": 1.4204596281051636, "learning_rate": 9.237842830847145e-06, "loss": 0.09782169, "memory(GiB)": 13.7, "step": 23375, "train_speed(iter/s)": 1.530847 }, { "acc": 0.99300594, "epoch": 10.958518865713616, "grad_norm": 3.3751635551452637, "learning_rate": 9.237431420356284e-06, "loss": 0.03185892, "memory(GiB)": 13.7, "step": 23380, "train_speed(iter/s)": 1.530867 }, { "acc": 0.97634802, "epoch": 10.960862432622452, "grad_norm": 6.376513957977295, "learning_rate": 9.23701990802351e-06, "loss": 0.11532526, "memory(GiB)": 13.7, "step": 23385, "train_speed(iter/s)": 1.530867 }, { "acc": 0.97135448, "epoch": 10.963205999531286, "grad_norm": 4.3038225173950195, "learning_rate": 9.236608293858717e-06, "loss": 0.17517042, "memory(GiB)": 13.7, "step": 23390, "train_speed(iter/s)": 1.530883 }, { "acc": 0.97396908, "epoch": 10.965549566440123, "grad_norm": 4.075022220611572, "learning_rate": 9.236196577871795e-06, "loss": 0.10122268, "memory(GiB)": 13.7, "step": 23395, "train_speed(iter/s)": 1.530874 }, { "acc": 0.9844305, "epoch": 10.967893133348957, "grad_norm": 5.7906951904296875, "learning_rate": 9.235784760072647e-06, "loss": 0.0830716, "memory(GiB)": 13.7, "step": 23400, "train_speed(iter/s)": 1.530868 }, { "acc": 0.98175554, "epoch": 10.970236700257793, "grad_norm": 1.8687975406646729, "learning_rate": 9.235372840471166e-06, "loss": 0.09699577, "memory(GiB)": 13.7, "step": 23405, "train_speed(iter/s)": 1.530887 }, { "acc": 0.97876129, "epoch": 10.972580267166627, "grad_norm": 5.152183532714844, "learning_rate": 9.234960819077251e-06, "loss": 0.12233323, "memory(GiB)": 13.7, "step": 23410, "train_speed(iter/s)": 1.530906 }, { "acc": 0.97192822, "epoch": 10.974923834075463, "grad_norm": 5.245786666870117, "learning_rate": 9.234548695900811e-06, "loss": 0.10492631, "memory(GiB)": 13.7, "step": 23415, "train_speed(iter/s)": 1.530913 }, { "acc": 0.97875004, "epoch": 10.977267400984298, "grad_norm": 3.6619794368743896, "learning_rate": 9.23413647095175e-06, "loss": 0.1247769, "memory(GiB)": 13.7, "step": 23420, "train_speed(iter/s)": 1.530923 }, { "acc": 0.97277775, "epoch": 10.979610967893134, "grad_norm": 7.340280055999756, "learning_rate": 9.23372414423998e-06, "loss": 0.10614723, "memory(GiB)": 13.7, "step": 23425, "train_speed(iter/s)": 1.530921 }, { "acc": 0.97780628, "epoch": 10.981954534801968, "grad_norm": 2.213123083114624, "learning_rate": 9.233311715775406e-06, "loss": 0.13480085, "memory(GiB)": 13.7, "step": 23430, "train_speed(iter/s)": 1.530925 }, { "acc": 0.97133389, "epoch": 10.984298101710804, "grad_norm": 6.466525554656982, "learning_rate": 9.232899185567946e-06, "loss": 0.16216166, "memory(GiB)": 13.7, "step": 23435, "train_speed(iter/s)": 1.530931 }, { "acc": 0.97636356, "epoch": 10.986641668619638, "grad_norm": 4.376628398895264, "learning_rate": 9.23248655362751e-06, "loss": 0.12639551, "memory(GiB)": 13.7, "step": 23440, "train_speed(iter/s)": 1.530934 }, { "acc": 0.97966185, "epoch": 10.988985235528475, "grad_norm": 4.475575923919678, "learning_rate": 9.232073819964024e-06, "loss": 0.1553293, "memory(GiB)": 13.7, "step": 23445, "train_speed(iter/s)": 1.530938 }, { "acc": 0.97183609, "epoch": 10.991328802437309, "grad_norm": 2.7925262451171875, "learning_rate": 9.231660984587404e-06, "loss": 0.09924026, "memory(GiB)": 13.7, "step": 23450, "train_speed(iter/s)": 1.530951 }, { "acc": 0.985322, "epoch": 10.993672369346145, "grad_norm": 0.708737850189209, "learning_rate": 9.231248047507574e-06, "loss": 0.0656297, "memory(GiB)": 13.7, "step": 23455, "train_speed(iter/s)": 1.530956 }, { "acc": 0.98557692, "epoch": 10.996015936254981, "grad_norm": 0.46482840180397034, "learning_rate": 9.230835008734462e-06, "loss": 0.07287858, "memory(GiB)": 13.7, "step": 23460, "train_speed(iter/s)": 1.530951 }, { "acc": 0.9605958, "epoch": 10.998359503163815, "grad_norm": 5.997359275817871, "learning_rate": 9.230421868277992e-06, "loss": 0.14758811, "memory(GiB)": 13.7, "step": 23465, "train_speed(iter/s)": 1.530969 }, { "acc": 0.99029102, "epoch": 11.000703070072651, "grad_norm": 6.630102157592773, "learning_rate": 9.230008626148096e-06, "loss": 0.08901902, "memory(GiB)": 13.7, "step": 23470, "train_speed(iter/s)": 1.530941 }, { "acc": 0.97478628, "epoch": 11.003046636981486, "grad_norm": 5.423348903656006, "learning_rate": 9.22959528235471e-06, "loss": 0.08237275, "memory(GiB)": 13.7, "step": 23475, "train_speed(iter/s)": 1.530935 }, { "acc": 0.9854167, "epoch": 11.005390203890322, "grad_norm": 5.286520957946777, "learning_rate": 9.229181836907767e-06, "loss": 0.08575972, "memory(GiB)": 13.7, "step": 23480, "train_speed(iter/s)": 1.530934 }, { "acc": 0.98687496, "epoch": 11.007733770799156, "grad_norm": 5.840988636016846, "learning_rate": 9.228768289817203e-06, "loss": 0.06610092, "memory(GiB)": 13.7, "step": 23485, "train_speed(iter/s)": 1.530942 }, { "acc": 0.97945881, "epoch": 11.010077337707992, "grad_norm": 5.129784107208252, "learning_rate": 9.228354641092962e-06, "loss": 0.12885056, "memory(GiB)": 13.7, "step": 23490, "train_speed(iter/s)": 1.530967 }, { "acc": 0.9676754, "epoch": 11.012420904616826, "grad_norm": 18.09170150756836, "learning_rate": 9.227940890744985e-06, "loss": 0.2114502, "memory(GiB)": 13.7, "step": 23495, "train_speed(iter/s)": 1.530978 }, { "acc": 0.97488098, "epoch": 11.014764471525663, "grad_norm": 7.037392616271973, "learning_rate": 9.227527038783218e-06, "loss": 0.12865212, "memory(GiB)": 13.7, "step": 23500, "train_speed(iter/s)": 1.530984 }, { "acc": 0.97323856, "epoch": 11.017108038434497, "grad_norm": 4.640435695648193, "learning_rate": 9.227113085217604e-06, "loss": 0.09452927, "memory(GiB)": 13.7, "step": 23505, "train_speed(iter/s)": 1.530992 }, { "acc": 0.97846594, "epoch": 11.019451605343333, "grad_norm": 9.306025505065918, "learning_rate": 9.226699030058099e-06, "loss": 0.14314735, "memory(GiB)": 13.7, "step": 23510, "train_speed(iter/s)": 1.531001 }, { "acc": 0.96929283, "epoch": 11.021795172252167, "grad_norm": 8.0936861038208, "learning_rate": 9.226284873314655e-06, "loss": 0.14517914, "memory(GiB)": 13.7, "step": 23515, "train_speed(iter/s)": 1.53101 }, { "acc": 0.97902966, "epoch": 11.024138739161003, "grad_norm": 1.4690155982971191, "learning_rate": 9.225870614997223e-06, "loss": 0.12194142, "memory(GiB)": 13.7, "step": 23520, "train_speed(iter/s)": 1.531021 }, { "acc": 0.95587654, "epoch": 11.026482306069838, "grad_norm": 5.861593723297119, "learning_rate": 9.225456255115764e-06, "loss": 0.21461687, "memory(GiB)": 13.7, "step": 23525, "train_speed(iter/s)": 1.531024 }, { "acc": 0.9812355, "epoch": 11.028825872978674, "grad_norm": 4.545754909515381, "learning_rate": 9.225041793680237e-06, "loss": 0.07253644, "memory(GiB)": 13.7, "step": 23530, "train_speed(iter/s)": 1.531023 }, { "acc": 0.98386364, "epoch": 11.031169439887508, "grad_norm": 4.461450099945068, "learning_rate": 9.224627230700604e-06, "loss": 0.09612087, "memory(GiB)": 13.7, "step": 23535, "train_speed(iter/s)": 1.531036 }, { "acc": 0.97147188, "epoch": 11.033513006796344, "grad_norm": 7.5377726554870605, "learning_rate": 9.224212566186829e-06, "loss": 0.10922375, "memory(GiB)": 13.7, "step": 23540, "train_speed(iter/s)": 1.531044 }, { "acc": 0.97705364, "epoch": 11.03585657370518, "grad_norm": 1.4437289237976074, "learning_rate": 9.22379780014888e-06, "loss": 0.11683815, "memory(GiB)": 13.7, "step": 23545, "train_speed(iter/s)": 1.531038 }, { "acc": 0.97976189, "epoch": 11.038200140614014, "grad_norm": 5.660226345062256, "learning_rate": 9.223382932596727e-06, "loss": 0.08504319, "memory(GiB)": 13.7, "step": 23550, "train_speed(iter/s)": 1.531031 }, { "acc": 0.96885414, "epoch": 11.04054370752285, "grad_norm": 10.059395790100098, "learning_rate": 9.222967963540341e-06, "loss": 0.19263513, "memory(GiB)": 13.7, "step": 23555, "train_speed(iter/s)": 1.531019 }, { "acc": 0.97496109, "epoch": 11.042887274431685, "grad_norm": 3.0765888690948486, "learning_rate": 9.222552892989698e-06, "loss": 0.10922859, "memory(GiB)": 13.7, "step": 23560, "train_speed(iter/s)": 1.531035 }, { "acc": 0.98451767, "epoch": 11.045230841340521, "grad_norm": 4.579072952270508, "learning_rate": 9.222137720954774e-06, "loss": 0.08991365, "memory(GiB)": 13.7, "step": 23565, "train_speed(iter/s)": 1.531025 }, { "acc": 0.98301468, "epoch": 11.047574408249355, "grad_norm": 4.727758407592773, "learning_rate": 9.221722447445546e-06, "loss": 0.09301724, "memory(GiB)": 13.7, "step": 23570, "train_speed(iter/s)": 1.531041 }, { "acc": 0.97677383, "epoch": 11.049917975158191, "grad_norm": 7.060082912445068, "learning_rate": 9.221307072472002e-06, "loss": 0.102903, "memory(GiB)": 13.7, "step": 23575, "train_speed(iter/s)": 1.53104 }, { "acc": 0.96134806, "epoch": 11.052261542067026, "grad_norm": 50.988685607910156, "learning_rate": 9.220891596044119e-06, "loss": 0.18088228, "memory(GiB)": 13.7, "step": 23580, "train_speed(iter/s)": 1.531042 }, { "acc": 0.96082802, "epoch": 11.054605108975862, "grad_norm": 1.6393412351608276, "learning_rate": 9.22047601817189e-06, "loss": 0.13130647, "memory(GiB)": 13.7, "step": 23585, "train_speed(iter/s)": 1.531026 }, { "acc": 0.97920284, "epoch": 11.056948675884696, "grad_norm": 0.18423916399478912, "learning_rate": 9.2200603388653e-06, "loss": 0.06494448, "memory(GiB)": 13.7, "step": 23590, "train_speed(iter/s)": 1.531046 }, { "acc": 0.98728628, "epoch": 11.059292242793532, "grad_norm": 5.680213928222656, "learning_rate": 9.21964455813434e-06, "loss": 0.04887204, "memory(GiB)": 13.7, "step": 23595, "train_speed(iter/s)": 1.531072 }, { "acc": 0.97877216, "epoch": 11.061635809702366, "grad_norm": 4.117061138153076, "learning_rate": 9.219228675989007e-06, "loss": 0.10546407, "memory(GiB)": 13.7, "step": 23600, "train_speed(iter/s)": 1.531078 }, { "acc": 0.97645836, "epoch": 11.063979376611202, "grad_norm": 3.4322078227996826, "learning_rate": 9.218812692439295e-06, "loss": 0.07837344, "memory(GiB)": 13.7, "step": 23605, "train_speed(iter/s)": 1.531091 }, { "acc": 0.97345829, "epoch": 11.066322943520037, "grad_norm": 4.39431619644165, "learning_rate": 9.218396607495207e-06, "loss": 0.10160526, "memory(GiB)": 13.7, "step": 23610, "train_speed(iter/s)": 1.531109 }, { "acc": 0.9776042, "epoch": 11.068666510428873, "grad_norm": 5.815738677978516, "learning_rate": 9.217980421166739e-06, "loss": 0.10839458, "memory(GiB)": 13.7, "step": 23615, "train_speed(iter/s)": 1.531113 }, { "acc": 0.97408009, "epoch": 11.071010077337707, "grad_norm": 5.76481294631958, "learning_rate": 9.2175641334639e-06, "loss": 0.12419782, "memory(GiB)": 13.7, "step": 23620, "train_speed(iter/s)": 1.53113 }, { "acc": 0.99104166, "epoch": 11.073353644246543, "grad_norm": 3.7744851112365723, "learning_rate": 9.21714774439669e-06, "loss": 0.06667394, "memory(GiB)": 13.7, "step": 23625, "train_speed(iter/s)": 1.531133 }, { "acc": 0.98922615, "epoch": 11.07569721115538, "grad_norm": 7.853800296783447, "learning_rate": 9.216731253975122e-06, "loss": 0.06002103, "memory(GiB)": 13.7, "step": 23630, "train_speed(iter/s)": 1.531148 }, { "acc": 0.97645292, "epoch": 11.078040778064214, "grad_norm": 0.5284954309463501, "learning_rate": 9.216314662209206e-06, "loss": 0.16617894, "memory(GiB)": 13.7, "step": 23635, "train_speed(iter/s)": 1.531144 }, { "acc": 0.96881561, "epoch": 11.08038434497305, "grad_norm": 7.2019147872924805, "learning_rate": 9.215897969108956e-06, "loss": 0.16169752, "memory(GiB)": 13.7, "step": 23640, "train_speed(iter/s)": 1.531162 }, { "acc": 0.96959829, "epoch": 11.082727911881884, "grad_norm": 8.154960632324219, "learning_rate": 9.215481174684387e-06, "loss": 0.12843547, "memory(GiB)": 13.7, "step": 23645, "train_speed(iter/s)": 1.531189 }, { "acc": 0.97332611, "epoch": 11.08507147879072, "grad_norm": 5.562686443328857, "learning_rate": 9.21506427894552e-06, "loss": 0.15599746, "memory(GiB)": 13.7, "step": 23650, "train_speed(iter/s)": 1.531191 }, { "acc": 0.97075405, "epoch": 11.087415045699554, "grad_norm": 5.184345245361328, "learning_rate": 9.214647281902372e-06, "loss": 0.12613845, "memory(GiB)": 13.7, "step": 23655, "train_speed(iter/s)": 1.531202 }, { "acc": 0.9809226, "epoch": 11.08975861260839, "grad_norm": 7.48801326751709, "learning_rate": 9.214230183564966e-06, "loss": 0.08750397, "memory(GiB)": 13.7, "step": 23660, "train_speed(iter/s)": 1.531207 }, { "acc": 0.95888348, "epoch": 11.092102179517225, "grad_norm": 5.019938945770264, "learning_rate": 9.213812983943332e-06, "loss": 0.2276504, "memory(GiB)": 13.7, "step": 23665, "train_speed(iter/s)": 1.531214 }, { "acc": 0.99343138, "epoch": 11.09444574642606, "grad_norm": 1.8075178861618042, "learning_rate": 9.213395683047496e-06, "loss": 0.07298395, "memory(GiB)": 13.7, "step": 23670, "train_speed(iter/s)": 1.531237 }, { "acc": 0.98434582, "epoch": 11.096789313334895, "grad_norm": 1.4209089279174805, "learning_rate": 9.212978280887487e-06, "loss": 0.0841979, "memory(GiB)": 13.7, "step": 23675, "train_speed(iter/s)": 1.531248 }, { "acc": 0.97896824, "epoch": 11.099132880243731, "grad_norm": 2.015834331512451, "learning_rate": 9.212560777473339e-06, "loss": 0.09067791, "memory(GiB)": 13.7, "step": 23680, "train_speed(iter/s)": 1.53127 }, { "acc": 0.96974707, "epoch": 11.101476447152566, "grad_norm": 18.020244598388672, "learning_rate": 9.21214317281509e-06, "loss": 0.14233807, "memory(GiB)": 13.7, "step": 23685, "train_speed(iter/s)": 1.531279 }, { "acc": 0.96129465, "epoch": 11.103820014061402, "grad_norm": 8.31956672668457, "learning_rate": 9.211725466922772e-06, "loss": 0.20190325, "memory(GiB)": 13.7, "step": 23690, "train_speed(iter/s)": 1.531292 }, { "acc": 0.97892857, "epoch": 11.106163580970236, "grad_norm": 7.068849086761475, "learning_rate": 9.211307659806431e-06, "loss": 0.09984404, "memory(GiB)": 13.7, "step": 23695, "train_speed(iter/s)": 1.531307 }, { "acc": 0.98520832, "epoch": 11.108507147879072, "grad_norm": 3.025449275970459, "learning_rate": 9.210889751476106e-06, "loss": 0.10917909, "memory(GiB)": 13.7, "step": 23700, "train_speed(iter/s)": 1.531315 }, { "acc": 0.9775198, "epoch": 11.110850714787908, "grad_norm": 2.1616971492767334, "learning_rate": 9.210471741941844e-06, "loss": 0.11024632, "memory(GiB)": 13.7, "step": 23705, "train_speed(iter/s)": 1.531336 }, { "acc": 0.96072426, "epoch": 11.113194281696742, "grad_norm": 9.28225326538086, "learning_rate": 9.210053631213694e-06, "loss": 0.29178553, "memory(GiB)": 13.7, "step": 23710, "train_speed(iter/s)": 1.531361 }, { "acc": 0.97495537, "epoch": 11.115537848605578, "grad_norm": 23.307859420776367, "learning_rate": 9.209635419301702e-06, "loss": 0.16691149, "memory(GiB)": 13.7, "step": 23715, "train_speed(iter/s)": 1.531377 }, { "acc": 0.97907982, "epoch": 11.117881415514413, "grad_norm": 0.312174916267395, "learning_rate": 9.209217106215924e-06, "loss": 0.11267612, "memory(GiB)": 13.7, "step": 23720, "train_speed(iter/s)": 1.531368 }, { "acc": 0.9640564, "epoch": 11.120224982423249, "grad_norm": 16.229692459106445, "learning_rate": 9.20879869196641e-06, "loss": 0.18792231, "memory(GiB)": 13.7, "step": 23725, "train_speed(iter/s)": 1.531382 }, { "acc": 0.9792263, "epoch": 11.122568549332083, "grad_norm": 15.751068115234375, "learning_rate": 9.208380176563225e-06, "loss": 0.1228385, "memory(GiB)": 13.7, "step": 23730, "train_speed(iter/s)": 1.531385 }, { "acc": 0.97583332, "epoch": 11.12491211624092, "grad_norm": 2.7823431491851807, "learning_rate": 9.207961560016423e-06, "loss": 0.1103272, "memory(GiB)": 13.7, "step": 23735, "train_speed(iter/s)": 1.531397 }, { "acc": 0.98613091, "epoch": 11.127255683149754, "grad_norm": 19.534095764160156, "learning_rate": 9.20754284233607e-06, "loss": 0.12068036, "memory(GiB)": 13.7, "step": 23740, "train_speed(iter/s)": 1.531411 }, { "acc": 0.96817226, "epoch": 11.12959925005859, "grad_norm": 6.066197395324707, "learning_rate": 9.207124023532223e-06, "loss": 0.11036525, "memory(GiB)": 13.7, "step": 23745, "train_speed(iter/s)": 1.531419 }, { "acc": 0.97835321, "epoch": 11.131942816967424, "grad_norm": 12.940177917480469, "learning_rate": 9.206705103614958e-06, "loss": 0.15026861, "memory(GiB)": 13.7, "step": 23750, "train_speed(iter/s)": 1.531433 }, { "acc": 0.96852684, "epoch": 11.13428638387626, "grad_norm": 0.7552690505981445, "learning_rate": 9.20628608259434e-06, "loss": 0.2355722, "memory(GiB)": 13.7, "step": 23755, "train_speed(iter/s)": 1.531433 }, { "acc": 0.98050594, "epoch": 11.136629950785094, "grad_norm": 5.682160377502441, "learning_rate": 9.205866960480442e-06, "loss": 0.07584798, "memory(GiB)": 13.7, "step": 23760, "train_speed(iter/s)": 1.531442 }, { "acc": 0.97827454, "epoch": 11.13897351769393, "grad_norm": 6.464139461517334, "learning_rate": 9.205447737283337e-06, "loss": 0.11124113, "memory(GiB)": 13.7, "step": 23765, "train_speed(iter/s)": 1.531452 }, { "acc": 0.97306547, "epoch": 11.141317084602765, "grad_norm": 6.01619815826416, "learning_rate": 9.205028413013103e-06, "loss": 0.16215184, "memory(GiB)": 13.7, "step": 23770, "train_speed(iter/s)": 1.531473 }, { "acc": 0.97148762, "epoch": 11.1436606515116, "grad_norm": 3.396003007888794, "learning_rate": 9.20460898767982e-06, "loss": 0.11186604, "memory(GiB)": 13.7, "step": 23775, "train_speed(iter/s)": 1.531482 }, { "acc": 0.96958332, "epoch": 11.146004218420435, "grad_norm": 32.159332275390625, "learning_rate": 9.204189461293566e-06, "loss": 0.14805386, "memory(GiB)": 13.7, "step": 23780, "train_speed(iter/s)": 1.531493 }, { "acc": 0.96941156, "epoch": 11.148347785329271, "grad_norm": 2.8074870109558105, "learning_rate": 9.203769833864431e-06, "loss": 0.10320868, "memory(GiB)": 13.7, "step": 23785, "train_speed(iter/s)": 1.531505 }, { "acc": 0.97001858, "epoch": 11.150691352238107, "grad_norm": 2.5730957984924316, "learning_rate": 9.203350105402497e-06, "loss": 0.13460813, "memory(GiB)": 13.7, "step": 23790, "train_speed(iter/s)": 1.531505 }, { "acc": 0.98425598, "epoch": 11.153034919146942, "grad_norm": 5.859164237976074, "learning_rate": 9.202930275917854e-06, "loss": 0.082795, "memory(GiB)": 13.7, "step": 23795, "train_speed(iter/s)": 1.531501 }, { "acc": 0.96299982, "epoch": 11.155378486055778, "grad_norm": 8.490839004516602, "learning_rate": 9.202510345420591e-06, "loss": 0.17241155, "memory(GiB)": 13.7, "step": 23800, "train_speed(iter/s)": 1.531528 }, { "acc": 0.97010422, "epoch": 11.157722052964612, "grad_norm": 5.579826354980469, "learning_rate": 9.202090313920806e-06, "loss": 0.12918041, "memory(GiB)": 13.7, "step": 23805, "train_speed(iter/s)": 1.531526 }, { "acc": 0.96603622, "epoch": 11.160065619873448, "grad_norm": 7.428760528564453, "learning_rate": 9.201670181428592e-06, "loss": 0.18002453, "memory(GiB)": 13.7, "step": 23810, "train_speed(iter/s)": 1.53154 }, { "acc": 0.97104168, "epoch": 11.162409186782282, "grad_norm": 4.492711544036865, "learning_rate": 9.20124994795405e-06, "loss": 0.12806447, "memory(GiB)": 13.7, "step": 23815, "train_speed(iter/s)": 1.531549 }, { "acc": 0.98361111, "epoch": 11.164752753691118, "grad_norm": 3.6788532733917236, "learning_rate": 9.200829613507278e-06, "loss": 0.09924067, "memory(GiB)": 13.7, "step": 23820, "train_speed(iter/s)": 1.531561 }, { "acc": 0.97831354, "epoch": 11.167096320599953, "grad_norm": 6.009144306182861, "learning_rate": 9.200409178098383e-06, "loss": 0.1525319, "memory(GiB)": 13.7, "step": 23825, "train_speed(iter/s)": 1.531558 }, { "acc": 0.98620539, "epoch": 11.169439887508789, "grad_norm": 5.450589179992676, "learning_rate": 9.199988641737469e-06, "loss": 0.1121195, "memory(GiB)": 13.7, "step": 23830, "train_speed(iter/s)": 1.531575 }, { "acc": 0.98640881, "epoch": 11.171783454417623, "grad_norm": 4.180819034576416, "learning_rate": 9.199568004434644e-06, "loss": 0.07126801, "memory(GiB)": 13.7, "step": 23835, "train_speed(iter/s)": 1.531598 }, { "acc": 0.97983637, "epoch": 11.17412702132646, "grad_norm": 4.113857746124268, "learning_rate": 9.199147266200017e-06, "loss": 0.10549405, "memory(GiB)": 13.7, "step": 23840, "train_speed(iter/s)": 1.531602 }, { "acc": 0.97314587, "epoch": 11.176470588235293, "grad_norm": 7.22974967956543, "learning_rate": 9.198726427043707e-06, "loss": 0.12453772, "memory(GiB)": 13.7, "step": 23845, "train_speed(iter/s)": 1.531614 }, { "acc": 0.97117424, "epoch": 11.17881415514413, "grad_norm": 6.578171730041504, "learning_rate": 9.198305486975823e-06, "loss": 0.09870404, "memory(GiB)": 13.7, "step": 23850, "train_speed(iter/s)": 1.531639 }, { "acc": 0.96952457, "epoch": 11.181157722052964, "grad_norm": 8.132396697998047, "learning_rate": 9.19788444600649e-06, "loss": 0.17300856, "memory(GiB)": 13.7, "step": 23855, "train_speed(iter/s)": 1.531663 }, { "acc": 0.9942709, "epoch": 11.1835012889618, "grad_norm": 3.573596954345703, "learning_rate": 9.197463304145821e-06, "loss": 0.06326357, "memory(GiB)": 13.7, "step": 23860, "train_speed(iter/s)": 1.531693 }, { "acc": 0.98325901, "epoch": 11.185844855870634, "grad_norm": 3.693394660949707, "learning_rate": 9.197042061403943e-06, "loss": 0.10770947, "memory(GiB)": 13.7, "step": 23865, "train_speed(iter/s)": 1.53172 }, { "acc": 0.9806448, "epoch": 11.18818842277947, "grad_norm": 12.534482955932617, "learning_rate": 9.196620717790979e-06, "loss": 0.10305678, "memory(GiB)": 13.7, "step": 23870, "train_speed(iter/s)": 1.531725 }, { "acc": 0.96134186, "epoch": 11.190531989688306, "grad_norm": 2.143690586090088, "learning_rate": 9.19619927331706e-06, "loss": 0.22882009, "memory(GiB)": 13.7, "step": 23875, "train_speed(iter/s)": 1.53176 }, { "acc": 0.97684975, "epoch": 11.19287555659714, "grad_norm": 13.529401779174805, "learning_rate": 9.195777727992314e-06, "loss": 0.09366081, "memory(GiB)": 13.7, "step": 23880, "train_speed(iter/s)": 1.531755 }, { "acc": 0.97094975, "epoch": 11.195219123505977, "grad_norm": 12.097784996032715, "learning_rate": 9.195356081826876e-06, "loss": 0.18959959, "memory(GiB)": 13.7, "step": 23885, "train_speed(iter/s)": 1.531754 }, { "acc": 0.97320833, "epoch": 11.197562690414811, "grad_norm": 0.9555598497390747, "learning_rate": 9.194934334830876e-06, "loss": 0.11033452, "memory(GiB)": 13.7, "step": 23890, "train_speed(iter/s)": 1.531752 }, { "acc": 0.97197914, "epoch": 11.199906257323647, "grad_norm": 9.04755973815918, "learning_rate": 9.194512487014457e-06, "loss": 0.15407394, "memory(GiB)": 13.7, "step": 23895, "train_speed(iter/s)": 1.531742 }, { "acc": 0.97583332, "epoch": 11.202249824232482, "grad_norm": 8.510970115661621, "learning_rate": 9.194090538387753e-06, "loss": 0.15776725, "memory(GiB)": 13.7, "step": 23900, "train_speed(iter/s)": 1.531742 }, { "acc": 0.97215271, "epoch": 11.204593391141318, "grad_norm": 3.712535858154297, "learning_rate": 9.193668488960911e-06, "loss": 0.12195365, "memory(GiB)": 13.7, "step": 23905, "train_speed(iter/s)": 1.531735 }, { "acc": 0.97064476, "epoch": 11.206936958050152, "grad_norm": 3.3285470008850098, "learning_rate": 9.193246338744076e-06, "loss": 0.15744581, "memory(GiB)": 13.7, "step": 23910, "train_speed(iter/s)": 1.531747 }, { "acc": 0.98795242, "epoch": 11.209280524958988, "grad_norm": 5.361177444458008, "learning_rate": 9.192824087747389e-06, "loss": 0.12170866, "memory(GiB)": 13.7, "step": 23915, "train_speed(iter/s)": 1.531753 }, { "acc": 0.97739048, "epoch": 11.211624091867822, "grad_norm": 5.816941261291504, "learning_rate": 9.192401735981008e-06, "loss": 0.11652278, "memory(GiB)": 13.7, "step": 23920, "train_speed(iter/s)": 1.531771 }, { "acc": 0.97020836, "epoch": 11.213967658776658, "grad_norm": 5.184878349304199, "learning_rate": 9.191979283455078e-06, "loss": 0.08440582, "memory(GiB)": 13.7, "step": 23925, "train_speed(iter/s)": 1.531771 }, { "acc": 0.96503468, "epoch": 11.216311225685493, "grad_norm": 27.196218490600586, "learning_rate": 9.191556730179758e-06, "loss": 0.17319201, "memory(GiB)": 13.7, "step": 23930, "train_speed(iter/s)": 1.531764 }, { "acc": 0.97437496, "epoch": 11.218654792594329, "grad_norm": 3.1919097900390625, "learning_rate": 9.191134076165203e-06, "loss": 0.11993814, "memory(GiB)": 13.7, "step": 23935, "train_speed(iter/s)": 1.531758 }, { "acc": 0.97363091, "epoch": 11.220998359503163, "grad_norm": 0.5557793378829956, "learning_rate": 9.190711321421572e-06, "loss": 0.19749489, "memory(GiB)": 13.7, "step": 23940, "train_speed(iter/s)": 1.531777 }, { "acc": 0.9875, "epoch": 11.223341926412, "grad_norm": 5.190140247344971, "learning_rate": 9.190288465959024e-06, "loss": 0.1080458, "memory(GiB)": 13.7, "step": 23945, "train_speed(iter/s)": 1.531795 }, { "acc": 0.9916667, "epoch": 11.225685493320835, "grad_norm": 1.787993311882019, "learning_rate": 9.189865509787729e-06, "loss": 0.0148524, "memory(GiB)": 13.7, "step": 23950, "train_speed(iter/s)": 1.531805 }, { "acc": 0.98482151, "epoch": 11.22802906022967, "grad_norm": 2.8274452686309814, "learning_rate": 9.18944245291785e-06, "loss": 0.0841341, "memory(GiB)": 13.7, "step": 23955, "train_speed(iter/s)": 1.531804 }, { "acc": 0.98549604, "epoch": 11.230372627138506, "grad_norm": 5.428183555603027, "learning_rate": 9.189019295359553e-06, "loss": 0.08509155, "memory(GiB)": 13.7, "step": 23960, "train_speed(iter/s)": 1.531822 }, { "acc": 0.98340778, "epoch": 11.23271619404734, "grad_norm": 3.6529810428619385, "learning_rate": 9.188596037123016e-06, "loss": 0.08061991, "memory(GiB)": 13.7, "step": 23965, "train_speed(iter/s)": 1.531854 }, { "acc": 0.95571365, "epoch": 11.235059760956176, "grad_norm": 3.1455695629119873, "learning_rate": 9.188172678218408e-06, "loss": 0.14075041, "memory(GiB)": 13.7, "step": 23970, "train_speed(iter/s)": 1.531871 }, { "acc": 0.97531815, "epoch": 11.23740332786501, "grad_norm": 5.221853256225586, "learning_rate": 9.187749218655906e-06, "loss": 0.08661612, "memory(GiB)": 13.7, "step": 23975, "train_speed(iter/s)": 1.531873 }, { "acc": 0.96862183, "epoch": 11.239746894773846, "grad_norm": 3.2284903526306152, "learning_rate": 9.187325658445689e-06, "loss": 0.11835927, "memory(GiB)": 13.7, "step": 23980, "train_speed(iter/s)": 1.531888 }, { "acc": 0.98135424, "epoch": 11.24209046168268, "grad_norm": 17.583049774169922, "learning_rate": 9.186901997597939e-06, "loss": 0.10832897, "memory(GiB)": 13.7, "step": 23985, "train_speed(iter/s)": 1.531903 }, { "acc": 0.97579823, "epoch": 11.244434028591517, "grad_norm": 5.873970031738281, "learning_rate": 9.186478236122837e-06, "loss": 0.12370844, "memory(GiB)": 13.7, "step": 23990, "train_speed(iter/s)": 1.531914 }, { "acc": 0.96377602, "epoch": 11.246777595500351, "grad_norm": 73.37757110595703, "learning_rate": 9.18605437403057e-06, "loss": 0.15175303, "memory(GiB)": 13.7, "step": 23995, "train_speed(iter/s)": 1.531929 }, { "acc": 0.98819447, "epoch": 11.249121162409187, "grad_norm": 3.036137580871582, "learning_rate": 9.185630411331328e-06, "loss": 0.05144409, "memory(GiB)": 13.7, "step": 24000, "train_speed(iter/s)": 1.531944 }, { "acc": 0.98019352, "epoch": 11.251464729318021, "grad_norm": 6.740403175354004, "learning_rate": 9.1852063480353e-06, "loss": 0.13218819, "memory(GiB)": 13.7, "step": 24005, "train_speed(iter/s)": 1.531966 }, { "acc": 0.96917067, "epoch": 11.253808296226858, "grad_norm": 1.5963904857635498, "learning_rate": 9.184782184152679e-06, "loss": 0.14141986, "memory(GiB)": 13.7, "step": 24010, "train_speed(iter/s)": 1.53199 }, { "acc": 0.96645832, "epoch": 11.256151863135692, "grad_norm": 0.1591866910457611, "learning_rate": 9.18435791969366e-06, "loss": 0.12061561, "memory(GiB)": 13.7, "step": 24015, "train_speed(iter/s)": 1.531998 }, { "acc": 0.97717266, "epoch": 11.258495430044528, "grad_norm": 9.461668014526367, "learning_rate": 9.183933554668442e-06, "loss": 0.11170831, "memory(GiB)": 13.7, "step": 24020, "train_speed(iter/s)": 1.531996 }, { "acc": 0.98035717, "epoch": 11.260838996953362, "grad_norm": 8.641637802124023, "learning_rate": 9.183509089087226e-06, "loss": 0.18837569, "memory(GiB)": 13.7, "step": 24025, "train_speed(iter/s)": 1.531986 }, { "acc": 0.98045635, "epoch": 11.263182563862198, "grad_norm": 2.1943836212158203, "learning_rate": 9.183084522960212e-06, "loss": 0.08483737, "memory(GiB)": 13.7, "step": 24030, "train_speed(iter/s)": 1.532009 }, { "acc": 0.97510414, "epoch": 11.265526130771033, "grad_norm": 6.050080299377441, "learning_rate": 9.182659856297608e-06, "loss": 0.10757105, "memory(GiB)": 13.7, "step": 24035, "train_speed(iter/s)": 1.532034 }, { "acc": 0.9636734, "epoch": 11.267869697679869, "grad_norm": 2.727468252182007, "learning_rate": 9.18223508910962e-06, "loss": 0.19447966, "memory(GiB)": 13.7, "step": 24040, "train_speed(iter/s)": 1.532044 }, { "acc": 0.97367172, "epoch": 11.270213264588705, "grad_norm": 2.1522977352142334, "learning_rate": 9.181810221406459e-06, "loss": 0.10004618, "memory(GiB)": 13.7, "step": 24045, "train_speed(iter/s)": 1.532054 }, { "acc": 0.97425594, "epoch": 11.272556831497539, "grad_norm": 6.67037296295166, "learning_rate": 9.181385253198337e-06, "loss": 0.1032339, "memory(GiB)": 13.7, "step": 24050, "train_speed(iter/s)": 1.532068 }, { "acc": 0.9802084, "epoch": 11.274900398406375, "grad_norm": 9.77157211303711, "learning_rate": 9.180960184495468e-06, "loss": 0.07089461, "memory(GiB)": 13.7, "step": 24055, "train_speed(iter/s)": 1.532069 }, { "acc": 0.98726196, "epoch": 11.27724396531521, "grad_norm": 5.524475574493408, "learning_rate": 9.180535015308071e-06, "loss": 0.13659182, "memory(GiB)": 13.7, "step": 24060, "train_speed(iter/s)": 1.532071 }, { "acc": 0.98520832, "epoch": 11.279587532224046, "grad_norm": 2.506647825241089, "learning_rate": 9.180109745646363e-06, "loss": 0.10911683, "memory(GiB)": 13.7, "step": 24065, "train_speed(iter/s)": 1.532082 }, { "acc": 0.9731945, "epoch": 11.28193109913288, "grad_norm": 9.42082405090332, "learning_rate": 9.17968437552057e-06, "loss": 0.15823805, "memory(GiB)": 13.7, "step": 24070, "train_speed(iter/s)": 1.532109 }, { "acc": 0.97567921, "epoch": 11.284274666041716, "grad_norm": 13.209737777709961, "learning_rate": 9.179258904940913e-06, "loss": 0.14950473, "memory(GiB)": 13.7, "step": 24075, "train_speed(iter/s)": 1.532102 }, { "acc": 0.97473221, "epoch": 11.28661823295055, "grad_norm": 3.800384521484375, "learning_rate": 9.178833333917621e-06, "loss": 0.14965446, "memory(GiB)": 13.7, "step": 24080, "train_speed(iter/s)": 1.5321 }, { "acc": 0.98781242, "epoch": 11.288961799859386, "grad_norm": 0.5982149839401245, "learning_rate": 9.178407662460922e-06, "loss": 0.07096294, "memory(GiB)": 13.7, "step": 24085, "train_speed(iter/s)": 1.532099 }, { "acc": 0.96728706, "epoch": 11.29130536676822, "grad_norm": 4.254620552062988, "learning_rate": 9.177981890581048e-06, "loss": 0.19027783, "memory(GiB)": 13.7, "step": 24090, "train_speed(iter/s)": 1.532111 }, { "acc": 0.96927071, "epoch": 11.293648933677057, "grad_norm": 20.433940887451172, "learning_rate": 9.177556018288234e-06, "loss": 0.19294071, "memory(GiB)": 13.7, "step": 24095, "train_speed(iter/s)": 1.532129 }, { "acc": 0.975243, "epoch": 11.295992500585891, "grad_norm": 3.660457134246826, "learning_rate": 9.177130045592717e-06, "loss": 0.1031557, "memory(GiB)": 13.7, "step": 24100, "train_speed(iter/s)": 1.53214 }, { "acc": 0.97979164, "epoch": 11.298336067494727, "grad_norm": 5.693208694458008, "learning_rate": 9.176703972504732e-06, "loss": 0.11889004, "memory(GiB)": 13.7, "step": 24105, "train_speed(iter/s)": 1.532149 }, { "acc": 0.97172079, "epoch": 11.300679634403561, "grad_norm": 12.371550559997559, "learning_rate": 9.176277799034527e-06, "loss": 0.19934555, "memory(GiB)": 13.7, "step": 24110, "train_speed(iter/s)": 1.532154 }, { "acc": 0.98059521, "epoch": 11.303023201312397, "grad_norm": 2.422801971435547, "learning_rate": 9.175851525192339e-06, "loss": 0.09872436, "memory(GiB)": 13.7, "step": 24115, "train_speed(iter/s)": 1.532154 }, { "acc": 0.98700466, "epoch": 11.305366768221234, "grad_norm": 5.984368801116943, "learning_rate": 9.175425150988419e-06, "loss": 0.09525405, "memory(GiB)": 13.7, "step": 24120, "train_speed(iter/s)": 1.532162 }, { "acc": 0.96113091, "epoch": 11.307710335130068, "grad_norm": 7.481354236602783, "learning_rate": 9.174998676433013e-06, "loss": 0.11971664, "memory(GiB)": 13.7, "step": 24125, "train_speed(iter/s)": 1.53216 }, { "acc": 0.97071428, "epoch": 11.310053902038904, "grad_norm": 13.861517906188965, "learning_rate": 9.174572101536376e-06, "loss": 0.19272404, "memory(GiB)": 13.7, "step": 24130, "train_speed(iter/s)": 1.532173 }, { "acc": 0.9819313, "epoch": 11.312397468947738, "grad_norm": 6.870605945587158, "learning_rate": 9.174145426308756e-06, "loss": 0.1014842, "memory(GiB)": 13.7, "step": 24135, "train_speed(iter/s)": 1.53217 }, { "acc": 0.9777462, "epoch": 11.314741035856574, "grad_norm": 3.3250155448913574, "learning_rate": 9.173718650760413e-06, "loss": 0.11329092, "memory(GiB)": 13.7, "step": 24140, "train_speed(iter/s)": 1.532173 }, { "acc": 0.9822917, "epoch": 11.317084602765409, "grad_norm": 1.6280792951583862, "learning_rate": 9.173291774901601e-06, "loss": 0.11375321, "memory(GiB)": 13.7, "step": 24145, "train_speed(iter/s)": 1.532189 }, { "acc": 0.97485113, "epoch": 11.319428169674245, "grad_norm": 6.169077396392822, "learning_rate": 9.172864798742587e-06, "loss": 0.11101713, "memory(GiB)": 13.7, "step": 24150, "train_speed(iter/s)": 1.532205 }, { "acc": 0.97374144, "epoch": 11.321771736583079, "grad_norm": 3.0134291648864746, "learning_rate": 9.172437722293627e-06, "loss": 0.13371339, "memory(GiB)": 13.7, "step": 24155, "train_speed(iter/s)": 1.532214 }, { "acc": 0.97992268, "epoch": 11.324115303491915, "grad_norm": 5.090505123138428, "learning_rate": 9.172010545564992e-06, "loss": 0.11699576, "memory(GiB)": 13.7, "step": 24160, "train_speed(iter/s)": 1.532207 }, { "acc": 0.97769394, "epoch": 11.32645887040075, "grad_norm": 4.492522716522217, "learning_rate": 9.171583268566948e-06, "loss": 0.08283013, "memory(GiB)": 13.7, "step": 24165, "train_speed(iter/s)": 1.532206 }, { "acc": 0.95398808, "epoch": 11.328802437309585, "grad_norm": 14.798819541931152, "learning_rate": 9.171155891309765e-06, "loss": 0.18815042, "memory(GiB)": 13.7, "step": 24170, "train_speed(iter/s)": 1.532214 }, { "acc": 0.98687496, "epoch": 11.33114600421842, "grad_norm": 1.23271644115448, "learning_rate": 9.170728413803716e-06, "loss": 0.03032691, "memory(GiB)": 13.7, "step": 24175, "train_speed(iter/s)": 1.53222 }, { "acc": 0.97076387, "epoch": 11.333489571127256, "grad_norm": 5.8432488441467285, "learning_rate": 9.170300836059076e-06, "loss": 0.15489498, "memory(GiB)": 13.7, "step": 24180, "train_speed(iter/s)": 1.53223 }, { "acc": 0.96986618, "epoch": 11.33583313803609, "grad_norm": 2.266774892807007, "learning_rate": 9.169873158086124e-06, "loss": 0.10014237, "memory(GiB)": 13.7, "step": 24185, "train_speed(iter/s)": 1.532237 }, { "acc": 0.96818457, "epoch": 11.338176704944926, "grad_norm": 3.203186273574829, "learning_rate": 9.169445379895136e-06, "loss": 0.1308465, "memory(GiB)": 13.7, "step": 24190, "train_speed(iter/s)": 1.532251 }, { "acc": 0.95384111, "epoch": 11.340520271853762, "grad_norm": 41.325618743896484, "learning_rate": 9.1690175014964e-06, "loss": 0.23201323, "memory(GiB)": 13.7, "step": 24195, "train_speed(iter/s)": 1.532256 }, { "acc": 0.98713741, "epoch": 11.342863838762597, "grad_norm": 13.134443283081055, "learning_rate": 9.168589522900197e-06, "loss": 0.0784821, "memory(GiB)": 13.7, "step": 24200, "train_speed(iter/s)": 1.532257 }, { "acc": 0.96575003, "epoch": 11.345207405671433, "grad_norm": 6.617187023162842, "learning_rate": 9.168161444116814e-06, "loss": 0.18713719, "memory(GiB)": 13.7, "step": 24205, "train_speed(iter/s)": 1.532264 }, { "acc": 0.97668018, "epoch": 11.347550972580267, "grad_norm": 8.469860076904297, "learning_rate": 9.167733265156543e-06, "loss": 0.16272775, "memory(GiB)": 13.7, "step": 24210, "train_speed(iter/s)": 1.532273 }, { "acc": 0.97247486, "epoch": 11.349894539489103, "grad_norm": 10.030817985534668, "learning_rate": 9.167304986029675e-06, "loss": 0.14257584, "memory(GiB)": 13.7, "step": 24215, "train_speed(iter/s)": 1.532284 }, { "acc": 0.99415178, "epoch": 11.352238106397937, "grad_norm": 20.089202880859375, "learning_rate": 9.166876606746503e-06, "loss": 0.03268445, "memory(GiB)": 13.7, "step": 24220, "train_speed(iter/s)": 1.532291 }, { "acc": 0.98080359, "epoch": 11.354581673306773, "grad_norm": 1.735557198524475, "learning_rate": 9.166448127317326e-06, "loss": 0.11975871, "memory(GiB)": 13.7, "step": 24225, "train_speed(iter/s)": 1.532304 }, { "acc": 0.98156929, "epoch": 11.356925240215608, "grad_norm": 11.895735740661621, "learning_rate": 9.166019547752441e-06, "loss": 0.10635922, "memory(GiB)": 13.7, "step": 24230, "train_speed(iter/s)": 1.532303 }, { "acc": 0.97666664, "epoch": 11.359268807124444, "grad_norm": 5.3541693687438965, "learning_rate": 9.165590868062151e-06, "loss": 0.10933408, "memory(GiB)": 13.7, "step": 24235, "train_speed(iter/s)": 1.532296 }, { "acc": 0.97234373, "epoch": 11.361612374033278, "grad_norm": 1.2100940942764282, "learning_rate": 9.16516208825676e-06, "loss": 0.13439763, "memory(GiB)": 13.7, "step": 24240, "train_speed(iter/s)": 1.532311 }, { "acc": 0.97282391, "epoch": 11.363955940942114, "grad_norm": 8.821733474731445, "learning_rate": 9.164733208346575e-06, "loss": 0.15941683, "memory(GiB)": 13.7, "step": 24245, "train_speed(iter/s)": 1.532322 }, { "acc": 0.94934521, "epoch": 11.366299507850949, "grad_norm": 8.157992362976074, "learning_rate": 9.164304228341904e-06, "loss": 0.24226282, "memory(GiB)": 13.7, "step": 24250, "train_speed(iter/s)": 1.532329 }, { "acc": 0.98675594, "epoch": 11.368643074759785, "grad_norm": 2.211778402328491, "learning_rate": 9.16387514825306e-06, "loss": 0.05682152, "memory(GiB)": 13.7, "step": 24255, "train_speed(iter/s)": 1.53233 }, { "acc": 0.94975853, "epoch": 11.370986641668619, "grad_norm": 8.43785285949707, "learning_rate": 9.163445968090353e-06, "loss": 0.28877778, "memory(GiB)": 13.7, "step": 24260, "train_speed(iter/s)": 1.532332 }, { "acc": 0.96239586, "epoch": 11.373330208577455, "grad_norm": 5.102385520935059, "learning_rate": 9.163016687864102e-06, "loss": 0.17614172, "memory(GiB)": 13.7, "step": 24265, "train_speed(iter/s)": 1.532324 }, { "acc": 0.97741518, "epoch": 11.37567377548629, "grad_norm": 0.004876463674008846, "learning_rate": 9.162587307584625e-06, "loss": 0.0589564, "memory(GiB)": 13.7, "step": 24270, "train_speed(iter/s)": 1.532336 }, { "acc": 0.97275934, "epoch": 11.378017342395125, "grad_norm": 6.597569942474365, "learning_rate": 9.162157827262242e-06, "loss": 0.12486815, "memory(GiB)": 13.7, "step": 24275, "train_speed(iter/s)": 1.532331 }, { "acc": 0.9765399, "epoch": 11.38036090930396, "grad_norm": 6.75430965423584, "learning_rate": 9.161728246907277e-06, "loss": 0.06857994, "memory(GiB)": 13.7, "step": 24280, "train_speed(iter/s)": 1.532335 }, { "acc": 0.95828524, "epoch": 11.382704476212796, "grad_norm": 5.822977542877197, "learning_rate": 9.161298566530056e-06, "loss": 0.14298217, "memory(GiB)": 13.7, "step": 24285, "train_speed(iter/s)": 1.532355 }, { "acc": 0.97881126, "epoch": 11.385048043121632, "grad_norm": 6.217565536499023, "learning_rate": 9.160868786140907e-06, "loss": 0.08180593, "memory(GiB)": 13.7, "step": 24290, "train_speed(iter/s)": 1.532351 }, { "acc": 0.97287827, "epoch": 11.387391610030466, "grad_norm": 3.2095282077789307, "learning_rate": 9.16043890575016e-06, "loss": 0.11054074, "memory(GiB)": 13.7, "step": 24295, "train_speed(iter/s)": 1.532364 }, { "acc": 0.97738094, "epoch": 11.389735176939302, "grad_norm": 3.69130539894104, "learning_rate": 9.160008925368149e-06, "loss": 0.11273931, "memory(GiB)": 13.7, "step": 24300, "train_speed(iter/s)": 1.532384 }, { "acc": 0.98164244, "epoch": 11.392078743848137, "grad_norm": 3.6075401306152344, "learning_rate": 9.159578845005207e-06, "loss": 0.09626476, "memory(GiB)": 13.7, "step": 24305, "train_speed(iter/s)": 1.532404 }, { "acc": 0.98128471, "epoch": 11.394422310756973, "grad_norm": 3.823864459991455, "learning_rate": 9.159148664671673e-06, "loss": 0.08788412, "memory(GiB)": 13.7, "step": 24310, "train_speed(iter/s)": 1.532433 }, { "acc": 0.96416311, "epoch": 11.396765877665807, "grad_norm": 4.761067867279053, "learning_rate": 9.15871838437789e-06, "loss": 0.17569163, "memory(GiB)": 13.7, "step": 24315, "train_speed(iter/s)": 1.532443 }, { "acc": 0.97275753, "epoch": 11.399109444574643, "grad_norm": 11.772767066955566, "learning_rate": 9.158288004134196e-06, "loss": 0.11436422, "memory(GiB)": 13.7, "step": 24320, "train_speed(iter/s)": 1.532439 }, { "acc": 0.97330894, "epoch": 11.401453011483477, "grad_norm": 4.080338954925537, "learning_rate": 9.157857523950937e-06, "loss": 0.17962706, "memory(GiB)": 13.7, "step": 24325, "train_speed(iter/s)": 1.532459 }, { "acc": 0.9822484, "epoch": 11.403796578392313, "grad_norm": 4.660445213317871, "learning_rate": 9.157426943838462e-06, "loss": 0.1105177, "memory(GiB)": 13.7, "step": 24330, "train_speed(iter/s)": 1.532467 }, { "acc": 0.9786458, "epoch": 11.406140145301148, "grad_norm": 5.8226189613342285, "learning_rate": 9.156996263807118e-06, "loss": 0.08298561, "memory(GiB)": 13.7, "step": 24335, "train_speed(iter/s)": 1.532473 }, { "acc": 0.98187504, "epoch": 11.408483712209984, "grad_norm": 6.279607772827148, "learning_rate": 9.156565483867262e-06, "loss": 0.04231166, "memory(GiB)": 13.7, "step": 24340, "train_speed(iter/s)": 1.532469 }, { "acc": 0.97678032, "epoch": 11.410827279118818, "grad_norm": 3.5862538814544678, "learning_rate": 9.156134604029244e-06, "loss": 0.09196559, "memory(GiB)": 13.7, "step": 24345, "train_speed(iter/s)": 1.532488 }, { "acc": 0.97578564, "epoch": 11.413170846027654, "grad_norm": 10.466490745544434, "learning_rate": 9.155703624303422e-06, "loss": 0.16578453, "memory(GiB)": 13.7, "step": 24350, "train_speed(iter/s)": 1.532488 }, { "acc": 0.97673759, "epoch": 11.415514412936488, "grad_norm": 21.820947647094727, "learning_rate": 9.155272544700158e-06, "loss": 0.12660251, "memory(GiB)": 13.7, "step": 24355, "train_speed(iter/s)": 1.532489 }, { "acc": 0.97194939, "epoch": 11.417857979845325, "grad_norm": 3.7800192832946777, "learning_rate": 9.15484136522981e-06, "loss": 0.13430467, "memory(GiB)": 13.7, "step": 24360, "train_speed(iter/s)": 1.532508 }, { "acc": 0.97526045, "epoch": 11.42020154675416, "grad_norm": 0.9374815225601196, "learning_rate": 9.154410085902741e-06, "loss": 0.07451005, "memory(GiB)": 13.7, "step": 24365, "train_speed(iter/s)": 1.532526 }, { "acc": 0.9732336, "epoch": 11.422545113662995, "grad_norm": 6.624947547912598, "learning_rate": 9.153978706729325e-06, "loss": 0.16642567, "memory(GiB)": 13.7, "step": 24370, "train_speed(iter/s)": 1.532531 }, { "acc": 0.97891455, "epoch": 11.424888680571831, "grad_norm": 1.4645755290985107, "learning_rate": 9.153547227719924e-06, "loss": 0.09812636, "memory(GiB)": 13.7, "step": 24375, "train_speed(iter/s)": 1.532539 }, { "acc": 0.98142853, "epoch": 11.427232247480665, "grad_norm": 5.742001056671143, "learning_rate": 9.15311564888491e-06, "loss": 0.06948636, "memory(GiB)": 13.7, "step": 24380, "train_speed(iter/s)": 1.532544 }, { "acc": 0.98052082, "epoch": 11.429575814389501, "grad_norm": 5.6355767250061035, "learning_rate": 9.15268397023466e-06, "loss": 0.07670504, "memory(GiB)": 13.7, "step": 24385, "train_speed(iter/s)": 1.53257 }, { "acc": 0.9641325, "epoch": 11.431919381298336, "grad_norm": 37.43828582763672, "learning_rate": 9.152252191779548e-06, "loss": 0.18719488, "memory(GiB)": 13.7, "step": 24390, "train_speed(iter/s)": 1.532577 }, { "acc": 0.97549562, "epoch": 11.434262948207172, "grad_norm": 8.107681274414062, "learning_rate": 9.151820313529953e-06, "loss": 0.14265031, "memory(GiB)": 13.7, "step": 24395, "train_speed(iter/s)": 1.532579 }, { "acc": 0.98136768, "epoch": 11.436606515116006, "grad_norm": 6.049530029296875, "learning_rate": 9.151388335496256e-06, "loss": 0.07149962, "memory(GiB)": 13.7, "step": 24400, "train_speed(iter/s)": 1.532586 }, { "acc": 0.97747154, "epoch": 11.438950082024842, "grad_norm": 6.429887771606445, "learning_rate": 9.15095625768884e-06, "loss": 0.14900556, "memory(GiB)": 13.7, "step": 24405, "train_speed(iter/s)": 1.532596 }, { "acc": 0.97654667, "epoch": 11.441293648933677, "grad_norm": 2.925593376159668, "learning_rate": 9.150524080118089e-06, "loss": 0.12101376, "memory(GiB)": 13.7, "step": 24410, "train_speed(iter/s)": 1.53261 }, { "acc": 0.98029766, "epoch": 11.443637215842513, "grad_norm": 5.77847957611084, "learning_rate": 9.150091802794394e-06, "loss": 0.06741018, "memory(GiB)": 13.7, "step": 24415, "train_speed(iter/s)": 1.532609 }, { "acc": 0.97664757, "epoch": 11.445980782751347, "grad_norm": 7.995769500732422, "learning_rate": 9.149659425728145e-06, "loss": 0.20416813, "memory(GiB)": 13.7, "step": 24420, "train_speed(iter/s)": 1.53262 }, { "acc": 0.95975685, "epoch": 11.448324349660183, "grad_norm": 12.97987174987793, "learning_rate": 9.149226948929735e-06, "loss": 0.17880743, "memory(GiB)": 13.7, "step": 24425, "train_speed(iter/s)": 1.532633 }, { "acc": 0.97137041, "epoch": 11.450667916569017, "grad_norm": 5.9025797843933105, "learning_rate": 9.148794372409558e-06, "loss": 0.09199866, "memory(GiB)": 13.7, "step": 24430, "train_speed(iter/s)": 1.532627 }, { "acc": 0.97385912, "epoch": 11.453011483477853, "grad_norm": 5.606277942657471, "learning_rate": 9.148361696178014e-06, "loss": 0.10987816, "memory(GiB)": 13.7, "step": 24435, "train_speed(iter/s)": 1.532623 }, { "acc": 0.9639286, "epoch": 11.45535505038669, "grad_norm": 3.4889976978302, "learning_rate": 9.1479289202455e-06, "loss": 0.14884279, "memory(GiB)": 13.7, "step": 24440, "train_speed(iter/s)": 1.532635 }, { "acc": 0.97307701, "epoch": 11.457698617295524, "grad_norm": 4.1000590324401855, "learning_rate": 9.14749604462242e-06, "loss": 0.07545859, "memory(GiB)": 13.7, "step": 24445, "train_speed(iter/s)": 1.532638 }, { "acc": 0.98068829, "epoch": 11.46004218420436, "grad_norm": 2.882570743560791, "learning_rate": 9.14706306931918e-06, "loss": 0.06208882, "memory(GiB)": 13.7, "step": 24450, "train_speed(iter/s)": 1.532644 }, { "acc": 0.97676001, "epoch": 11.462385751113194, "grad_norm": 0.8003600835800171, "learning_rate": 9.146629994346186e-06, "loss": 0.10611722, "memory(GiB)": 13.7, "step": 24455, "train_speed(iter/s)": 1.53266 }, { "acc": 0.96686258, "epoch": 11.46472931802203, "grad_norm": 8.396685600280762, "learning_rate": 9.146196819713849e-06, "loss": 0.12142987, "memory(GiB)": 13.7, "step": 24460, "train_speed(iter/s)": 1.532685 }, { "acc": 0.97622023, "epoch": 11.467072884930865, "grad_norm": 1.8703808784484863, "learning_rate": 9.145763545432579e-06, "loss": 0.10270448, "memory(GiB)": 13.7, "step": 24465, "train_speed(iter/s)": 1.532677 }, { "acc": 0.978125, "epoch": 11.4694164518397, "grad_norm": 6.110227108001709, "learning_rate": 9.14533017151279e-06, "loss": 0.11025195, "memory(GiB)": 13.7, "step": 24470, "train_speed(iter/s)": 1.532699 }, { "acc": 0.99142361, "epoch": 11.471760018748535, "grad_norm": 0.010461246594786644, "learning_rate": 9.144896697964901e-06, "loss": 0.05081353, "memory(GiB)": 13.7, "step": 24475, "train_speed(iter/s)": 1.532708 }, { "acc": 0.97218142, "epoch": 11.474103585657371, "grad_norm": 6.1881184577941895, "learning_rate": 9.144463124799333e-06, "loss": 0.16547575, "memory(GiB)": 13.7, "step": 24480, "train_speed(iter/s)": 1.532725 }, { "acc": 0.97537813, "epoch": 11.476447152566205, "grad_norm": 4.334246635437012, "learning_rate": 9.144029452026505e-06, "loss": 0.13381182, "memory(GiB)": 13.7, "step": 24485, "train_speed(iter/s)": 1.532755 }, { "acc": 0.96149673, "epoch": 11.478790719475041, "grad_norm": 2.549433946609497, "learning_rate": 9.143595679656841e-06, "loss": 0.18548795, "memory(GiB)": 13.7, "step": 24490, "train_speed(iter/s)": 1.532766 }, { "acc": 0.97045593, "epoch": 11.481134286383876, "grad_norm": 6.5027337074279785, "learning_rate": 9.143161807700768e-06, "loss": 0.17181407, "memory(GiB)": 13.7, "step": 24495, "train_speed(iter/s)": 1.532763 }, { "acc": 0.97651787, "epoch": 11.483477853292712, "grad_norm": 3.4886415004730225, "learning_rate": 9.142727836168715e-06, "loss": 0.10367026, "memory(GiB)": 13.7, "step": 24500, "train_speed(iter/s)": 1.532771 }, { "acc": 0.97733011, "epoch": 11.485821420201546, "grad_norm": 0.4258478581905365, "learning_rate": 9.142293765071113e-06, "loss": 0.10749706, "memory(GiB)": 13.7, "step": 24505, "train_speed(iter/s)": 1.532764 }, { "acc": 0.97426586, "epoch": 11.488164987110382, "grad_norm": 4.053562164306641, "learning_rate": 9.141859594418396e-06, "loss": 0.09179016, "memory(GiB)": 13.7, "step": 24510, "train_speed(iter/s)": 1.532767 }, { "acc": 0.97617283, "epoch": 11.490508554019216, "grad_norm": 3.882094621658325, "learning_rate": 9.141425324220999e-06, "loss": 0.1421928, "memory(GiB)": 13.7, "step": 24515, "train_speed(iter/s)": 1.532789 }, { "acc": 0.9779438, "epoch": 11.492852120928053, "grad_norm": 5.239746570587158, "learning_rate": 9.14099095448936e-06, "loss": 0.08362379, "memory(GiB)": 13.7, "step": 24520, "train_speed(iter/s)": 1.532803 }, { "acc": 0.97325897, "epoch": 11.495195687836887, "grad_norm": 3.771299362182617, "learning_rate": 9.140556485233923e-06, "loss": 0.10870436, "memory(GiB)": 13.7, "step": 24525, "train_speed(iter/s)": 1.532795 }, { "acc": 0.98113098, "epoch": 11.497539254745723, "grad_norm": 1.9928945302963257, "learning_rate": 9.14012191646513e-06, "loss": 0.15174061, "memory(GiB)": 13.7, "step": 24530, "train_speed(iter/s)": 1.532812 }, { "acc": 0.98915758, "epoch": 11.499882821654559, "grad_norm": 17.22673988342285, "learning_rate": 9.139687248193424e-06, "loss": 0.09082253, "memory(GiB)": 13.7, "step": 24535, "train_speed(iter/s)": 1.53282 }, { "acc": 0.96438923, "epoch": 11.502226388563393, "grad_norm": 84.00553131103516, "learning_rate": 9.139252480429255e-06, "loss": 0.2026875, "memory(GiB)": 13.7, "step": 24540, "train_speed(iter/s)": 1.53282 }, { "acc": 0.98298607, "epoch": 11.50456995547223, "grad_norm": 5.405235290527344, "learning_rate": 9.138817613183073e-06, "loss": 0.05031776, "memory(GiB)": 13.7, "step": 24545, "train_speed(iter/s)": 1.532824 }, { "acc": 0.95202627, "epoch": 11.506913522381064, "grad_norm": 9.027006149291992, "learning_rate": 9.138382646465333e-06, "loss": 0.2224997, "memory(GiB)": 13.7, "step": 24550, "train_speed(iter/s)": 1.532833 }, { "acc": 0.97520828, "epoch": 11.5092570892899, "grad_norm": 4.621053218841553, "learning_rate": 9.137947580286486e-06, "loss": 0.14292369, "memory(GiB)": 13.7, "step": 24555, "train_speed(iter/s)": 1.53284 }, { "acc": 0.98669643, "epoch": 11.511600656198734, "grad_norm": 1.1662404537200928, "learning_rate": 9.137512414656993e-06, "loss": 0.0911243, "memory(GiB)": 13.7, "step": 24560, "train_speed(iter/s)": 1.532833 }, { "acc": 0.96236715, "epoch": 11.51394422310757, "grad_norm": 5.78315544128418, "learning_rate": 9.137077149587314e-06, "loss": 0.1296396, "memory(GiB)": 13.7, "step": 24565, "train_speed(iter/s)": 1.532827 }, { "acc": 0.96088581, "epoch": 11.516287790016404, "grad_norm": 4.373386859893799, "learning_rate": 9.136641785087907e-06, "loss": 0.23490374, "memory(GiB)": 13.7, "step": 24570, "train_speed(iter/s)": 1.532851 }, { "acc": 0.97982035, "epoch": 11.51863135692524, "grad_norm": 6.267180442810059, "learning_rate": 9.13620632116924e-06, "loss": 0.05990989, "memory(GiB)": 13.7, "step": 24575, "train_speed(iter/s)": 1.532863 }, { "acc": 0.97041664, "epoch": 11.520974923834075, "grad_norm": 4.819267272949219, "learning_rate": 9.135770757841782e-06, "loss": 0.16653128, "memory(GiB)": 13.7, "step": 24580, "train_speed(iter/s)": 1.532873 }, { "acc": 0.9836278, "epoch": 11.523318490742911, "grad_norm": 6.547035217285156, "learning_rate": 9.135335095115999e-06, "loss": 0.09048276, "memory(GiB)": 13.7, "step": 24585, "train_speed(iter/s)": 1.532878 }, { "acc": 0.98074408, "epoch": 11.525662057651745, "grad_norm": 2.6902241706848145, "learning_rate": 9.134899333002365e-06, "loss": 0.10174685, "memory(GiB)": 13.7, "step": 24590, "train_speed(iter/s)": 1.532889 }, { "acc": 0.98874998, "epoch": 11.528005624560581, "grad_norm": 3.68249249458313, "learning_rate": 9.134463471511353e-06, "loss": 0.05561212, "memory(GiB)": 13.7, "step": 24595, "train_speed(iter/s)": 1.532888 }, { "acc": 0.98058128, "epoch": 11.530349191469416, "grad_norm": 1.5829490423202515, "learning_rate": 9.134027510653441e-06, "loss": 0.1046968, "memory(GiB)": 13.7, "step": 24600, "train_speed(iter/s)": 1.532883 }, { "acc": 0.98031998, "epoch": 11.532692758378252, "grad_norm": 4.071916580200195, "learning_rate": 9.133591450439108e-06, "loss": 0.10030378, "memory(GiB)": 13.7, "step": 24605, "train_speed(iter/s)": 1.532909 }, { "acc": 0.98454819, "epoch": 11.535036325287088, "grad_norm": 4.967743396759033, "learning_rate": 9.133155290878832e-06, "loss": 0.06467596, "memory(GiB)": 13.7, "step": 24610, "train_speed(iter/s)": 1.532912 }, { "acc": 0.98284969, "epoch": 11.537379892195922, "grad_norm": 18.00370216369629, "learning_rate": 9.1327190319831e-06, "loss": 0.13941958, "memory(GiB)": 13.7, "step": 24615, "train_speed(iter/s)": 1.532926 }, { "acc": 0.96573772, "epoch": 11.539723459104758, "grad_norm": 4.007503032684326, "learning_rate": 9.132282673762402e-06, "loss": 0.11224476, "memory(GiB)": 13.7, "step": 24620, "train_speed(iter/s)": 1.532941 }, { "acc": 0.95534725, "epoch": 11.542067026013592, "grad_norm": 7.067577362060547, "learning_rate": 9.131846216227219e-06, "loss": 0.17688091, "memory(GiB)": 13.7, "step": 24625, "train_speed(iter/s)": 1.532947 }, { "acc": 0.97628212, "epoch": 11.544410592922429, "grad_norm": 1.0544060468673706, "learning_rate": 9.131409659388046e-06, "loss": 0.11781116, "memory(GiB)": 13.7, "step": 24630, "train_speed(iter/s)": 1.532965 }, { "acc": 0.97837124, "epoch": 11.546754159831263, "grad_norm": 58.604766845703125, "learning_rate": 9.130973003255375e-06, "loss": 0.11189966, "memory(GiB)": 13.7, "step": 24635, "train_speed(iter/s)": 1.532962 }, { "acc": 0.97421875, "epoch": 11.549097726740099, "grad_norm": 8.354744911193848, "learning_rate": 9.130536247839704e-06, "loss": 0.12643827, "memory(GiB)": 13.7, "step": 24640, "train_speed(iter/s)": 1.532953 }, { "acc": 0.96947918, "epoch": 11.551441293648933, "grad_norm": 17.070213317871094, "learning_rate": 9.13009939315153e-06, "loss": 0.14474106, "memory(GiB)": 13.7, "step": 24645, "train_speed(iter/s)": 1.532964 }, { "acc": 0.98028851, "epoch": 11.55378486055777, "grad_norm": 4.021169185638428, "learning_rate": 9.129662439201353e-06, "loss": 0.09792607, "memory(GiB)": 13.7, "step": 24650, "train_speed(iter/s)": 1.532984 }, { "acc": 0.98166122, "epoch": 11.556128427466604, "grad_norm": 4.977852821350098, "learning_rate": 9.129225385999676e-06, "loss": 0.08397447, "memory(GiB)": 13.7, "step": 24655, "train_speed(iter/s)": 1.532992 }, { "acc": 0.96872845, "epoch": 11.55847199437544, "grad_norm": 6.68197774887085, "learning_rate": 9.128788233557006e-06, "loss": 0.17684747, "memory(GiB)": 13.7, "step": 24660, "train_speed(iter/s)": 1.532991 }, { "acc": 0.97921629, "epoch": 11.560815561284274, "grad_norm": 4.7049360275268555, "learning_rate": 9.128350981883847e-06, "loss": 0.11249, "memory(GiB)": 13.7, "step": 24665, "train_speed(iter/s)": 1.533023 }, { "acc": 0.97466354, "epoch": 11.56315912819311, "grad_norm": 5.955930709838867, "learning_rate": 9.127913630990713e-06, "loss": 0.17225085, "memory(GiB)": 13.7, "step": 24670, "train_speed(iter/s)": 1.533038 }, { "acc": 0.96609097, "epoch": 11.565502695101944, "grad_norm": 5.4534101486206055, "learning_rate": 9.127476180888114e-06, "loss": 0.18609548, "memory(GiB)": 13.7, "step": 24675, "train_speed(iter/s)": 1.533052 }, { "acc": 0.97013788, "epoch": 11.56784626201078, "grad_norm": 9.002251625061035, "learning_rate": 9.127038631586566e-06, "loss": 0.17419441, "memory(GiB)": 13.7, "step": 24680, "train_speed(iter/s)": 1.533078 }, { "acc": 0.98478012, "epoch": 11.570189828919617, "grad_norm": 1.9560569524765015, "learning_rate": 9.126600983096586e-06, "loss": 0.12313491, "memory(GiB)": 13.7, "step": 24685, "train_speed(iter/s)": 1.533074 }, { "acc": 0.9723629, "epoch": 11.57253339582845, "grad_norm": 1.4063003063201904, "learning_rate": 9.126163235428694e-06, "loss": 0.12063109, "memory(GiB)": 13.7, "step": 24690, "train_speed(iter/s)": 1.533053 }, { "acc": 0.97645206, "epoch": 11.574876962737287, "grad_norm": 6.010679721832275, "learning_rate": 9.125725388593413e-06, "loss": 0.12357997, "memory(GiB)": 13.7, "step": 24695, "train_speed(iter/s)": 1.533073 }, { "acc": 0.97205362, "epoch": 11.577220529646121, "grad_norm": 6.4099507331848145, "learning_rate": 9.125287442601264e-06, "loss": 0.11639595, "memory(GiB)": 13.7, "step": 24700, "train_speed(iter/s)": 1.533079 }, { "acc": 0.9854166, "epoch": 11.579564096554957, "grad_norm": 2.4553990364074707, "learning_rate": 9.124849397462777e-06, "loss": 0.079318, "memory(GiB)": 13.7, "step": 24705, "train_speed(iter/s)": 1.53308 }, { "acc": 0.97013893, "epoch": 11.581907663463792, "grad_norm": 5.945916652679443, "learning_rate": 9.124411253188479e-06, "loss": 0.12517083, "memory(GiB)": 13.7, "step": 24710, "train_speed(iter/s)": 1.533092 }, { "acc": 0.97729168, "epoch": 11.584251230372628, "grad_norm": 5.060199737548828, "learning_rate": 9.123973009788903e-06, "loss": 0.13178741, "memory(GiB)": 13.7, "step": 24715, "train_speed(iter/s)": 1.533086 }, { "acc": 0.9813549, "epoch": 11.586594797281462, "grad_norm": 2.5897696018218994, "learning_rate": 9.123534667274583e-06, "loss": 0.08846158, "memory(GiB)": 13.7, "step": 24720, "train_speed(iter/s)": 1.533097 }, { "acc": 0.98398266, "epoch": 11.588938364190298, "grad_norm": 5.850444316864014, "learning_rate": 9.123096225656055e-06, "loss": 0.16086856, "memory(GiB)": 13.7, "step": 24725, "train_speed(iter/s)": 1.533089 }, { "acc": 0.97104168, "epoch": 11.591281931099132, "grad_norm": 7.9140238761901855, "learning_rate": 9.122657684943857e-06, "loss": 0.09599316, "memory(GiB)": 13.7, "step": 24730, "train_speed(iter/s)": 1.533077 }, { "acc": 0.98305836, "epoch": 11.593625498007968, "grad_norm": 4.662247657775879, "learning_rate": 9.12221904514853e-06, "loss": 0.09557903, "memory(GiB)": 13.7, "step": 24735, "train_speed(iter/s)": 1.533066 }, { "acc": 0.98519917, "epoch": 11.595969064916803, "grad_norm": 5.544482231140137, "learning_rate": 9.12178030628062e-06, "loss": 0.0856666, "memory(GiB)": 13.7, "step": 24740, "train_speed(iter/s)": 1.533083 }, { "acc": 0.97959146, "epoch": 11.598312631825639, "grad_norm": 12.966914176940918, "learning_rate": 9.121341468350669e-06, "loss": 0.07527769, "memory(GiB)": 13.7, "step": 24745, "train_speed(iter/s)": 1.533092 }, { "acc": 0.98031254, "epoch": 11.600656198734473, "grad_norm": 4.944980144500732, "learning_rate": 9.120902531369228e-06, "loss": 0.16865458, "memory(GiB)": 13.7, "step": 24750, "train_speed(iter/s)": 1.533098 }, { "acc": 0.9543601, "epoch": 11.60299976564331, "grad_norm": 6.252420902252197, "learning_rate": 9.120463495346847e-06, "loss": 0.17908524, "memory(GiB)": 13.7, "step": 24755, "train_speed(iter/s)": 1.533096 }, { "acc": 0.97365837, "epoch": 11.605343332552144, "grad_norm": 7.401638031005859, "learning_rate": 9.120024360294078e-06, "loss": 0.17056777, "memory(GiB)": 13.7, "step": 24760, "train_speed(iter/s)": 1.533122 }, { "acc": 0.98550587, "epoch": 11.60768689946098, "grad_norm": 4.914587020874023, "learning_rate": 9.119585126221476e-06, "loss": 0.09617892, "memory(GiB)": 13.7, "step": 24765, "train_speed(iter/s)": 1.533137 }, { "acc": 0.9802084, "epoch": 11.610030466369814, "grad_norm": 6.811304092407227, "learning_rate": 9.119145793139602e-06, "loss": 0.11457912, "memory(GiB)": 13.7, "step": 24770, "train_speed(iter/s)": 1.533131 }, { "acc": 0.97273979, "epoch": 11.61237403327865, "grad_norm": 5.864699840545654, "learning_rate": 9.118706361059013e-06, "loss": 0.1659005, "memory(GiB)": 13.7, "step": 24775, "train_speed(iter/s)": 1.533164 }, { "acc": 0.97331848, "epoch": 11.614717600187486, "grad_norm": 8.4959716796875, "learning_rate": 9.118266829990272e-06, "loss": 0.06406897, "memory(GiB)": 13.7, "step": 24780, "train_speed(iter/s)": 1.533161 }, { "acc": 0.98819447, "epoch": 11.61706116709632, "grad_norm": 0.8023852109909058, "learning_rate": 9.117827199943943e-06, "loss": 0.03742919, "memory(GiB)": 13.7, "step": 24785, "train_speed(iter/s)": 1.533159 }, { "acc": 0.95234137, "epoch": 11.619404734005156, "grad_norm": 8.489280700683594, "learning_rate": 9.117387470930598e-06, "loss": 0.23898675, "memory(GiB)": 13.7, "step": 24790, "train_speed(iter/s)": 1.533152 }, { "acc": 0.97348413, "epoch": 11.62174830091399, "grad_norm": 5.5379743576049805, "learning_rate": 9.1169476429608e-06, "loss": 0.10796468, "memory(GiB)": 13.7, "step": 24795, "train_speed(iter/s)": 1.533152 }, { "acc": 0.96874456, "epoch": 11.624091867822827, "grad_norm": 3.5191071033477783, "learning_rate": 9.116507716045127e-06, "loss": 0.13226972, "memory(GiB)": 13.7, "step": 24800, "train_speed(iter/s)": 1.533172 }, { "acc": 0.98291664, "epoch": 11.626435434731661, "grad_norm": 4.986231327056885, "learning_rate": 9.116067690194149e-06, "loss": 0.12372935, "memory(GiB)": 13.7, "step": 24805, "train_speed(iter/s)": 1.533195 }, { "acc": 0.982125, "epoch": 11.628779001640497, "grad_norm": 3.0017752647399902, "learning_rate": 9.115627565418445e-06, "loss": 0.14772383, "memory(GiB)": 13.7, "step": 24810, "train_speed(iter/s)": 1.533196 }, { "acc": 0.9739584, "epoch": 11.631122568549332, "grad_norm": 6.68189001083374, "learning_rate": 9.115187341728593e-06, "loss": 0.11436888, "memory(GiB)": 13.7, "step": 24815, "train_speed(iter/s)": 1.533214 }, { "acc": 0.96824894, "epoch": 11.633466135458168, "grad_norm": 11.10175609588623, "learning_rate": 9.114747019135175e-06, "loss": 0.13939738, "memory(GiB)": 13.7, "step": 24820, "train_speed(iter/s)": 1.533216 }, { "acc": 0.97591343, "epoch": 11.635809702367002, "grad_norm": 8.069066047668457, "learning_rate": 9.114306597648778e-06, "loss": 0.12404196, "memory(GiB)": 13.7, "step": 24825, "train_speed(iter/s)": 1.533204 }, { "acc": 0.97422085, "epoch": 11.638153269275838, "grad_norm": 5.557889938354492, "learning_rate": 9.11386607727998e-06, "loss": 0.17488415, "memory(GiB)": 13.7, "step": 24830, "train_speed(iter/s)": 1.533205 }, { "acc": 0.97299709, "epoch": 11.640496836184672, "grad_norm": 6.334068775177002, "learning_rate": 9.113425458039378e-06, "loss": 0.12495538, "memory(GiB)": 13.7, "step": 24835, "train_speed(iter/s)": 1.533198 }, { "acc": 0.97213745, "epoch": 11.642840403093508, "grad_norm": 4.936373233795166, "learning_rate": 9.112984739937561e-06, "loss": 0.20553703, "memory(GiB)": 13.7, "step": 24840, "train_speed(iter/s)": 1.533216 }, { "acc": 0.97249489, "epoch": 11.645183970002343, "grad_norm": 18.550683975219727, "learning_rate": 9.11254392298512e-06, "loss": 0.1207788, "memory(GiB)": 13.7, "step": 24845, "train_speed(iter/s)": 1.533233 }, { "acc": 0.97458334, "epoch": 11.647527536911179, "grad_norm": 5.215298652648926, "learning_rate": 9.112103007192652e-06, "loss": 0.10895569, "memory(GiB)": 13.7, "step": 24850, "train_speed(iter/s)": 1.533231 }, { "acc": 0.9688488, "epoch": 11.649871103820015, "grad_norm": 5.4199724197387695, "learning_rate": 9.111661992570754e-06, "loss": 0.1229887, "memory(GiB)": 13.7, "step": 24855, "train_speed(iter/s)": 1.533249 }, { "acc": 0.95738115, "epoch": 11.65221467072885, "grad_norm": 5.020448684692383, "learning_rate": 9.111220879130032e-06, "loss": 0.30667238, "memory(GiB)": 13.7, "step": 24860, "train_speed(iter/s)": 1.533251 }, { "acc": 0.96620541, "epoch": 11.654558237637685, "grad_norm": 8.584565162658691, "learning_rate": 9.110779666881081e-06, "loss": 0.15859927, "memory(GiB)": 13.7, "step": 24865, "train_speed(iter/s)": 1.533274 }, { "acc": 0.9777072, "epoch": 11.65690180454652, "grad_norm": 7.686856269836426, "learning_rate": 9.110338355834511e-06, "loss": 0.16169279, "memory(GiB)": 13.7, "step": 24870, "train_speed(iter/s)": 1.533288 }, { "acc": 0.97529755, "epoch": 11.659245371455356, "grad_norm": 7.364481449127197, "learning_rate": 9.10989694600093e-06, "loss": 0.13234154, "memory(GiB)": 13.7, "step": 24875, "train_speed(iter/s)": 1.533295 }, { "acc": 0.97133617, "epoch": 11.66158893836419, "grad_norm": 4.667370319366455, "learning_rate": 9.109455437390948e-06, "loss": 0.13514966, "memory(GiB)": 13.7, "step": 24880, "train_speed(iter/s)": 1.533294 }, { "acc": 0.97479763, "epoch": 11.663932505273026, "grad_norm": 6.55984354019165, "learning_rate": 9.109013830015174e-06, "loss": 0.1114086, "memory(GiB)": 13.7, "step": 24885, "train_speed(iter/s)": 1.533305 }, { "acc": 0.98498564, "epoch": 11.66627607218186, "grad_norm": 2.754615306854248, "learning_rate": 9.108572123884225e-06, "loss": 0.04240058, "memory(GiB)": 13.7, "step": 24890, "train_speed(iter/s)": 1.533312 }, { "acc": 0.9774519, "epoch": 11.668619639090696, "grad_norm": 2.5913655757904053, "learning_rate": 9.108130319008719e-06, "loss": 0.0874231, "memory(GiB)": 13.7, "step": 24895, "train_speed(iter/s)": 1.533339 }, { "acc": 0.98249998, "epoch": 11.67096320599953, "grad_norm": 4.651152610778809, "learning_rate": 9.107688415399274e-06, "loss": 0.08114161, "memory(GiB)": 13.7, "step": 24900, "train_speed(iter/s)": 1.53334 }, { "acc": 0.97738094, "epoch": 11.673306772908367, "grad_norm": 8.243142127990723, "learning_rate": 9.107246413066514e-06, "loss": 0.11467049, "memory(GiB)": 13.7, "step": 24905, "train_speed(iter/s)": 1.533359 }, { "acc": 0.98611107, "epoch": 11.675650339817201, "grad_norm": 5.769146919250488, "learning_rate": 9.10680431202106e-06, "loss": 0.0805271, "memory(GiB)": 13.7, "step": 24910, "train_speed(iter/s)": 1.53337 }, { "acc": 0.97514877, "epoch": 11.677993906726037, "grad_norm": 8.285089492797852, "learning_rate": 9.106362112273542e-06, "loss": 0.1262358, "memory(GiB)": 13.7, "step": 24915, "train_speed(iter/s)": 1.533394 }, { "acc": 0.95772724, "epoch": 11.680337473634872, "grad_norm": 6.713459014892578, "learning_rate": 9.105919813834588e-06, "loss": 0.18371069, "memory(GiB)": 13.7, "step": 24920, "train_speed(iter/s)": 1.533399 }, { "acc": 0.9802289, "epoch": 11.682681040543708, "grad_norm": 3.9709973335266113, "learning_rate": 9.105477416714828e-06, "loss": 0.0961216, "memory(GiB)": 13.7, "step": 24925, "train_speed(iter/s)": 1.533418 }, { "acc": 0.98430557, "epoch": 11.685024607452544, "grad_norm": 6.213527679443359, "learning_rate": 9.105034920924896e-06, "loss": 0.06952356, "memory(GiB)": 13.7, "step": 24930, "train_speed(iter/s)": 1.533424 }, { "acc": 0.98313446, "epoch": 11.687368174361378, "grad_norm": 3.218836545944214, "learning_rate": 9.104592326475431e-06, "loss": 0.12668629, "memory(GiB)": 13.7, "step": 24935, "train_speed(iter/s)": 1.533422 }, { "acc": 0.97949409, "epoch": 11.689711741270214, "grad_norm": 2.0026426315307617, "learning_rate": 9.10414963337707e-06, "loss": 0.08126942, "memory(GiB)": 13.7, "step": 24940, "train_speed(iter/s)": 1.533412 }, { "acc": 0.97173424, "epoch": 11.692055308179048, "grad_norm": 8.68415641784668, "learning_rate": 9.103706841640451e-06, "loss": 0.15884082, "memory(GiB)": 13.7, "step": 24945, "train_speed(iter/s)": 1.533423 }, { "acc": 0.96311016, "epoch": 11.694398875087884, "grad_norm": 18.578914642333984, "learning_rate": 9.10326395127622e-06, "loss": 0.26829815, "memory(GiB)": 13.7, "step": 24950, "train_speed(iter/s)": 1.533417 }, { "acc": 0.98469038, "epoch": 11.696742441996719, "grad_norm": 6.298919200897217, "learning_rate": 9.10282096229502e-06, "loss": 0.10281146, "memory(GiB)": 13.7, "step": 24955, "train_speed(iter/s)": 1.533418 }, { "acc": 0.96378784, "epoch": 11.699086008905555, "grad_norm": 14.17284107208252, "learning_rate": 9.102377874707506e-06, "loss": 0.18135129, "memory(GiB)": 13.7, "step": 24960, "train_speed(iter/s)": 1.533424 }, { "acc": 0.96960363, "epoch": 11.70142957581439, "grad_norm": 7.499939918518066, "learning_rate": 9.101934688524322e-06, "loss": 0.16326149, "memory(GiB)": 13.7, "step": 24965, "train_speed(iter/s)": 1.533426 }, { "acc": 0.98200455, "epoch": 11.703773142723225, "grad_norm": 0.04296122118830681, "learning_rate": 9.101491403756121e-06, "loss": 0.0805542, "memory(GiB)": 13.7, "step": 24970, "train_speed(iter/s)": 1.533446 }, { "acc": 0.98869371, "epoch": 11.70611670963206, "grad_norm": 5.498157024383545, "learning_rate": 9.10104802041356e-06, "loss": 0.06526576, "memory(GiB)": 13.7, "step": 24975, "train_speed(iter/s)": 1.533457 }, { "acc": 0.98145828, "epoch": 11.708460276540896, "grad_norm": 2.5971693992614746, "learning_rate": 9.100604538507296e-06, "loss": 0.08462796, "memory(GiB)": 13.7, "step": 24980, "train_speed(iter/s)": 1.533469 }, { "acc": 0.98990536, "epoch": 11.71080384344973, "grad_norm": 3.6452860832214355, "learning_rate": 9.10016095804799e-06, "loss": 0.04265598, "memory(GiB)": 13.7, "step": 24985, "train_speed(iter/s)": 1.533481 }, { "acc": 0.99145298, "epoch": 11.713147410358566, "grad_norm": 5.416808128356934, "learning_rate": 9.099717279046302e-06, "loss": 0.04459395, "memory(GiB)": 13.7, "step": 24990, "train_speed(iter/s)": 1.533494 }, { "acc": 0.97309036, "epoch": 11.7154909772674, "grad_norm": 2.6794395446777344, "learning_rate": 9.099273501512896e-06, "loss": 0.13906665, "memory(GiB)": 13.7, "step": 24995, "train_speed(iter/s)": 1.533509 }, { "acc": 0.98347588, "epoch": 11.717834544176236, "grad_norm": 7.753404140472412, "learning_rate": 9.098829625458443e-06, "loss": 0.08633875, "memory(GiB)": 13.7, "step": 25000, "train_speed(iter/s)": 1.533516 }, { "acc": 0.95967255, "epoch": 11.72017811108507, "grad_norm": 7.79307746887207, "learning_rate": 9.098385650893612e-06, "loss": 0.11822891, "memory(GiB)": 13.7, "step": 25005, "train_speed(iter/s)": 1.533529 }, { "acc": 0.97892323, "epoch": 11.722521677993907, "grad_norm": 2.9788293838500977, "learning_rate": 9.097941577829068e-06, "loss": 0.14706302, "memory(GiB)": 13.7, "step": 25010, "train_speed(iter/s)": 1.533521 }, { "acc": 0.97146645, "epoch": 11.724865244902741, "grad_norm": 14.339010238647461, "learning_rate": 9.097497406275491e-06, "loss": 0.16162181, "memory(GiB)": 13.7, "step": 25015, "train_speed(iter/s)": 1.533533 }, { "acc": 0.99473705, "epoch": 11.727208811811577, "grad_norm": 1.939380407333374, "learning_rate": 9.097053136243557e-06, "loss": 0.05143307, "memory(GiB)": 13.7, "step": 25020, "train_speed(iter/s)": 1.533537 }, { "acc": 0.97988091, "epoch": 11.729552378720413, "grad_norm": 0.1480841338634491, "learning_rate": 9.096608767743942e-06, "loss": 0.08116664, "memory(GiB)": 13.7, "step": 25025, "train_speed(iter/s)": 1.533544 }, { "acc": 0.97392197, "epoch": 11.731895945629248, "grad_norm": 4.039504528045654, "learning_rate": 9.09616430078733e-06, "loss": 0.13857999, "memory(GiB)": 13.7, "step": 25030, "train_speed(iter/s)": 1.53356 }, { "acc": 0.96758661, "epoch": 11.734239512538084, "grad_norm": 1.1185457706451416, "learning_rate": 9.095719735384403e-06, "loss": 0.1752784, "memory(GiB)": 13.7, "step": 25035, "train_speed(iter/s)": 1.533583 }, { "acc": 0.98030643, "epoch": 11.736583079446918, "grad_norm": 7.753818035125732, "learning_rate": 9.095275071545848e-06, "loss": 0.086993, "memory(GiB)": 13.7, "step": 25040, "train_speed(iter/s)": 1.533597 }, { "acc": 0.96598663, "epoch": 11.738926646355754, "grad_norm": 3.0862698554992676, "learning_rate": 9.094830309282353e-06, "loss": 0.15369053, "memory(GiB)": 13.7, "step": 25045, "train_speed(iter/s)": 1.533618 }, { "acc": 0.9749486, "epoch": 11.741270213264588, "grad_norm": 4.919770240783691, "learning_rate": 9.09438544860461e-06, "loss": 0.12987633, "memory(GiB)": 13.7, "step": 25050, "train_speed(iter/s)": 1.533614 }, { "acc": 0.98099098, "epoch": 11.743613780173424, "grad_norm": 10.191405296325684, "learning_rate": 9.093940489523306e-06, "loss": 0.08798079, "memory(GiB)": 13.7, "step": 25055, "train_speed(iter/s)": 1.533612 }, { "acc": 0.95747833, "epoch": 11.745957347082259, "grad_norm": 9.473947525024414, "learning_rate": 9.093495432049143e-06, "loss": 0.21764944, "memory(GiB)": 13.7, "step": 25060, "train_speed(iter/s)": 1.533623 }, { "acc": 0.97553024, "epoch": 11.748300913991095, "grad_norm": 3.6310856342315674, "learning_rate": 9.093050276192815e-06, "loss": 0.11927042, "memory(GiB)": 13.7, "step": 25065, "train_speed(iter/s)": 1.533642 }, { "acc": 0.97999458, "epoch": 11.750644480899929, "grad_norm": 7.952926158905029, "learning_rate": 9.092605021965027e-06, "loss": 0.08704523, "memory(GiB)": 13.7, "step": 25070, "train_speed(iter/s)": 1.533639 }, { "acc": 0.97605238, "epoch": 11.752988047808765, "grad_norm": 2.928619623184204, "learning_rate": 9.092159669376474e-06, "loss": 0.0906452, "memory(GiB)": 13.7, "step": 25075, "train_speed(iter/s)": 1.533638 }, { "acc": 0.98296137, "epoch": 11.7553316147176, "grad_norm": 7.083773612976074, "learning_rate": 9.091714218437865e-06, "loss": 0.12468284, "memory(GiB)": 13.7, "step": 25080, "train_speed(iter/s)": 1.533647 }, { "acc": 0.98420391, "epoch": 11.757675181626436, "grad_norm": 10.170042037963867, "learning_rate": 9.091268669159909e-06, "loss": 0.09746878, "memory(GiB)": 13.7, "step": 25085, "train_speed(iter/s)": 1.533661 }, { "acc": 0.98556547, "epoch": 11.76001874853527, "grad_norm": 7.13151216506958, "learning_rate": 9.090823021553311e-06, "loss": 0.05533783, "memory(GiB)": 13.7, "step": 25090, "train_speed(iter/s)": 1.533669 }, { "acc": 0.96913376, "epoch": 11.762362315444106, "grad_norm": 4.04970121383667, "learning_rate": 9.090377275628789e-06, "loss": 0.15642083, "memory(GiB)": 13.7, "step": 25095, "train_speed(iter/s)": 1.533677 }, { "acc": 0.98259192, "epoch": 11.764705882352942, "grad_norm": 6.10127592086792, "learning_rate": 9.08993143139705e-06, "loss": 0.12087591, "memory(GiB)": 13.7, "step": 25100, "train_speed(iter/s)": 1.533688 }, { "acc": 0.98562498, "epoch": 11.767049449261776, "grad_norm": 0.6267812252044678, "learning_rate": 9.089485488868815e-06, "loss": 0.06705425, "memory(GiB)": 13.7, "step": 25105, "train_speed(iter/s)": 1.533705 }, { "acc": 0.97620049, "epoch": 11.769393016170612, "grad_norm": 5.409492015838623, "learning_rate": 9.089039448054804e-06, "loss": 0.07144051, "memory(GiB)": 13.7, "step": 25110, "train_speed(iter/s)": 1.533709 }, { "acc": 0.96853619, "epoch": 11.771736583079447, "grad_norm": 9.274787902832031, "learning_rate": 9.088593308965736e-06, "loss": 0.17542729, "memory(GiB)": 13.7, "step": 25115, "train_speed(iter/s)": 1.533711 }, { "acc": 0.97039146, "epoch": 11.774080149988283, "grad_norm": 8.219557762145996, "learning_rate": 9.088147071612332e-06, "loss": 0.13012049, "memory(GiB)": 13.7, "step": 25120, "train_speed(iter/s)": 1.533715 }, { "acc": 0.97565479, "epoch": 11.776423716897117, "grad_norm": 6.126839637756348, "learning_rate": 9.087700736005327e-06, "loss": 0.0692101, "memory(GiB)": 13.7, "step": 25125, "train_speed(iter/s)": 1.533715 }, { "acc": 0.98670635, "epoch": 11.778767283805953, "grad_norm": 10.507952690124512, "learning_rate": 9.087254302155439e-06, "loss": 0.08304166, "memory(GiB)": 13.7, "step": 25130, "train_speed(iter/s)": 1.533712 }, { "acc": 0.9697917, "epoch": 11.781110850714787, "grad_norm": 8.213665962219238, "learning_rate": 9.086807770073406e-06, "loss": 0.11201921, "memory(GiB)": 13.7, "step": 25135, "train_speed(iter/s)": 1.533712 }, { "acc": 0.98627605, "epoch": 11.783454417623624, "grad_norm": 6.356650352478027, "learning_rate": 9.086361139769959e-06, "loss": 0.053675, "memory(GiB)": 13.7, "step": 25140, "train_speed(iter/s)": 1.533739 }, { "acc": 0.96784029, "epoch": 11.785797984532458, "grad_norm": 7.760589599609375, "learning_rate": 9.085914411255831e-06, "loss": 0.17647991, "memory(GiB)": 13.7, "step": 25145, "train_speed(iter/s)": 1.533735 }, { "acc": 0.9885416, "epoch": 11.788141551441294, "grad_norm": 6.613426208496094, "learning_rate": 9.085467584541765e-06, "loss": 0.04551045, "memory(GiB)": 13.7, "step": 25150, "train_speed(iter/s)": 1.533756 }, { "acc": 0.9764616, "epoch": 11.790485118350128, "grad_norm": 6.105714797973633, "learning_rate": 9.085020659638497e-06, "loss": 0.087592, "memory(GiB)": 13.7, "step": 25155, "train_speed(iter/s)": 1.533758 }, { "acc": 0.9837595, "epoch": 11.792828685258964, "grad_norm": 5.214649200439453, "learning_rate": 9.08457363655677e-06, "loss": 0.07575386, "memory(GiB)": 13.7, "step": 25160, "train_speed(iter/s)": 1.533773 }, { "acc": 0.98051319, "epoch": 11.795172252167799, "grad_norm": 1.5910131931304932, "learning_rate": 9.084126515307333e-06, "loss": 0.11275849, "memory(GiB)": 13.7, "step": 25165, "train_speed(iter/s)": 1.533789 }, { "acc": 0.97619267, "epoch": 11.797515819076635, "grad_norm": 8.116960525512695, "learning_rate": 9.083679295900926e-06, "loss": 0.08455794, "memory(GiB)": 13.7, "step": 25170, "train_speed(iter/s)": 1.53379 }, { "acc": 0.97332401, "epoch": 11.79985938598547, "grad_norm": 7.109637260437012, "learning_rate": 9.083231978348305e-06, "loss": 0.16154115, "memory(GiB)": 13.7, "step": 25175, "train_speed(iter/s)": 1.533778 }, { "acc": 0.96966343, "epoch": 11.802202952894305, "grad_norm": 5.740055084228516, "learning_rate": 9.082784562660223e-06, "loss": 0.1456182, "memory(GiB)": 13.7, "step": 25180, "train_speed(iter/s)": 1.533758 }, { "acc": 0.9822916, "epoch": 11.804546519803141, "grad_norm": 9.405651092529297, "learning_rate": 9.082337048847427e-06, "loss": 0.04413535, "memory(GiB)": 13.7, "step": 25185, "train_speed(iter/s)": 1.533772 }, { "acc": 0.97929926, "epoch": 11.806890086711975, "grad_norm": 4.405102729797363, "learning_rate": 9.08188943692068e-06, "loss": 0.11249175, "memory(GiB)": 13.7, "step": 25190, "train_speed(iter/s)": 1.533779 }, { "acc": 0.98173075, "epoch": 11.809233653620812, "grad_norm": 4.621903419494629, "learning_rate": 9.081441726890739e-06, "loss": 0.13460243, "memory(GiB)": 13.7, "step": 25195, "train_speed(iter/s)": 1.533778 }, { "acc": 0.9678319, "epoch": 11.811577220529646, "grad_norm": 6.4467363357543945, "learning_rate": 9.080993918768367e-06, "loss": 0.18447063, "memory(GiB)": 13.7, "step": 25200, "train_speed(iter/s)": 1.533793 }, { "acc": 0.96330929, "epoch": 11.813920787438482, "grad_norm": 6.997300624847412, "learning_rate": 9.080546012564326e-06, "loss": 0.16126411, "memory(GiB)": 13.7, "step": 25205, "train_speed(iter/s)": 1.533806 }, { "acc": 0.97153854, "epoch": 11.816264354347316, "grad_norm": 26.973478317260742, "learning_rate": 9.080098008289384e-06, "loss": 0.1781057, "memory(GiB)": 13.7, "step": 25210, "train_speed(iter/s)": 1.533799 }, { "acc": 0.97895832, "epoch": 11.818607921256152, "grad_norm": 50.24534225463867, "learning_rate": 9.079649905954307e-06, "loss": 0.09012542, "memory(GiB)": 13.7, "step": 25215, "train_speed(iter/s)": 1.533804 }, { "acc": 0.96847591, "epoch": 11.820951488164987, "grad_norm": 10.93319034576416, "learning_rate": 9.079201705569867e-06, "loss": 0.18804679, "memory(GiB)": 13.7, "step": 25220, "train_speed(iter/s)": 1.53381 }, { "acc": 0.98891945, "epoch": 11.823295055073823, "grad_norm": 0.25777575373649597, "learning_rate": 9.078753407146837e-06, "loss": 0.05848562, "memory(GiB)": 13.7, "step": 25225, "train_speed(iter/s)": 1.533817 }, { "acc": 0.98385353, "epoch": 11.825638621982657, "grad_norm": 6.205893516540527, "learning_rate": 9.078305010695994e-06, "loss": 0.06437702, "memory(GiB)": 13.7, "step": 25230, "train_speed(iter/s)": 1.533833 }, { "acc": 0.9872921, "epoch": 11.827982188891493, "grad_norm": 0.7848390340805054, "learning_rate": 9.077856516228115e-06, "loss": 0.12434142, "memory(GiB)": 13.7, "step": 25235, "train_speed(iter/s)": 1.533855 }, { "acc": 0.98612614, "epoch": 11.830325755800327, "grad_norm": 3.972076177597046, "learning_rate": 9.077407923753983e-06, "loss": 0.0666029, "memory(GiB)": 13.7, "step": 25240, "train_speed(iter/s)": 1.533874 }, { "acc": 0.94395828, "epoch": 11.832669322709163, "grad_norm": 11.517460823059082, "learning_rate": 9.076959233284376e-06, "loss": 0.34014561, "memory(GiB)": 13.7, "step": 25245, "train_speed(iter/s)": 1.533881 }, { "acc": 0.98420048, "epoch": 11.835012889617998, "grad_norm": 3.4025967121124268, "learning_rate": 9.07651044483008e-06, "loss": 0.07540556, "memory(GiB)": 13.7, "step": 25250, "train_speed(iter/s)": 1.533898 }, { "acc": 0.98133583, "epoch": 11.837356456526834, "grad_norm": 9.235127449035645, "learning_rate": 9.076061558401886e-06, "loss": 0.11137216, "memory(GiB)": 13.7, "step": 25255, "train_speed(iter/s)": 1.533903 }, { "acc": 0.97327328, "epoch": 11.839700023435668, "grad_norm": 7.935184001922607, "learning_rate": 9.075612574010579e-06, "loss": 0.11076946, "memory(GiB)": 13.7, "step": 25260, "train_speed(iter/s)": 1.533914 }, { "acc": 0.95558033, "epoch": 11.842043590344504, "grad_norm": 0.5686464309692383, "learning_rate": 9.075163491666957e-06, "loss": 0.20860519, "memory(GiB)": 13.7, "step": 25265, "train_speed(iter/s)": 1.533927 }, { "acc": 0.97051582, "epoch": 11.84438715725334, "grad_norm": 9.447507858276367, "learning_rate": 9.074714311381808e-06, "loss": 0.12699451, "memory(GiB)": 13.7, "step": 25270, "train_speed(iter/s)": 1.533928 }, { "acc": 0.97257643, "epoch": 11.846730724162175, "grad_norm": 13.778693199157715, "learning_rate": 9.074265033165934e-06, "loss": 0.21444037, "memory(GiB)": 13.7, "step": 25275, "train_speed(iter/s)": 1.533929 }, { "acc": 0.98417616, "epoch": 11.84907429107101, "grad_norm": 8.206382751464844, "learning_rate": 9.073815657030129e-06, "loss": 0.0776712, "memory(GiB)": 13.7, "step": 25280, "train_speed(iter/s)": 1.53395 }, { "acc": 0.98259926, "epoch": 11.851417857979845, "grad_norm": 6.073086738586426, "learning_rate": 9.073366182985199e-06, "loss": 0.0775292, "memory(GiB)": 13.7, "step": 25285, "train_speed(iter/s)": 1.533944 }, { "acc": 0.94250755, "epoch": 11.853761424888681, "grad_norm": 2.9251651763916016, "learning_rate": 9.072916611041946e-06, "loss": 0.23565454, "memory(GiB)": 13.7, "step": 25290, "train_speed(iter/s)": 1.533946 }, { "acc": 0.9682291, "epoch": 11.856104991797515, "grad_norm": 6.05254602432251, "learning_rate": 9.072466941211177e-06, "loss": 0.16518555, "memory(GiB)": 13.7, "step": 25295, "train_speed(iter/s)": 1.533943 }, { "acc": 0.97004719, "epoch": 11.858448558706352, "grad_norm": 6.019381999969482, "learning_rate": 9.0720171735037e-06, "loss": 0.16781877, "memory(GiB)": 13.7, "step": 25300, "train_speed(iter/s)": 1.533956 }, { "acc": 0.9614584, "epoch": 11.860792125615186, "grad_norm": 7.769115924835205, "learning_rate": 9.071567307930325e-06, "loss": 0.16483579, "memory(GiB)": 13.7, "step": 25305, "train_speed(iter/s)": 1.533964 }, { "acc": 0.97866573, "epoch": 11.863135692524022, "grad_norm": 6.167100429534912, "learning_rate": 9.07111734450187e-06, "loss": 0.11140201, "memory(GiB)": 13.7, "step": 25310, "train_speed(iter/s)": 1.533961 }, { "acc": 0.97392368, "epoch": 11.865479259432856, "grad_norm": 0.13798321783542633, "learning_rate": 9.070667283229145e-06, "loss": 0.11021709, "memory(GiB)": 13.7, "step": 25315, "train_speed(iter/s)": 1.533968 }, { "acc": 0.98321428, "epoch": 11.867822826341692, "grad_norm": 4.078976154327393, "learning_rate": 9.07021712412297e-06, "loss": 0.04988713, "memory(GiB)": 13.7, "step": 25320, "train_speed(iter/s)": 1.533958 }, { "acc": 0.97967262, "epoch": 11.870166393250527, "grad_norm": 6.098888397216797, "learning_rate": 9.069766867194166e-06, "loss": 0.13420153, "memory(GiB)": 13.7, "step": 25325, "train_speed(iter/s)": 1.53396 }, { "acc": 0.98170147, "epoch": 11.872509960159363, "grad_norm": 3.109738826751709, "learning_rate": 9.069316512453553e-06, "loss": 0.12137556, "memory(GiB)": 13.7, "step": 25330, "train_speed(iter/s)": 1.533979 }, { "acc": 0.99541664, "epoch": 11.874853527068197, "grad_norm": 0.9374977946281433, "learning_rate": 9.068866059911961e-06, "loss": 0.03242468, "memory(GiB)": 13.7, "step": 25335, "train_speed(iter/s)": 1.533987 }, { "acc": 0.98729496, "epoch": 11.877197093977033, "grad_norm": 1.5267231464385986, "learning_rate": 9.068415509580212e-06, "loss": 0.04565951, "memory(GiB)": 13.7, "step": 25340, "train_speed(iter/s)": 1.533986 }, { "acc": 0.97859631, "epoch": 11.87954066088587, "grad_norm": 5.428760528564453, "learning_rate": 9.067964861469138e-06, "loss": 0.12212188, "memory(GiB)": 13.7, "step": 25345, "train_speed(iter/s)": 1.533986 }, { "acc": 0.97593765, "epoch": 11.881884227794703, "grad_norm": 3.2133095264434814, "learning_rate": 9.067514115589573e-06, "loss": 0.08073206, "memory(GiB)": 13.7, "step": 25350, "train_speed(iter/s)": 1.533999 }, { "acc": 0.9796875, "epoch": 11.88422779470354, "grad_norm": 6.665071487426758, "learning_rate": 9.06706327195235e-06, "loss": 0.05880504, "memory(GiB)": 13.7, "step": 25355, "train_speed(iter/s)": 1.533999 }, { "acc": 0.97293024, "epoch": 11.886571361612374, "grad_norm": 1.2569539546966553, "learning_rate": 9.066612330568306e-06, "loss": 0.09241156, "memory(GiB)": 13.7, "step": 25360, "train_speed(iter/s)": 1.534015 }, { "acc": 0.98595276, "epoch": 11.88891492852121, "grad_norm": 4.831905841827393, "learning_rate": 9.066161291448279e-06, "loss": 0.0584707, "memory(GiB)": 13.7, "step": 25365, "train_speed(iter/s)": 1.534015 }, { "acc": 0.97357216, "epoch": 11.891258495430044, "grad_norm": 8.637060165405273, "learning_rate": 9.06571015460311e-06, "loss": 0.13832333, "memory(GiB)": 13.7, "step": 25370, "train_speed(iter/s)": 1.534026 }, { "acc": 0.97905169, "epoch": 11.89360206233888, "grad_norm": 4.96700382232666, "learning_rate": 9.065258920043646e-06, "loss": 0.09151782, "memory(GiB)": 13.7, "step": 25375, "train_speed(iter/s)": 1.534028 }, { "acc": 0.9760417, "epoch": 11.895945629247715, "grad_norm": 1.871992826461792, "learning_rate": 9.064807587780732e-06, "loss": 0.11909595, "memory(GiB)": 13.7, "step": 25380, "train_speed(iter/s)": 1.534025 }, { "acc": 0.97626152, "epoch": 11.89828919615655, "grad_norm": 6.35767126083374, "learning_rate": 9.064356157825213e-06, "loss": 0.14061583, "memory(GiB)": 13.7, "step": 25385, "train_speed(iter/s)": 1.534026 }, { "acc": 0.9895834, "epoch": 11.900632763065385, "grad_norm": 5.887215614318848, "learning_rate": 9.063904630187945e-06, "loss": 0.05740195, "memory(GiB)": 13.7, "step": 25390, "train_speed(iter/s)": 1.534047 }, { "acc": 0.99122028, "epoch": 11.902976329974221, "grad_norm": 0.09703551232814789, "learning_rate": 9.063453004879779e-06, "loss": 0.0690613, "memory(GiB)": 13.7, "step": 25395, "train_speed(iter/s)": 1.534064 }, { "acc": 0.97917233, "epoch": 11.905319896883055, "grad_norm": 3.2014670372009277, "learning_rate": 9.06300128191157e-06, "loss": 0.09239043, "memory(GiB)": 13.7, "step": 25400, "train_speed(iter/s)": 1.534062 }, { "acc": 0.96614323, "epoch": 11.907663463791891, "grad_norm": 9.731806755065918, "learning_rate": 9.062549461294176e-06, "loss": 0.16384404, "memory(GiB)": 13.7, "step": 25405, "train_speed(iter/s)": 1.53406 }, { "acc": 0.98017368, "epoch": 11.910007030700726, "grad_norm": 5.794807434082031, "learning_rate": 9.062097543038458e-06, "loss": 0.07454385, "memory(GiB)": 13.7, "step": 25410, "train_speed(iter/s)": 1.534071 }, { "acc": 0.9822917, "epoch": 11.912350597609562, "grad_norm": 3.147106170654297, "learning_rate": 9.061645527155278e-06, "loss": 0.0883711, "memory(GiB)": 13.7, "step": 25415, "train_speed(iter/s)": 1.53408 }, { "acc": 0.95371847, "epoch": 11.914694164518398, "grad_norm": 31.502077102661133, "learning_rate": 9.061193413655502e-06, "loss": 0.26038342, "memory(GiB)": 13.7, "step": 25420, "train_speed(iter/s)": 1.534091 }, { "acc": 0.9556181, "epoch": 11.917037731427232, "grad_norm": 16.605567932128906, "learning_rate": 9.060741202549998e-06, "loss": 0.2787394, "memory(GiB)": 13.7, "step": 25425, "train_speed(iter/s)": 1.53408 }, { "acc": 0.98644371, "epoch": 11.919381298336068, "grad_norm": 3.1319046020507812, "learning_rate": 9.060288893849633e-06, "loss": 0.03614456, "memory(GiB)": 13.7, "step": 25430, "train_speed(iter/s)": 1.534089 }, { "acc": 0.98041134, "epoch": 11.921724865244903, "grad_norm": 11.720925331115723, "learning_rate": 9.059836487565281e-06, "loss": 0.10248739, "memory(GiB)": 13.7, "step": 25435, "train_speed(iter/s)": 1.534106 }, { "acc": 0.98421087, "epoch": 11.924068432153739, "grad_norm": 5.068703651428223, "learning_rate": 9.059383983707816e-06, "loss": 0.09105053, "memory(GiB)": 13.7, "step": 25440, "train_speed(iter/s)": 1.53411 }, { "acc": 0.96022186, "epoch": 11.926411999062573, "grad_norm": 11.808943748474121, "learning_rate": 9.058931382288113e-06, "loss": 0.26412826, "memory(GiB)": 13.7, "step": 25445, "train_speed(iter/s)": 1.534117 }, { "acc": 0.96714287, "epoch": 11.928755565971409, "grad_norm": 8.551003456115723, "learning_rate": 9.058478683317055e-06, "loss": 0.1906669, "memory(GiB)": 13.7, "step": 25450, "train_speed(iter/s)": 1.534141 }, { "acc": 0.96572304, "epoch": 11.931099132880243, "grad_norm": 4.620264530181885, "learning_rate": 9.05802588680552e-06, "loss": 0.12651081, "memory(GiB)": 13.7, "step": 25455, "train_speed(iter/s)": 1.534147 }, { "acc": 0.97754087, "epoch": 11.93344269978908, "grad_norm": 5.397618293762207, "learning_rate": 9.057572992764391e-06, "loss": 0.10604548, "memory(GiB)": 13.7, "step": 25460, "train_speed(iter/s)": 1.534155 }, { "acc": 0.97279758, "epoch": 11.935786266697914, "grad_norm": 6.263644695281982, "learning_rate": 9.057120001204559e-06, "loss": 0.09373192, "memory(GiB)": 13.7, "step": 25465, "train_speed(iter/s)": 1.53416 }, { "acc": 0.97261486, "epoch": 11.93812983360675, "grad_norm": 3.515407085418701, "learning_rate": 9.056666912136907e-06, "loss": 0.12771893, "memory(GiB)": 13.7, "step": 25470, "train_speed(iter/s)": 1.534166 }, { "acc": 0.96672821, "epoch": 11.940473400515584, "grad_norm": 8.4296293258667, "learning_rate": 9.056213725572328e-06, "loss": 0.17594423, "memory(GiB)": 13.7, "step": 25475, "train_speed(iter/s)": 1.534166 }, { "acc": 0.98731155, "epoch": 11.94281696742442, "grad_norm": 0.420470654964447, "learning_rate": 9.055760441521715e-06, "loss": 0.08713928, "memory(GiB)": 13.7, "step": 25480, "train_speed(iter/s)": 1.53418 }, { "acc": 0.9802496, "epoch": 11.945160534333255, "grad_norm": 1.7395154237747192, "learning_rate": 9.055307059995964e-06, "loss": 0.0924035, "memory(GiB)": 13.7, "step": 25485, "train_speed(iter/s)": 1.534196 }, { "acc": 0.96933298, "epoch": 11.94750410124209, "grad_norm": 5.598074436187744, "learning_rate": 9.054853581005972e-06, "loss": 0.14287807, "memory(GiB)": 13.7, "step": 25490, "train_speed(iter/s)": 1.534196 }, { "acc": 0.97944937, "epoch": 11.949847668150925, "grad_norm": 3.474489450454712, "learning_rate": 9.054400004562639e-06, "loss": 0.15251329, "memory(GiB)": 13.7, "step": 25495, "train_speed(iter/s)": 1.534198 }, { "acc": 0.983922, "epoch": 11.952191235059761, "grad_norm": 6.415441036224365, "learning_rate": 9.053946330676869e-06, "loss": 0.09053289, "memory(GiB)": 13.7, "step": 25500, "train_speed(iter/s)": 1.534196 }, { "acc": 0.9797267, "epoch": 11.954534801968595, "grad_norm": 8.225078582763672, "learning_rate": 9.053492559359566e-06, "loss": 0.08549104, "memory(GiB)": 13.7, "step": 25505, "train_speed(iter/s)": 1.53421 }, { "acc": 0.98214283, "epoch": 11.956878368877431, "grad_norm": 0.05760885402560234, "learning_rate": 9.053038690621636e-06, "loss": 0.16563271, "memory(GiB)": 13.7, "step": 25510, "train_speed(iter/s)": 1.534204 }, { "acc": 0.978125, "epoch": 11.959221935786267, "grad_norm": 6.9405107498168945, "learning_rate": 9.05258472447399e-06, "loss": 0.11252291, "memory(GiB)": 13.7, "step": 25515, "train_speed(iter/s)": 1.534215 }, { "acc": 0.96680555, "epoch": 11.961565502695102, "grad_norm": 30.671537399291992, "learning_rate": 9.05213066092754e-06, "loss": 0.1526952, "memory(GiB)": 13.7, "step": 25520, "train_speed(iter/s)": 1.534222 }, { "acc": 0.97562504, "epoch": 11.963909069603938, "grad_norm": 11.22166919708252, "learning_rate": 9.051676499993199e-06, "loss": 0.09830759, "memory(GiB)": 13.7, "step": 25525, "train_speed(iter/s)": 1.534227 }, { "acc": 0.98380947, "epoch": 11.966252636512772, "grad_norm": 4.166868686676025, "learning_rate": 9.051222241681882e-06, "loss": 0.09806408, "memory(GiB)": 13.7, "step": 25530, "train_speed(iter/s)": 1.534251 }, { "acc": 0.98455353, "epoch": 11.968596203421608, "grad_norm": 12.216221809387207, "learning_rate": 9.050767886004513e-06, "loss": 0.05900486, "memory(GiB)": 13.7, "step": 25535, "train_speed(iter/s)": 1.534263 }, { "acc": 0.9645834, "epoch": 11.970939770330443, "grad_norm": 0.056748613715171814, "learning_rate": 9.050313432972008e-06, "loss": 0.0742552, "memory(GiB)": 13.7, "step": 25540, "train_speed(iter/s)": 1.534283 }, { "acc": 0.96551361, "epoch": 11.973283337239279, "grad_norm": 20.380220413208008, "learning_rate": 9.049858882595295e-06, "loss": 0.18706692, "memory(GiB)": 13.7, "step": 25545, "train_speed(iter/s)": 1.534287 }, { "acc": 0.98722477, "epoch": 11.975626904148113, "grad_norm": 0.09024346619844437, "learning_rate": 9.049404234885297e-06, "loss": 0.04456908, "memory(GiB)": 13.7, "step": 25550, "train_speed(iter/s)": 1.534283 }, { "acc": 0.98206844, "epoch": 11.977970471056949, "grad_norm": 3.944629669189453, "learning_rate": 9.048949489852941e-06, "loss": 0.12263968, "memory(GiB)": 13.7, "step": 25555, "train_speed(iter/s)": 1.534292 }, { "acc": 0.970854, "epoch": 11.980314037965783, "grad_norm": 10.93019962310791, "learning_rate": 9.048494647509163e-06, "loss": 0.13962625, "memory(GiB)": 13.7, "step": 25560, "train_speed(iter/s)": 1.534307 }, { "acc": 0.96710224, "epoch": 11.98265760487462, "grad_norm": 4.911665916442871, "learning_rate": 9.048039707864889e-06, "loss": 0.15061364, "memory(GiB)": 13.7, "step": 25565, "train_speed(iter/s)": 1.534305 }, { "acc": 0.96433983, "epoch": 11.985001171783454, "grad_norm": 7.464668273925781, "learning_rate": 9.04758467093106e-06, "loss": 0.16460333, "memory(GiB)": 13.7, "step": 25570, "train_speed(iter/s)": 1.534307 }, { "acc": 0.98386364, "epoch": 11.98734473869229, "grad_norm": 1.1647206544876099, "learning_rate": 9.04712953671861e-06, "loss": 0.06706952, "memory(GiB)": 13.7, "step": 25575, "train_speed(iter/s)": 1.534298 }, { "acc": 0.97988691, "epoch": 11.989688305601124, "grad_norm": 5.48682165145874, "learning_rate": 9.04667430523848e-06, "loss": 0.12933146, "memory(GiB)": 13.7, "step": 25580, "train_speed(iter/s)": 1.53432 }, { "acc": 0.98729172, "epoch": 11.99203187250996, "grad_norm": 4.744586944580078, "learning_rate": 9.046218976501612e-06, "loss": 0.06613306, "memory(GiB)": 13.7, "step": 25585, "train_speed(iter/s)": 1.534325 }, { "acc": 0.96386356, "epoch": 11.994375439418796, "grad_norm": 6.608508110046387, "learning_rate": 9.045763550518952e-06, "loss": 0.13578277, "memory(GiB)": 13.7, "step": 25590, "train_speed(iter/s)": 1.534337 }, { "acc": 0.96349316, "epoch": 11.99671900632763, "grad_norm": 7.2425079345703125, "learning_rate": 9.045308027301446e-06, "loss": 0.2052386, "memory(GiB)": 13.7, "step": 25595, "train_speed(iter/s)": 1.534336 }, { "acc": 0.96583328, "epoch": 11.999062573236467, "grad_norm": 7.2295732498168945, "learning_rate": 9.044852406860042e-06, "loss": 0.13181283, "memory(GiB)": 13.7, "step": 25600, "train_speed(iter/s)": 1.53434 }, { "acc": 0.99083338, "epoch": 12.001406140145301, "grad_norm": 1.4201359748840332, "learning_rate": 9.044396689205695e-06, "loss": 0.04124345, "memory(GiB)": 13.7, "step": 25605, "train_speed(iter/s)": 1.534299 }, { "acc": 0.97577333, "epoch": 12.003749707054137, "grad_norm": 3.556270122528076, "learning_rate": 9.043940874349354e-06, "loss": 0.1902581, "memory(GiB)": 13.7, "step": 25610, "train_speed(iter/s)": 1.534302 }, { "acc": 0.98486109, "epoch": 12.006093273962971, "grad_norm": 4.836005687713623, "learning_rate": 9.04348496230198e-06, "loss": 0.10617545, "memory(GiB)": 13.7, "step": 25615, "train_speed(iter/s)": 1.534289 }, { "acc": 0.9728323, "epoch": 12.008436840871807, "grad_norm": 3.002880096435547, "learning_rate": 9.043028953074531e-06, "loss": 0.16685886, "memory(GiB)": 13.7, "step": 25620, "train_speed(iter/s)": 1.534305 }, { "acc": 0.96304884, "epoch": 12.010780407780642, "grad_norm": 2.229436159133911, "learning_rate": 9.042572846677963e-06, "loss": 0.14507982, "memory(GiB)": 13.7, "step": 25625, "train_speed(iter/s)": 1.534313 }, { "acc": 0.98500004, "epoch": 12.013123974689478, "grad_norm": 6.483191013336182, "learning_rate": 9.042116643123245e-06, "loss": 0.11348224, "memory(GiB)": 13.7, "step": 25630, "train_speed(iter/s)": 1.534307 }, { "acc": 0.95479164, "epoch": 12.015467541598312, "grad_norm": 8.857966423034668, "learning_rate": 9.041660342421342e-06, "loss": 0.25082965, "memory(GiB)": 13.7, "step": 25635, "train_speed(iter/s)": 1.534318 }, { "acc": 0.97786465, "epoch": 12.017811108507148, "grad_norm": 7.880929946899414, "learning_rate": 9.04120394458322e-06, "loss": 0.16188304, "memory(GiB)": 13.7, "step": 25640, "train_speed(iter/s)": 1.534318 }, { "acc": 0.97742958, "epoch": 12.020154675415982, "grad_norm": 3.3460798263549805, "learning_rate": 9.040747449619849e-06, "loss": 0.07208354, "memory(GiB)": 13.7, "step": 25645, "train_speed(iter/s)": 1.534319 }, { "acc": 0.97566051, "epoch": 12.022498242324819, "grad_norm": 6.267284870147705, "learning_rate": 9.040290857542202e-06, "loss": 0.10553925, "memory(GiB)": 13.7, "step": 25650, "train_speed(iter/s)": 1.534318 }, { "acc": 0.97943459, "epoch": 12.024841809233653, "grad_norm": 3.6445600986480713, "learning_rate": 9.039834168361257e-06, "loss": 0.13206453, "memory(GiB)": 13.7, "step": 25655, "train_speed(iter/s)": 1.534343 }, { "acc": 0.9840148, "epoch": 12.027185376142489, "grad_norm": 2.378622531890869, "learning_rate": 9.039377382087986e-06, "loss": 0.08449039, "memory(GiB)": 13.7, "step": 25660, "train_speed(iter/s)": 1.53436 }, { "acc": 0.97860126, "epoch": 12.029528943051323, "grad_norm": 4.2469563484191895, "learning_rate": 9.038920498733373e-06, "loss": 0.12260571, "memory(GiB)": 13.7, "step": 25665, "train_speed(iter/s)": 1.534358 }, { "acc": 0.96937494, "epoch": 12.03187250996016, "grad_norm": 3.225856304168701, "learning_rate": 9.038463518308399e-06, "loss": 0.11175058, "memory(GiB)": 13.7, "step": 25670, "train_speed(iter/s)": 1.534354 }, { "acc": 0.97976189, "epoch": 12.034216076868995, "grad_norm": 58.51896667480469, "learning_rate": 9.038006440824046e-06, "loss": 0.11917222, "memory(GiB)": 13.7, "step": 25675, "train_speed(iter/s)": 1.53436 }, { "acc": 0.9792079, "epoch": 12.03655964377783, "grad_norm": 3.032122850418091, "learning_rate": 9.037549266291302e-06, "loss": 0.14490778, "memory(GiB)": 13.7, "step": 25680, "train_speed(iter/s)": 1.534348 }, { "acc": 0.9791667, "epoch": 12.038903210686666, "grad_norm": 4.112100124359131, "learning_rate": 9.037091994721157e-06, "loss": 0.09448979, "memory(GiB)": 13.7, "step": 25685, "train_speed(iter/s)": 1.53435 }, { "acc": 0.99162464, "epoch": 12.0412467775955, "grad_norm": 0.265238493680954, "learning_rate": 9.036634626124604e-06, "loss": 0.06984922, "memory(GiB)": 13.7, "step": 25690, "train_speed(iter/s)": 1.534347 }, { "acc": 0.97557545, "epoch": 12.043590344504336, "grad_norm": 2.8168704509735107, "learning_rate": 9.036177160512634e-06, "loss": 0.12288809, "memory(GiB)": 13.7, "step": 25695, "train_speed(iter/s)": 1.534353 }, { "acc": 0.98166084, "epoch": 12.04593391141317, "grad_norm": 4.701152801513672, "learning_rate": 9.03571959789624e-06, "loss": 0.15618479, "memory(GiB)": 13.7, "step": 25700, "train_speed(iter/s)": 1.534384 }, { "acc": 0.98279228, "epoch": 12.048277478322007, "grad_norm": 4.442796230316162, "learning_rate": 9.035261938286425e-06, "loss": 0.067646, "memory(GiB)": 13.7, "step": 25705, "train_speed(iter/s)": 1.534396 }, { "acc": 0.96913185, "epoch": 12.05062104523084, "grad_norm": 3.650491952896118, "learning_rate": 9.03480418169419e-06, "loss": 0.08843224, "memory(GiB)": 13.7, "step": 25710, "train_speed(iter/s)": 1.534403 }, { "acc": 0.98372021, "epoch": 12.052964612139677, "grad_norm": 2.3829376697540283, "learning_rate": 9.034346328130535e-06, "loss": 0.09256451, "memory(GiB)": 13.7, "step": 25715, "train_speed(iter/s)": 1.534407 }, { "acc": 0.97334824, "epoch": 12.055308179048511, "grad_norm": 6.140851974487305, "learning_rate": 9.033888377606465e-06, "loss": 0.1126138, "memory(GiB)": 13.7, "step": 25720, "train_speed(iter/s)": 1.534427 }, { "acc": 0.97166672, "epoch": 12.057651745957347, "grad_norm": 4.527583122253418, "learning_rate": 9.033430330132991e-06, "loss": 0.11236734, "memory(GiB)": 13.7, "step": 25725, "train_speed(iter/s)": 1.534436 }, { "acc": 0.98075085, "epoch": 12.059995312866182, "grad_norm": 5.409265995025635, "learning_rate": 9.03297218572112e-06, "loss": 0.11522857, "memory(GiB)": 13.7, "step": 25730, "train_speed(iter/s)": 1.534444 }, { "acc": 0.97236614, "epoch": 12.062338879775018, "grad_norm": 8.378701210021973, "learning_rate": 9.032513944381867e-06, "loss": 0.20885439, "memory(GiB)": 13.7, "step": 25735, "train_speed(iter/s)": 1.534459 }, { "acc": 0.98819447, "epoch": 12.064682446683852, "grad_norm": 0.08036492764949799, "learning_rate": 9.032055606126243e-06, "loss": 0.06842313, "memory(GiB)": 13.7, "step": 25740, "train_speed(iter/s)": 1.534461 }, { "acc": 0.9703125, "epoch": 12.067026013592688, "grad_norm": 0.05208996683359146, "learning_rate": 9.03159717096527e-06, "loss": 0.11066277, "memory(GiB)": 13.7, "step": 25745, "train_speed(iter/s)": 1.534458 }, { "acc": 0.98893385, "epoch": 12.069369580501524, "grad_norm": 1.4130737781524658, "learning_rate": 9.031138638909962e-06, "loss": 0.07497684, "memory(GiB)": 13.7, "step": 25750, "train_speed(iter/s)": 1.534471 }, { "acc": 0.96852684, "epoch": 12.071713147410359, "grad_norm": 5.510608196258545, "learning_rate": 9.030680009971341e-06, "loss": 0.13797129, "memory(GiB)": 13.7, "step": 25755, "train_speed(iter/s)": 1.534495 }, { "acc": 0.99300594, "epoch": 12.074056714319195, "grad_norm": 5.046352386474609, "learning_rate": 9.030221284160436e-06, "loss": 0.05344657, "memory(GiB)": 13.7, "step": 25760, "train_speed(iter/s)": 1.534526 }, { "acc": 0.97383776, "epoch": 12.076400281228029, "grad_norm": 1.5285487174987793, "learning_rate": 9.029762461488268e-06, "loss": 0.08171941, "memory(GiB)": 13.7, "step": 25765, "train_speed(iter/s)": 1.534538 }, { "acc": 0.975, "epoch": 12.078743848136865, "grad_norm": 0.18999724090099335, "learning_rate": 9.02930354196587e-06, "loss": 0.12542636, "memory(GiB)": 13.7, "step": 25770, "train_speed(iter/s)": 1.534548 }, { "acc": 0.96613102, "epoch": 12.0810874150457, "grad_norm": 3.4094316959381104, "learning_rate": 9.028844525604269e-06, "loss": 0.12388203, "memory(GiB)": 13.7, "step": 25775, "train_speed(iter/s)": 1.534544 }, { "acc": 0.99548607, "epoch": 12.083430981954535, "grad_norm": 0.05507354810833931, "learning_rate": 9.028385412414496e-06, "loss": 0.05684329, "memory(GiB)": 13.7, "step": 25780, "train_speed(iter/s)": 1.534546 }, { "acc": 0.98233833, "epoch": 12.08577454886337, "grad_norm": 5.221128463745117, "learning_rate": 9.027926202407596e-06, "loss": 0.06346596, "memory(GiB)": 13.7, "step": 25785, "train_speed(iter/s)": 1.534547 }, { "acc": 0.98556061, "epoch": 12.088118115772206, "grad_norm": 1.2288905382156372, "learning_rate": 9.0274668955946e-06, "loss": 0.08922757, "memory(GiB)": 13.7, "step": 25790, "train_speed(iter/s)": 1.534556 }, { "acc": 0.96102715, "epoch": 12.09046168268104, "grad_norm": 9.283556938171387, "learning_rate": 9.027007491986546e-06, "loss": 0.15377957, "memory(GiB)": 13.7, "step": 25795, "train_speed(iter/s)": 1.534576 }, { "acc": 0.96933537, "epoch": 12.092805249589876, "grad_norm": 17.06757164001465, "learning_rate": 9.026547991594482e-06, "loss": 0.20175931, "memory(GiB)": 13.7, "step": 25800, "train_speed(iter/s)": 1.534569 }, { "acc": 0.97086315, "epoch": 12.09514881649871, "grad_norm": 4.556125164031982, "learning_rate": 9.026088394429452e-06, "loss": 0.11811824, "memory(GiB)": 13.7, "step": 25805, "train_speed(iter/s)": 1.534568 }, { "acc": 0.96442032, "epoch": 12.097492383407547, "grad_norm": 5.386300563812256, "learning_rate": 9.0256287005025e-06, "loss": 0.24407778, "memory(GiB)": 13.7, "step": 25810, "train_speed(iter/s)": 1.534584 }, { "acc": 0.98316994, "epoch": 12.09983595031638, "grad_norm": 0.41137951612472534, "learning_rate": 9.025168909824679e-06, "loss": 0.07915763, "memory(GiB)": 13.7, "step": 25815, "train_speed(iter/s)": 1.534602 }, { "acc": 0.98270836, "epoch": 12.102179517225217, "grad_norm": 4.593562602996826, "learning_rate": 9.024709022407042e-06, "loss": 0.05774271, "memory(GiB)": 13.7, "step": 25820, "train_speed(iter/s)": 1.534596 }, { "acc": 0.98145838, "epoch": 12.104523084134051, "grad_norm": 3.1487181186676025, "learning_rate": 9.024249038260636e-06, "loss": 0.07603027, "memory(GiB)": 13.7, "step": 25825, "train_speed(iter/s)": 1.534605 }, { "acc": 0.97166662, "epoch": 12.106866651042887, "grad_norm": 2.9133405685424805, "learning_rate": 9.023788957396526e-06, "loss": 0.06901999, "memory(GiB)": 13.7, "step": 25830, "train_speed(iter/s)": 1.534616 }, { "acc": 0.97958336, "epoch": 12.109210217951723, "grad_norm": 3.292846202850342, "learning_rate": 9.023328779825764e-06, "loss": 0.08781015, "memory(GiB)": 13.7, "step": 25835, "train_speed(iter/s)": 1.53463 }, { "acc": 0.96395836, "epoch": 12.111553784860558, "grad_norm": 10.06739330291748, "learning_rate": 9.022868505559416e-06, "loss": 0.23438292, "memory(GiB)": 13.7, "step": 25840, "train_speed(iter/s)": 1.534653 }, { "acc": 1.0, "epoch": 12.113897351769394, "grad_norm": 0.0958700105547905, "learning_rate": 9.022408134608542e-06, "loss": 0.01481978, "memory(GiB)": 13.7, "step": 25845, "train_speed(iter/s)": 1.53465 }, { "acc": 0.98142853, "epoch": 12.116240918678228, "grad_norm": 6.195145130157471, "learning_rate": 9.021947666984211e-06, "loss": 0.05908296, "memory(GiB)": 13.7, "step": 25850, "train_speed(iter/s)": 1.534666 }, { "acc": 0.96859207, "epoch": 12.118584485587064, "grad_norm": 12.920597076416016, "learning_rate": 9.021487102697488e-06, "loss": 0.19124343, "memory(GiB)": 13.7, "step": 25855, "train_speed(iter/s)": 1.53467 }, { "acc": 0.98715286, "epoch": 12.120928052495898, "grad_norm": 3.659125566482544, "learning_rate": 9.021026441759447e-06, "loss": 0.05461308, "memory(GiB)": 13.7, "step": 25860, "train_speed(iter/s)": 1.534686 }, { "acc": 0.97766371, "epoch": 12.123271619404735, "grad_norm": 13.999773979187012, "learning_rate": 9.020565684181157e-06, "loss": 0.14634418, "memory(GiB)": 13.7, "step": 25865, "train_speed(iter/s)": 1.534706 }, { "acc": 0.9496726, "epoch": 12.125615186313569, "grad_norm": 7.005476951599121, "learning_rate": 9.020104829973696e-06, "loss": 0.22920656, "memory(GiB)": 13.7, "step": 25870, "train_speed(iter/s)": 1.534704 }, { "acc": 0.9718421, "epoch": 12.127958753222405, "grad_norm": 5.239405632019043, "learning_rate": 9.01964387914814e-06, "loss": 0.10610863, "memory(GiB)": 13.7, "step": 25875, "train_speed(iter/s)": 1.534726 }, { "acc": 0.97620754, "epoch": 12.13030232013124, "grad_norm": 9.33222484588623, "learning_rate": 9.019182831715568e-06, "loss": 0.08366182, "memory(GiB)": 13.7, "step": 25880, "train_speed(iter/s)": 1.534749 }, { "acc": 0.97787704, "epoch": 12.132645887040075, "grad_norm": 3.4111874103546143, "learning_rate": 9.018721687687066e-06, "loss": 0.12086433, "memory(GiB)": 13.7, "step": 25885, "train_speed(iter/s)": 1.534758 }, { "acc": 0.97349205, "epoch": 12.13498945394891, "grad_norm": 32.444122314453125, "learning_rate": 9.018260447073714e-06, "loss": 0.13022619, "memory(GiB)": 13.7, "step": 25890, "train_speed(iter/s)": 1.534778 }, { "acc": 0.97897034, "epoch": 12.137333020857746, "grad_norm": 17.397314071655273, "learning_rate": 9.0177991098866e-06, "loss": 0.20132029, "memory(GiB)": 13.7, "step": 25895, "train_speed(iter/s)": 1.534783 }, { "acc": 0.98718214, "epoch": 12.13967658776658, "grad_norm": 7.413900852203369, "learning_rate": 9.017337676136812e-06, "loss": 0.08282378, "memory(GiB)": 13.7, "step": 25900, "train_speed(iter/s)": 1.534789 }, { "acc": 0.97107143, "epoch": 12.142020154675416, "grad_norm": 4.419926166534424, "learning_rate": 9.016876145835444e-06, "loss": 0.06743416, "memory(GiB)": 13.7, "step": 25905, "train_speed(iter/s)": 1.534787 }, { "acc": 0.97706852, "epoch": 12.14436372158425, "grad_norm": 9.312357902526855, "learning_rate": 9.016414518993588e-06, "loss": 0.11756499, "memory(GiB)": 13.7, "step": 25910, "train_speed(iter/s)": 1.534796 }, { "acc": 0.96828709, "epoch": 12.146707288493086, "grad_norm": 6.480887413024902, "learning_rate": 9.015952795622342e-06, "loss": 0.10609262, "memory(GiB)": 13.7, "step": 25915, "train_speed(iter/s)": 1.534801 }, { "acc": 0.98109798, "epoch": 12.149050855401923, "grad_norm": 3.924381732940674, "learning_rate": 9.0154909757328e-06, "loss": 0.06804846, "memory(GiB)": 13.7, "step": 25920, "train_speed(iter/s)": 1.534801 }, { "acc": 0.98393431, "epoch": 12.151394422310757, "grad_norm": 11.05126953125, "learning_rate": 9.015029059336066e-06, "loss": 0.07750785, "memory(GiB)": 13.7, "step": 25925, "train_speed(iter/s)": 1.534805 }, { "acc": 0.96547623, "epoch": 12.153737989219593, "grad_norm": 41.32037353515625, "learning_rate": 9.014567046443241e-06, "loss": 0.18299339, "memory(GiB)": 13.7, "step": 25930, "train_speed(iter/s)": 1.534808 }, { "acc": 0.97416325, "epoch": 12.156081556128427, "grad_norm": 10.530937194824219, "learning_rate": 9.014104937065434e-06, "loss": 0.12218904, "memory(GiB)": 13.7, "step": 25935, "train_speed(iter/s)": 1.534807 }, { "acc": 0.99093752, "epoch": 12.158425123037263, "grad_norm": 2.9847865104675293, "learning_rate": 9.013642731213748e-06, "loss": 0.0394752, "memory(GiB)": 13.7, "step": 25940, "train_speed(iter/s)": 1.534809 }, { "acc": 0.978125, "epoch": 12.160768689946098, "grad_norm": 5.426822662353516, "learning_rate": 9.013180428899296e-06, "loss": 0.07962118, "memory(GiB)": 13.7, "step": 25945, "train_speed(iter/s)": 1.534811 }, { "acc": 0.97605162, "epoch": 12.163112256854934, "grad_norm": 7.9849958419799805, "learning_rate": 9.012718030133189e-06, "loss": 0.13557351, "memory(GiB)": 13.7, "step": 25950, "train_speed(iter/s)": 1.534813 }, { "acc": 0.9780529, "epoch": 12.165455823763768, "grad_norm": 3.015460252761841, "learning_rate": 9.012255534926541e-06, "loss": 0.10712255, "memory(GiB)": 13.7, "step": 25955, "train_speed(iter/s)": 1.534816 }, { "acc": 0.9828125, "epoch": 12.167799390672604, "grad_norm": 1.8928606510162354, "learning_rate": 9.01179294329047e-06, "loss": 0.10623331, "memory(GiB)": 13.7, "step": 25960, "train_speed(iter/s)": 1.534815 }, { "acc": 0.97399817, "epoch": 12.170142957581438, "grad_norm": 0.013402075506746769, "learning_rate": 9.011330255236094e-06, "loss": 0.20522599, "memory(GiB)": 13.7, "step": 25965, "train_speed(iter/s)": 1.534826 }, { "acc": 0.97178574, "epoch": 12.172486524490274, "grad_norm": 9.894536018371582, "learning_rate": 9.010867470774537e-06, "loss": 0.18160819, "memory(GiB)": 13.7, "step": 25970, "train_speed(iter/s)": 1.534825 }, { "acc": 0.98098221, "epoch": 12.174830091399109, "grad_norm": 0.21834152936935425, "learning_rate": 9.01040458991692e-06, "loss": 0.07202327, "memory(GiB)": 13.7, "step": 25975, "train_speed(iter/s)": 1.534829 }, { "acc": 0.97966223, "epoch": 12.177173658307945, "grad_norm": 6.218373775482178, "learning_rate": 9.009941612674371e-06, "loss": 0.16122429, "memory(GiB)": 13.7, "step": 25980, "train_speed(iter/s)": 1.534822 }, { "acc": 0.98923607, "epoch": 12.17951722521678, "grad_norm": 2.0610740184783936, "learning_rate": 9.009478539058016e-06, "loss": 0.11646019, "memory(GiB)": 13.7, "step": 25985, "train_speed(iter/s)": 1.534827 }, { "acc": 0.96360836, "epoch": 12.181860792125615, "grad_norm": 2.8373866081237793, "learning_rate": 9.00901536907899e-06, "loss": 0.18565775, "memory(GiB)": 13.7, "step": 25990, "train_speed(iter/s)": 1.534824 }, { "acc": 0.97199812, "epoch": 12.18420435903445, "grad_norm": 5.95367431640625, "learning_rate": 9.008552102748423e-06, "loss": 0.12930549, "memory(GiB)": 13.7, "step": 25995, "train_speed(iter/s)": 1.53482 }, { "acc": 0.9707386, "epoch": 12.186547925943286, "grad_norm": 5.92476224899292, "learning_rate": 9.008088740077448e-06, "loss": 0.13867346, "memory(GiB)": 13.7, "step": 26000, "train_speed(iter/s)": 1.534825 }, { "acc": 0.97246284, "epoch": 12.188891492852122, "grad_norm": 0.12578389048576355, "learning_rate": 9.007625281077208e-06, "loss": 0.10336928, "memory(GiB)": 13.7, "step": 26005, "train_speed(iter/s)": 1.534822 }, { "acc": 0.97904215, "epoch": 12.191235059760956, "grad_norm": 2.4088473320007324, "learning_rate": 9.007161725758841e-06, "loss": 0.08603562, "memory(GiB)": 13.7, "step": 26010, "train_speed(iter/s)": 1.53484 }, { "acc": 0.97748508, "epoch": 12.193578626669792, "grad_norm": 7.645264625549316, "learning_rate": 9.006698074133488e-06, "loss": 0.13560979, "memory(GiB)": 13.7, "step": 26015, "train_speed(iter/s)": 1.534837 }, { "acc": 0.96265659, "epoch": 12.195922193578626, "grad_norm": 8.158978462219238, "learning_rate": 9.006234326212295e-06, "loss": 0.17725477, "memory(GiB)": 13.7, "step": 26020, "train_speed(iter/s)": 1.534849 }, { "acc": 0.98913193, "epoch": 12.198265760487462, "grad_norm": 3.4946343898773193, "learning_rate": 9.005770482006408e-06, "loss": 0.04327111, "memory(GiB)": 13.7, "step": 26025, "train_speed(iter/s)": 1.534853 }, { "acc": 0.9652422, "epoch": 12.200609327396297, "grad_norm": 4.050529479980469, "learning_rate": 9.005306541526979e-06, "loss": 0.13363035, "memory(GiB)": 13.7, "step": 26030, "train_speed(iter/s)": 1.534872 }, { "acc": 0.98125, "epoch": 12.202952894305133, "grad_norm": 5.677677631378174, "learning_rate": 9.004842504785157e-06, "loss": 0.04376923, "memory(GiB)": 13.7, "step": 26035, "train_speed(iter/s)": 1.534881 }, { "acc": 0.97344017, "epoch": 12.205296461213967, "grad_norm": 5.989318370819092, "learning_rate": 9.004378371792095e-06, "loss": 0.09374058, "memory(GiB)": 13.7, "step": 26040, "train_speed(iter/s)": 1.534881 }, { "acc": 0.98199368, "epoch": 12.207640028122803, "grad_norm": 3.335298538208008, "learning_rate": 9.00391414255895e-06, "loss": 0.06457593, "memory(GiB)": 13.7, "step": 26045, "train_speed(iter/s)": 1.534896 }, { "acc": 0.9875, "epoch": 12.209983595031638, "grad_norm": 0.7788805365562439, "learning_rate": 9.003449817096882e-06, "loss": 0.0386545, "memory(GiB)": 13.7, "step": 26050, "train_speed(iter/s)": 1.534907 }, { "acc": 0.97854166, "epoch": 12.212327161940474, "grad_norm": 8.184125900268555, "learning_rate": 9.002985395417052e-06, "loss": 0.10191016, "memory(GiB)": 13.7, "step": 26055, "train_speed(iter/s)": 1.534917 }, { "acc": 0.98187504, "epoch": 12.214670728849308, "grad_norm": 4.934392929077148, "learning_rate": 9.002520877530623e-06, "loss": 0.13558792, "memory(GiB)": 13.7, "step": 26060, "train_speed(iter/s)": 1.534933 }, { "acc": 0.98910713, "epoch": 12.217014295758144, "grad_norm": 1.6467163562774658, "learning_rate": 9.002056263448759e-06, "loss": 0.0827719, "memory(GiB)": 13.7, "step": 26065, "train_speed(iter/s)": 1.534943 }, { "acc": 0.97976189, "epoch": 12.219357862666978, "grad_norm": 2.509999990463257, "learning_rate": 9.001591553182628e-06, "loss": 0.1173921, "memory(GiB)": 13.7, "step": 26070, "train_speed(iter/s)": 1.534939 }, { "acc": 0.96467266, "epoch": 12.221701429575814, "grad_norm": 4.81809663772583, "learning_rate": 9.0011267467434e-06, "loss": 0.20038557, "memory(GiB)": 13.7, "step": 26075, "train_speed(iter/s)": 1.534962 }, { "acc": 0.98849211, "epoch": 12.22404499648465, "grad_norm": 14.608469009399414, "learning_rate": 9.00066184414225e-06, "loss": 0.03545113, "memory(GiB)": 13.7, "step": 26080, "train_speed(iter/s)": 1.534973 }, { "acc": 0.98070517, "epoch": 12.226388563393485, "grad_norm": 1.7626322507858276, "learning_rate": 9.00019684539035e-06, "loss": 0.0874714, "memory(GiB)": 13.7, "step": 26085, "train_speed(iter/s)": 1.535003 }, { "acc": 0.97231064, "epoch": 12.22873213030232, "grad_norm": 6.584887504577637, "learning_rate": 8.999731750498878e-06, "loss": 0.14900477, "memory(GiB)": 13.7, "step": 26090, "train_speed(iter/s)": 1.535002 }, { "acc": 0.96907864, "epoch": 12.231075697211155, "grad_norm": 8.05148696899414, "learning_rate": 8.999266559479013e-06, "loss": 0.18286927, "memory(GiB)": 13.7, "step": 26095, "train_speed(iter/s)": 1.53499 }, { "acc": 0.98013964, "epoch": 12.233419264119991, "grad_norm": 4.677858352661133, "learning_rate": 8.998801272341937e-06, "loss": 0.1032258, "memory(GiB)": 13.7, "step": 26100, "train_speed(iter/s)": 1.534997 }, { "acc": 0.98476639, "epoch": 12.235762831028826, "grad_norm": 1.1763166189193726, "learning_rate": 8.998335889098833e-06, "loss": 0.09172174, "memory(GiB)": 13.7, "step": 26105, "train_speed(iter/s)": 1.534999 }, { "acc": 0.99256401, "epoch": 12.238106397937662, "grad_norm": 0.5738611221313477, "learning_rate": 8.997870409760888e-06, "loss": 0.0579741, "memory(GiB)": 13.7, "step": 26110, "train_speed(iter/s)": 1.535 }, { "acc": 0.97770834, "epoch": 12.240449964846496, "grad_norm": 6.170032501220703, "learning_rate": 8.997404834339293e-06, "loss": 0.0649523, "memory(GiB)": 13.7, "step": 26115, "train_speed(iter/s)": 1.535007 }, { "acc": 0.99068184, "epoch": 12.242793531755332, "grad_norm": 1.8088815212249756, "learning_rate": 8.996939162845234e-06, "loss": 0.02214862, "memory(GiB)": 13.7, "step": 26120, "train_speed(iter/s)": 1.535017 }, { "acc": 0.96494045, "epoch": 12.245137098664166, "grad_norm": 8.07553482055664, "learning_rate": 8.99647339528991e-06, "loss": 0.15634567, "memory(GiB)": 13.7, "step": 26125, "train_speed(iter/s)": 1.535029 }, { "acc": 0.97739887, "epoch": 12.247480665573002, "grad_norm": 5.246571063995361, "learning_rate": 8.99600753168451e-06, "loss": 0.17895176, "memory(GiB)": 13.7, "step": 26130, "train_speed(iter/s)": 1.53504 }, { "acc": 0.97124996, "epoch": 12.249824232481837, "grad_norm": 4.877968788146973, "learning_rate": 8.995541572040236e-06, "loss": 0.16634623, "memory(GiB)": 13.7, "step": 26135, "train_speed(iter/s)": 1.535051 }, { "acc": 0.97738094, "epoch": 12.252167799390673, "grad_norm": 3.676482677459717, "learning_rate": 8.99507551636829e-06, "loss": 0.082379, "memory(GiB)": 13.7, "step": 26140, "train_speed(iter/s)": 1.535087 }, { "acc": 0.98241482, "epoch": 12.254511366299507, "grad_norm": 3.7402303218841553, "learning_rate": 8.994609364679868e-06, "loss": 0.16702161, "memory(GiB)": 13.7, "step": 26145, "train_speed(iter/s)": 1.535101 }, { "acc": 0.95218258, "epoch": 12.256854933208343, "grad_norm": 11.545868873596191, "learning_rate": 8.99414311698618e-06, "loss": 0.33874154, "memory(GiB)": 13.7, "step": 26150, "train_speed(iter/s)": 1.535102 }, { "acc": 0.98072376, "epoch": 12.259198500117177, "grad_norm": 5.840695381164551, "learning_rate": 8.993676773298432e-06, "loss": 0.12575495, "memory(GiB)": 13.7, "step": 26155, "train_speed(iter/s)": 1.535104 }, { "acc": 0.98779755, "epoch": 12.261542067026014, "grad_norm": 12.04178237915039, "learning_rate": 8.993210333627833e-06, "loss": 0.0254685, "memory(GiB)": 13.7, "step": 26160, "train_speed(iter/s)": 1.535101 }, { "acc": 0.98822851, "epoch": 12.26388563393485, "grad_norm": 2.680171012878418, "learning_rate": 8.992743797985595e-06, "loss": 0.03643688, "memory(GiB)": 13.7, "step": 26165, "train_speed(iter/s)": 1.535103 }, { "acc": 0.98861609, "epoch": 12.266229200843684, "grad_norm": 2.7119429111480713, "learning_rate": 8.992277166382932e-06, "loss": 0.07966933, "memory(GiB)": 13.7, "step": 26170, "train_speed(iter/s)": 1.535099 }, { "acc": 0.97734203, "epoch": 12.26857276775252, "grad_norm": 5.262099742889404, "learning_rate": 8.991810438831056e-06, "loss": 0.12136803, "memory(GiB)": 13.7, "step": 26175, "train_speed(iter/s)": 1.535099 }, { "acc": 0.978125, "epoch": 12.270916334661354, "grad_norm": 7.642766952514648, "learning_rate": 8.991343615341195e-06, "loss": 0.10706161, "memory(GiB)": 13.7, "step": 26180, "train_speed(iter/s)": 1.535106 }, { "acc": 0.99292002, "epoch": 12.27325990157019, "grad_norm": 2.7274880409240723, "learning_rate": 8.990876695924561e-06, "loss": 0.03166052, "memory(GiB)": 13.7, "step": 26185, "train_speed(iter/s)": 1.535107 }, { "acc": 0.9705718, "epoch": 12.275603468479025, "grad_norm": 7.51829719543457, "learning_rate": 8.990409680592382e-06, "loss": 0.14344473, "memory(GiB)": 13.7, "step": 26190, "train_speed(iter/s)": 1.535122 }, { "acc": 0.95503969, "epoch": 12.27794703538786, "grad_norm": 11.014634132385254, "learning_rate": 8.989942569355882e-06, "loss": 0.2416398, "memory(GiB)": 13.7, "step": 26195, "train_speed(iter/s)": 1.535118 }, { "acc": 0.96049747, "epoch": 12.280290602296695, "grad_norm": 7.736076354980469, "learning_rate": 8.989475362226289e-06, "loss": 0.14818845, "memory(GiB)": 13.7, "step": 26200, "train_speed(iter/s)": 1.535123 }, { "acc": 0.97216339, "epoch": 12.282634169205531, "grad_norm": 21.045427322387695, "learning_rate": 8.989008059214832e-06, "loss": 0.16330608, "memory(GiB)": 13.7, "step": 26205, "train_speed(iter/s)": 1.535127 }, { "acc": 0.97404222, "epoch": 12.284977736114365, "grad_norm": 10.630866050720215, "learning_rate": 8.988540660332743e-06, "loss": 0.15701814, "memory(GiB)": 13.7, "step": 26210, "train_speed(iter/s)": 1.535127 }, { "acc": 0.97730293, "epoch": 12.287321303023202, "grad_norm": 3.2797677516937256, "learning_rate": 8.988073165591264e-06, "loss": 0.11047951, "memory(GiB)": 13.7, "step": 26215, "train_speed(iter/s)": 1.535127 }, { "acc": 0.99365025, "epoch": 12.289664869932036, "grad_norm": 6.203852653503418, "learning_rate": 8.987605575001622e-06, "loss": 0.04906285, "memory(GiB)": 13.7, "step": 26220, "train_speed(iter/s)": 1.535144 }, { "acc": 0.97540178, "epoch": 12.292008436840872, "grad_norm": 3.7409327030181885, "learning_rate": 8.987137888575062e-06, "loss": 0.12342255, "memory(GiB)": 13.7, "step": 26225, "train_speed(iter/s)": 1.535153 }, { "acc": 0.97445183, "epoch": 12.294352003749706, "grad_norm": 52.6985969543457, "learning_rate": 8.986670106322824e-06, "loss": 0.18757932, "memory(GiB)": 13.7, "step": 26230, "train_speed(iter/s)": 1.53516 }, { "acc": 0.96947918, "epoch": 12.296695570658542, "grad_norm": 4.017569065093994, "learning_rate": 8.986202228256151e-06, "loss": 0.10311117, "memory(GiB)": 13.7, "step": 26235, "train_speed(iter/s)": 1.535162 }, { "acc": 0.96205359, "epoch": 12.299039137567377, "grad_norm": 9.106222152709961, "learning_rate": 8.985734254386292e-06, "loss": 0.17239356, "memory(GiB)": 13.7, "step": 26240, "train_speed(iter/s)": 1.535178 }, { "acc": 0.98729162, "epoch": 12.301382704476213, "grad_norm": 7.117280006408691, "learning_rate": 8.985266184724493e-06, "loss": 0.07299343, "memory(GiB)": 13.7, "step": 26245, "train_speed(iter/s)": 1.535186 }, { "acc": 0.98395939, "epoch": 12.303726271385049, "grad_norm": 6.482752323150635, "learning_rate": 8.984798019282006e-06, "loss": 0.07425611, "memory(GiB)": 13.7, "step": 26250, "train_speed(iter/s)": 1.535198 }, { "acc": 0.98709679, "epoch": 12.306069838293883, "grad_norm": 2.6830248832702637, "learning_rate": 8.984329758070085e-06, "loss": 0.06725605, "memory(GiB)": 13.7, "step": 26255, "train_speed(iter/s)": 1.535196 }, { "acc": 0.9727747, "epoch": 12.30841340520272, "grad_norm": 25.04064178466797, "learning_rate": 8.983861401099984e-06, "loss": 0.14297776, "memory(GiB)": 13.7, "step": 26260, "train_speed(iter/s)": 1.535202 }, { "acc": 0.96591988, "epoch": 12.310756972111554, "grad_norm": 4.588369369506836, "learning_rate": 8.983392948382961e-06, "loss": 0.23885984, "memory(GiB)": 13.7, "step": 26265, "train_speed(iter/s)": 1.535224 }, { "acc": 0.9671875, "epoch": 12.31310053902039, "grad_norm": 6.28444242477417, "learning_rate": 8.982924399930278e-06, "loss": 0.20214212, "memory(GiB)": 13.7, "step": 26270, "train_speed(iter/s)": 1.535238 }, { "acc": 0.97354164, "epoch": 12.315444105929224, "grad_norm": 8.03614616394043, "learning_rate": 8.982455755753193e-06, "loss": 0.09204533, "memory(GiB)": 13.7, "step": 26275, "train_speed(iter/s)": 1.535252 }, { "acc": 0.9828125, "epoch": 12.31778767283806, "grad_norm": 3.0256285667419434, "learning_rate": 8.981987015862975e-06, "loss": 0.07435106, "memory(GiB)": 13.7, "step": 26280, "train_speed(iter/s)": 1.535283 }, { "acc": 0.96697922, "epoch": 12.320131239746894, "grad_norm": 11.85147762298584, "learning_rate": 8.98151818027089e-06, "loss": 0.18134341, "memory(GiB)": 13.7, "step": 26285, "train_speed(iter/s)": 1.535294 }, { "acc": 0.97554951, "epoch": 12.32247480665573, "grad_norm": 6.517391204833984, "learning_rate": 8.981049248988205e-06, "loss": 0.15120786, "memory(GiB)": 13.7, "step": 26290, "train_speed(iter/s)": 1.5353 }, { "acc": 0.9763195, "epoch": 12.324818373564565, "grad_norm": 2.0278875827789307, "learning_rate": 8.980580222026194e-06, "loss": 0.09467481, "memory(GiB)": 13.7, "step": 26295, "train_speed(iter/s)": 1.535315 }, { "acc": 0.96363087, "epoch": 12.3271619404734, "grad_norm": 6.8806471824646, "learning_rate": 8.980111099396129e-06, "loss": 0.222594, "memory(GiB)": 13.7, "step": 26300, "train_speed(iter/s)": 1.535318 }, { "acc": 0.96798611, "epoch": 12.329505507382235, "grad_norm": 6.361279487609863, "learning_rate": 8.97964188110929e-06, "loss": 0.15288107, "memory(GiB)": 13.7, "step": 26305, "train_speed(iter/s)": 1.535316 }, { "acc": 0.97842264, "epoch": 12.331849074291071, "grad_norm": 7.0582051277160645, "learning_rate": 8.97917256717695e-06, "loss": 0.13059825, "memory(GiB)": 13.7, "step": 26310, "train_speed(iter/s)": 1.535326 }, { "acc": 0.97510662, "epoch": 12.334192641199905, "grad_norm": 6.906073570251465, "learning_rate": 8.978703157610393e-06, "loss": 0.18279459, "memory(GiB)": 13.7, "step": 26315, "train_speed(iter/s)": 1.535327 }, { "acc": 0.98125, "epoch": 12.336536208108742, "grad_norm": 4.42567777633667, "learning_rate": 8.978233652420902e-06, "loss": 0.07006167, "memory(GiB)": 13.7, "step": 26320, "train_speed(iter/s)": 1.535321 }, { "acc": 0.98665209, "epoch": 12.338879775017578, "grad_norm": 1.7962018251419067, "learning_rate": 8.97776405161976e-06, "loss": 0.07131741, "memory(GiB)": 13.7, "step": 26325, "train_speed(iter/s)": 1.535335 }, { "acc": 0.98142853, "epoch": 12.341223341926412, "grad_norm": 5.628791332244873, "learning_rate": 8.977294355218259e-06, "loss": 0.10122002, "memory(GiB)": 13.7, "step": 26330, "train_speed(iter/s)": 1.53534 }, { "acc": 0.98145828, "epoch": 12.343566908835248, "grad_norm": 4.1011552810668945, "learning_rate": 8.976824563227685e-06, "loss": 0.12536592, "memory(GiB)": 13.7, "step": 26335, "train_speed(iter/s)": 1.535333 }, { "acc": 0.97175598, "epoch": 12.345910475744082, "grad_norm": 7.592284202575684, "learning_rate": 8.976354675659332e-06, "loss": 0.13646522, "memory(GiB)": 13.7, "step": 26340, "train_speed(iter/s)": 1.53534 }, { "acc": 0.97878189, "epoch": 12.348254042652918, "grad_norm": 7.246296405792236, "learning_rate": 8.975884692524496e-06, "loss": 0.09213679, "memory(GiB)": 13.7, "step": 26345, "train_speed(iter/s)": 1.535341 }, { "acc": 0.9932292, "epoch": 12.350597609561753, "grad_norm": 1.4927209615707397, "learning_rate": 8.97541461383447e-06, "loss": 0.05859648, "memory(GiB)": 13.7, "step": 26350, "train_speed(iter/s)": 1.535342 }, { "acc": 0.97056656, "epoch": 12.352941176470589, "grad_norm": 7.065094470977783, "learning_rate": 8.974944439600555e-06, "loss": 0.14927036, "memory(GiB)": 13.7, "step": 26355, "train_speed(iter/s)": 1.535347 }, { "acc": 0.98017044, "epoch": 12.355284743379423, "grad_norm": 6.233232021331787, "learning_rate": 8.974474169834054e-06, "loss": 0.10597153, "memory(GiB)": 13.7, "step": 26360, "train_speed(iter/s)": 1.535349 }, { "acc": 0.97205353, "epoch": 12.35762831028826, "grad_norm": 2.6621243953704834, "learning_rate": 8.97400380454627e-06, "loss": 0.09748083, "memory(GiB)": 13.7, "step": 26365, "train_speed(iter/s)": 1.535351 }, { "acc": 0.990625, "epoch": 12.359971877197093, "grad_norm": 0.41595005989074707, "learning_rate": 8.973533343748508e-06, "loss": 0.10456241, "memory(GiB)": 13.7, "step": 26370, "train_speed(iter/s)": 1.535368 }, { "acc": 0.9634531, "epoch": 12.36231544410593, "grad_norm": 3.1836178302764893, "learning_rate": 8.973062787452075e-06, "loss": 0.22961369, "memory(GiB)": 13.7, "step": 26375, "train_speed(iter/s)": 1.535388 }, { "acc": 0.97696438, "epoch": 12.364659011014764, "grad_norm": 4.31479549407959, "learning_rate": 8.972592135668286e-06, "loss": 0.05433769, "memory(GiB)": 13.7, "step": 26380, "train_speed(iter/s)": 1.5354 }, { "acc": 0.98054523, "epoch": 12.3670025779236, "grad_norm": 3.4948973655700684, "learning_rate": 8.97212138840845e-06, "loss": 0.10262408, "memory(GiB)": 13.7, "step": 26385, "train_speed(iter/s)": 1.535419 }, { "acc": 0.96101904, "epoch": 12.369346144832434, "grad_norm": 3.204841375350952, "learning_rate": 8.971650545683884e-06, "loss": 0.15056158, "memory(GiB)": 13.7, "step": 26390, "train_speed(iter/s)": 1.535433 }, { "acc": 0.97689762, "epoch": 12.37168971174127, "grad_norm": 9.318603515625, "learning_rate": 8.971179607505905e-06, "loss": 0.17031679, "memory(GiB)": 13.7, "step": 26395, "train_speed(iter/s)": 1.53544 }, { "acc": 0.96676464, "epoch": 12.374033278650105, "grad_norm": 4.136138916015625, "learning_rate": 8.970708573885834e-06, "loss": 0.16928192, "memory(GiB)": 13.7, "step": 26400, "train_speed(iter/s)": 1.535445 }, { "acc": 0.96211987, "epoch": 12.37637684555894, "grad_norm": 2.22536301612854, "learning_rate": 8.970237444834992e-06, "loss": 0.11527507, "memory(GiB)": 13.7, "step": 26405, "train_speed(iter/s)": 1.535452 }, { "acc": 0.97490253, "epoch": 12.378720412467777, "grad_norm": 10.621857643127441, "learning_rate": 8.969766220364702e-06, "loss": 0.07849753, "memory(GiB)": 13.7, "step": 26410, "train_speed(iter/s)": 1.535464 }, { "acc": 0.98130207, "epoch": 12.381063979376611, "grad_norm": 3.822216272354126, "learning_rate": 8.969294900486292e-06, "loss": 0.12747431, "memory(GiB)": 13.7, "step": 26415, "train_speed(iter/s)": 1.535483 }, { "acc": 0.98922081, "epoch": 12.383407546285447, "grad_norm": 4.170030117034912, "learning_rate": 8.968823485211092e-06, "loss": 0.06281035, "memory(GiB)": 13.7, "step": 26420, "train_speed(iter/s)": 1.535492 }, { "acc": 0.9846221, "epoch": 12.385751113194281, "grad_norm": 3.239780902862549, "learning_rate": 8.968351974550432e-06, "loss": 0.0681503, "memory(GiB)": 13.7, "step": 26425, "train_speed(iter/s)": 1.535506 }, { "acc": 0.97296429, "epoch": 12.388094680103118, "grad_norm": 2.032463312149048, "learning_rate": 8.967880368515647e-06, "loss": 0.20736418, "memory(GiB)": 13.7, "step": 26430, "train_speed(iter/s)": 1.53551 }, { "acc": 0.96572304, "epoch": 12.390438247011952, "grad_norm": 5.484834671020508, "learning_rate": 8.967408667118069e-06, "loss": 0.12133131, "memory(GiB)": 13.7, "step": 26435, "train_speed(iter/s)": 1.535506 }, { "acc": 0.98052492, "epoch": 12.392781813920788, "grad_norm": 6.42664909362793, "learning_rate": 8.966936870369041e-06, "loss": 0.13331311, "memory(GiB)": 13.7, "step": 26440, "train_speed(iter/s)": 1.535528 }, { "acc": 0.9827178, "epoch": 12.395125380829622, "grad_norm": 4.265266418457031, "learning_rate": 8.966464978279902e-06, "loss": 0.0821915, "memory(GiB)": 13.7, "step": 26445, "train_speed(iter/s)": 1.535539 }, { "acc": 0.97555561, "epoch": 12.397468947738458, "grad_norm": 10.527178764343262, "learning_rate": 8.965992990861993e-06, "loss": 0.143937, "memory(GiB)": 13.7, "step": 26450, "train_speed(iter/s)": 1.535544 }, { "acc": 0.95746527, "epoch": 12.399812514647293, "grad_norm": 8.212160110473633, "learning_rate": 8.965520908126661e-06, "loss": 0.17888306, "memory(GiB)": 13.7, "step": 26455, "train_speed(iter/s)": 1.535559 }, { "acc": 0.9838542, "epoch": 12.402156081556129, "grad_norm": 1.1155200004577637, "learning_rate": 8.965048730085252e-06, "loss": 0.06084362, "memory(GiB)": 13.7, "step": 26460, "train_speed(iter/s)": 1.535573 }, { "acc": 0.96931553, "epoch": 12.404499648464963, "grad_norm": 2.558159589767456, "learning_rate": 8.964576456749115e-06, "loss": 0.15904154, "memory(GiB)": 13.7, "step": 26465, "train_speed(iter/s)": 1.535601 }, { "acc": 0.97597218, "epoch": 12.406843215373799, "grad_norm": 1.1692326068878174, "learning_rate": 8.964104088129606e-06, "loss": 0.07727656, "memory(GiB)": 13.7, "step": 26470, "train_speed(iter/s)": 1.535603 }, { "acc": 0.97878838, "epoch": 12.409186782282633, "grad_norm": 3.492678642272949, "learning_rate": 8.963631624238074e-06, "loss": 0.05511051, "memory(GiB)": 13.7, "step": 26475, "train_speed(iter/s)": 1.535615 }, { "acc": 0.98280907, "epoch": 12.41153034919147, "grad_norm": 3.9405410289764404, "learning_rate": 8.963159065085876e-06, "loss": 0.10006906, "memory(GiB)": 13.7, "step": 26480, "train_speed(iter/s)": 1.535633 }, { "acc": 0.97233143, "epoch": 12.413873916100304, "grad_norm": 8.79374885559082, "learning_rate": 8.962686410684375e-06, "loss": 0.11446126, "memory(GiB)": 13.7, "step": 26485, "train_speed(iter/s)": 1.535646 }, { "acc": 0.99133015, "epoch": 12.41621748300914, "grad_norm": 3.371835947036743, "learning_rate": 8.962213661044928e-06, "loss": 0.05590425, "memory(GiB)": 13.7, "step": 26490, "train_speed(iter/s)": 1.535654 }, { "acc": 0.97363091, "epoch": 12.418561049917976, "grad_norm": 1.2841401100158691, "learning_rate": 8.961740816178902e-06, "loss": 0.12949649, "memory(GiB)": 13.7, "step": 26495, "train_speed(iter/s)": 1.535662 }, { "acc": 0.98542614, "epoch": 12.42090461682681, "grad_norm": 4.624750137329102, "learning_rate": 8.96126787609766e-06, "loss": 0.0730342, "memory(GiB)": 13.7, "step": 26500, "train_speed(iter/s)": 1.535673 }, { "acc": 0.98395834, "epoch": 12.423248183735646, "grad_norm": 2.416372537612915, "learning_rate": 8.960794840812569e-06, "loss": 0.07537395, "memory(GiB)": 13.7, "step": 26505, "train_speed(iter/s)": 1.53568 }, { "acc": 0.95645838, "epoch": 12.42559175064448, "grad_norm": 7.781129837036133, "learning_rate": 8.960321710335e-06, "loss": 0.13380558, "memory(GiB)": 13.7, "step": 26510, "train_speed(iter/s)": 1.535693 }, { "acc": 0.97312498, "epoch": 12.427935317553317, "grad_norm": 6.768186092376709, "learning_rate": 8.959848484676326e-06, "loss": 0.13858588, "memory(GiB)": 13.7, "step": 26515, "train_speed(iter/s)": 1.535693 }, { "acc": 0.97947121, "epoch": 12.430278884462151, "grad_norm": 10.422279357910156, "learning_rate": 8.959375163847925e-06, "loss": 0.14500384, "memory(GiB)": 13.7, "step": 26520, "train_speed(iter/s)": 1.535707 }, { "acc": 0.97157087, "epoch": 12.432622451370987, "grad_norm": 9.563225746154785, "learning_rate": 8.958901747861169e-06, "loss": 0.19301838, "memory(GiB)": 13.7, "step": 26525, "train_speed(iter/s)": 1.535713 }, { "acc": 0.98000002, "epoch": 12.434966018279821, "grad_norm": 8.413752555847168, "learning_rate": 8.958428236727439e-06, "loss": 0.08916527, "memory(GiB)": 13.7, "step": 26530, "train_speed(iter/s)": 1.535724 }, { "acc": 0.99383144, "epoch": 12.437309585188657, "grad_norm": 0.6252486109733582, "learning_rate": 8.957954630458117e-06, "loss": 0.03864608, "memory(GiB)": 13.7, "step": 26535, "train_speed(iter/s)": 1.535725 }, { "acc": 0.96662292, "epoch": 12.439653152097492, "grad_norm": 5.641930103302002, "learning_rate": 8.957480929064589e-06, "loss": 0.2176791, "memory(GiB)": 13.7, "step": 26540, "train_speed(iter/s)": 1.535726 }, { "acc": 0.97354164, "epoch": 12.441996719006328, "grad_norm": 6.282113075256348, "learning_rate": 8.957007132558235e-06, "loss": 0.1524478, "memory(GiB)": 13.7, "step": 26545, "train_speed(iter/s)": 1.535735 }, { "acc": 0.97508011, "epoch": 12.444340285915162, "grad_norm": 3.6074583530426025, "learning_rate": 8.956533240950451e-06, "loss": 0.09029593, "memory(GiB)": 13.7, "step": 26550, "train_speed(iter/s)": 1.53575 }, { "acc": 0.97171049, "epoch": 12.446683852823998, "grad_norm": 2.8713724613189697, "learning_rate": 8.956059254252625e-06, "loss": 0.17412409, "memory(GiB)": 13.7, "step": 26555, "train_speed(iter/s)": 1.535755 }, { "acc": 0.9649107, "epoch": 12.449027419732833, "grad_norm": 8.688629150390625, "learning_rate": 8.955585172476147e-06, "loss": 0.21541638, "memory(GiB)": 13.7, "step": 26560, "train_speed(iter/s)": 1.535765 }, { "acc": 0.9764286, "epoch": 12.451370986641669, "grad_norm": 5.544373035430908, "learning_rate": 8.955110995632415e-06, "loss": 0.13228097, "memory(GiB)": 13.7, "step": 26565, "train_speed(iter/s)": 1.535769 }, { "acc": 0.97525301, "epoch": 12.453714553550505, "grad_norm": 1.9921962022781372, "learning_rate": 8.954636723732828e-06, "loss": 0.10211976, "memory(GiB)": 13.7, "step": 26570, "train_speed(iter/s)": 1.535772 }, { "acc": 0.98448868, "epoch": 12.456058120459339, "grad_norm": 6.584528923034668, "learning_rate": 8.954162356788784e-06, "loss": 0.06016642, "memory(GiB)": 13.7, "step": 26575, "train_speed(iter/s)": 1.535784 }, { "acc": 0.9694643, "epoch": 12.458401687368175, "grad_norm": 9.1343355178833, "learning_rate": 8.953687894811685e-06, "loss": 0.20390821, "memory(GiB)": 13.7, "step": 26580, "train_speed(iter/s)": 1.535776 }, { "acc": 0.96654758, "epoch": 12.46074525427701, "grad_norm": 7.4080729484558105, "learning_rate": 8.953213337812938e-06, "loss": 0.1695435, "memory(GiB)": 13.7, "step": 26585, "train_speed(iter/s)": 1.535771 }, { "acc": 0.97145834, "epoch": 12.463088821185845, "grad_norm": 3.5063304901123047, "learning_rate": 8.952738685803946e-06, "loss": 0.14940393, "memory(GiB)": 13.7, "step": 26590, "train_speed(iter/s)": 1.535768 }, { "acc": 0.99562502, "epoch": 12.46543238809468, "grad_norm": 0.7252794504165649, "learning_rate": 8.952263938796124e-06, "loss": 0.05598127, "memory(GiB)": 13.7, "step": 26595, "train_speed(iter/s)": 1.535772 }, { "acc": 0.96849213, "epoch": 12.467775955003516, "grad_norm": 1.459702491760254, "learning_rate": 8.951789096800876e-06, "loss": 0.09830637, "memory(GiB)": 13.7, "step": 26600, "train_speed(iter/s)": 1.535764 }, { "acc": 0.97842264, "epoch": 12.47011952191235, "grad_norm": 2.86897873878479, "learning_rate": 8.951314159829618e-06, "loss": 0.10545813, "memory(GiB)": 13.7, "step": 26605, "train_speed(iter/s)": 1.535766 }, { "acc": 0.98104172, "epoch": 12.472463088821186, "grad_norm": 4.945206642150879, "learning_rate": 8.95083912789377e-06, "loss": 0.12259984, "memory(GiB)": 13.7, "step": 26610, "train_speed(iter/s)": 1.535766 }, { "acc": 0.97924109, "epoch": 12.47480665573002, "grad_norm": 4.817296028137207, "learning_rate": 8.950364001004745e-06, "loss": 0.11269186, "memory(GiB)": 13.7, "step": 26615, "train_speed(iter/s)": 1.535772 }, { "acc": 0.9895607, "epoch": 12.477150222638857, "grad_norm": 7.414314270019531, "learning_rate": 8.949888779173968e-06, "loss": 0.03878946, "memory(GiB)": 13.7, "step": 26620, "train_speed(iter/s)": 1.535785 }, { "acc": 0.98729172, "epoch": 12.479493789547691, "grad_norm": 2.2241082191467285, "learning_rate": 8.949413462412858e-06, "loss": 0.072832, "memory(GiB)": 13.7, "step": 26625, "train_speed(iter/s)": 1.535792 }, { "acc": 0.96521015, "epoch": 12.481837356456527, "grad_norm": 3.228393316268921, "learning_rate": 8.94893805073284e-06, "loss": 0.09672841, "memory(GiB)": 13.7, "step": 26630, "train_speed(iter/s)": 1.535791 }, { "acc": 0.98745193, "epoch": 12.484180923365361, "grad_norm": 8.472017288208008, "learning_rate": 8.948462544145346e-06, "loss": 0.03934436, "memory(GiB)": 13.7, "step": 26635, "train_speed(iter/s)": 1.535793 }, { "acc": 0.97687502, "epoch": 12.486524490274197, "grad_norm": 2.747164487838745, "learning_rate": 8.947986942661801e-06, "loss": 0.16228909, "memory(GiB)": 13.7, "step": 26640, "train_speed(iter/s)": 1.535789 }, { "acc": 0.97833328, "epoch": 12.488868057183032, "grad_norm": 2.849001407623291, "learning_rate": 8.947511246293639e-06, "loss": 0.07360842, "memory(GiB)": 13.7, "step": 26645, "train_speed(iter/s)": 1.535807 }, { "acc": 0.976791, "epoch": 12.491211624091868, "grad_norm": 0.5839525461196899, "learning_rate": 8.947035455052293e-06, "loss": 0.1067158, "memory(GiB)": 13.7, "step": 26650, "train_speed(iter/s)": 1.535812 }, { "acc": 0.97628536, "epoch": 12.493555191000704, "grad_norm": 4.244751453399658, "learning_rate": 8.9465595689492e-06, "loss": 0.10493921, "memory(GiB)": 13.7, "step": 26655, "train_speed(iter/s)": 1.535813 }, { "acc": 0.9729167, "epoch": 12.495898757909538, "grad_norm": 0.052755214273929596, "learning_rate": 8.9460835879958e-06, "loss": 0.0636031, "memory(GiB)": 13.7, "step": 26660, "train_speed(iter/s)": 1.535823 }, { "acc": 0.98103313, "epoch": 12.498242324818374, "grad_norm": 3.858903169631958, "learning_rate": 8.94560751220353e-06, "loss": 0.08676056, "memory(GiB)": 13.7, "step": 26665, "train_speed(iter/s)": 1.535831 }, { "acc": 0.98579445, "epoch": 12.500585891727209, "grad_norm": 6.739360809326172, "learning_rate": 8.945131341583839e-06, "loss": 0.09224511, "memory(GiB)": 13.7, "step": 26670, "train_speed(iter/s)": 1.535839 }, { "acc": 0.97397327, "epoch": 12.502929458636045, "grad_norm": 1.9173685312271118, "learning_rate": 8.944655076148168e-06, "loss": 0.07271543, "memory(GiB)": 13.7, "step": 26675, "train_speed(iter/s)": 1.535841 }, { "acc": 0.97411661, "epoch": 12.505273025544879, "grad_norm": 6.828699111938477, "learning_rate": 8.944178715907967e-06, "loss": 0.10775951, "memory(GiB)": 13.7, "step": 26680, "train_speed(iter/s)": 1.535857 }, { "acc": 0.97870426, "epoch": 12.507616592453715, "grad_norm": 0.041574012488126755, "learning_rate": 8.943702260874687e-06, "loss": 0.14949095, "memory(GiB)": 13.7, "step": 26685, "train_speed(iter/s)": 1.535856 }, { "acc": 0.96834221, "epoch": 12.50996015936255, "grad_norm": 1.5122973918914795, "learning_rate": 8.943225711059776e-06, "loss": 0.14559221, "memory(GiB)": 13.7, "step": 26690, "train_speed(iter/s)": 1.535856 }, { "acc": 0.98200169, "epoch": 12.512303726271385, "grad_norm": 4.667044162750244, "learning_rate": 8.942749066474694e-06, "loss": 0.125218, "memory(GiB)": 13.7, "step": 26695, "train_speed(iter/s)": 1.535861 }, { "acc": 0.97381945, "epoch": 12.51464729318022, "grad_norm": 6.16033935546875, "learning_rate": 8.942272327130896e-06, "loss": 0.08697503, "memory(GiB)": 13.7, "step": 26700, "train_speed(iter/s)": 1.535859 }, { "acc": 0.98249998, "epoch": 12.516990860089056, "grad_norm": 3.508580207824707, "learning_rate": 8.941795493039841e-06, "loss": 0.05374972, "memory(GiB)": 13.7, "step": 26705, "train_speed(iter/s)": 1.535872 }, { "acc": 0.97780485, "epoch": 12.51933442699789, "grad_norm": 5.426531791687012, "learning_rate": 8.94131856421299e-06, "loss": 0.07173148, "memory(GiB)": 13.7, "step": 26710, "train_speed(iter/s)": 1.535881 }, { "acc": 0.99377975, "epoch": 12.521677993906726, "grad_norm": 3.35446834564209, "learning_rate": 8.940841540661806e-06, "loss": 0.03529181, "memory(GiB)": 13.7, "step": 26715, "train_speed(iter/s)": 1.535884 }, { "acc": 0.98624992, "epoch": 12.52402156081556, "grad_norm": 1.092557430267334, "learning_rate": 8.940364422397756e-06, "loss": 0.05572281, "memory(GiB)": 13.7, "step": 26720, "train_speed(iter/s)": 1.535892 }, { "acc": 0.97508926, "epoch": 12.526365127724397, "grad_norm": 7.335222244262695, "learning_rate": 8.939887209432311e-06, "loss": 0.14231944, "memory(GiB)": 13.7, "step": 26725, "train_speed(iter/s)": 1.535904 }, { "acc": 0.9838542, "epoch": 12.528708694633231, "grad_norm": 4.409327030181885, "learning_rate": 8.93940990177694e-06, "loss": 0.07117126, "memory(GiB)": 13.7, "step": 26730, "train_speed(iter/s)": 1.535902 }, { "acc": 0.98286362, "epoch": 12.531052261542067, "grad_norm": 5.7897210121154785, "learning_rate": 8.938932499443114e-06, "loss": 0.10356579, "memory(GiB)": 13.7, "step": 26735, "train_speed(iter/s)": 1.535908 }, { "acc": 0.98952484, "epoch": 12.533395828450903, "grad_norm": 0.03819034993648529, "learning_rate": 8.938455002442307e-06, "loss": 0.05830886, "memory(GiB)": 13.7, "step": 26740, "train_speed(iter/s)": 1.535906 }, { "acc": 0.98702993, "epoch": 12.535739395359737, "grad_norm": 0.01814374513924122, "learning_rate": 8.937977410786004e-06, "loss": 0.07515259, "memory(GiB)": 13.7, "step": 26745, "train_speed(iter/s)": 1.535901 }, { "acc": 0.9895833, "epoch": 12.538082962268573, "grad_norm": 4.390223979949951, "learning_rate": 8.937499724485677e-06, "loss": 0.04692926, "memory(GiB)": 13.7, "step": 26750, "train_speed(iter/s)": 1.535914 }, { "acc": 0.95798397, "epoch": 12.540426529177408, "grad_norm": 10.669450759887695, "learning_rate": 8.937021943552809e-06, "loss": 0.20339723, "memory(GiB)": 13.7, "step": 26755, "train_speed(iter/s)": 1.535925 }, { "acc": 0.96680355, "epoch": 12.542770096086244, "grad_norm": 3.9308295249938965, "learning_rate": 8.936544067998889e-06, "loss": 0.14306757, "memory(GiB)": 13.7, "step": 26760, "train_speed(iter/s)": 1.535943 }, { "acc": 0.9678793, "epoch": 12.545113662995078, "grad_norm": 3.2120416164398193, "learning_rate": 8.936066097835398e-06, "loss": 0.1418196, "memory(GiB)": 13.7, "step": 26765, "train_speed(iter/s)": 1.53596 }, { "acc": 0.98614988, "epoch": 12.547457229903914, "grad_norm": 1.6463441848754883, "learning_rate": 8.935588033073828e-06, "loss": 0.11190192, "memory(GiB)": 13.7, "step": 26770, "train_speed(iter/s)": 1.53598 }, { "acc": 0.97718754, "epoch": 12.549800796812749, "grad_norm": 2.92891526222229, "learning_rate": 8.935109873725669e-06, "loss": 0.08159156, "memory(GiB)": 13.7, "step": 26775, "train_speed(iter/s)": 1.535983 }, { "acc": 0.97209072, "epoch": 12.552144363721585, "grad_norm": 3.873074769973755, "learning_rate": 8.934631619802416e-06, "loss": 0.12284067, "memory(GiB)": 13.7, "step": 26780, "train_speed(iter/s)": 1.535988 }, { "acc": 0.98374062, "epoch": 12.554487930630419, "grad_norm": 2.0251195430755615, "learning_rate": 8.934153271315566e-06, "loss": 0.09161115, "memory(GiB)": 13.7, "step": 26785, "train_speed(iter/s)": 1.536 }, { "acc": 0.98156242, "epoch": 12.556831497539255, "grad_norm": 4.290555477142334, "learning_rate": 8.93367482827661e-06, "loss": 0.07202982, "memory(GiB)": 13.7, "step": 26790, "train_speed(iter/s)": 1.535996 }, { "acc": 0.98120985, "epoch": 12.55917506444809, "grad_norm": 6.27388858795166, "learning_rate": 8.933196290697054e-06, "loss": 0.14148798, "memory(GiB)": 13.7, "step": 26795, "train_speed(iter/s)": 1.536001 }, { "acc": 0.9755621, "epoch": 12.561518631356925, "grad_norm": 16.7082462310791, "learning_rate": 8.9327176585884e-06, "loss": 0.11015675, "memory(GiB)": 13.7, "step": 26800, "train_speed(iter/s)": 1.536008 }, { "acc": 0.98913231, "epoch": 12.56386219826576, "grad_norm": 3.626209020614624, "learning_rate": 8.93223893196215e-06, "loss": 0.07356718, "memory(GiB)": 13.7, "step": 26805, "train_speed(iter/s)": 1.536013 }, { "acc": 0.98145294, "epoch": 12.566205765174596, "grad_norm": 9.413029670715332, "learning_rate": 8.931760110829815e-06, "loss": 0.08143353, "memory(GiB)": 13.7, "step": 26810, "train_speed(iter/s)": 1.536015 }, { "acc": 0.98713284, "epoch": 12.568549332083432, "grad_norm": 5.907914638519287, "learning_rate": 8.9312811952029e-06, "loss": 0.09545569, "memory(GiB)": 13.7, "step": 26815, "train_speed(iter/s)": 1.536011 }, { "acc": 0.98434849, "epoch": 12.570892898992266, "grad_norm": 3.649658441543579, "learning_rate": 8.930802185092921e-06, "loss": 0.071663, "memory(GiB)": 13.7, "step": 26820, "train_speed(iter/s)": 1.53601 }, { "acc": 0.96576633, "epoch": 12.573236465901102, "grad_norm": 23.829561233520508, "learning_rate": 8.930323080511387e-06, "loss": 0.15967305, "memory(GiB)": 13.7, "step": 26825, "train_speed(iter/s)": 1.536004 }, { "acc": 0.97460232, "epoch": 12.575580032809937, "grad_norm": 6.3500776290893555, "learning_rate": 8.929843881469819e-06, "loss": 0.09321968, "memory(GiB)": 13.7, "step": 26830, "train_speed(iter/s)": 1.536024 }, { "acc": 0.9801136, "epoch": 12.577923599718773, "grad_norm": 4.749141216278076, "learning_rate": 8.92936458797973e-06, "loss": 0.10761551, "memory(GiB)": 13.7, "step": 26835, "train_speed(iter/s)": 1.536041 }, { "acc": 0.96570511, "epoch": 12.580267166627607, "grad_norm": 18.42445945739746, "learning_rate": 8.928885200052647e-06, "loss": 0.16716172, "memory(GiB)": 13.7, "step": 26840, "train_speed(iter/s)": 1.536049 }, { "acc": 0.9715476, "epoch": 12.582610733536443, "grad_norm": 10.002853393554688, "learning_rate": 8.928405717700089e-06, "loss": 0.11721381, "memory(GiB)": 13.7, "step": 26845, "train_speed(iter/s)": 1.536082 }, { "acc": 0.97517853, "epoch": 12.584954300445277, "grad_norm": 2.8811042308807373, "learning_rate": 8.927926140933581e-06, "loss": 0.1112389, "memory(GiB)": 13.7, "step": 26850, "train_speed(iter/s)": 1.536086 }, { "acc": 0.98052082, "epoch": 12.587297867354113, "grad_norm": 2.0240142345428467, "learning_rate": 8.92744646976465e-06, "loss": 0.09670747, "memory(GiB)": 13.7, "step": 26855, "train_speed(iter/s)": 1.536078 }, { "acc": 0.96369047, "epoch": 12.589641434262948, "grad_norm": 7.079728603363037, "learning_rate": 8.926966704204827e-06, "loss": 0.16131573, "memory(GiB)": 13.7, "step": 26860, "train_speed(iter/s)": 1.536082 }, { "acc": 0.96531248, "epoch": 12.591985001171784, "grad_norm": 6.4480881690979, "learning_rate": 8.926486844265647e-06, "loss": 0.14373908, "memory(GiB)": 13.7, "step": 26865, "train_speed(iter/s)": 1.536088 }, { "acc": 0.96205349, "epoch": 12.594328568080618, "grad_norm": 53.562400817871094, "learning_rate": 8.926006889958638e-06, "loss": 0.26009297, "memory(GiB)": 13.7, "step": 26870, "train_speed(iter/s)": 1.536086 }, { "acc": 0.98389606, "epoch": 12.596672134989454, "grad_norm": 2.215029239654541, "learning_rate": 8.925526841295338e-06, "loss": 0.06889922, "memory(GiB)": 13.7, "step": 26875, "train_speed(iter/s)": 1.536086 }, { "acc": 0.99047623, "epoch": 12.599015701898288, "grad_norm": 4.382453918457031, "learning_rate": 8.925046698287291e-06, "loss": 0.0539876, "memory(GiB)": 13.7, "step": 26880, "train_speed(iter/s)": 1.536101 }, { "acc": 0.9793602, "epoch": 12.601359268807125, "grad_norm": 4.621502876281738, "learning_rate": 8.924566460946034e-06, "loss": 0.12577596, "memory(GiB)": 13.7, "step": 26885, "train_speed(iter/s)": 1.536097 }, { "acc": 0.97098217, "epoch": 12.60370283571596, "grad_norm": 6.4760918617248535, "learning_rate": 8.924086129283111e-06, "loss": 0.12104659, "memory(GiB)": 13.7, "step": 26890, "train_speed(iter/s)": 1.536119 }, { "acc": 0.96827383, "epoch": 12.606046402624795, "grad_norm": 2.8640267848968506, "learning_rate": 8.923605703310066e-06, "loss": 0.08305471, "memory(GiB)": 13.7, "step": 26895, "train_speed(iter/s)": 1.53613 }, { "acc": 0.97925596, "epoch": 12.608389969533631, "grad_norm": 6.8212714195251465, "learning_rate": 8.923125183038449e-06, "loss": 0.11500757, "memory(GiB)": 13.7, "step": 26900, "train_speed(iter/s)": 1.536141 }, { "acc": 0.97742062, "epoch": 12.610733536442465, "grad_norm": 10.17969036102295, "learning_rate": 8.922644568479808e-06, "loss": 0.12876312, "memory(GiB)": 13.7, "step": 26905, "train_speed(iter/s)": 1.536138 }, { "acc": 0.97820892, "epoch": 12.613077103351301, "grad_norm": 3.0531132221221924, "learning_rate": 8.9221638596457e-06, "loss": 0.13022256, "memory(GiB)": 13.7, "step": 26910, "train_speed(iter/s)": 1.536139 }, { "acc": 0.97937498, "epoch": 12.615420670260136, "grad_norm": 8.133569717407227, "learning_rate": 8.921683056547674e-06, "loss": 0.11533716, "memory(GiB)": 13.7, "step": 26915, "train_speed(iter/s)": 1.536144 }, { "acc": 0.96535091, "epoch": 12.617764237168972, "grad_norm": 10.458775520324707, "learning_rate": 8.92120215919729e-06, "loss": 0.12322888, "memory(GiB)": 13.7, "step": 26920, "train_speed(iter/s)": 1.536144 }, { "acc": 0.97279015, "epoch": 12.620107804077806, "grad_norm": 5.129916667938232, "learning_rate": 8.920721167606107e-06, "loss": 0.14928689, "memory(GiB)": 13.7, "step": 26925, "train_speed(iter/s)": 1.536149 }, { "acc": 0.97072649, "epoch": 12.622451370986642, "grad_norm": 5.218138217926025, "learning_rate": 8.920240081785685e-06, "loss": 0.18824545, "memory(GiB)": 13.7, "step": 26930, "train_speed(iter/s)": 1.536159 }, { "acc": 0.94933414, "epoch": 12.624794937895476, "grad_norm": 8.628820419311523, "learning_rate": 8.91975890174759e-06, "loss": 0.16907599, "memory(GiB)": 13.7, "step": 26935, "train_speed(iter/s)": 1.536145 }, { "acc": 0.9941761, "epoch": 12.627138504804313, "grad_norm": 0.6706676483154297, "learning_rate": 8.919277627503388e-06, "loss": 0.03694089, "memory(GiB)": 13.7, "step": 26940, "train_speed(iter/s)": 1.536143 }, { "acc": 0.9690526, "epoch": 12.629482071713147, "grad_norm": 7.589054584503174, "learning_rate": 8.918796259064643e-06, "loss": 0.13391128, "memory(GiB)": 13.7, "step": 26945, "train_speed(iter/s)": 1.536157 }, { "acc": 0.96293564, "epoch": 12.631825638621983, "grad_norm": 7.064904689788818, "learning_rate": 8.918314796442931e-06, "loss": 0.16368871, "memory(GiB)": 13.7, "step": 26950, "train_speed(iter/s)": 1.536157 }, { "acc": 0.96169109, "epoch": 12.634169205530817, "grad_norm": 6.901378631591797, "learning_rate": 8.91783323964982e-06, "loss": 0.11471986, "memory(GiB)": 13.7, "step": 26955, "train_speed(iter/s)": 1.536152 }, { "acc": 0.96613092, "epoch": 12.636512772439653, "grad_norm": 7.999946117401123, "learning_rate": 8.91735158869689e-06, "loss": 0.1923274, "memory(GiB)": 13.7, "step": 26960, "train_speed(iter/s)": 1.536164 }, { "acc": 0.9723959, "epoch": 12.638856339348488, "grad_norm": 4.55918025970459, "learning_rate": 8.916869843595712e-06, "loss": 0.14951062, "memory(GiB)": 13.7, "step": 26965, "train_speed(iter/s)": 1.536156 }, { "acc": 0.97221584, "epoch": 12.641199906257324, "grad_norm": 3.142866611480713, "learning_rate": 8.916388004357871e-06, "loss": 0.10751487, "memory(GiB)": 13.7, "step": 26970, "train_speed(iter/s)": 1.536147 }, { "acc": 0.97014341, "epoch": 12.643543473166158, "grad_norm": 6.703845024108887, "learning_rate": 8.91590607099495e-06, "loss": 0.1383435, "memory(GiB)": 13.7, "step": 26975, "train_speed(iter/s)": 1.536147 }, { "acc": 0.99319439, "epoch": 12.645887040074994, "grad_norm": 4.473565578460693, "learning_rate": 8.915424043518525e-06, "loss": 0.04418716, "memory(GiB)": 13.7, "step": 26980, "train_speed(iter/s)": 1.53616 }, { "acc": 0.98986111, "epoch": 12.64823060698383, "grad_norm": 0.9731713533401489, "learning_rate": 8.914941921940191e-06, "loss": 0.06184623, "memory(GiB)": 13.7, "step": 26985, "train_speed(iter/s)": 1.536158 }, { "acc": 0.97477684, "epoch": 12.650574173892664, "grad_norm": 4.94063663482666, "learning_rate": 8.914459706271532e-06, "loss": 0.12574728, "memory(GiB)": 13.7, "step": 26990, "train_speed(iter/s)": 1.536166 }, { "acc": 0.97838545, "epoch": 12.6529177408015, "grad_norm": 0.17805112898349762, "learning_rate": 8.913977396524141e-06, "loss": 0.09344062, "memory(GiB)": 13.7, "step": 26995, "train_speed(iter/s)": 1.536174 }, { "acc": 0.98361111, "epoch": 12.655261307710335, "grad_norm": 5.247017860412598, "learning_rate": 8.91349499270961e-06, "loss": 0.06500349, "memory(GiB)": 13.7, "step": 27000, "train_speed(iter/s)": 1.536183 }, { "acc": 0.97104816, "epoch": 12.657604874619171, "grad_norm": 6.560130596160889, "learning_rate": 8.913012494839535e-06, "loss": 0.12929595, "memory(GiB)": 13.7, "step": 27005, "train_speed(iter/s)": 1.536193 }, { "acc": 0.98500004, "epoch": 12.659948441528005, "grad_norm": 2.217663288116455, "learning_rate": 8.912529902925511e-06, "loss": 0.05944266, "memory(GiB)": 13.7, "step": 27010, "train_speed(iter/s)": 1.536204 }, { "acc": 0.97860889, "epoch": 12.662292008436841, "grad_norm": 3.118501901626587, "learning_rate": 8.912047216979144e-06, "loss": 0.11007129, "memory(GiB)": 13.7, "step": 27015, "train_speed(iter/s)": 1.536192 }, { "acc": 0.97188854, "epoch": 12.664635575345676, "grad_norm": 7.871745586395264, "learning_rate": 8.911564437012029e-06, "loss": 0.09992082, "memory(GiB)": 13.7, "step": 27020, "train_speed(iter/s)": 1.536203 }, { "acc": 0.97929926, "epoch": 12.666979142254512, "grad_norm": 4.06701135635376, "learning_rate": 8.911081563035773e-06, "loss": 0.10179914, "memory(GiB)": 13.7, "step": 27025, "train_speed(iter/s)": 1.53622 }, { "acc": 0.97665405, "epoch": 12.669322709163346, "grad_norm": 7.436315059661865, "learning_rate": 8.910598595061986e-06, "loss": 0.11438296, "memory(GiB)": 13.7, "step": 27030, "train_speed(iter/s)": 1.536234 }, { "acc": 0.97748919, "epoch": 12.671666276072182, "grad_norm": 3.3750460147857666, "learning_rate": 8.910115533102273e-06, "loss": 0.15644889, "memory(GiB)": 13.7, "step": 27035, "train_speed(iter/s)": 1.536228 }, { "acc": 0.97189388, "epoch": 12.674009842981016, "grad_norm": 10.806278228759766, "learning_rate": 8.909632377168246e-06, "loss": 0.13947875, "memory(GiB)": 13.7, "step": 27040, "train_speed(iter/s)": 1.53623 }, { "acc": 0.97718792, "epoch": 12.676353409889852, "grad_norm": 9.018636703491211, "learning_rate": 8.909149127271521e-06, "loss": 0.16157367, "memory(GiB)": 13.7, "step": 27045, "train_speed(iter/s)": 1.536225 }, { "acc": 0.97403135, "epoch": 12.678696976798687, "grad_norm": 6.897166728973389, "learning_rate": 8.908665783423711e-06, "loss": 0.10911806, "memory(GiB)": 13.7, "step": 27050, "train_speed(iter/s)": 1.536236 }, { "acc": 0.94606762, "epoch": 12.681040543707523, "grad_norm": 6.142474174499512, "learning_rate": 8.908182345636435e-06, "loss": 0.19724435, "memory(GiB)": 13.7, "step": 27055, "train_speed(iter/s)": 1.536238 }, { "acc": 0.9854044, "epoch": 12.683384110616359, "grad_norm": 4.480172157287598, "learning_rate": 8.907698813921312e-06, "loss": 0.09983015, "memory(GiB)": 13.7, "step": 27060, "train_speed(iter/s)": 1.536229 }, { "acc": 0.97342262, "epoch": 12.685727677525193, "grad_norm": 4.838273048400879, "learning_rate": 8.907215188289967e-06, "loss": 0.1434684, "memory(GiB)": 13.7, "step": 27065, "train_speed(iter/s)": 1.536225 }, { "acc": 0.97895298, "epoch": 12.68807124443403, "grad_norm": 4.813547611236572, "learning_rate": 8.906731468754025e-06, "loss": 0.11805787, "memory(GiB)": 13.7, "step": 27070, "train_speed(iter/s)": 1.536231 }, { "acc": 0.9785778, "epoch": 12.690414811342864, "grad_norm": 1.6178301572799683, "learning_rate": 8.90624765532511e-06, "loss": 0.07674242, "memory(GiB)": 13.7, "step": 27075, "train_speed(iter/s)": 1.536227 }, { "acc": 0.9685833, "epoch": 12.6927583782517, "grad_norm": 14.275853157043457, "learning_rate": 8.905763748014852e-06, "loss": 0.14780486, "memory(GiB)": 13.7, "step": 27080, "train_speed(iter/s)": 1.536225 }, { "acc": 0.96222715, "epoch": 12.695101945160534, "grad_norm": 6.5407023429870605, "learning_rate": 8.905279746834882e-06, "loss": 0.16367977, "memory(GiB)": 13.7, "step": 27085, "train_speed(iter/s)": 1.536225 }, { "acc": 0.9913393, "epoch": 12.69744551206937, "grad_norm": 4.007548809051514, "learning_rate": 8.904795651796838e-06, "loss": 0.06601822, "memory(GiB)": 13.7, "step": 27090, "train_speed(iter/s)": 1.53622 }, { "acc": 0.98206844, "epoch": 12.699789078978204, "grad_norm": 3.5093801021575928, "learning_rate": 8.904311462912353e-06, "loss": 0.06872655, "memory(GiB)": 13.7, "step": 27095, "train_speed(iter/s)": 1.536232 }, { "acc": 0.96807041, "epoch": 12.70213264588704, "grad_norm": 2.6826670169830322, "learning_rate": 8.903827180193064e-06, "loss": 0.16400584, "memory(GiB)": 13.7, "step": 27100, "train_speed(iter/s)": 1.53624 }, { "acc": 0.98451843, "epoch": 12.704476212795875, "grad_norm": 2.056257724761963, "learning_rate": 8.903342803650613e-06, "loss": 0.09177207, "memory(GiB)": 13.7, "step": 27105, "train_speed(iter/s)": 1.536259 }, { "acc": 0.97087755, "epoch": 12.70681977970471, "grad_norm": 6.3663835525512695, "learning_rate": 8.902858333296644e-06, "loss": 0.11013604, "memory(GiB)": 13.7, "step": 27110, "train_speed(iter/s)": 1.536284 }, { "acc": 0.9864584, "epoch": 12.709163346613545, "grad_norm": 13.084959030151367, "learning_rate": 8.9023737691428e-06, "loss": 0.05943868, "memory(GiB)": 13.7, "step": 27115, "train_speed(iter/s)": 1.536287 }, { "acc": 0.97644377, "epoch": 12.711506913522381, "grad_norm": 4.8265886306762695, "learning_rate": 8.901889111200728e-06, "loss": 0.10348405, "memory(GiB)": 13.7, "step": 27120, "train_speed(iter/s)": 1.536291 }, { "acc": 0.97482777, "epoch": 12.713850480431216, "grad_norm": 6.887488842010498, "learning_rate": 8.90140435948208e-06, "loss": 0.12972833, "memory(GiB)": 13.7, "step": 27125, "train_speed(iter/s)": 1.536307 }, { "acc": 0.99120941, "epoch": 12.716194047340052, "grad_norm": 0.03569021448493004, "learning_rate": 8.900919513998508e-06, "loss": 0.04647185, "memory(GiB)": 13.7, "step": 27130, "train_speed(iter/s)": 1.536304 }, { "acc": 0.96119041, "epoch": 12.718537614248888, "grad_norm": 6.840069770812988, "learning_rate": 8.900434574761664e-06, "loss": 0.16015856, "memory(GiB)": 13.7, "step": 27135, "train_speed(iter/s)": 1.536303 }, { "acc": 0.98154764, "epoch": 12.720881181157722, "grad_norm": 6.348769664764404, "learning_rate": 8.899949541783205e-06, "loss": 0.06851732, "memory(GiB)": 13.7, "step": 27140, "train_speed(iter/s)": 1.536316 }, { "acc": 0.99375, "epoch": 12.723224748066558, "grad_norm": 0.6477273106575012, "learning_rate": 8.89946441507479e-06, "loss": 0.07818861, "memory(GiB)": 13.7, "step": 27145, "train_speed(iter/s)": 1.536321 }, { "acc": 0.96710224, "epoch": 12.725568314975392, "grad_norm": 2.796802043914795, "learning_rate": 8.898979194648078e-06, "loss": 0.17522697, "memory(GiB)": 13.7, "step": 27150, "train_speed(iter/s)": 1.536336 }, { "acc": 0.98474941, "epoch": 12.727911881884229, "grad_norm": 3.239384651184082, "learning_rate": 8.898493880514735e-06, "loss": 0.04323211, "memory(GiB)": 13.7, "step": 27155, "train_speed(iter/s)": 1.536353 }, { "acc": 0.97687073, "epoch": 12.730255448793063, "grad_norm": 29.34745216369629, "learning_rate": 8.898008472686425e-06, "loss": 0.13798541, "memory(GiB)": 13.7, "step": 27160, "train_speed(iter/s)": 1.536366 }, { "acc": 0.95683489, "epoch": 12.732599015701899, "grad_norm": 4.30601167678833, "learning_rate": 8.897522971174815e-06, "loss": 0.19018109, "memory(GiB)": 13.7, "step": 27165, "train_speed(iter/s)": 1.536368 }, { "acc": 0.98238621, "epoch": 12.734942582610733, "grad_norm": 5.847790718078613, "learning_rate": 8.897037375991575e-06, "loss": 0.09984604, "memory(GiB)": 13.7, "step": 27170, "train_speed(iter/s)": 1.536372 }, { "acc": 0.97028275, "epoch": 12.73728614951957, "grad_norm": 58.56796646118164, "learning_rate": 8.896551687148378e-06, "loss": 0.1882948, "memory(GiB)": 13.7, "step": 27175, "train_speed(iter/s)": 1.536378 }, { "acc": 0.96739988, "epoch": 12.739629716428404, "grad_norm": 2.2222049236297607, "learning_rate": 8.8960659046569e-06, "loss": 0.2387924, "memory(GiB)": 13.7, "step": 27180, "train_speed(iter/s)": 1.536395 }, { "acc": 0.97854176, "epoch": 12.74197328333724, "grad_norm": 3.0398659706115723, "learning_rate": 8.895580028528814e-06, "loss": 0.11363291, "memory(GiB)": 13.7, "step": 27185, "train_speed(iter/s)": 1.536383 }, { "acc": 0.96050053, "epoch": 12.744316850246074, "grad_norm": 5.512601852416992, "learning_rate": 8.8950940587758e-06, "loss": 0.14946203, "memory(GiB)": 13.7, "step": 27190, "train_speed(iter/s)": 1.536396 }, { "acc": 0.96919022, "epoch": 12.74666041715491, "grad_norm": 31.137767791748047, "learning_rate": 8.89460799540954e-06, "loss": 0.15191181, "memory(GiB)": 13.7, "step": 27195, "train_speed(iter/s)": 1.536402 }, { "acc": 0.9819643, "epoch": 12.749003984063744, "grad_norm": 5.478453159332275, "learning_rate": 8.894121838441717e-06, "loss": 0.09035665, "memory(GiB)": 13.7, "step": 27200, "train_speed(iter/s)": 1.536403 }, { "acc": 0.98458328, "epoch": 12.75134755097258, "grad_norm": 3.3938510417938232, "learning_rate": 8.893635587884016e-06, "loss": 0.08416211, "memory(GiB)": 13.7, "step": 27205, "train_speed(iter/s)": 1.536425 }, { "acc": 0.983218, "epoch": 12.753691117881415, "grad_norm": 4.303081035614014, "learning_rate": 8.893149243748127e-06, "loss": 0.08038119, "memory(GiB)": 13.7, "step": 27210, "train_speed(iter/s)": 1.536437 }, { "acc": 0.96809521, "epoch": 12.75603468479025, "grad_norm": 4.5585036277771, "learning_rate": 8.892662806045738e-06, "loss": 0.1962652, "memory(GiB)": 13.7, "step": 27215, "train_speed(iter/s)": 1.536431 }, { "acc": 0.96821423, "epoch": 12.758378251699085, "grad_norm": 4.450860023498535, "learning_rate": 8.89217627478854e-06, "loss": 0.10098724, "memory(GiB)": 13.7, "step": 27220, "train_speed(iter/s)": 1.536418 }, { "acc": 0.98135414, "epoch": 12.760721818607921, "grad_norm": 8.385932922363281, "learning_rate": 8.891689649988232e-06, "loss": 0.09692832, "memory(GiB)": 13.7, "step": 27225, "train_speed(iter/s)": 1.536425 }, { "acc": 0.97969398, "epoch": 12.763065385516757, "grad_norm": 1.6987806558609009, "learning_rate": 8.891202931656505e-06, "loss": 0.07286424, "memory(GiB)": 13.7, "step": 27230, "train_speed(iter/s)": 1.536427 }, { "acc": 0.96930065, "epoch": 12.765408952425592, "grad_norm": 5.891445159912109, "learning_rate": 8.890716119805063e-06, "loss": 0.19218345, "memory(GiB)": 13.7, "step": 27235, "train_speed(iter/s)": 1.536418 }, { "acc": 0.98101768, "epoch": 12.767752519334428, "grad_norm": 3.1429927349090576, "learning_rate": 8.890229214445607e-06, "loss": 0.07232706, "memory(GiB)": 13.7, "step": 27240, "train_speed(iter/s)": 1.536413 }, { "acc": 0.9871397, "epoch": 12.770096086243262, "grad_norm": 0.5851978659629822, "learning_rate": 8.88974221558984e-06, "loss": 0.06020463, "memory(GiB)": 13.7, "step": 27245, "train_speed(iter/s)": 1.536413 }, { "acc": 0.97856064, "epoch": 12.772439653152098, "grad_norm": 9.96501636505127, "learning_rate": 8.889255123249467e-06, "loss": 0.09813019, "memory(GiB)": 13.7, "step": 27250, "train_speed(iter/s)": 1.536409 }, { "acc": 0.98105392, "epoch": 12.774783220060932, "grad_norm": 0.019683364778757095, "learning_rate": 8.888767937436194e-06, "loss": 0.09670836, "memory(GiB)": 13.7, "step": 27255, "train_speed(iter/s)": 1.536403 }, { "acc": 0.96832809, "epoch": 12.777126786969768, "grad_norm": 11.478201866149902, "learning_rate": 8.888280658161737e-06, "loss": 0.1366282, "memory(GiB)": 13.7, "step": 27260, "train_speed(iter/s)": 1.536391 }, { "acc": 0.97364578, "epoch": 12.779470353878603, "grad_norm": 7.968465328216553, "learning_rate": 8.887793285437802e-06, "loss": 0.11903507, "memory(GiB)": 13.7, "step": 27265, "train_speed(iter/s)": 1.53639 }, { "acc": 0.97854176, "epoch": 12.781813920787439, "grad_norm": 1.5570374727249146, "learning_rate": 8.887305819276109e-06, "loss": 0.12629315, "memory(GiB)": 13.7, "step": 27270, "train_speed(iter/s)": 1.536392 }, { "acc": 0.98445511, "epoch": 12.784157487696273, "grad_norm": 3.3296334743499756, "learning_rate": 8.886818259688374e-06, "loss": 0.11311926, "memory(GiB)": 13.7, "step": 27275, "train_speed(iter/s)": 1.536406 }, { "acc": 0.96245041, "epoch": 12.78650105460511, "grad_norm": 6.557122707366943, "learning_rate": 8.886330606686314e-06, "loss": 0.15938697, "memory(GiB)": 13.7, "step": 27280, "train_speed(iter/s)": 1.536414 }, { "acc": 0.99437504, "epoch": 12.788844621513944, "grad_norm": 0.027721991762518883, "learning_rate": 8.885842860281651e-06, "loss": 0.02963085, "memory(GiB)": 13.7, "step": 27285, "train_speed(iter/s)": 1.536435 }, { "acc": 0.98687496, "epoch": 12.79118818842278, "grad_norm": 3.823046922683716, "learning_rate": 8.885355020486113e-06, "loss": 0.07082672, "memory(GiB)": 13.7, "step": 27290, "train_speed(iter/s)": 1.536435 }, { "acc": 0.96106062, "epoch": 12.793531755331614, "grad_norm": 4.270023822784424, "learning_rate": 8.884867087311421e-06, "loss": 0.18318148, "memory(GiB)": 13.7, "step": 27295, "train_speed(iter/s)": 1.536443 }, { "acc": 0.97372475, "epoch": 12.79587532224045, "grad_norm": 18.648996353149414, "learning_rate": 8.884379060769307e-06, "loss": 0.14826517, "memory(GiB)": 13.7, "step": 27300, "train_speed(iter/s)": 1.536434 }, { "acc": 0.97989082, "epoch": 12.798218889149286, "grad_norm": 1.4352617263793945, "learning_rate": 8.8838909408715e-06, "loss": 0.08786352, "memory(GiB)": 13.7, "step": 27305, "train_speed(iter/s)": 1.53645 }, { "acc": 0.97520332, "epoch": 12.80056245605812, "grad_norm": 4.839436054229736, "learning_rate": 8.88340272762973e-06, "loss": 0.15568526, "memory(GiB)": 13.7, "step": 27310, "train_speed(iter/s)": 1.53646 }, { "acc": 0.99416666, "epoch": 12.802906022966956, "grad_norm": 4.288150787353516, "learning_rate": 8.882914421055737e-06, "loss": 0.01648731, "memory(GiB)": 13.7, "step": 27315, "train_speed(iter/s)": 1.536445 }, { "acc": 0.96991472, "epoch": 12.80524958987579, "grad_norm": 5.655358791351318, "learning_rate": 8.882426021161256e-06, "loss": 0.16099236, "memory(GiB)": 13.7, "step": 27320, "train_speed(iter/s)": 1.536463 }, { "acc": 0.98224907, "epoch": 12.807593156784627, "grad_norm": 4.805692672729492, "learning_rate": 8.881937527958026e-06, "loss": 0.07766891, "memory(GiB)": 13.7, "step": 27325, "train_speed(iter/s)": 1.53648 }, { "acc": 0.96524143, "epoch": 12.809936723693461, "grad_norm": 4.102777004241943, "learning_rate": 8.88144894145779e-06, "loss": 0.20212538, "memory(GiB)": 13.7, "step": 27330, "train_speed(iter/s)": 1.536489 }, { "acc": 0.97734375, "epoch": 12.812280290602297, "grad_norm": 1.79425847530365, "learning_rate": 8.88096026167229e-06, "loss": 0.13441702, "memory(GiB)": 13.7, "step": 27335, "train_speed(iter/s)": 1.536505 }, { "acc": 0.9864584, "epoch": 12.814623857511132, "grad_norm": 7.361049652099609, "learning_rate": 8.880471488613275e-06, "loss": 0.12735, "memory(GiB)": 13.7, "step": 27340, "train_speed(iter/s)": 1.536515 }, { "acc": 0.98208332, "epoch": 12.816967424419968, "grad_norm": 7.878287315368652, "learning_rate": 8.879982622292491e-06, "loss": 0.10993774, "memory(GiB)": 13.7, "step": 27345, "train_speed(iter/s)": 1.536539 }, { "acc": 0.97654762, "epoch": 12.819310991328802, "grad_norm": 5.325809955596924, "learning_rate": 8.879493662721693e-06, "loss": 0.13134506, "memory(GiB)": 13.7, "step": 27350, "train_speed(iter/s)": 1.536553 }, { "acc": 0.9678545, "epoch": 12.821654558237638, "grad_norm": 5.124143600463867, "learning_rate": 8.879004609912626e-06, "loss": 0.16661828, "memory(GiB)": 13.7, "step": 27355, "train_speed(iter/s)": 1.536563 }, { "acc": 0.97128696, "epoch": 12.823998125146472, "grad_norm": 4.5307207107543945, "learning_rate": 8.878515463877052e-06, "loss": 0.14373925, "memory(GiB)": 13.7, "step": 27360, "train_speed(iter/s)": 1.53657 }, { "acc": 0.9708333, "epoch": 12.826341692055308, "grad_norm": 3.6114847660064697, "learning_rate": 8.878026224626727e-06, "loss": 0.08172142, "memory(GiB)": 13.7, "step": 27365, "train_speed(iter/s)": 1.536572 }, { "acc": 0.98669643, "epoch": 12.828685258964143, "grad_norm": 5.838706970214844, "learning_rate": 8.87753689217341e-06, "loss": 0.11679218, "memory(GiB)": 13.7, "step": 27370, "train_speed(iter/s)": 1.536576 }, { "acc": 0.97927628, "epoch": 12.831028825872979, "grad_norm": 0.255183607339859, "learning_rate": 8.877047466528864e-06, "loss": 0.08319652, "memory(GiB)": 13.7, "step": 27375, "train_speed(iter/s)": 1.536587 }, { "acc": 0.9625, "epoch": 12.833372392781815, "grad_norm": 8.52576732635498, "learning_rate": 8.87655794770485e-06, "loss": 0.12213197, "memory(GiB)": 13.7, "step": 27380, "train_speed(iter/s)": 1.536603 }, { "acc": 0.99750004, "epoch": 12.83571595969065, "grad_norm": 3.8512539863586426, "learning_rate": 8.876068335713139e-06, "loss": 0.02707382, "memory(GiB)": 13.7, "step": 27385, "train_speed(iter/s)": 1.536599 }, { "acc": 0.97972136, "epoch": 12.838059526599485, "grad_norm": 1.752922534942627, "learning_rate": 8.875578630565497e-06, "loss": 0.10568912, "memory(GiB)": 13.7, "step": 27390, "train_speed(iter/s)": 1.536605 }, { "acc": 0.98419476, "epoch": 12.84040309350832, "grad_norm": 4.9177775382995605, "learning_rate": 8.875088832273694e-06, "loss": 0.08695886, "memory(GiB)": 13.7, "step": 27395, "train_speed(iter/s)": 1.536596 }, { "acc": 0.9739419, "epoch": 12.842746660417156, "grad_norm": 1.7273980379104614, "learning_rate": 8.874598940849506e-06, "loss": 0.07883265, "memory(GiB)": 13.7, "step": 27400, "train_speed(iter/s)": 1.536607 }, { "acc": 0.99392853, "epoch": 12.84509022732599, "grad_norm": 66.27556610107422, "learning_rate": 8.874108956304706e-06, "loss": 0.05280543, "memory(GiB)": 13.7, "step": 27405, "train_speed(iter/s)": 1.536612 }, { "acc": 0.97296705, "epoch": 12.847433794234826, "grad_norm": 5.337618350982666, "learning_rate": 8.873618878651073e-06, "loss": 0.1241082, "memory(GiB)": 13.7, "step": 27410, "train_speed(iter/s)": 1.536611 }, { "acc": 0.9604167, "epoch": 12.84977736114366, "grad_norm": 2.2372617721557617, "learning_rate": 8.873128707900387e-06, "loss": 0.15648307, "memory(GiB)": 13.7, "step": 27415, "train_speed(iter/s)": 1.536596 }, { "acc": 0.97654762, "epoch": 12.852120928052496, "grad_norm": 3.5299813747406006, "learning_rate": 8.872638444064428e-06, "loss": 0.06669234, "memory(GiB)": 13.7, "step": 27420, "train_speed(iter/s)": 1.536608 }, { "acc": 0.96873093, "epoch": 12.85446449496133, "grad_norm": 14.290912628173828, "learning_rate": 8.872148087154983e-06, "loss": 0.16879702, "memory(GiB)": 13.7, "step": 27425, "train_speed(iter/s)": 1.536617 }, { "acc": 0.97531242, "epoch": 12.856808061870167, "grad_norm": 6.579897880554199, "learning_rate": 8.871657637183837e-06, "loss": 0.08553826, "memory(GiB)": 13.7, "step": 27430, "train_speed(iter/s)": 1.536613 }, { "acc": 0.98154764, "epoch": 12.859151628779001, "grad_norm": 1.0298824310302734, "learning_rate": 8.87116709416278e-06, "loss": 0.10519191, "memory(GiB)": 13.7, "step": 27435, "train_speed(iter/s)": 1.536605 }, { "acc": 0.97249994, "epoch": 12.861495195687837, "grad_norm": 3.8888490200042725, "learning_rate": 8.870676458103601e-06, "loss": 0.10833095, "memory(GiB)": 13.7, "step": 27440, "train_speed(iter/s)": 1.536601 }, { "acc": 0.9856945, "epoch": 12.863838762596671, "grad_norm": 0.030420472845435143, "learning_rate": 8.870185729018096e-06, "loss": 0.04776284, "memory(GiB)": 13.7, "step": 27445, "train_speed(iter/s)": 1.536595 }, { "acc": 0.97290993, "epoch": 12.866182329505508, "grad_norm": 1.4812581539154053, "learning_rate": 8.869694906918056e-06, "loss": 0.10953914, "memory(GiB)": 13.7, "step": 27450, "train_speed(iter/s)": 1.536616 }, { "acc": 0.96743908, "epoch": 12.868525896414342, "grad_norm": 1.6982964277267456, "learning_rate": 8.869203991815282e-06, "loss": 0.18372135, "memory(GiB)": 13.7, "step": 27455, "train_speed(iter/s)": 1.536631 }, { "acc": 0.96227684, "epoch": 12.870869463323178, "grad_norm": 6.452993392944336, "learning_rate": 8.868712983721576e-06, "loss": 0.12844789, "memory(GiB)": 13.7, "step": 27460, "train_speed(iter/s)": 1.536634 }, { "acc": 0.97422981, "epoch": 12.873213030232012, "grad_norm": 1.2381138801574707, "learning_rate": 8.868221882648736e-06, "loss": 0.14803911, "memory(GiB)": 13.7, "step": 27465, "train_speed(iter/s)": 1.536653 }, { "acc": 0.97307549, "epoch": 12.875556597140848, "grad_norm": 7.754909515380859, "learning_rate": 8.86773068860857e-06, "loss": 0.14139705, "memory(GiB)": 13.7, "step": 27470, "train_speed(iter/s)": 1.536658 }, { "acc": 0.98270226, "epoch": 12.877900164049684, "grad_norm": 8.5753812789917, "learning_rate": 8.867239401612883e-06, "loss": 0.08497736, "memory(GiB)": 13.7, "step": 27475, "train_speed(iter/s)": 1.536656 }, { "acc": 0.98173618, "epoch": 12.880243730958519, "grad_norm": 6.143301963806152, "learning_rate": 8.866748021673483e-06, "loss": 0.09228926, "memory(GiB)": 13.7, "step": 27480, "train_speed(iter/s)": 1.536654 }, { "acc": 0.97837524, "epoch": 12.882587297867355, "grad_norm": 3.2367637157440186, "learning_rate": 8.866256548802182e-06, "loss": 0.11138057, "memory(GiB)": 13.7, "step": 27485, "train_speed(iter/s)": 1.536659 }, { "acc": 0.94955359, "epoch": 12.884930864776189, "grad_norm": 8.747042655944824, "learning_rate": 8.865764983010794e-06, "loss": 0.17423279, "memory(GiB)": 13.7, "step": 27490, "train_speed(iter/s)": 1.536673 }, { "acc": 0.96875, "epoch": 12.887274431685025, "grad_norm": 4.184844493865967, "learning_rate": 8.865273324311134e-06, "loss": 0.18676392, "memory(GiB)": 13.7, "step": 27495, "train_speed(iter/s)": 1.536678 }, { "acc": 0.97594242, "epoch": 12.88961799859386, "grad_norm": 2.9961941242218018, "learning_rate": 8.86478157271502e-06, "loss": 0.13573129, "memory(GiB)": 13.7, "step": 27500, "train_speed(iter/s)": 1.536689 }, { "acc": 0.96447926, "epoch": 12.891961565502696, "grad_norm": 7.582704544067383, "learning_rate": 8.864289728234275e-06, "loss": 0.23661699, "memory(GiB)": 13.7, "step": 27505, "train_speed(iter/s)": 1.536698 }, { "acc": 0.98978624, "epoch": 12.89430513241153, "grad_norm": 4.233827590942383, "learning_rate": 8.863797790880714e-06, "loss": 0.0280353, "memory(GiB)": 13.7, "step": 27510, "train_speed(iter/s)": 1.5367 }, { "acc": 0.98514881, "epoch": 12.896648699320366, "grad_norm": 7.84159517288208, "learning_rate": 8.863305760666169e-06, "loss": 0.08430457, "memory(GiB)": 13.7, "step": 27515, "train_speed(iter/s)": 1.536702 }, { "acc": 0.98000002, "epoch": 12.8989922662292, "grad_norm": 4.312167167663574, "learning_rate": 8.862813637602462e-06, "loss": 0.08677011, "memory(GiB)": 13.7, "step": 27520, "train_speed(iter/s)": 1.536735 }, { "acc": 0.96273594, "epoch": 12.901335833138036, "grad_norm": 7.759736061096191, "learning_rate": 8.862321421701425e-06, "loss": 0.16792865, "memory(GiB)": 13.7, "step": 27525, "train_speed(iter/s)": 1.536759 }, { "acc": 0.97225513, "epoch": 12.90367940004687, "grad_norm": 5.483731269836426, "learning_rate": 8.861829112974888e-06, "loss": 0.14505388, "memory(GiB)": 13.7, "step": 27530, "train_speed(iter/s)": 1.536763 }, { "acc": 0.98133926, "epoch": 12.906022966955707, "grad_norm": 9.06069278717041, "learning_rate": 8.861336711434683e-06, "loss": 0.09161668, "memory(GiB)": 13.7, "step": 27535, "train_speed(iter/s)": 1.53677 }, { "acc": 0.95827827, "epoch": 12.908366533864541, "grad_norm": 12.69376277923584, "learning_rate": 8.86084421709265e-06, "loss": 0.20091543, "memory(GiB)": 13.7, "step": 27540, "train_speed(iter/s)": 1.536783 }, { "acc": 0.95266018, "epoch": 12.910710100773377, "grad_norm": 11.666104316711426, "learning_rate": 8.860351629960622e-06, "loss": 0.16474625, "memory(GiB)": 13.7, "step": 27545, "train_speed(iter/s)": 1.536786 }, { "acc": 0.96890869, "epoch": 12.913053667682213, "grad_norm": 4.703061580657959, "learning_rate": 8.859858950050443e-06, "loss": 0.19234412, "memory(GiB)": 13.7, "step": 27550, "train_speed(iter/s)": 1.536808 }, { "acc": 0.95925598, "epoch": 12.915397234591047, "grad_norm": 0.9064183235168457, "learning_rate": 8.859366177373953e-06, "loss": 0.18781908, "memory(GiB)": 13.7, "step": 27555, "train_speed(iter/s)": 1.536815 }, { "acc": 0.97374458, "epoch": 12.917740801499884, "grad_norm": 6.800901889801025, "learning_rate": 8.858873311942999e-06, "loss": 0.08901492, "memory(GiB)": 13.7, "step": 27560, "train_speed(iter/s)": 1.536825 }, { "acc": 0.9800705, "epoch": 12.920084368408718, "grad_norm": 4.633141040802002, "learning_rate": 8.858380353769424e-06, "loss": 0.08905531, "memory(GiB)": 13.7, "step": 27565, "train_speed(iter/s)": 1.536829 }, { "acc": 0.95351191, "epoch": 12.922427935317554, "grad_norm": 2.3112289905548096, "learning_rate": 8.85788730286508e-06, "loss": 0.22641401, "memory(GiB)": 13.7, "step": 27570, "train_speed(iter/s)": 1.536825 }, { "acc": 0.9782795, "epoch": 12.924771502226388, "grad_norm": 5.611356258392334, "learning_rate": 8.857394159241818e-06, "loss": 0.06690947, "memory(GiB)": 13.7, "step": 27575, "train_speed(iter/s)": 1.536839 }, { "acc": 0.97003965, "epoch": 12.927115069135224, "grad_norm": 5.514248371124268, "learning_rate": 8.856900922911492e-06, "loss": 0.08755067, "memory(GiB)": 13.7, "step": 27580, "train_speed(iter/s)": 1.536836 }, { "acc": 0.97057886, "epoch": 12.929458636044059, "grad_norm": 3.9764599800109863, "learning_rate": 8.856407593885956e-06, "loss": 0.13255255, "memory(GiB)": 13.7, "step": 27585, "train_speed(iter/s)": 1.536858 }, { "acc": 0.97403278, "epoch": 12.931802202952895, "grad_norm": 5.367774963378906, "learning_rate": 8.855914172177071e-06, "loss": 0.11888769, "memory(GiB)": 13.7, "step": 27590, "train_speed(iter/s)": 1.53686 }, { "acc": 0.94880953, "epoch": 12.934145769861729, "grad_norm": 7.608604431152344, "learning_rate": 8.855420657796691e-06, "loss": 0.21124701, "memory(GiB)": 13.7, "step": 27595, "train_speed(iter/s)": 1.536874 }, { "acc": 0.98203373, "epoch": 12.936489336770565, "grad_norm": 2.3318920135498047, "learning_rate": 8.854927050756687e-06, "loss": 0.16104306, "memory(GiB)": 13.7, "step": 27600, "train_speed(iter/s)": 1.536876 }, { "acc": 0.97562504, "epoch": 12.9388329036794, "grad_norm": 6.50673770904541, "learning_rate": 8.854433351068919e-06, "loss": 0.09358838, "memory(GiB)": 13.7, "step": 27605, "train_speed(iter/s)": 1.536888 }, { "acc": 0.97344704, "epoch": 12.941176470588236, "grad_norm": 6.6571478843688965, "learning_rate": 8.853939558745253e-06, "loss": 0.14210339, "memory(GiB)": 13.7, "step": 27610, "train_speed(iter/s)": 1.536902 }, { "acc": 0.96981525, "epoch": 12.94352003749707, "grad_norm": 10.1102933883667, "learning_rate": 8.85344567379756e-06, "loss": 0.17642152, "memory(GiB)": 13.7, "step": 27615, "train_speed(iter/s)": 1.536916 }, { "acc": 0.98511906, "epoch": 12.945863604405906, "grad_norm": 17.4816837310791, "learning_rate": 8.85295169623771e-06, "loss": 0.08903796, "memory(GiB)": 13.7, "step": 27620, "train_speed(iter/s)": 1.536919 }, { "acc": 0.97678986, "epoch": 12.948207171314742, "grad_norm": 6.661534786224365, "learning_rate": 8.852457626077579e-06, "loss": 0.12393892, "memory(GiB)": 13.7, "step": 27625, "train_speed(iter/s)": 1.536934 }, { "acc": 0.98312159, "epoch": 12.950550738223576, "grad_norm": 4.16747522354126, "learning_rate": 8.85196346332904e-06, "loss": 0.13346196, "memory(GiB)": 13.7, "step": 27630, "train_speed(iter/s)": 1.536948 }, { "acc": 0.96123543, "epoch": 12.952894305132412, "grad_norm": 7.737534523010254, "learning_rate": 8.851469208003973e-06, "loss": 0.13909531, "memory(GiB)": 13.7, "step": 27635, "train_speed(iter/s)": 1.536953 }, { "acc": 0.98673325, "epoch": 12.955237872041247, "grad_norm": 0.6705635786056519, "learning_rate": 8.850974860114259e-06, "loss": 0.03665988, "memory(GiB)": 13.7, "step": 27640, "train_speed(iter/s)": 1.536956 }, { "acc": 0.99196434, "epoch": 12.957581438950083, "grad_norm": 0.041187599301338196, "learning_rate": 8.850480419671777e-06, "loss": 0.03669458, "memory(GiB)": 13.7, "step": 27645, "train_speed(iter/s)": 1.536963 }, { "acc": 0.9666666, "epoch": 12.959925005858917, "grad_norm": 6.431072235107422, "learning_rate": 8.849985886688415e-06, "loss": 0.13438228, "memory(GiB)": 13.7, "step": 27650, "train_speed(iter/s)": 1.536969 }, { "acc": 0.97394209, "epoch": 12.962268572767753, "grad_norm": 11.200033187866211, "learning_rate": 8.84949126117606e-06, "loss": 0.14005802, "memory(GiB)": 13.7, "step": 27655, "train_speed(iter/s)": 1.536969 }, { "acc": 0.97883396, "epoch": 12.964612139676587, "grad_norm": 1.2294539213180542, "learning_rate": 8.848996543146598e-06, "loss": 0.08483709, "memory(GiB)": 13.7, "step": 27660, "train_speed(iter/s)": 1.536964 }, { "acc": 0.96270828, "epoch": 12.966955706585424, "grad_norm": 7.4792985916137695, "learning_rate": 8.848501732611923e-06, "loss": 0.17466276, "memory(GiB)": 13.7, "step": 27665, "train_speed(iter/s)": 1.536957 }, { "acc": 0.97301474, "epoch": 12.969299273494258, "grad_norm": 7.380562782287598, "learning_rate": 8.848006829583927e-06, "loss": 0.10632336, "memory(GiB)": 13.7, "step": 27670, "train_speed(iter/s)": 1.536965 }, { "acc": 0.97900858, "epoch": 12.971642840403094, "grad_norm": 4.230636119842529, "learning_rate": 8.847511834074508e-06, "loss": 0.16297412, "memory(GiB)": 13.7, "step": 27675, "train_speed(iter/s)": 1.536969 }, { "acc": 0.98476191, "epoch": 12.973986407311928, "grad_norm": 5.033946990966797, "learning_rate": 8.847016746095562e-06, "loss": 0.07680629, "memory(GiB)": 13.7, "step": 27680, "train_speed(iter/s)": 1.536962 }, { "acc": 0.97679024, "epoch": 12.976329974220764, "grad_norm": 6.07417106628418, "learning_rate": 8.846521565658993e-06, "loss": 0.10913305, "memory(GiB)": 13.7, "step": 27685, "train_speed(iter/s)": 1.536966 }, { "acc": 0.97722902, "epoch": 12.978673541129599, "grad_norm": 5.093864440917969, "learning_rate": 8.846026292776698e-06, "loss": 0.07142633, "memory(GiB)": 13.7, "step": 27690, "train_speed(iter/s)": 1.53697 }, { "acc": 0.98729172, "epoch": 12.981017108038435, "grad_norm": 2.509108304977417, "learning_rate": 8.845530927460585e-06, "loss": 0.06710867, "memory(GiB)": 13.7, "step": 27695, "train_speed(iter/s)": 1.536979 }, { "acc": 0.97016373, "epoch": 12.983360674947269, "grad_norm": 11.052882194519043, "learning_rate": 8.845035469722561e-06, "loss": 0.12976211, "memory(GiB)": 13.7, "step": 27700, "train_speed(iter/s)": 1.53698 }, { "acc": 0.98807545, "epoch": 12.985704241856105, "grad_norm": 2.9198591709136963, "learning_rate": 8.844539919574534e-06, "loss": 0.06566538, "memory(GiB)": 13.7, "step": 27705, "train_speed(iter/s)": 1.536981 }, { "acc": 0.97469006, "epoch": 12.98804780876494, "grad_norm": 7.927314758300781, "learning_rate": 8.844044277028415e-06, "loss": 0.11950786, "memory(GiB)": 13.7, "step": 27710, "train_speed(iter/s)": 1.536998 }, { "acc": 0.98586311, "epoch": 12.990391375673775, "grad_norm": 4.59891414642334, "learning_rate": 8.843548542096122e-06, "loss": 0.05917482, "memory(GiB)": 13.7, "step": 27715, "train_speed(iter/s)": 1.537001 }, { "acc": 0.97572918, "epoch": 12.992734942582612, "grad_norm": 6.361262798309326, "learning_rate": 8.843052714789565e-06, "loss": 0.10332402, "memory(GiB)": 13.7, "step": 27720, "train_speed(iter/s)": 1.536988 }, { "acc": 0.97191591, "epoch": 12.995078509491446, "grad_norm": 1.889780879020691, "learning_rate": 8.842556795120663e-06, "loss": 0.08957827, "memory(GiB)": 13.7, "step": 27725, "train_speed(iter/s)": 1.537004 }, { "acc": 0.98966665, "epoch": 12.997422076400282, "grad_norm": 1.4213452339172363, "learning_rate": 8.84206078310134e-06, "loss": 0.05174065, "memory(GiB)": 13.7, "step": 27730, "train_speed(iter/s)": 1.53701 }, { "acc": 0.97208328, "epoch": 12.999765643309116, "grad_norm": 60.52702713012695, "learning_rate": 8.841564678743516e-06, "loss": 0.12133189, "memory(GiB)": 13.7, "step": 27735, "train_speed(iter/s)": 1.537011 }, { "acc": 0.97569447, "epoch": 13.002109210217952, "grad_norm": 7.59992790222168, "learning_rate": 8.841068482059117e-06, "loss": 0.11903892, "memory(GiB)": 13.7, "step": 27740, "train_speed(iter/s)": 1.536978 }, { "acc": 0.98241205, "epoch": 13.004452777126787, "grad_norm": 5.111220359802246, "learning_rate": 8.840572193060066e-06, "loss": 0.06939278, "memory(GiB)": 13.7, "step": 27745, "train_speed(iter/s)": 1.536968 }, { "acc": 0.9895442, "epoch": 13.006796344035623, "grad_norm": 3.603466510772705, "learning_rate": 8.840075811758298e-06, "loss": 0.09537997, "memory(GiB)": 13.7, "step": 27750, "train_speed(iter/s)": 1.536974 }, { "acc": 0.96730118, "epoch": 13.009139910944457, "grad_norm": 4.564226150512695, "learning_rate": 8.839579338165741e-06, "loss": 0.10864434, "memory(GiB)": 13.7, "step": 27755, "train_speed(iter/s)": 1.536982 }, { "acc": 0.97979164, "epoch": 13.011483477853293, "grad_norm": 1.1933343410491943, "learning_rate": 8.839082772294328e-06, "loss": 0.0669548, "memory(GiB)": 13.7, "step": 27760, "train_speed(iter/s)": 1.536977 }, { "acc": 0.970858, "epoch": 13.013827044762127, "grad_norm": 3.824033260345459, "learning_rate": 8.838586114155996e-06, "loss": 0.16701329, "memory(GiB)": 13.7, "step": 27765, "train_speed(iter/s)": 1.536995 }, { "acc": 0.97588539, "epoch": 13.016170611670963, "grad_norm": 3.2686233520507812, "learning_rate": 8.838089363762683e-06, "loss": 0.06084083, "memory(GiB)": 13.7, "step": 27770, "train_speed(iter/s)": 1.536991 }, { "acc": 0.97477398, "epoch": 13.018514178579798, "grad_norm": 42.344970703125, "learning_rate": 8.83759252112633e-06, "loss": 0.11451102, "memory(GiB)": 13.7, "step": 27775, "train_speed(iter/s)": 1.53699 }, { "acc": 0.97317533, "epoch": 13.020857745488634, "grad_norm": 8.29941177368164, "learning_rate": 8.837095586258876e-06, "loss": 0.0903824, "memory(GiB)": 13.7, "step": 27780, "train_speed(iter/s)": 1.537 }, { "acc": 0.97353401, "epoch": 13.023201312397468, "grad_norm": 10.209437370300293, "learning_rate": 8.83659855917227e-06, "loss": 0.13338289, "memory(GiB)": 13.7, "step": 27785, "train_speed(iter/s)": 1.537013 }, { "acc": 0.97407207, "epoch": 13.025544879306304, "grad_norm": 5.449995994567871, "learning_rate": 8.83610143987846e-06, "loss": 0.12224231, "memory(GiB)": 13.7, "step": 27790, "train_speed(iter/s)": 1.537015 }, { "acc": 0.97510414, "epoch": 13.02788844621514, "grad_norm": 3.2144250869750977, "learning_rate": 8.835604228389388e-06, "loss": 0.1266029, "memory(GiB)": 13.7, "step": 27795, "train_speed(iter/s)": 1.537015 }, { "acc": 0.96869125, "epoch": 13.030232013123975, "grad_norm": 5.317164421081543, "learning_rate": 8.835106924717011e-06, "loss": 0.09888794, "memory(GiB)": 13.7, "step": 27800, "train_speed(iter/s)": 1.537027 }, { "acc": 0.97454863, "epoch": 13.03257558003281, "grad_norm": 5.594313144683838, "learning_rate": 8.83460952887328e-06, "loss": 0.09522811, "memory(GiB)": 13.7, "step": 27805, "train_speed(iter/s)": 1.53703 }, { "acc": 0.98693457, "epoch": 13.034919146941645, "grad_norm": 2.8506991863250732, "learning_rate": 8.834112040870155e-06, "loss": 0.06404433, "memory(GiB)": 13.7, "step": 27810, "train_speed(iter/s)": 1.537028 }, { "acc": 0.99375, "epoch": 13.037262713850481, "grad_norm": 2.1694600582122803, "learning_rate": 8.83361446071959e-06, "loss": 0.05768848, "memory(GiB)": 13.7, "step": 27815, "train_speed(iter/s)": 1.537017 }, { "acc": 0.98247976, "epoch": 13.039606280759315, "grad_norm": 3.798433780670166, "learning_rate": 8.833116788433545e-06, "loss": 0.04689962, "memory(GiB)": 13.7, "step": 27820, "train_speed(iter/s)": 1.53704 }, { "acc": 0.96361609, "epoch": 13.041949847668151, "grad_norm": 4.263322353363037, "learning_rate": 8.832619024023985e-06, "loss": 0.21746464, "memory(GiB)": 13.7, "step": 27825, "train_speed(iter/s)": 1.537057 }, { "acc": 0.97279758, "epoch": 13.044293414576986, "grad_norm": 4.8569722175598145, "learning_rate": 8.832121167502872e-06, "loss": 0.15485177, "memory(GiB)": 13.7, "step": 27830, "train_speed(iter/s)": 1.537073 }, { "acc": 0.98924341, "epoch": 13.046636981485822, "grad_norm": 3.9772322177886963, "learning_rate": 8.831623218882175e-06, "loss": 0.05569682, "memory(GiB)": 13.7, "step": 27835, "train_speed(iter/s)": 1.537074 }, { "acc": 0.98240643, "epoch": 13.048980548394656, "grad_norm": 5.296977996826172, "learning_rate": 8.83112517817386e-06, "loss": 0.06904826, "memory(GiB)": 13.7, "step": 27840, "train_speed(iter/s)": 1.53708 }, { "acc": 0.97424278, "epoch": 13.051324115303492, "grad_norm": 12.861833572387695, "learning_rate": 8.830627045389902e-06, "loss": 0.14594524, "memory(GiB)": 13.7, "step": 27845, "train_speed(iter/s)": 1.537083 }, { "acc": 0.97225695, "epoch": 13.053667682212327, "grad_norm": 3.1245977878570557, "learning_rate": 8.830128820542272e-06, "loss": 0.1238848, "memory(GiB)": 13.7, "step": 27850, "train_speed(iter/s)": 1.537095 }, { "acc": 0.98487015, "epoch": 13.056011249121163, "grad_norm": 3.2420814037323, "learning_rate": 8.829630503642947e-06, "loss": 0.11116669, "memory(GiB)": 13.7, "step": 27855, "train_speed(iter/s)": 1.537095 }, { "acc": 0.98039742, "epoch": 13.058354816029997, "grad_norm": 56.97886657714844, "learning_rate": 8.829132094703903e-06, "loss": 0.12962725, "memory(GiB)": 13.7, "step": 27860, "train_speed(iter/s)": 1.537087 }, { "acc": 0.98841343, "epoch": 13.060698382938833, "grad_norm": 3.2165944576263428, "learning_rate": 8.828633593737123e-06, "loss": 0.03745931, "memory(GiB)": 13.7, "step": 27865, "train_speed(iter/s)": 1.53711 }, { "acc": 0.97075539, "epoch": 13.063041949847667, "grad_norm": 7.19523286819458, "learning_rate": 8.828135000754588e-06, "loss": 0.15530221, "memory(GiB)": 13.7, "step": 27870, "train_speed(iter/s)": 1.537119 }, { "acc": 0.97836571, "epoch": 13.065385516756503, "grad_norm": 5.807712554931641, "learning_rate": 8.827636315768282e-06, "loss": 0.11060644, "memory(GiB)": 13.7, "step": 27875, "train_speed(iter/s)": 1.537137 }, { "acc": 0.97736111, "epoch": 13.06772908366534, "grad_norm": 5.664491653442383, "learning_rate": 8.827137538790192e-06, "loss": 0.08247455, "memory(GiB)": 13.7, "step": 27880, "train_speed(iter/s)": 1.537149 }, { "acc": 0.9833334, "epoch": 13.070072650574174, "grad_norm": 7.292339324951172, "learning_rate": 8.826638669832306e-06, "loss": 0.14528919, "memory(GiB)": 13.7, "step": 27885, "train_speed(iter/s)": 1.537157 }, { "acc": 0.9543601, "epoch": 13.07241621748301, "grad_norm": 36.09888458251953, "learning_rate": 8.826139708906619e-06, "loss": 0.21690934, "memory(GiB)": 13.7, "step": 27890, "train_speed(iter/s)": 1.53715 }, { "acc": 0.97152777, "epoch": 13.074759784391844, "grad_norm": 8.104416847229004, "learning_rate": 8.82564065602512e-06, "loss": 0.09036682, "memory(GiB)": 13.7, "step": 27895, "train_speed(iter/s)": 1.537143 }, { "acc": 0.96479168, "epoch": 13.07710335130068, "grad_norm": 77.72545623779297, "learning_rate": 8.825141511199805e-06, "loss": 0.0901499, "memory(GiB)": 13.7, "step": 27900, "train_speed(iter/s)": 1.537158 }, { "acc": 0.97755203, "epoch": 13.079446918209515, "grad_norm": 3.4270830154418945, "learning_rate": 8.824642274442673e-06, "loss": 0.07191982, "memory(GiB)": 13.7, "step": 27905, "train_speed(iter/s)": 1.537163 }, { "acc": 0.99291668, "epoch": 13.08179048511835, "grad_norm": 1.9625012874603271, "learning_rate": 8.824142945765726e-06, "loss": 0.06256685, "memory(GiB)": 13.7, "step": 27910, "train_speed(iter/s)": 1.537159 }, { "acc": 0.97261372, "epoch": 13.084134052027185, "grad_norm": 3.3867270946502686, "learning_rate": 8.823643525180963e-06, "loss": 0.18596268, "memory(GiB)": 13.7, "step": 27915, "train_speed(iter/s)": 1.537163 }, { "acc": 0.99486113, "epoch": 13.086477618936021, "grad_norm": 0.1379278600215912, "learning_rate": 8.823144012700389e-06, "loss": 0.0255446, "memory(GiB)": 13.7, "step": 27920, "train_speed(iter/s)": 1.537177 }, { "acc": 0.98946505, "epoch": 13.088821185844855, "grad_norm": 4.927330493927002, "learning_rate": 8.822644408336013e-06, "loss": 0.03866891, "memory(GiB)": 13.7, "step": 27925, "train_speed(iter/s)": 1.537182 }, { "acc": 0.97794189, "epoch": 13.091164752753691, "grad_norm": 2.8834667205810547, "learning_rate": 8.82214471209984e-06, "loss": 0.11614341, "memory(GiB)": 13.7, "step": 27930, "train_speed(iter/s)": 1.53718 }, { "acc": 0.98113098, "epoch": 13.093508319662526, "grad_norm": 6.031506538391113, "learning_rate": 8.821644924003886e-06, "loss": 0.08010417, "memory(GiB)": 13.7, "step": 27935, "train_speed(iter/s)": 1.537172 }, { "acc": 0.97017546, "epoch": 13.095851886571362, "grad_norm": 4.415254592895508, "learning_rate": 8.82114504406016e-06, "loss": 0.07458498, "memory(GiB)": 13.7, "step": 27940, "train_speed(iter/s)": 1.537183 }, { "acc": 0.96312494, "epoch": 13.098195453480196, "grad_norm": 7.133242607116699, "learning_rate": 8.820645072280677e-06, "loss": 0.11622752, "memory(GiB)": 13.7, "step": 27945, "train_speed(iter/s)": 1.537198 }, { "acc": 0.99377975, "epoch": 13.100539020389032, "grad_norm": 3.8718695640563965, "learning_rate": 8.82014500867746e-06, "loss": 0.04275254, "memory(GiB)": 13.7, "step": 27950, "train_speed(iter/s)": 1.537204 }, { "acc": 0.9880209, "epoch": 13.102882587297866, "grad_norm": 0.4334529638290405, "learning_rate": 8.819644853262522e-06, "loss": 0.05166779, "memory(GiB)": 13.7, "step": 27955, "train_speed(iter/s)": 1.537205 }, { "acc": 0.98480949, "epoch": 13.105226154206703, "grad_norm": 11.117020606994629, "learning_rate": 8.819144606047893e-06, "loss": 0.10518441, "memory(GiB)": 13.7, "step": 27960, "train_speed(iter/s)": 1.537222 }, { "acc": 0.9819643, "epoch": 13.107569721115539, "grad_norm": 4.746418476104736, "learning_rate": 8.81864426704559e-06, "loss": 0.06606056, "memory(GiB)": 13.7, "step": 27965, "train_speed(iter/s)": 1.537211 }, { "acc": 0.98189812, "epoch": 13.109913288024373, "grad_norm": 4.113626956939697, "learning_rate": 8.81814383626764e-06, "loss": 0.11250198, "memory(GiB)": 13.7, "step": 27970, "train_speed(iter/s)": 1.537222 }, { "acc": 0.96821117, "epoch": 13.112256854933209, "grad_norm": 23.85956382751465, "learning_rate": 8.817643313726078e-06, "loss": 0.13973556, "memory(GiB)": 13.7, "step": 27975, "train_speed(iter/s)": 1.537215 }, { "acc": 0.96613159, "epoch": 13.114600421842043, "grad_norm": 5.2129340171813965, "learning_rate": 8.817142699432932e-06, "loss": 0.17626286, "memory(GiB)": 13.7, "step": 27980, "train_speed(iter/s)": 1.537213 }, { "acc": 0.95984039, "epoch": 13.11694398875088, "grad_norm": 6.964299201965332, "learning_rate": 8.81664199340023e-06, "loss": 0.11564549, "memory(GiB)": 13.7, "step": 27985, "train_speed(iter/s)": 1.537222 }, { "acc": 0.97345428, "epoch": 13.119287555659714, "grad_norm": 8.935334205627441, "learning_rate": 8.816141195640014e-06, "loss": 0.10070529, "memory(GiB)": 13.7, "step": 27990, "train_speed(iter/s)": 1.537242 }, { "acc": 0.96716728, "epoch": 13.12163112256855, "grad_norm": 9.473487854003906, "learning_rate": 8.81564030616432e-06, "loss": 0.1584355, "memory(GiB)": 13.7, "step": 27995, "train_speed(iter/s)": 1.537249 }, { "acc": 0.96953049, "epoch": 13.123974689477384, "grad_norm": 6.7555251121521, "learning_rate": 8.815139324985184e-06, "loss": 0.15002842, "memory(GiB)": 13.7, "step": 28000, "train_speed(iter/s)": 1.537253 }, { "acc": 0.99008923, "epoch": 13.12631825638622, "grad_norm": 4.670015811920166, "learning_rate": 8.814638252114652e-06, "loss": 0.07025076, "memory(GiB)": 13.7, "step": 28005, "train_speed(iter/s)": 1.537282 }, { "acc": 0.97870035, "epoch": 13.128661823295054, "grad_norm": 293.1585388183594, "learning_rate": 8.814137087564767e-06, "loss": 0.08667626, "memory(GiB)": 13.7, "step": 28010, "train_speed(iter/s)": 1.537278 }, { "acc": 0.96661711, "epoch": 13.13100539020389, "grad_norm": 14.861385345458984, "learning_rate": 8.813635831347576e-06, "loss": 0.15781325, "memory(GiB)": 13.7, "step": 28015, "train_speed(iter/s)": 1.53728 }, { "acc": 0.97231064, "epoch": 13.133348957112725, "grad_norm": 1.0647382736206055, "learning_rate": 8.813134483475126e-06, "loss": 0.1280203, "memory(GiB)": 13.7, "step": 28020, "train_speed(iter/s)": 1.53729 }, { "acc": 0.98165874, "epoch": 13.135692524021561, "grad_norm": 5.343130588531494, "learning_rate": 8.812633043959468e-06, "loss": 0.09669266, "memory(GiB)": 13.7, "step": 28025, "train_speed(iter/s)": 1.537305 }, { "acc": 0.96343756, "epoch": 13.138036090930395, "grad_norm": 7.442403316497803, "learning_rate": 8.812131512812655e-06, "loss": 0.16589372, "memory(GiB)": 13.7, "step": 28030, "train_speed(iter/s)": 1.537307 }, { "acc": 0.95604992, "epoch": 13.140379657839231, "grad_norm": 10.119285583496094, "learning_rate": 8.811629890046744e-06, "loss": 0.17034843, "memory(GiB)": 13.7, "step": 28035, "train_speed(iter/s)": 1.537326 }, { "acc": 0.95446434, "epoch": 13.142723224748067, "grad_norm": 3.887835741043091, "learning_rate": 8.81112817567379e-06, "loss": 0.13158526, "memory(GiB)": 13.7, "step": 28040, "train_speed(iter/s)": 1.537341 }, { "acc": 0.97421207, "epoch": 13.145066791656902, "grad_norm": 2.4667391777038574, "learning_rate": 8.810626369705854e-06, "loss": 0.07133258, "memory(GiB)": 13.7, "step": 28045, "train_speed(iter/s)": 1.537357 }, { "acc": 0.98093748, "epoch": 13.147410358565738, "grad_norm": 3.449230670928955, "learning_rate": 8.810124472154997e-06, "loss": 0.08392878, "memory(GiB)": 13.7, "step": 28050, "train_speed(iter/s)": 1.537358 }, { "acc": 0.9770834, "epoch": 13.149753925474572, "grad_norm": 3.5727744102478027, "learning_rate": 8.809622483033284e-06, "loss": 0.11283623, "memory(GiB)": 13.7, "step": 28055, "train_speed(iter/s)": 1.53736 }, { "acc": 0.97373524, "epoch": 13.152097492383408, "grad_norm": 0.3329572379589081, "learning_rate": 8.80912040235278e-06, "loss": 0.06539575, "memory(GiB)": 13.7, "step": 28060, "train_speed(iter/s)": 1.537365 }, { "acc": 0.98348961, "epoch": 13.154441059292242, "grad_norm": 4.398420333862305, "learning_rate": 8.808618230125553e-06, "loss": 0.0404221, "memory(GiB)": 13.7, "step": 28065, "train_speed(iter/s)": 1.537368 }, { "acc": 0.98416672, "epoch": 13.156784626201079, "grad_norm": 5.853534698486328, "learning_rate": 8.808115966363675e-06, "loss": 0.08089569, "memory(GiB)": 13.7, "step": 28070, "train_speed(iter/s)": 1.53739 }, { "acc": 0.98480158, "epoch": 13.159128193109913, "grad_norm": 5.697215557098389, "learning_rate": 8.807613611079218e-06, "loss": 0.0747906, "memory(GiB)": 13.7, "step": 28075, "train_speed(iter/s)": 1.537395 }, { "acc": 0.9833333, "epoch": 13.161471760018749, "grad_norm": 11.296794891357422, "learning_rate": 8.807111164284257e-06, "loss": 0.13978951, "memory(GiB)": 13.7, "step": 28080, "train_speed(iter/s)": 1.537396 }, { "acc": 0.98663197, "epoch": 13.163815326927583, "grad_norm": 4.8971147537231445, "learning_rate": 8.806608625990872e-06, "loss": 0.0792667, "memory(GiB)": 13.7, "step": 28085, "train_speed(iter/s)": 1.5374 }, { "acc": 0.97510414, "epoch": 13.16615889383642, "grad_norm": 8.255985260009766, "learning_rate": 8.806105996211137e-06, "loss": 0.09626985, "memory(GiB)": 13.7, "step": 28090, "train_speed(iter/s)": 1.537398 }, { "acc": 0.98619757, "epoch": 13.168502460745254, "grad_norm": 5.021232604980469, "learning_rate": 8.805603274957134e-06, "loss": 0.08999963, "memory(GiB)": 13.7, "step": 28095, "train_speed(iter/s)": 1.537407 }, { "acc": 0.97493057, "epoch": 13.17084602765409, "grad_norm": 39.52009201049805, "learning_rate": 8.805100462240953e-06, "loss": 0.1025138, "memory(GiB)": 13.7, "step": 28100, "train_speed(iter/s)": 1.537415 }, { "acc": 0.972822, "epoch": 13.173189594562924, "grad_norm": 10.667841911315918, "learning_rate": 8.804597558074675e-06, "loss": 0.13860695, "memory(GiB)": 13.7, "step": 28105, "train_speed(iter/s)": 1.537425 }, { "acc": 0.97680798, "epoch": 13.17553316147176, "grad_norm": 1.8968377113342285, "learning_rate": 8.804094562470388e-06, "loss": 0.10937983, "memory(GiB)": 13.7, "step": 28110, "train_speed(iter/s)": 1.537432 }, { "acc": 0.97629356, "epoch": 13.177876728380594, "grad_norm": 3.7507967948913574, "learning_rate": 8.803591475440185e-06, "loss": 0.10743902, "memory(GiB)": 13.7, "step": 28115, "train_speed(iter/s)": 1.537442 }, { "acc": 0.98239002, "epoch": 13.18022029528943, "grad_norm": 5.745625019073486, "learning_rate": 8.803088296996154e-06, "loss": 0.11159772, "memory(GiB)": 13.7, "step": 28120, "train_speed(iter/s)": 1.537452 }, { "acc": 0.98143425, "epoch": 13.182563862198267, "grad_norm": 4.470946788787842, "learning_rate": 8.802585027150398e-06, "loss": 0.1135235, "memory(GiB)": 13.7, "step": 28125, "train_speed(iter/s)": 1.537457 }, { "acc": 0.984375, "epoch": 13.184907429107101, "grad_norm": 0.003200054634362459, "learning_rate": 8.802081665915004e-06, "loss": 0.04221747, "memory(GiB)": 13.7, "step": 28130, "train_speed(iter/s)": 1.537459 }, { "acc": 0.97312412, "epoch": 13.187250996015937, "grad_norm": 0.7744287252426147, "learning_rate": 8.801578213302078e-06, "loss": 0.09121331, "memory(GiB)": 13.7, "step": 28135, "train_speed(iter/s)": 1.537448 }, { "acc": 0.97112427, "epoch": 13.189594562924771, "grad_norm": 6.47655725479126, "learning_rate": 8.801074669323719e-06, "loss": 0.15030845, "memory(GiB)": 13.7, "step": 28140, "train_speed(iter/s)": 1.537456 }, { "acc": 0.96083336, "epoch": 13.191938129833607, "grad_norm": 6.8307366371154785, "learning_rate": 8.800571033992031e-06, "loss": 0.10761758, "memory(GiB)": 13.7, "step": 28145, "train_speed(iter/s)": 1.537462 }, { "acc": 0.9817708, "epoch": 13.194281696742442, "grad_norm": 10.477767944335938, "learning_rate": 8.80006730731912e-06, "loss": 0.07137095, "memory(GiB)": 13.7, "step": 28150, "train_speed(iter/s)": 1.537471 }, { "acc": 0.986973, "epoch": 13.196625263651278, "grad_norm": 1.6378587484359741, "learning_rate": 8.799563489317091e-06, "loss": 0.08078592, "memory(GiB)": 13.7, "step": 28155, "train_speed(iter/s)": 1.537464 }, { "acc": 0.97062387, "epoch": 13.198968830560112, "grad_norm": 2.999133825302124, "learning_rate": 8.79905957999806e-06, "loss": 0.12134364, "memory(GiB)": 13.7, "step": 28160, "train_speed(iter/s)": 1.537472 }, { "acc": 0.97006397, "epoch": 13.201312397468948, "grad_norm": 2.0498125553131104, "learning_rate": 8.798555579374133e-06, "loss": 0.11559486, "memory(GiB)": 13.7, "step": 28165, "train_speed(iter/s)": 1.537496 }, { "acc": 0.98194447, "epoch": 13.203655964377782, "grad_norm": 4.358509540557861, "learning_rate": 8.79805148745743e-06, "loss": 0.09855447, "memory(GiB)": 13.7, "step": 28170, "train_speed(iter/s)": 1.537492 }, { "acc": 0.98781252, "epoch": 13.205999531286619, "grad_norm": 1.3106893301010132, "learning_rate": 8.797547304260064e-06, "loss": 0.04438402, "memory(GiB)": 13.7, "step": 28175, "train_speed(iter/s)": 1.537488 }, { "acc": 0.97091722, "epoch": 13.208343098195453, "grad_norm": 5.5251922607421875, "learning_rate": 8.797043029794155e-06, "loss": 0.1860978, "memory(GiB)": 13.7, "step": 28180, "train_speed(iter/s)": 1.537503 }, { "acc": 0.96791668, "epoch": 13.210686665104289, "grad_norm": 4.451453685760498, "learning_rate": 8.796538664071825e-06, "loss": 0.20257754, "memory(GiB)": 13.7, "step": 28185, "train_speed(iter/s)": 1.537521 }, { "acc": 0.97264652, "epoch": 13.213030232013123, "grad_norm": 4.89556884765625, "learning_rate": 8.796034207105193e-06, "loss": 0.1412643, "memory(GiB)": 13.7, "step": 28190, "train_speed(iter/s)": 1.53752 }, { "acc": 0.97767811, "epoch": 13.21537379892196, "grad_norm": 0.7372888326644897, "learning_rate": 8.795529658906393e-06, "loss": 0.14448349, "memory(GiB)": 13.7, "step": 28195, "train_speed(iter/s)": 1.537522 }, { "acc": 0.98275681, "epoch": 13.217717365830794, "grad_norm": 11.295888900756836, "learning_rate": 8.795025019487546e-06, "loss": 0.08174307, "memory(GiB)": 13.7, "step": 28200, "train_speed(iter/s)": 1.537525 }, { "acc": 0.97791662, "epoch": 13.22006093273963, "grad_norm": 6.0934600830078125, "learning_rate": 8.794520288860781e-06, "loss": 0.08471563, "memory(GiB)": 13.7, "step": 28205, "train_speed(iter/s)": 1.537531 }, { "acc": 0.9807292, "epoch": 13.222404499648466, "grad_norm": 11.112381935119629, "learning_rate": 8.794015467038236e-06, "loss": 0.09803185, "memory(GiB)": 13.7, "step": 28210, "train_speed(iter/s)": 1.537529 }, { "acc": 0.9791667, "epoch": 13.2247480665573, "grad_norm": 7.1545515060424805, "learning_rate": 8.793510554032039e-06, "loss": 0.15589215, "memory(GiB)": 13.7, "step": 28215, "train_speed(iter/s)": 1.537541 }, { "acc": 0.992663, "epoch": 13.227091633466136, "grad_norm": 4.440087795257568, "learning_rate": 8.79300554985433e-06, "loss": 0.0396288, "memory(GiB)": 13.7, "step": 28220, "train_speed(iter/s)": 1.537546 }, { "acc": 0.97875996, "epoch": 13.22943520037497, "grad_norm": 5.12668514251709, "learning_rate": 8.792500454517247e-06, "loss": 0.08729523, "memory(GiB)": 13.7, "step": 28225, "train_speed(iter/s)": 1.537554 }, { "acc": 0.98383923, "epoch": 13.231778767283807, "grad_norm": 4.78843355178833, "learning_rate": 8.791995268032932e-06, "loss": 0.07141135, "memory(GiB)": 13.7, "step": 28230, "train_speed(iter/s)": 1.537562 }, { "acc": 0.97511368, "epoch": 13.23412233419264, "grad_norm": 3.060637950897217, "learning_rate": 8.791489990413526e-06, "loss": 0.13731298, "memory(GiB)": 13.7, "step": 28235, "train_speed(iter/s)": 1.537557 }, { "acc": 0.9792634, "epoch": 13.236465901101477, "grad_norm": 10.273519515991211, "learning_rate": 8.790984621671176e-06, "loss": 0.10389309, "memory(GiB)": 13.7, "step": 28240, "train_speed(iter/s)": 1.537572 }, { "acc": 0.98075886, "epoch": 13.238809468010311, "grad_norm": 2.678678274154663, "learning_rate": 8.790479161818026e-06, "loss": 0.06859939, "memory(GiB)": 13.7, "step": 28245, "train_speed(iter/s)": 1.537589 }, { "acc": 0.9760416, "epoch": 13.241153034919147, "grad_norm": 0.5776465535163879, "learning_rate": 8.78997361086623e-06, "loss": 0.11530203, "memory(GiB)": 13.7, "step": 28250, "train_speed(iter/s)": 1.537594 }, { "acc": 0.99051476, "epoch": 13.243496601827982, "grad_norm": 1.1871070861816406, "learning_rate": 8.789467968827938e-06, "loss": 0.03986599, "memory(GiB)": 13.7, "step": 28255, "train_speed(iter/s)": 1.537605 }, { "acc": 0.98738422, "epoch": 13.245840168736818, "grad_norm": 1.6383490562438965, "learning_rate": 8.788962235715303e-06, "loss": 0.0664268, "memory(GiB)": 13.7, "step": 28260, "train_speed(iter/s)": 1.537622 }, { "acc": 0.9890625, "epoch": 13.248183735645652, "grad_norm": 0.10199781507253647, "learning_rate": 8.78845641154048e-06, "loss": 0.03467699, "memory(GiB)": 13.7, "step": 28265, "train_speed(iter/s)": 1.537627 }, { "acc": 0.97120991, "epoch": 13.250527302554488, "grad_norm": 6.817938804626465, "learning_rate": 8.787950496315631e-06, "loss": 0.14388262, "memory(GiB)": 13.7, "step": 28270, "train_speed(iter/s)": 1.537634 }, { "acc": 0.96266365, "epoch": 13.252870869463322, "grad_norm": 4.993860244750977, "learning_rate": 8.787444490052916e-06, "loss": 0.12150259, "memory(GiB)": 13.7, "step": 28275, "train_speed(iter/s)": 1.537654 }, { "acc": 0.97850275, "epoch": 13.255214436372158, "grad_norm": 1.560866117477417, "learning_rate": 8.786938392764496e-06, "loss": 0.12791134, "memory(GiB)": 13.7, "step": 28280, "train_speed(iter/s)": 1.537659 }, { "acc": 0.98173609, "epoch": 13.257558003280995, "grad_norm": 3.65085768699646, "learning_rate": 8.786432204462536e-06, "loss": 0.14259112, "memory(GiB)": 13.7, "step": 28285, "train_speed(iter/s)": 1.53767 }, { "acc": 0.98373508, "epoch": 13.259901570189829, "grad_norm": 4.828186511993408, "learning_rate": 8.785925925159202e-06, "loss": 0.06803716, "memory(GiB)": 13.7, "step": 28290, "train_speed(iter/s)": 1.537677 }, { "acc": 0.98271713, "epoch": 13.262245137098665, "grad_norm": 7.42336368560791, "learning_rate": 8.785419554866668e-06, "loss": 0.06798553, "memory(GiB)": 13.7, "step": 28295, "train_speed(iter/s)": 1.537677 }, { "acc": 0.96835318, "epoch": 13.2645887040075, "grad_norm": 24.858104705810547, "learning_rate": 8.7849130935971e-06, "loss": 0.14834468, "memory(GiB)": 13.7, "step": 28300, "train_speed(iter/s)": 1.537678 }, { "acc": 0.98042116, "epoch": 13.266932270916335, "grad_norm": 4.832225799560547, "learning_rate": 8.784406541362675e-06, "loss": 0.05394979, "memory(GiB)": 13.7, "step": 28305, "train_speed(iter/s)": 1.537699 }, { "acc": 0.97990532, "epoch": 13.26927583782517, "grad_norm": 5.169865131378174, "learning_rate": 8.783899898175568e-06, "loss": 0.07600873, "memory(GiB)": 13.7, "step": 28310, "train_speed(iter/s)": 1.53771 }, { "acc": 0.96420994, "epoch": 13.271619404734006, "grad_norm": 6.949633598327637, "learning_rate": 8.783393164047955e-06, "loss": 0.1274415, "memory(GiB)": 13.7, "step": 28315, "train_speed(iter/s)": 1.537725 }, { "acc": 0.99020834, "epoch": 13.27396297164284, "grad_norm": 6.770183086395264, "learning_rate": 8.78288633899202e-06, "loss": 0.03975141, "memory(GiB)": 13.7, "step": 28320, "train_speed(iter/s)": 1.537723 }, { "acc": 0.97081909, "epoch": 13.276306538551676, "grad_norm": 6.720524787902832, "learning_rate": 8.782379423019943e-06, "loss": 0.17290168, "memory(GiB)": 13.7, "step": 28325, "train_speed(iter/s)": 1.53773 }, { "acc": 0.97495832, "epoch": 13.27865010546051, "grad_norm": 3.030022621154785, "learning_rate": 8.781872416143909e-06, "loss": 0.15353357, "memory(GiB)": 13.7, "step": 28330, "train_speed(iter/s)": 1.537729 }, { "acc": 0.97558041, "epoch": 13.280993672369346, "grad_norm": 1.9848815202713013, "learning_rate": 8.781365318376105e-06, "loss": 0.12527618, "memory(GiB)": 13.7, "step": 28335, "train_speed(iter/s)": 1.537739 }, { "acc": 0.98857136, "epoch": 13.28333723927818, "grad_norm": 1.2386974096298218, "learning_rate": 8.78085812972872e-06, "loss": 0.04472063, "memory(GiB)": 13.7, "step": 28340, "train_speed(iter/s)": 1.537739 }, { "acc": 0.98697262, "epoch": 13.285680806187017, "grad_norm": 4.758920192718506, "learning_rate": 8.780350850213945e-06, "loss": 0.07459409, "memory(GiB)": 13.7, "step": 28345, "train_speed(iter/s)": 1.537758 }, { "acc": 0.97145824, "epoch": 13.288024373095851, "grad_norm": 16.995174407958984, "learning_rate": 8.779843479843973e-06, "loss": 0.09727951, "memory(GiB)": 13.7, "step": 28350, "train_speed(iter/s)": 1.53778 }, { "acc": 0.98808613, "epoch": 13.290367940004687, "grad_norm": 3.503434658050537, "learning_rate": 8.779336018630999e-06, "loss": 0.06127136, "memory(GiB)": 13.7, "step": 28355, "train_speed(iter/s)": 1.537787 }, { "acc": 0.9833334, "epoch": 13.292711506913522, "grad_norm": 0.47708961367607117, "learning_rate": 8.778828466587224e-06, "loss": 0.10933192, "memory(GiB)": 13.7, "step": 28360, "train_speed(iter/s)": 1.537799 }, { "acc": 0.9901042, "epoch": 13.295055073822358, "grad_norm": 3.8469009399414062, "learning_rate": 8.778320823724843e-06, "loss": 0.05109027, "memory(GiB)": 13.7, "step": 28365, "train_speed(iter/s)": 1.537797 }, { "acc": 0.97848959, "epoch": 13.297398640731194, "grad_norm": 2.21197772026062, "learning_rate": 8.777813090056062e-06, "loss": 0.12700835, "memory(GiB)": 13.7, "step": 28370, "train_speed(iter/s)": 1.5378 }, { "acc": 0.99020834, "epoch": 13.299742207640028, "grad_norm": 3.9629580974578857, "learning_rate": 8.777305265593083e-06, "loss": 0.04768681, "memory(GiB)": 13.7, "step": 28375, "train_speed(iter/s)": 1.537794 }, { "acc": 0.95857143, "epoch": 13.302085774548864, "grad_norm": 6.282837390899658, "learning_rate": 8.776797350348113e-06, "loss": 0.12871816, "memory(GiB)": 13.7, "step": 28380, "train_speed(iter/s)": 1.537804 }, { "acc": 0.965625, "epoch": 13.304429341457698, "grad_norm": 10.794049263000488, "learning_rate": 8.776289344333361e-06, "loss": 0.20838277, "memory(GiB)": 13.7, "step": 28385, "train_speed(iter/s)": 1.537798 }, { "acc": 0.98292408, "epoch": 13.306772908366534, "grad_norm": 2.5032315254211426, "learning_rate": 8.775781247561038e-06, "loss": 0.06896064, "memory(GiB)": 13.7, "step": 28390, "train_speed(iter/s)": 1.537802 }, { "acc": 0.9801609, "epoch": 13.309116475275369, "grad_norm": 48.156097412109375, "learning_rate": 8.775273060043355e-06, "loss": 0.14901202, "memory(GiB)": 13.7, "step": 28395, "train_speed(iter/s)": 1.537822 }, { "acc": 0.98458338, "epoch": 13.311460042184205, "grad_norm": 5.329888820648193, "learning_rate": 8.774764781792533e-06, "loss": 0.06812648, "memory(GiB)": 13.7, "step": 28400, "train_speed(iter/s)": 1.537844 }, { "acc": 0.9671875, "epoch": 13.31380360909304, "grad_norm": 1.160386323928833, "learning_rate": 8.774256412820782e-06, "loss": 0.10052335, "memory(GiB)": 13.7, "step": 28405, "train_speed(iter/s)": 1.537851 }, { "acc": 0.97501059, "epoch": 13.316147176001875, "grad_norm": 10.566581726074219, "learning_rate": 8.773747953140323e-06, "loss": 0.09340651, "memory(GiB)": 13.7, "step": 28410, "train_speed(iter/s)": 1.537852 }, { "acc": 0.96062498, "epoch": 13.31849074291071, "grad_norm": 8.028095245361328, "learning_rate": 8.773239402763381e-06, "loss": 0.13981946, "memory(GiB)": 13.7, "step": 28415, "train_speed(iter/s)": 1.537856 }, { "acc": 0.98208618, "epoch": 13.320834309819546, "grad_norm": 6.313863277435303, "learning_rate": 8.77273076170218e-06, "loss": 0.07481328, "memory(GiB)": 13.7, "step": 28420, "train_speed(iter/s)": 1.537875 }, { "acc": 0.98298607, "epoch": 13.32317787672838, "grad_norm": 2.6609272956848145, "learning_rate": 8.772222029968941e-06, "loss": 0.0578709, "memory(GiB)": 13.7, "step": 28425, "train_speed(iter/s)": 1.537874 }, { "acc": 0.9822917, "epoch": 13.325521443637216, "grad_norm": 1.9234685897827148, "learning_rate": 8.771713207575898e-06, "loss": 0.1031791, "memory(GiB)": 13.7, "step": 28430, "train_speed(iter/s)": 1.53787 }, { "acc": 0.98136444, "epoch": 13.32786501054605, "grad_norm": 6.997575759887695, "learning_rate": 8.771204294535278e-06, "loss": 0.08960345, "memory(GiB)": 13.7, "step": 28435, "train_speed(iter/s)": 1.537872 }, { "acc": 0.98162985, "epoch": 13.330208577454886, "grad_norm": 10.645719528198242, "learning_rate": 8.770695290859313e-06, "loss": 0.09152087, "memory(GiB)": 13.7, "step": 28440, "train_speed(iter/s)": 1.537874 }, { "acc": 0.97705669, "epoch": 13.33255214436372, "grad_norm": 3.826244831085205, "learning_rate": 8.770186196560241e-06, "loss": 0.15336698, "memory(GiB)": 13.7, "step": 28445, "train_speed(iter/s)": 1.537883 }, { "acc": 0.97455359, "epoch": 13.334895711272557, "grad_norm": 7.051507472991943, "learning_rate": 8.769677011650297e-06, "loss": 0.12565823, "memory(GiB)": 13.7, "step": 28450, "train_speed(iter/s)": 1.537883 }, { "acc": 0.95738096, "epoch": 13.337239278181393, "grad_norm": 10.226882934570312, "learning_rate": 8.76916773614172e-06, "loss": 0.256213, "memory(GiB)": 13.7, "step": 28455, "train_speed(iter/s)": 1.537888 }, { "acc": 0.9798151, "epoch": 13.339582845090227, "grad_norm": 3.1948039531707764, "learning_rate": 8.768658370046751e-06, "loss": 0.05501112, "memory(GiB)": 13.7, "step": 28460, "train_speed(iter/s)": 1.537893 }, { "acc": 0.99147587, "epoch": 13.341926411999063, "grad_norm": 1.965813159942627, "learning_rate": 8.768148913377636e-06, "loss": 0.06844675, "memory(GiB)": 13.7, "step": 28465, "train_speed(iter/s)": 1.537908 }, { "acc": 0.97517862, "epoch": 13.344269978907898, "grad_norm": 4.565713882446289, "learning_rate": 8.767639366146618e-06, "loss": 0.10380124, "memory(GiB)": 13.7, "step": 28470, "train_speed(iter/s)": 1.537903 }, { "acc": 0.95323868, "epoch": 13.346613545816734, "grad_norm": 6.602987766265869, "learning_rate": 8.767129728365944e-06, "loss": 0.16699201, "memory(GiB)": 13.7, "step": 28475, "train_speed(iter/s)": 1.537902 }, { "acc": 0.98147831, "epoch": 13.348957112725568, "grad_norm": 5.704309463500977, "learning_rate": 8.766620000047867e-06, "loss": 0.08032165, "memory(GiB)": 13.7, "step": 28480, "train_speed(iter/s)": 1.537928 }, { "acc": 0.96748285, "epoch": 13.351300679634404, "grad_norm": 6.2907938957214355, "learning_rate": 8.766110181204638e-06, "loss": 0.20476837, "memory(GiB)": 13.7, "step": 28485, "train_speed(iter/s)": 1.537939 }, { "acc": 0.96579056, "epoch": 13.353644246543238, "grad_norm": 32.266082763671875, "learning_rate": 8.765600271848512e-06, "loss": 0.16389079, "memory(GiB)": 13.7, "step": 28490, "train_speed(iter/s)": 1.537951 }, { "acc": 0.97096758, "epoch": 13.355987813452074, "grad_norm": 10.542696952819824, "learning_rate": 8.765090271991744e-06, "loss": 0.15324697, "memory(GiB)": 13.7, "step": 28495, "train_speed(iter/s)": 1.537955 }, { "acc": 0.990382, "epoch": 13.358331380360909, "grad_norm": 0.27736327052116394, "learning_rate": 8.764580181646593e-06, "loss": 0.03972906, "memory(GiB)": 13.7, "step": 28500, "train_speed(iter/s)": 1.53795 }, { "acc": 0.95660715, "epoch": 13.360674947269745, "grad_norm": 5.326908111572266, "learning_rate": 8.764070000825321e-06, "loss": 0.13675892, "memory(GiB)": 13.7, "step": 28505, "train_speed(iter/s)": 1.537955 }, { "acc": 0.98265629, "epoch": 13.363018514178579, "grad_norm": 2.910541534423828, "learning_rate": 8.76355972954019e-06, "loss": 0.04008013, "memory(GiB)": 13.7, "step": 28510, "train_speed(iter/s)": 1.537967 }, { "acc": 0.98478088, "epoch": 13.365362081087415, "grad_norm": 1.7421226501464844, "learning_rate": 8.763049367803467e-06, "loss": 0.07769591, "memory(GiB)": 13.7, "step": 28515, "train_speed(iter/s)": 1.537978 }, { "acc": 0.97673607, "epoch": 13.36770564799625, "grad_norm": 6.503571510314941, "learning_rate": 8.762538915627417e-06, "loss": 0.11448295, "memory(GiB)": 13.7, "step": 28520, "train_speed(iter/s)": 1.537982 }, { "acc": 0.96788425, "epoch": 13.370049214905086, "grad_norm": 3.107727527618408, "learning_rate": 8.762028373024311e-06, "loss": 0.17586976, "memory(GiB)": 13.7, "step": 28525, "train_speed(iter/s)": 1.537985 }, { "acc": 0.97411995, "epoch": 13.372392781813922, "grad_norm": 4.257567882537842, "learning_rate": 8.76151774000642e-06, "loss": 0.08720176, "memory(GiB)": 13.7, "step": 28530, "train_speed(iter/s)": 1.538006 }, { "acc": 0.9802084, "epoch": 13.374736348722756, "grad_norm": 0.6607383489608765, "learning_rate": 8.761007016586017e-06, "loss": 0.09443694, "memory(GiB)": 13.7, "step": 28535, "train_speed(iter/s)": 1.538016 }, { "acc": 0.98300591, "epoch": 13.377079915631592, "grad_norm": 0.4660875201225281, "learning_rate": 8.760496202775382e-06, "loss": 0.0543474, "memory(GiB)": 13.7, "step": 28540, "train_speed(iter/s)": 1.538015 }, { "acc": 0.98270836, "epoch": 13.379423482540426, "grad_norm": 12.736743927001953, "learning_rate": 8.759985298586791e-06, "loss": 0.11588596, "memory(GiB)": 13.7, "step": 28545, "train_speed(iter/s)": 1.538031 }, { "acc": 0.98579073, "epoch": 13.381767049449262, "grad_norm": 1.2724815607070923, "learning_rate": 8.759474304032524e-06, "loss": 0.1097331, "memory(GiB)": 13.7, "step": 28550, "train_speed(iter/s)": 1.538037 }, { "acc": 0.96342268, "epoch": 13.384110616358097, "grad_norm": 4.899837017059326, "learning_rate": 8.758963219124863e-06, "loss": 0.17483704, "memory(GiB)": 13.7, "step": 28555, "train_speed(iter/s)": 1.538056 }, { "acc": 0.97253981, "epoch": 13.386454183266933, "grad_norm": 4.048274040222168, "learning_rate": 8.758452043876093e-06, "loss": 0.11371839, "memory(GiB)": 13.7, "step": 28560, "train_speed(iter/s)": 1.538073 }, { "acc": 0.98094254, "epoch": 13.388797750175767, "grad_norm": 0.689968466758728, "learning_rate": 8.757940778298503e-06, "loss": 0.07798611, "memory(GiB)": 13.7, "step": 28565, "train_speed(iter/s)": 1.538094 }, { "acc": 0.97380457, "epoch": 13.391141317084603, "grad_norm": 6.060851573944092, "learning_rate": 8.757429422404382e-06, "loss": 0.11957139, "memory(GiB)": 13.7, "step": 28570, "train_speed(iter/s)": 1.538093 }, { "acc": 0.97197876, "epoch": 13.393484883993438, "grad_norm": 8.117674827575684, "learning_rate": 8.756917976206018e-06, "loss": 0.07839465, "memory(GiB)": 13.7, "step": 28575, "train_speed(iter/s)": 1.538093 }, { "acc": 0.96899071, "epoch": 13.395828450902274, "grad_norm": 2.83542537689209, "learning_rate": 8.756406439715707e-06, "loss": 0.15750417, "memory(GiB)": 13.7, "step": 28580, "train_speed(iter/s)": 1.538104 }, { "acc": 0.97732944, "epoch": 13.398172017811108, "grad_norm": 1.5022873878479004, "learning_rate": 8.755894812945745e-06, "loss": 0.12653033, "memory(GiB)": 13.7, "step": 28585, "train_speed(iter/s)": 1.538109 }, { "acc": 0.98395824, "epoch": 13.400515584719944, "grad_norm": 3.563667058944702, "learning_rate": 8.755383095908429e-06, "loss": 0.06978095, "memory(GiB)": 13.7, "step": 28590, "train_speed(iter/s)": 1.538111 }, { "acc": 0.97482376, "epoch": 13.402859151628778, "grad_norm": 3.306114912033081, "learning_rate": 8.754871288616059e-06, "loss": 0.10683092, "memory(GiB)": 13.7, "step": 28595, "train_speed(iter/s)": 1.538108 }, { "acc": 0.97328634, "epoch": 13.405202718537614, "grad_norm": 3.5840821266174316, "learning_rate": 8.754359391080936e-06, "loss": 0.19591253, "memory(GiB)": 13.7, "step": 28600, "train_speed(iter/s)": 1.53811 }, { "acc": 0.97976761, "epoch": 13.407546285446449, "grad_norm": 9.333733558654785, "learning_rate": 8.753847403315369e-06, "loss": 0.13973137, "memory(GiB)": 13.7, "step": 28605, "train_speed(iter/s)": 1.538108 }, { "acc": 0.98914146, "epoch": 13.409889852355285, "grad_norm": 3.2988157272338867, "learning_rate": 8.753335325331657e-06, "loss": 0.06465786, "memory(GiB)": 13.7, "step": 28610, "train_speed(iter/s)": 1.538106 }, { "acc": 0.97572918, "epoch": 13.41223341926412, "grad_norm": 7.244616985321045, "learning_rate": 8.752823157142115e-06, "loss": 0.13348577, "memory(GiB)": 13.7, "step": 28615, "train_speed(iter/s)": 1.538095 }, { "acc": 0.9885088, "epoch": 13.414576986172955, "grad_norm": 3.1007983684539795, "learning_rate": 8.752310898759052e-06, "loss": 0.05441989, "memory(GiB)": 13.7, "step": 28620, "train_speed(iter/s)": 1.538089 }, { "acc": 0.96469698, "epoch": 13.416920553081791, "grad_norm": 6.669349193572998, "learning_rate": 8.75179855019478e-06, "loss": 0.12986966, "memory(GiB)": 13.7, "step": 28625, "train_speed(iter/s)": 1.538096 }, { "acc": 0.98359776, "epoch": 13.419264119990626, "grad_norm": 1.4451420307159424, "learning_rate": 8.751286111461615e-06, "loss": 0.10441515, "memory(GiB)": 13.7, "step": 28630, "train_speed(iter/s)": 1.538102 }, { "acc": 0.97337456, "epoch": 13.421607686899462, "grad_norm": 0.08685072511434555, "learning_rate": 8.750773582571874e-06, "loss": 0.18649254, "memory(GiB)": 13.7, "step": 28635, "train_speed(iter/s)": 1.538109 }, { "acc": 0.98156986, "epoch": 13.423951253808296, "grad_norm": 1.9811303615570068, "learning_rate": 8.750260963537876e-06, "loss": 0.06987796, "memory(GiB)": 13.7, "step": 28640, "train_speed(iter/s)": 1.538116 }, { "acc": 0.97987738, "epoch": 13.426294820717132, "grad_norm": 5.461951732635498, "learning_rate": 8.749748254371944e-06, "loss": 0.12480364, "memory(GiB)": 13.7, "step": 28645, "train_speed(iter/s)": 1.538124 }, { "acc": 0.98083334, "epoch": 13.428638387625966, "grad_norm": 1.3016204833984375, "learning_rate": 8.749235455086403e-06, "loss": 0.08357239, "memory(GiB)": 13.7, "step": 28650, "train_speed(iter/s)": 1.538106 }, { "acc": 0.98880205, "epoch": 13.430981954534802, "grad_norm": 10.393675804138184, "learning_rate": 8.748722565693574e-06, "loss": 0.05741079, "memory(GiB)": 13.7, "step": 28655, "train_speed(iter/s)": 1.538125 }, { "acc": 0.97493925, "epoch": 13.433325521443637, "grad_norm": 5.978443145751953, "learning_rate": 8.74820958620579e-06, "loss": 0.11879002, "memory(GiB)": 13.7, "step": 28660, "train_speed(iter/s)": 1.538131 }, { "acc": 0.9708334, "epoch": 13.435669088352473, "grad_norm": 5.5688252449035645, "learning_rate": 8.74769651663538e-06, "loss": 0.10889375, "memory(GiB)": 13.7, "step": 28665, "train_speed(iter/s)": 1.538116 }, { "acc": 0.97555809, "epoch": 13.438012655261307, "grad_norm": 7.498377323150635, "learning_rate": 8.747183356994677e-06, "loss": 0.14647316, "memory(GiB)": 13.7, "step": 28670, "train_speed(iter/s)": 1.538128 }, { "acc": 0.97651043, "epoch": 13.440356222170143, "grad_norm": 4.5559282302856445, "learning_rate": 8.746670107296013e-06, "loss": 0.14365777, "memory(GiB)": 13.7, "step": 28675, "train_speed(iter/s)": 1.538132 }, { "acc": 0.97989588, "epoch": 13.442699789078977, "grad_norm": 1.9213324785232544, "learning_rate": 8.746156767551728e-06, "loss": 0.11025406, "memory(GiB)": 13.7, "step": 28680, "train_speed(iter/s)": 1.538136 }, { "acc": 0.96895838, "epoch": 13.445043355987814, "grad_norm": 13.860116958618164, "learning_rate": 8.745643337774158e-06, "loss": 0.13151516, "memory(GiB)": 13.7, "step": 28685, "train_speed(iter/s)": 1.538142 }, { "acc": 0.96664352, "epoch": 13.447386922896648, "grad_norm": 4.098511695861816, "learning_rate": 8.745129817975648e-06, "loss": 0.15567389, "memory(GiB)": 13.7, "step": 28690, "train_speed(iter/s)": 1.538152 }, { "acc": 0.98163195, "epoch": 13.449730489805484, "grad_norm": 5.482205390930176, "learning_rate": 8.744616208168538e-06, "loss": 0.08909754, "memory(GiB)": 13.7, "step": 28695, "train_speed(iter/s)": 1.538155 }, { "acc": 0.97216721, "epoch": 13.45207405671432, "grad_norm": 10.70077133178711, "learning_rate": 8.744102508365175e-06, "loss": 0.21205697, "memory(GiB)": 13.7, "step": 28700, "train_speed(iter/s)": 1.538167 }, { "acc": 0.98080359, "epoch": 13.454417623623154, "grad_norm": 4.995474338531494, "learning_rate": 8.743588718577906e-06, "loss": 0.08197134, "memory(GiB)": 13.7, "step": 28705, "train_speed(iter/s)": 1.53816 }, { "acc": 0.95962801, "epoch": 13.45676119053199, "grad_norm": 9.568117141723633, "learning_rate": 8.743074838819082e-06, "loss": 0.18180033, "memory(GiB)": 13.7, "step": 28710, "train_speed(iter/s)": 1.538175 }, { "acc": 0.96629457, "epoch": 13.459104757440825, "grad_norm": 4.939856052398682, "learning_rate": 8.742560869101056e-06, "loss": 0.12924428, "memory(GiB)": 13.7, "step": 28715, "train_speed(iter/s)": 1.538178 }, { "acc": 0.98812504, "epoch": 13.46144832434966, "grad_norm": 1.803364872932434, "learning_rate": 8.742046809436178e-06, "loss": 0.08847299, "memory(GiB)": 13.7, "step": 28720, "train_speed(iter/s)": 1.53818 }, { "acc": 0.98801537, "epoch": 13.463791891258495, "grad_norm": 0.6406047940254211, "learning_rate": 8.741532659836806e-06, "loss": 0.08230817, "memory(GiB)": 13.7, "step": 28725, "train_speed(iter/s)": 1.538191 }, { "acc": 0.985322, "epoch": 13.466135458167331, "grad_norm": 2.4969117641448975, "learning_rate": 8.7410184203153e-06, "loss": 0.10508497, "memory(GiB)": 13.7, "step": 28730, "train_speed(iter/s)": 1.538205 }, { "acc": 0.97153816, "epoch": 13.468479025076165, "grad_norm": 12.03193473815918, "learning_rate": 8.74050409088402e-06, "loss": 0.12721779, "memory(GiB)": 13.7, "step": 28735, "train_speed(iter/s)": 1.538212 }, { "acc": 0.98111115, "epoch": 13.470822591985002, "grad_norm": 6.003736972808838, "learning_rate": 8.739989671555326e-06, "loss": 0.08807356, "memory(GiB)": 13.7, "step": 28740, "train_speed(iter/s)": 1.538216 }, { "acc": 0.99086533, "epoch": 13.473166158893836, "grad_norm": 0.3848893344402313, "learning_rate": 8.73947516234159e-06, "loss": 0.07900747, "memory(GiB)": 13.7, "step": 28745, "train_speed(iter/s)": 1.53822 }, { "acc": 0.97156582, "epoch": 13.475509725802672, "grad_norm": 6.111039638519287, "learning_rate": 8.738960563255173e-06, "loss": 0.13640341, "memory(GiB)": 13.7, "step": 28750, "train_speed(iter/s)": 1.538225 }, { "acc": 0.96488094, "epoch": 13.477853292711506, "grad_norm": 0.0419965460896492, "learning_rate": 8.738445874308444e-06, "loss": 0.10669222, "memory(GiB)": 13.7, "step": 28755, "train_speed(iter/s)": 1.538218 }, { "acc": 0.96547356, "epoch": 13.480196859620342, "grad_norm": 9.091774940490723, "learning_rate": 8.737931095513778e-06, "loss": 0.19567555, "memory(GiB)": 13.7, "step": 28760, "train_speed(iter/s)": 1.53822 }, { "acc": 0.98883018, "epoch": 13.482540426529177, "grad_norm": 4.200401306152344, "learning_rate": 8.737416226883546e-06, "loss": 0.09807326, "memory(GiB)": 13.7, "step": 28765, "train_speed(iter/s)": 1.538223 }, { "acc": 0.98896513, "epoch": 13.484883993438013, "grad_norm": 4.762889862060547, "learning_rate": 8.736901268430125e-06, "loss": 0.04785215, "memory(GiB)": 13.7, "step": 28770, "train_speed(iter/s)": 1.538222 }, { "acc": 0.9919445, "epoch": 13.487227560346849, "grad_norm": 1.2980856895446777, "learning_rate": 8.736386220165893e-06, "loss": 0.01678052, "memory(GiB)": 13.7, "step": 28775, "train_speed(iter/s)": 1.538235 }, { "acc": 0.96844692, "epoch": 13.489571127255683, "grad_norm": 0.9284713864326477, "learning_rate": 8.735871082103228e-06, "loss": 0.14278271, "memory(GiB)": 13.7, "step": 28780, "train_speed(iter/s)": 1.538241 }, { "acc": 0.96965866, "epoch": 13.49191469416452, "grad_norm": 16.847126007080078, "learning_rate": 8.735355854254514e-06, "loss": 0.16075344, "memory(GiB)": 13.7, "step": 28785, "train_speed(iter/s)": 1.538239 }, { "acc": 0.97394352, "epoch": 13.494258261073353, "grad_norm": 2.2557690143585205, "learning_rate": 8.734840536632138e-06, "loss": 0.07949547, "memory(GiB)": 13.7, "step": 28790, "train_speed(iter/s)": 1.538243 }, { "acc": 0.96436272, "epoch": 13.49660182798219, "grad_norm": 8.568172454833984, "learning_rate": 8.734325129248482e-06, "loss": 0.22229052, "memory(GiB)": 13.7, "step": 28795, "train_speed(iter/s)": 1.538249 }, { "acc": 0.96119041, "epoch": 13.498945394891024, "grad_norm": 4.262536525726318, "learning_rate": 8.733809632115935e-06, "loss": 0.21763649, "memory(GiB)": 13.7, "step": 28800, "train_speed(iter/s)": 1.538245 }, { "acc": 0.9753706, "epoch": 13.50128896179986, "grad_norm": 3.4671003818511963, "learning_rate": 8.733294045246893e-06, "loss": 0.07876804, "memory(GiB)": 13.7, "step": 28805, "train_speed(iter/s)": 1.538244 }, { "acc": 0.97605114, "epoch": 13.503632528708694, "grad_norm": 10.185394287109375, "learning_rate": 8.732778368653742e-06, "loss": 0.11798716, "memory(GiB)": 13.7, "step": 28810, "train_speed(iter/s)": 1.538254 }, { "acc": 0.97865534, "epoch": 13.50597609561753, "grad_norm": 2.585279941558838, "learning_rate": 8.732262602348882e-06, "loss": 0.06245687, "memory(GiB)": 13.7, "step": 28815, "train_speed(iter/s)": 1.538256 }, { "acc": 0.9817708, "epoch": 13.508319662526365, "grad_norm": 0.16308481991291046, "learning_rate": 8.731746746344707e-06, "loss": 0.09502063, "memory(GiB)": 13.7, "step": 28820, "train_speed(iter/s)": 1.538264 }, { "acc": 0.97589016, "epoch": 13.5106632294352, "grad_norm": 7.185672760009766, "learning_rate": 8.73123080065362e-06, "loss": 0.11144555, "memory(GiB)": 13.7, "step": 28825, "train_speed(iter/s)": 1.538273 }, { "acc": 0.96941423, "epoch": 13.513006796344035, "grad_norm": 5.204785346984863, "learning_rate": 8.73071476528802e-06, "loss": 0.15963842, "memory(GiB)": 13.7, "step": 28830, "train_speed(iter/s)": 1.538285 }, { "acc": 0.98524799, "epoch": 13.515350363252871, "grad_norm": 3.1861956119537354, "learning_rate": 8.730198640260314e-06, "loss": 0.06511161, "memory(GiB)": 13.7, "step": 28835, "train_speed(iter/s)": 1.53829 }, { "acc": 0.97786713, "epoch": 13.517693930161705, "grad_norm": 1.6464245319366455, "learning_rate": 8.729682425582903e-06, "loss": 0.06822491, "memory(GiB)": 13.7, "step": 28840, "train_speed(iter/s)": 1.538304 }, { "acc": 0.98459148, "epoch": 13.520037497070541, "grad_norm": 3.603502035140991, "learning_rate": 8.729166121268199e-06, "loss": 0.10272133, "memory(GiB)": 13.7, "step": 28845, "train_speed(iter/s)": 1.538316 }, { "acc": 0.97853622, "epoch": 13.522381063979376, "grad_norm": 2.287525177001953, "learning_rate": 8.72864972732861e-06, "loss": 0.14021257, "memory(GiB)": 13.7, "step": 28850, "train_speed(iter/s)": 1.538342 }, { "acc": 0.97757444, "epoch": 13.524724630888212, "grad_norm": 3.887035369873047, "learning_rate": 8.72813324377655e-06, "loss": 0.10194263, "memory(GiB)": 13.7, "step": 28855, "train_speed(iter/s)": 1.538348 }, { "acc": 0.99750004, "epoch": 13.527068197797046, "grad_norm": 2.366821765899658, "learning_rate": 8.727616670624434e-06, "loss": 0.02757448, "memory(GiB)": 13.7, "step": 28860, "train_speed(iter/s)": 1.538353 }, { "acc": 0.97210999, "epoch": 13.529411764705882, "grad_norm": 5.046533107757568, "learning_rate": 8.727100007884677e-06, "loss": 0.16483639, "memory(GiB)": 13.7, "step": 28865, "train_speed(iter/s)": 1.538355 }, { "acc": 0.98185472, "epoch": 13.531755331614718, "grad_norm": 5.131319046020508, "learning_rate": 8.7265832555697e-06, "loss": 0.0991765, "memory(GiB)": 13.7, "step": 28870, "train_speed(iter/s)": 1.538355 }, { "acc": 0.97153273, "epoch": 13.534098898523553, "grad_norm": 5.427308559417725, "learning_rate": 8.726066413691921e-06, "loss": 0.12822965, "memory(GiB)": 13.7, "step": 28875, "train_speed(iter/s)": 1.538362 }, { "acc": 0.99125004, "epoch": 13.536442465432389, "grad_norm": 4.313710689544678, "learning_rate": 8.725549482263765e-06, "loss": 0.02825531, "memory(GiB)": 13.7, "step": 28880, "train_speed(iter/s)": 1.538356 }, { "acc": 0.97970915, "epoch": 13.538786032341223, "grad_norm": 2.433971643447876, "learning_rate": 8.725032461297658e-06, "loss": 0.10753415, "memory(GiB)": 13.7, "step": 28885, "train_speed(iter/s)": 1.53837 }, { "acc": 0.98395834, "epoch": 13.541129599250059, "grad_norm": 4.545557022094727, "learning_rate": 8.724515350806027e-06, "loss": 0.0477883, "memory(GiB)": 13.7, "step": 28890, "train_speed(iter/s)": 1.538374 }, { "acc": 0.97729168, "epoch": 13.543473166158893, "grad_norm": 1.2483078241348267, "learning_rate": 8.7239981508013e-06, "loss": 0.09408339, "memory(GiB)": 13.7, "step": 28895, "train_speed(iter/s)": 1.53837 }, { "acc": 0.96948767, "epoch": 13.54581673306773, "grad_norm": 4.255577087402344, "learning_rate": 8.72348086129591e-06, "loss": 0.18363123, "memory(GiB)": 13.7, "step": 28900, "train_speed(iter/s)": 1.538366 }, { "acc": 0.99359379, "epoch": 13.548160299976564, "grad_norm": 4.182739734649658, "learning_rate": 8.722963482302293e-06, "loss": 0.03982607, "memory(GiB)": 13.7, "step": 28905, "train_speed(iter/s)": 1.538373 }, { "acc": 0.98306541, "epoch": 13.5505038668854, "grad_norm": 1.360076665878296, "learning_rate": 8.72244601383288e-06, "loss": 0.09351732, "memory(GiB)": 13.7, "step": 28910, "train_speed(iter/s)": 1.538391 }, { "acc": 0.96969404, "epoch": 13.552847433794234, "grad_norm": 1.0924071073532104, "learning_rate": 8.721928455900116e-06, "loss": 0.17528129, "memory(GiB)": 13.7, "step": 28915, "train_speed(iter/s)": 1.538397 }, { "acc": 0.97130203, "epoch": 13.55519100070307, "grad_norm": 3.204444169998169, "learning_rate": 8.721410808516435e-06, "loss": 0.11793406, "memory(GiB)": 13.7, "step": 28920, "train_speed(iter/s)": 1.538415 }, { "acc": 0.97968254, "epoch": 13.557534567611905, "grad_norm": 7.281612396240234, "learning_rate": 8.720893071694284e-06, "loss": 0.10448049, "memory(GiB)": 13.7, "step": 28925, "train_speed(iter/s)": 1.538414 }, { "acc": 0.96705952, "epoch": 13.55987813452074, "grad_norm": 9.594561576843262, "learning_rate": 8.720375245446103e-06, "loss": 0.19001735, "memory(GiB)": 13.7, "step": 28930, "train_speed(iter/s)": 1.538428 }, { "acc": 0.98200312, "epoch": 13.562221701429575, "grad_norm": 5.896312236785889, "learning_rate": 8.719857329784346e-06, "loss": 0.09300157, "memory(GiB)": 13.7, "step": 28935, "train_speed(iter/s)": 1.53843 }, { "acc": 0.98118935, "epoch": 13.564565268338411, "grad_norm": 14.648712158203125, "learning_rate": 8.719339324721456e-06, "loss": 0.09002485, "memory(GiB)": 13.7, "step": 28940, "train_speed(iter/s)": 1.538429 }, { "acc": 0.96156807, "epoch": 13.566908835247247, "grad_norm": 4.536891937255859, "learning_rate": 8.718821230269885e-06, "loss": 0.22158136, "memory(GiB)": 13.7, "step": 28945, "train_speed(iter/s)": 1.538443 }, { "acc": 0.97897825, "epoch": 13.569252402156081, "grad_norm": 0.937437891960144, "learning_rate": 8.718303046442089e-06, "loss": 0.12718076, "memory(GiB)": 13.7, "step": 28950, "train_speed(iter/s)": 1.538456 }, { "acc": 0.97904758, "epoch": 13.571595969064917, "grad_norm": 4.703335285186768, "learning_rate": 8.71778477325052e-06, "loss": 0.08081154, "memory(GiB)": 13.7, "step": 28955, "train_speed(iter/s)": 1.538455 }, { "acc": 0.95570068, "epoch": 13.573939535973752, "grad_norm": 25.828184127807617, "learning_rate": 8.717266410707639e-06, "loss": 0.25627089, "memory(GiB)": 13.7, "step": 28960, "train_speed(iter/s)": 1.538452 }, { "acc": 0.96478081, "epoch": 13.576283102882588, "grad_norm": 5.427547931671143, "learning_rate": 8.716747958825903e-06, "loss": 0.15929466, "memory(GiB)": 13.7, "step": 28965, "train_speed(iter/s)": 1.538457 }, { "acc": 0.97448864, "epoch": 13.578626669791422, "grad_norm": 1.6061620712280273, "learning_rate": 8.716229417617776e-06, "loss": 0.06209217, "memory(GiB)": 13.7, "step": 28970, "train_speed(iter/s)": 1.538472 }, { "acc": 0.96122026, "epoch": 13.580970236700258, "grad_norm": 4.662846088409424, "learning_rate": 8.715710787095718e-06, "loss": 0.13178613, "memory(GiB)": 13.7, "step": 28975, "train_speed(iter/s)": 1.538488 }, { "acc": 0.97121525, "epoch": 13.583313803609093, "grad_norm": 1.131221890449524, "learning_rate": 8.715192067272201e-06, "loss": 0.14990718, "memory(GiB)": 13.7, "step": 28980, "train_speed(iter/s)": 1.538494 }, { "acc": 0.9625, "epoch": 13.585657370517929, "grad_norm": 14.415934562683105, "learning_rate": 8.714673258159689e-06, "loss": 0.09143729, "memory(GiB)": 13.7, "step": 28985, "train_speed(iter/s)": 1.538495 }, { "acc": 0.97666664, "epoch": 13.588000937426763, "grad_norm": 15.01541805267334, "learning_rate": 8.714154359770654e-06, "loss": 0.10259444, "memory(GiB)": 13.7, "step": 28990, "train_speed(iter/s)": 1.538501 }, { "acc": 0.96885414, "epoch": 13.590344504335599, "grad_norm": 7.863274574279785, "learning_rate": 8.713635372117569e-06, "loss": 0.1073999, "memory(GiB)": 13.7, "step": 28995, "train_speed(iter/s)": 1.538514 }, { "acc": 0.97780752, "epoch": 13.592688071244433, "grad_norm": 5.890805244445801, "learning_rate": 8.713116295212908e-06, "loss": 0.11266056, "memory(GiB)": 13.7, "step": 29000, "train_speed(iter/s)": 1.538524 }, { "acc": 0.97852182, "epoch": 13.59503163815327, "grad_norm": 3.9894447326660156, "learning_rate": 8.712597129069148e-06, "loss": 0.13137217, "memory(GiB)": 13.7, "step": 29005, "train_speed(iter/s)": 1.538522 }, { "acc": 0.96506834, "epoch": 13.597375205062104, "grad_norm": 11.71630859375, "learning_rate": 8.712077873698769e-06, "loss": 0.1638938, "memory(GiB)": 13.7, "step": 29010, "train_speed(iter/s)": 1.538531 }, { "acc": 0.97048607, "epoch": 13.59971877197094, "grad_norm": 1.5624334812164307, "learning_rate": 8.71155852911425e-06, "loss": 0.19344269, "memory(GiB)": 13.7, "step": 29015, "train_speed(iter/s)": 1.538537 }, { "acc": 0.987257, "epoch": 13.602062338879776, "grad_norm": 2.133578300476074, "learning_rate": 8.711039095328076e-06, "loss": 0.05971451, "memory(GiB)": 13.7, "step": 29020, "train_speed(iter/s)": 1.538553 }, { "acc": 0.97929602, "epoch": 13.60440590578861, "grad_norm": 4.3490214347839355, "learning_rate": 8.710519572352733e-06, "loss": 0.07725087, "memory(GiB)": 13.7, "step": 29025, "train_speed(iter/s)": 1.538547 }, { "acc": 0.9788393, "epoch": 13.606749472697446, "grad_norm": 2.9092071056365967, "learning_rate": 8.709999960200708e-06, "loss": 0.07608449, "memory(GiB)": 13.7, "step": 29030, "train_speed(iter/s)": 1.538549 }, { "acc": 0.98038197, "epoch": 13.60909303960628, "grad_norm": 0.622032880783081, "learning_rate": 8.709480258884489e-06, "loss": 0.12588692, "memory(GiB)": 13.7, "step": 29035, "train_speed(iter/s)": 1.538555 }, { "acc": 0.96383152, "epoch": 13.611436606515117, "grad_norm": 8.906383514404297, "learning_rate": 8.708960468416572e-06, "loss": 0.11558722, "memory(GiB)": 13.7, "step": 29040, "train_speed(iter/s)": 1.538569 }, { "acc": 0.97832489, "epoch": 13.613780173423951, "grad_norm": 6.358185291290283, "learning_rate": 8.708440588809448e-06, "loss": 0.12716475, "memory(GiB)": 13.7, "step": 29045, "train_speed(iter/s)": 1.538578 }, { "acc": 0.97529755, "epoch": 13.616123740332787, "grad_norm": 3.5851714611053467, "learning_rate": 8.707920620075612e-06, "loss": 0.14440303, "memory(GiB)": 13.7, "step": 29050, "train_speed(iter/s)": 1.538577 }, { "acc": 0.97562494, "epoch": 13.618467307241621, "grad_norm": 5.140822887420654, "learning_rate": 8.707400562227566e-06, "loss": 0.1566345, "memory(GiB)": 13.7, "step": 29055, "train_speed(iter/s)": 1.53858 }, { "acc": 0.97736149, "epoch": 13.620810874150457, "grad_norm": 5.29824686050415, "learning_rate": 8.70688041527781e-06, "loss": 0.10803812, "memory(GiB)": 13.7, "step": 29060, "train_speed(iter/s)": 1.538586 }, { "acc": 0.96505947, "epoch": 13.623154441059292, "grad_norm": 8.569652557373047, "learning_rate": 8.70636017923884e-06, "loss": 0.13239216, "memory(GiB)": 13.7, "step": 29065, "train_speed(iter/s)": 1.538593 }, { "acc": 0.97299118, "epoch": 13.625498007968128, "grad_norm": 2.130188226699829, "learning_rate": 8.70583985412317e-06, "loss": 0.10447505, "memory(GiB)": 13.7, "step": 29070, "train_speed(iter/s)": 1.5386 }, { "acc": 0.98317537, "epoch": 13.627841574876962, "grad_norm": 5.276419162750244, "learning_rate": 8.705319439943302e-06, "loss": 0.0700937, "memory(GiB)": 13.7, "step": 29075, "train_speed(iter/s)": 1.538604 }, { "acc": 0.98505211, "epoch": 13.630185141785798, "grad_norm": 5.086972713470459, "learning_rate": 8.704798936711745e-06, "loss": 0.08548561, "memory(GiB)": 13.7, "step": 29080, "train_speed(iter/s)": 1.538631 }, { "acc": 0.9841856, "epoch": 13.632528708694633, "grad_norm": 5.949578285217285, "learning_rate": 8.704278344441012e-06, "loss": 0.07243187, "memory(GiB)": 13.7, "step": 29085, "train_speed(iter/s)": 1.538625 }, { "acc": 0.9641819, "epoch": 13.634872275603469, "grad_norm": 3.581298589706421, "learning_rate": 8.703757663143616e-06, "loss": 0.16410897, "memory(GiB)": 13.7, "step": 29090, "train_speed(iter/s)": 1.53863 }, { "acc": 0.97330933, "epoch": 13.637215842512303, "grad_norm": 3.4729318618774414, "learning_rate": 8.70323689283207e-06, "loss": 0.14957874, "memory(GiB)": 13.7, "step": 29095, "train_speed(iter/s)": 1.538637 }, { "acc": 0.97007465, "epoch": 13.639559409421139, "grad_norm": 35.96818923950195, "learning_rate": 8.702716033518896e-06, "loss": 0.10815575, "memory(GiB)": 13.7, "step": 29100, "train_speed(iter/s)": 1.538636 }, { "acc": 0.97842264, "epoch": 13.641902976329973, "grad_norm": 3.087454080581665, "learning_rate": 8.70219508521661e-06, "loss": 0.0778222, "memory(GiB)": 13.7, "step": 29105, "train_speed(iter/s)": 1.538644 }, { "acc": 0.9601099, "epoch": 13.64424654323881, "grad_norm": 5.033041477203369, "learning_rate": 8.701674047937734e-06, "loss": 0.12212374, "memory(GiB)": 13.7, "step": 29110, "train_speed(iter/s)": 1.53866 }, { "acc": 0.98656254, "epoch": 13.646590110147645, "grad_norm": 1.6183803081512451, "learning_rate": 8.701152921694796e-06, "loss": 0.04452927, "memory(GiB)": 13.7, "step": 29115, "train_speed(iter/s)": 1.538653 }, { "acc": 0.97913685, "epoch": 13.64893367705648, "grad_norm": 6.113470077514648, "learning_rate": 8.700631706500318e-06, "loss": 0.07992738, "memory(GiB)": 13.7, "step": 29120, "train_speed(iter/s)": 1.538669 }, { "acc": 0.97741518, "epoch": 13.651277243965316, "grad_norm": 6.339080810546875, "learning_rate": 8.700110402366829e-06, "loss": 0.09807137, "memory(GiB)": 13.7, "step": 29125, "train_speed(iter/s)": 1.53868 }, { "acc": 0.98132534, "epoch": 13.65362081087415, "grad_norm": 2.5719950199127197, "learning_rate": 8.69958900930686e-06, "loss": 0.09098694, "memory(GiB)": 13.7, "step": 29130, "train_speed(iter/s)": 1.538689 }, { "acc": 0.98369656, "epoch": 13.655964377782986, "grad_norm": 5.345005989074707, "learning_rate": 8.699067527332946e-06, "loss": 0.0910369, "memory(GiB)": 13.7, "step": 29135, "train_speed(iter/s)": 1.538696 }, { "acc": 0.97456303, "epoch": 13.65830794469182, "grad_norm": 8.349913597106934, "learning_rate": 8.698545956457616e-06, "loss": 0.13760488, "memory(GiB)": 13.7, "step": 29140, "train_speed(iter/s)": 1.538701 }, { "acc": 0.98892517, "epoch": 13.660651511600657, "grad_norm": 0.956061065196991, "learning_rate": 8.698024296693411e-06, "loss": 0.07988454, "memory(GiB)": 13.7, "step": 29145, "train_speed(iter/s)": 1.538708 }, { "acc": 0.96912003, "epoch": 13.662995078509491, "grad_norm": 4.359223365783691, "learning_rate": 8.69750254805287e-06, "loss": 0.17165358, "memory(GiB)": 13.7, "step": 29150, "train_speed(iter/s)": 1.538722 }, { "acc": 0.98660717, "epoch": 13.665338645418327, "grad_norm": 9.33434772491455, "learning_rate": 8.696980710548535e-06, "loss": 0.04817131, "memory(GiB)": 13.7, "step": 29155, "train_speed(iter/s)": 1.538717 }, { "acc": 0.97540436, "epoch": 13.667682212327161, "grad_norm": 3.765753984451294, "learning_rate": 8.696458784192946e-06, "loss": 0.05446486, "memory(GiB)": 13.7, "step": 29160, "train_speed(iter/s)": 1.538724 }, { "acc": 0.95904675, "epoch": 13.670025779235997, "grad_norm": 5.233644008636475, "learning_rate": 8.695936768998649e-06, "loss": 0.22704763, "memory(GiB)": 13.7, "step": 29165, "train_speed(iter/s)": 1.538724 }, { "acc": 0.98592262, "epoch": 13.672369346144832, "grad_norm": 3.5678741931915283, "learning_rate": 8.695414664978194e-06, "loss": 0.05031626, "memory(GiB)": 13.7, "step": 29170, "train_speed(iter/s)": 1.538739 }, { "acc": 0.98414297, "epoch": 13.674712913053668, "grad_norm": 0.10482700914144516, "learning_rate": 8.694892472144129e-06, "loss": 0.10763321, "memory(GiB)": 13.7, "step": 29175, "train_speed(iter/s)": 1.538745 }, { "acc": 0.9885416, "epoch": 13.677056479962502, "grad_norm": 4.723526954650879, "learning_rate": 8.694370190509005e-06, "loss": 0.04015271, "memory(GiB)": 13.7, "step": 29180, "train_speed(iter/s)": 1.538757 }, { "acc": 0.98029766, "epoch": 13.679400046871338, "grad_norm": 4.865091800689697, "learning_rate": 8.693847820085378e-06, "loss": 0.11191758, "memory(GiB)": 13.7, "step": 29185, "train_speed(iter/s)": 1.538767 }, { "acc": 0.97089014, "epoch": 13.681743613780174, "grad_norm": 9.53468132019043, "learning_rate": 8.693325360885802e-06, "loss": 0.0978248, "memory(GiB)": 13.7, "step": 29190, "train_speed(iter/s)": 1.538769 }, { "acc": 0.98359375, "epoch": 13.684087180689009, "grad_norm": 6.562524795532227, "learning_rate": 8.692802812922837e-06, "loss": 0.05884481, "memory(GiB)": 13.7, "step": 29195, "train_speed(iter/s)": 1.538774 }, { "acc": 0.96980658, "epoch": 13.686430747597845, "grad_norm": 5.55266809463501, "learning_rate": 8.692280176209043e-06, "loss": 0.13669429, "memory(GiB)": 13.7, "step": 29200, "train_speed(iter/s)": 1.53877 }, { "acc": 0.96882524, "epoch": 13.688774314506679, "grad_norm": 5.070350646972656, "learning_rate": 8.69175745075698e-06, "loss": 0.12726078, "memory(GiB)": 13.7, "step": 29205, "train_speed(iter/s)": 1.538777 }, { "acc": 0.97479172, "epoch": 13.691117881415515, "grad_norm": 4.3896074295043945, "learning_rate": 8.691234636579218e-06, "loss": 0.08519053, "memory(GiB)": 13.7, "step": 29210, "train_speed(iter/s)": 1.538793 }, { "acc": 0.99375, "epoch": 13.69346144832435, "grad_norm": 2.771493911743164, "learning_rate": 8.690711733688316e-06, "loss": 0.06157231, "memory(GiB)": 13.7, "step": 29215, "train_speed(iter/s)": 1.5388 }, { "acc": 0.96553888, "epoch": 13.695805015233185, "grad_norm": 38.031105041503906, "learning_rate": 8.690188742096852e-06, "loss": 0.14293426, "memory(GiB)": 13.7, "step": 29220, "train_speed(iter/s)": 1.538807 }, { "acc": 0.98238096, "epoch": 13.69814858214202, "grad_norm": 2.1141793727874756, "learning_rate": 8.689665661817387e-06, "loss": 0.05842085, "memory(GiB)": 13.7, "step": 29225, "train_speed(iter/s)": 1.538803 }, { "acc": 0.99020824, "epoch": 13.700492149050856, "grad_norm": 6.3940300941467285, "learning_rate": 8.689142492862503e-06, "loss": 0.0583303, "memory(GiB)": 13.7, "step": 29230, "train_speed(iter/s)": 1.538802 }, { "acc": 0.98264608, "epoch": 13.70283571595969, "grad_norm": 3.0255565643310547, "learning_rate": 8.68861923524477e-06, "loss": 0.08437052, "memory(GiB)": 13.7, "step": 29235, "train_speed(iter/s)": 1.538804 }, { "acc": 0.9895833, "epoch": 13.705179282868526, "grad_norm": 8.327021598815918, "learning_rate": 8.688095888976769e-06, "loss": 0.04267039, "memory(GiB)": 13.7, "step": 29240, "train_speed(iter/s)": 1.538811 }, { "acc": 0.97315483, "epoch": 13.70752284977736, "grad_norm": 11.465071678161621, "learning_rate": 8.687572454071076e-06, "loss": 0.13771026, "memory(GiB)": 13.7, "step": 29245, "train_speed(iter/s)": 1.538826 }, { "acc": 0.95251894, "epoch": 13.709866416686197, "grad_norm": 3.3070662021636963, "learning_rate": 8.687048930540273e-06, "loss": 0.29291735, "memory(GiB)": 13.7, "step": 29250, "train_speed(iter/s)": 1.538836 }, { "acc": 0.97049141, "epoch": 13.71220998359503, "grad_norm": 5.767425537109375, "learning_rate": 8.686525318396948e-06, "loss": 0.12636597, "memory(GiB)": 13.7, "step": 29255, "train_speed(iter/s)": 1.538829 }, { "acc": 0.97513885, "epoch": 13.714553550503867, "grad_norm": 4.978947162628174, "learning_rate": 8.686001617653681e-06, "loss": 0.09561977, "memory(GiB)": 13.7, "step": 29260, "train_speed(iter/s)": 1.538823 }, { "acc": 0.98052406, "epoch": 13.716897117412703, "grad_norm": 7.525969982147217, "learning_rate": 8.685477828323066e-06, "loss": 0.05434366, "memory(GiB)": 13.7, "step": 29265, "train_speed(iter/s)": 1.53883 }, { "acc": 0.97631054, "epoch": 13.719240684321537, "grad_norm": 5.607249736785889, "learning_rate": 8.684953950417688e-06, "loss": 0.13161045, "memory(GiB)": 13.7, "step": 29270, "train_speed(iter/s)": 1.53883 }, { "acc": 0.97861614, "epoch": 13.721584251230373, "grad_norm": 19.034578323364258, "learning_rate": 8.68442998395014e-06, "loss": 0.06963069, "memory(GiB)": 13.7, "step": 29275, "train_speed(iter/s)": 1.538829 }, { "acc": 0.97806625, "epoch": 13.723927818139208, "grad_norm": 3.7334160804748535, "learning_rate": 8.68390592893302e-06, "loss": 0.0693625, "memory(GiB)": 13.7, "step": 29280, "train_speed(iter/s)": 1.538835 }, { "acc": 0.9842804, "epoch": 13.726271385048044, "grad_norm": 10.608458518981934, "learning_rate": 8.683381785378922e-06, "loss": 0.07456629, "memory(GiB)": 13.7, "step": 29285, "train_speed(iter/s)": 1.538845 }, { "acc": 0.97719698, "epoch": 13.728614951956878, "grad_norm": 1.8884373903274536, "learning_rate": 8.682857553300444e-06, "loss": 0.16543964, "memory(GiB)": 13.7, "step": 29290, "train_speed(iter/s)": 1.538847 }, { "acc": 0.9760416, "epoch": 13.730958518865714, "grad_norm": 0.6890425682067871, "learning_rate": 8.68233323271019e-06, "loss": 0.05967574, "memory(GiB)": 13.7, "step": 29295, "train_speed(iter/s)": 1.538856 }, { "acc": 0.97008934, "epoch": 13.733302085774548, "grad_norm": 4.749297618865967, "learning_rate": 8.681808823620759e-06, "loss": 0.1516994, "memory(GiB)": 13.7, "step": 29300, "train_speed(iter/s)": 1.538846 }, { "acc": 0.98311596, "epoch": 13.735645652683385, "grad_norm": 7.320939064025879, "learning_rate": 8.681284326044758e-06, "loss": 0.07193012, "memory(GiB)": 13.7, "step": 29305, "train_speed(iter/s)": 1.538839 }, { "acc": 0.97867641, "epoch": 13.737989219592219, "grad_norm": 5.692868709564209, "learning_rate": 8.680759739994796e-06, "loss": 0.08183251, "memory(GiB)": 13.7, "step": 29310, "train_speed(iter/s)": 1.538856 }, { "acc": 0.97989578, "epoch": 13.740332786501055, "grad_norm": 12.569019317626953, "learning_rate": 8.68023506548348e-06, "loss": 0.07869006, "memory(GiB)": 13.7, "step": 29315, "train_speed(iter/s)": 1.538867 }, { "acc": 0.97842264, "epoch": 13.74267635340989, "grad_norm": 7.64638090133667, "learning_rate": 8.67971030252342e-06, "loss": 0.0700538, "memory(GiB)": 13.7, "step": 29320, "train_speed(iter/s)": 1.538887 }, { "acc": 0.971875, "epoch": 13.745019920318725, "grad_norm": 3.024376153945923, "learning_rate": 8.679185451127233e-06, "loss": 0.08571701, "memory(GiB)": 13.7, "step": 29325, "train_speed(iter/s)": 1.538902 }, { "acc": 0.97242517, "epoch": 13.74736348722756, "grad_norm": 9.367846488952637, "learning_rate": 8.678660511307531e-06, "loss": 0.11182855, "memory(GiB)": 13.7, "step": 29330, "train_speed(iter/s)": 1.538908 }, { "acc": 0.97089891, "epoch": 13.749707054136396, "grad_norm": 15.61771011352539, "learning_rate": 8.678135483076935e-06, "loss": 0.08284056, "memory(GiB)": 13.7, "step": 29335, "train_speed(iter/s)": 1.538933 }, { "acc": 0.98462296, "epoch": 13.75205062104523, "grad_norm": 3.6582891941070557, "learning_rate": 8.677610366448066e-06, "loss": 0.08958994, "memory(GiB)": 13.7, "step": 29340, "train_speed(iter/s)": 1.538935 }, { "acc": 0.9685606, "epoch": 13.754394187954066, "grad_norm": 6.767402172088623, "learning_rate": 8.677085161433542e-06, "loss": 0.13869308, "memory(GiB)": 13.7, "step": 29345, "train_speed(iter/s)": 1.538946 }, { "acc": 0.97791672, "epoch": 13.7567377548629, "grad_norm": 6.661435604095459, "learning_rate": 8.67655986804599e-06, "loss": 0.10013888, "memory(GiB)": 13.7, "step": 29350, "train_speed(iter/s)": 1.538956 }, { "acc": 0.96869049, "epoch": 13.759081321771736, "grad_norm": 1.210621953010559, "learning_rate": 8.676034486298037e-06, "loss": 0.13837235, "memory(GiB)": 13.7, "step": 29355, "train_speed(iter/s)": 1.538962 }, { "acc": 0.98687878, "epoch": 13.761424888680573, "grad_norm": 12.946283340454102, "learning_rate": 8.675509016202307e-06, "loss": 0.08327788, "memory(GiB)": 13.7, "step": 29360, "train_speed(iter/s)": 1.538972 }, { "acc": 0.98812504, "epoch": 13.763768455589407, "grad_norm": 2.4033985137939453, "learning_rate": 8.674983457771436e-06, "loss": 0.04938509, "memory(GiB)": 13.7, "step": 29365, "train_speed(iter/s)": 1.53898 }, { "acc": 0.97815475, "epoch": 13.766112022498243, "grad_norm": 0.6509975790977478, "learning_rate": 8.674457811018054e-06, "loss": 0.10687528, "memory(GiB)": 13.7, "step": 29370, "train_speed(iter/s)": 1.538981 }, { "acc": 0.99416666, "epoch": 13.768455589407077, "grad_norm": 4.859678745269775, "learning_rate": 8.673932075954794e-06, "loss": 0.06544968, "memory(GiB)": 13.7, "step": 29375, "train_speed(iter/s)": 1.538981 }, { "acc": 0.98624992, "epoch": 13.770799156315913, "grad_norm": 0.8702548146247864, "learning_rate": 8.673406252594297e-06, "loss": 0.07007701, "memory(GiB)": 13.7, "step": 29380, "train_speed(iter/s)": 1.53899 }, { "acc": 0.98504467, "epoch": 13.773142723224748, "grad_norm": 0.9441311359405518, "learning_rate": 8.6728803409492e-06, "loss": 0.10928744, "memory(GiB)": 13.7, "step": 29385, "train_speed(iter/s)": 1.538997 }, { "acc": 0.97517862, "epoch": 13.775486290133584, "grad_norm": 5.592580795288086, "learning_rate": 8.672354341032144e-06, "loss": 0.13248124, "memory(GiB)": 13.7, "step": 29390, "train_speed(iter/s)": 1.539 }, { "acc": 0.990625, "epoch": 13.777829857042418, "grad_norm": 3.842991590499878, "learning_rate": 8.671828252855771e-06, "loss": 0.07665215, "memory(GiB)": 13.7, "step": 29395, "train_speed(iter/s)": 1.539018 }, { "acc": 0.97743053, "epoch": 13.780173423951254, "grad_norm": 0.9135969281196594, "learning_rate": 8.671302076432731e-06, "loss": 0.08580967, "memory(GiB)": 13.7, "step": 29400, "train_speed(iter/s)": 1.539031 }, { "acc": 0.97729168, "epoch": 13.782516990860088, "grad_norm": 1.3660131692886353, "learning_rate": 8.670775811775668e-06, "loss": 0.10240736, "memory(GiB)": 13.7, "step": 29405, "train_speed(iter/s)": 1.53903 }, { "acc": 0.97416668, "epoch": 13.784860557768924, "grad_norm": 4.912899494171143, "learning_rate": 8.67024945889723e-06, "loss": 0.19275281, "memory(GiB)": 13.7, "step": 29410, "train_speed(iter/s)": 1.53906 }, { "acc": 0.98208332, "epoch": 13.787204124677759, "grad_norm": 6.418884754180908, "learning_rate": 8.669723017810073e-06, "loss": 0.09439423, "memory(GiB)": 13.7, "step": 29415, "train_speed(iter/s)": 1.539057 }, { "acc": 0.97243748, "epoch": 13.789547691586595, "grad_norm": 6.991671562194824, "learning_rate": 8.66919648852685e-06, "loss": 0.19804753, "memory(GiB)": 13.7, "step": 29420, "train_speed(iter/s)": 1.539052 }, { "acc": 0.96986065, "epoch": 13.79189125849543, "grad_norm": 4.62174129486084, "learning_rate": 8.668669871060215e-06, "loss": 0.10872734, "memory(GiB)": 13.7, "step": 29425, "train_speed(iter/s)": 1.539046 }, { "acc": 0.97789736, "epoch": 13.794234825404265, "grad_norm": 4.806837558746338, "learning_rate": 8.668143165422826e-06, "loss": 0.09525766, "memory(GiB)": 13.7, "step": 29430, "train_speed(iter/s)": 1.539057 }, { "acc": 0.96372681, "epoch": 13.796578392313101, "grad_norm": 10.274767875671387, "learning_rate": 8.667616371627346e-06, "loss": 0.18041785, "memory(GiB)": 13.7, "step": 29435, "train_speed(iter/s)": 1.539062 }, { "acc": 0.98326387, "epoch": 13.798921959221936, "grad_norm": 2.2853808403015137, "learning_rate": 8.667089489686434e-06, "loss": 0.08579487, "memory(GiB)": 13.7, "step": 29440, "train_speed(iter/s)": 1.539066 }, { "acc": 0.98760414, "epoch": 13.801265526130772, "grad_norm": 2.1558380126953125, "learning_rate": 8.666562519612757e-06, "loss": 0.07038309, "memory(GiB)": 13.7, "step": 29445, "train_speed(iter/s)": 1.539077 }, { "acc": 0.97956848, "epoch": 13.803609093039606, "grad_norm": 5.956718921661377, "learning_rate": 8.666035461418983e-06, "loss": 0.0405689, "memory(GiB)": 13.7, "step": 29450, "train_speed(iter/s)": 1.53909 }, { "acc": 0.98643856, "epoch": 13.805952659948442, "grad_norm": 0.35839977860450745, "learning_rate": 8.665508315117777e-06, "loss": 0.07146619, "memory(GiB)": 13.7, "step": 29455, "train_speed(iter/s)": 1.539103 }, { "acc": 0.98270416, "epoch": 13.808296226857276, "grad_norm": 2.564870595932007, "learning_rate": 8.66498108072181e-06, "loss": 0.10511215, "memory(GiB)": 13.7, "step": 29460, "train_speed(iter/s)": 1.539116 }, { "acc": 0.96864243, "epoch": 13.810639793766113, "grad_norm": 5.1674675941467285, "learning_rate": 8.664453758243758e-06, "loss": 0.10083491, "memory(GiB)": 13.7, "step": 29465, "train_speed(iter/s)": 1.539123 }, { "acc": 0.97334824, "epoch": 13.812983360674947, "grad_norm": 3.151928663253784, "learning_rate": 8.663926347696296e-06, "loss": 0.10597548, "memory(GiB)": 13.7, "step": 29470, "train_speed(iter/s)": 1.539129 }, { "acc": 0.97551823, "epoch": 13.815326927583783, "grad_norm": 7.925487518310547, "learning_rate": 8.663398849092097e-06, "loss": 0.11434667, "memory(GiB)": 13.7, "step": 29475, "train_speed(iter/s)": 1.539127 }, { "acc": 0.96613979, "epoch": 13.817670494492617, "grad_norm": 6.483867168426514, "learning_rate": 8.662871262443847e-06, "loss": 0.13753341, "memory(GiB)": 13.7, "step": 29480, "train_speed(iter/s)": 1.539136 }, { "acc": 0.94972534, "epoch": 13.820014061401453, "grad_norm": 14.312456130981445, "learning_rate": 8.662343587764222e-06, "loss": 0.25772772, "memory(GiB)": 13.7, "step": 29485, "train_speed(iter/s)": 1.539142 }, { "acc": 0.98073864, "epoch": 13.822357628310288, "grad_norm": 6.047175407409668, "learning_rate": 8.661815825065907e-06, "loss": 0.11164308, "memory(GiB)": 13.7, "step": 29490, "train_speed(iter/s)": 1.53915 }, { "acc": 0.97036705, "epoch": 13.824701195219124, "grad_norm": 10.801470756530762, "learning_rate": 8.661287974361587e-06, "loss": 0.09879488, "memory(GiB)": 13.7, "step": 29495, "train_speed(iter/s)": 1.539142 }, { "acc": 0.98035374, "epoch": 13.827044762127958, "grad_norm": 0.7371237277984619, "learning_rate": 8.66076003566395e-06, "loss": 0.08244436, "memory(GiB)": 13.7, "step": 29500, "train_speed(iter/s)": 1.53913 }, { "acc": 0.98696423, "epoch": 13.829388329036794, "grad_norm": 0.6291621327400208, "learning_rate": 8.66023200898569e-06, "loss": 0.0828202, "memory(GiB)": 13.7, "step": 29505, "train_speed(iter/s)": 1.539127 }, { "acc": 0.96796608, "epoch": 13.83173189594563, "grad_norm": 3.1601240634918213, "learning_rate": 8.659703894339494e-06, "loss": 0.17308164, "memory(GiB)": 13.7, "step": 29510, "train_speed(iter/s)": 1.53915 }, { "acc": 0.97448864, "epoch": 13.834075462854464, "grad_norm": 9.731688499450684, "learning_rate": 8.659175691738059e-06, "loss": 0.13160663, "memory(GiB)": 13.7, "step": 29515, "train_speed(iter/s)": 1.539148 }, { "acc": 0.97275696, "epoch": 13.8364190297633, "grad_norm": 5.488943099975586, "learning_rate": 8.658647401194078e-06, "loss": 0.09142585, "memory(GiB)": 13.7, "step": 29520, "train_speed(iter/s)": 1.539155 }, { "acc": 0.97445431, "epoch": 13.838762596672135, "grad_norm": 1.5924476385116577, "learning_rate": 8.658119022720252e-06, "loss": 0.12973197, "memory(GiB)": 13.7, "step": 29525, "train_speed(iter/s)": 1.53915 }, { "acc": 0.98059521, "epoch": 13.841106163580971, "grad_norm": 9.577400207519531, "learning_rate": 8.65759055632928e-06, "loss": 0.13657776, "memory(GiB)": 13.7, "step": 29530, "train_speed(iter/s)": 1.539159 }, { "acc": 0.96416664, "epoch": 13.843449730489805, "grad_norm": 6.058807849884033, "learning_rate": 8.657062002033867e-06, "loss": 0.22894053, "memory(GiB)": 13.7, "step": 29535, "train_speed(iter/s)": 1.539162 }, { "acc": 0.98772583, "epoch": 13.845793297398641, "grad_norm": 4.865257263183594, "learning_rate": 8.656533359846716e-06, "loss": 0.08944839, "memory(GiB)": 13.7, "step": 29540, "train_speed(iter/s)": 1.539176 }, { "acc": 0.97826385, "epoch": 13.848136864307476, "grad_norm": 2.558671474456787, "learning_rate": 8.656004629780534e-06, "loss": 0.14189841, "memory(GiB)": 13.7, "step": 29545, "train_speed(iter/s)": 1.539176 }, { "acc": 0.989394, "epoch": 13.850480431216312, "grad_norm": 8.875761032104492, "learning_rate": 8.655475811848029e-06, "loss": 0.07264249, "memory(GiB)": 13.7, "step": 29550, "train_speed(iter/s)": 1.539187 }, { "acc": 0.98884802, "epoch": 13.852823998125146, "grad_norm": 1.1413248777389526, "learning_rate": 8.654946906061914e-06, "loss": 0.05534327, "memory(GiB)": 13.7, "step": 29555, "train_speed(iter/s)": 1.539196 }, { "acc": 0.97813997, "epoch": 13.855167565033982, "grad_norm": 5.727439880371094, "learning_rate": 8.654417912434902e-06, "loss": 0.05557589, "memory(GiB)": 13.7, "step": 29560, "train_speed(iter/s)": 1.539199 }, { "acc": 0.98571434, "epoch": 13.857511131942816, "grad_norm": 4.311883926391602, "learning_rate": 8.653888830979705e-06, "loss": 0.06683959, "memory(GiB)": 13.7, "step": 29565, "train_speed(iter/s)": 1.539201 }, { "acc": 0.98587122, "epoch": 13.859854698851652, "grad_norm": 1.4326536655426025, "learning_rate": 8.653359661709044e-06, "loss": 0.09463867, "memory(GiB)": 13.7, "step": 29570, "train_speed(iter/s)": 1.539189 }, { "acc": 0.97468758, "epoch": 13.862198265760487, "grad_norm": 60.16899871826172, "learning_rate": 8.652830404635638e-06, "loss": 0.147821, "memory(GiB)": 13.7, "step": 29575, "train_speed(iter/s)": 1.539198 }, { "acc": 0.97919979, "epoch": 13.864541832669323, "grad_norm": 9.965910911560059, "learning_rate": 8.652301059772208e-06, "loss": 0.09757401, "memory(GiB)": 13.7, "step": 29580, "train_speed(iter/s)": 1.539188 }, { "acc": 0.97747593, "epoch": 13.866885399578157, "grad_norm": 1.0279489755630493, "learning_rate": 8.651771627131476e-06, "loss": 0.0937727, "memory(GiB)": 13.7, "step": 29585, "train_speed(iter/s)": 1.539196 }, { "acc": 0.97135534, "epoch": 13.869228966486993, "grad_norm": 2.2274861335754395, "learning_rate": 8.651242106726171e-06, "loss": 0.13703275, "memory(GiB)": 13.7, "step": 29590, "train_speed(iter/s)": 1.539195 }, { "acc": 0.98222218, "epoch": 13.871572533395828, "grad_norm": 6.942517280578613, "learning_rate": 8.65071249856902e-06, "loss": 0.04526744, "memory(GiB)": 13.7, "step": 29595, "train_speed(iter/s)": 1.539206 }, { "acc": 0.9953125, "epoch": 13.873916100304664, "grad_norm": 0.22761498391628265, "learning_rate": 8.650182802672749e-06, "loss": 0.04503882, "memory(GiB)": 13.7, "step": 29600, "train_speed(iter/s)": 1.539198 }, { "acc": 0.98330317, "epoch": 13.8762596672135, "grad_norm": 2.786959648132324, "learning_rate": 8.649653019050096e-06, "loss": 0.10379082, "memory(GiB)": 13.7, "step": 29605, "train_speed(iter/s)": 1.539191 }, { "acc": 0.97349205, "epoch": 13.878603234122334, "grad_norm": 4.553548812866211, "learning_rate": 8.649123147713792e-06, "loss": 0.08780736, "memory(GiB)": 13.7, "step": 29610, "train_speed(iter/s)": 1.539186 }, { "acc": 0.98190479, "epoch": 13.88094680103117, "grad_norm": 3.3579487800598145, "learning_rate": 8.648593188676577e-06, "loss": 0.04578935, "memory(GiB)": 13.7, "step": 29615, "train_speed(iter/s)": 1.539179 }, { "acc": 0.98954811, "epoch": 13.883290367940004, "grad_norm": 4.706632137298584, "learning_rate": 8.648063141951184e-06, "loss": 0.0617612, "memory(GiB)": 13.7, "step": 29620, "train_speed(iter/s)": 1.539169 }, { "acc": 0.97628975, "epoch": 13.88563393484884, "grad_norm": 6.447393417358398, "learning_rate": 8.647533007550359e-06, "loss": 0.10782044, "memory(GiB)": 13.7, "step": 29625, "train_speed(iter/s)": 1.539163 }, { "acc": 0.97707787, "epoch": 13.887977501757675, "grad_norm": 8.696611404418945, "learning_rate": 8.647002785486838e-06, "loss": 0.06848023, "memory(GiB)": 13.7, "step": 29630, "train_speed(iter/s)": 1.539162 }, { "acc": 0.98395834, "epoch": 13.89032106866651, "grad_norm": 1.7666133642196655, "learning_rate": 8.646472475773371e-06, "loss": 0.10502913, "memory(GiB)": 13.7, "step": 29635, "train_speed(iter/s)": 1.539173 }, { "acc": 0.97384806, "epoch": 13.892664635575345, "grad_norm": 6.913606643676758, "learning_rate": 8.645942078422704e-06, "loss": 0.09197429, "memory(GiB)": 13.7, "step": 29640, "train_speed(iter/s)": 1.539172 }, { "acc": 0.97717257, "epoch": 13.895008202484181, "grad_norm": 5.799701690673828, "learning_rate": 8.645411593447585e-06, "loss": 0.08663658, "memory(GiB)": 13.7, "step": 29645, "train_speed(iter/s)": 1.539171 }, { "acc": 0.99375, "epoch": 13.897351769393016, "grad_norm": 1.0360673666000366, "learning_rate": 8.644881020860765e-06, "loss": 0.03201255, "memory(GiB)": 13.7, "step": 29650, "train_speed(iter/s)": 1.539174 }, { "acc": 0.98761368, "epoch": 13.899695336301852, "grad_norm": 2.2950799465179443, "learning_rate": 8.644350360675e-06, "loss": 0.06768389, "memory(GiB)": 13.7, "step": 29655, "train_speed(iter/s)": 1.539173 }, { "acc": 0.97400503, "epoch": 13.902038903210686, "grad_norm": 2.264218330383301, "learning_rate": 8.643819612903044e-06, "loss": 0.09466529, "memory(GiB)": 13.7, "step": 29660, "train_speed(iter/s)": 1.539179 }, { "acc": 0.9791667, "epoch": 13.904382470119522, "grad_norm": 5.186247825622559, "learning_rate": 8.643288777557653e-06, "loss": 0.11181667, "memory(GiB)": 13.7, "step": 29665, "train_speed(iter/s)": 1.539183 }, { "acc": 0.98088284, "epoch": 13.906726037028356, "grad_norm": 6.563838481903076, "learning_rate": 8.642757854651587e-06, "loss": 0.12782257, "memory(GiB)": 13.7, "step": 29670, "train_speed(iter/s)": 1.539194 }, { "acc": 0.97078714, "epoch": 13.909069603937192, "grad_norm": 6.445052146911621, "learning_rate": 8.642226844197607e-06, "loss": 0.12060168, "memory(GiB)": 13.7, "step": 29675, "train_speed(iter/s)": 1.539193 }, { "acc": 0.98254852, "epoch": 13.911413170846028, "grad_norm": 11.219196319580078, "learning_rate": 8.641695746208478e-06, "loss": 0.08201133, "memory(GiB)": 13.7, "step": 29680, "train_speed(iter/s)": 1.539205 }, { "acc": 0.95670052, "epoch": 13.913756737754863, "grad_norm": 8.219365119934082, "learning_rate": 8.641164560696966e-06, "loss": 0.24930749, "memory(GiB)": 13.7, "step": 29685, "train_speed(iter/s)": 1.53921 }, { "acc": 0.96859264, "epoch": 13.916100304663699, "grad_norm": 6.773554801940918, "learning_rate": 8.640633287675838e-06, "loss": 0.19031796, "memory(GiB)": 13.7, "step": 29690, "train_speed(iter/s)": 1.539201 }, { "acc": 0.97066469, "epoch": 13.918443871572533, "grad_norm": 4.696674346923828, "learning_rate": 8.640101927157864e-06, "loss": 0.13622396, "memory(GiB)": 13.7, "step": 29695, "train_speed(iter/s)": 1.539194 }, { "acc": 0.97458591, "epoch": 13.92078743848137, "grad_norm": 8.054862022399902, "learning_rate": 8.639570479155819e-06, "loss": 0.11439613, "memory(GiB)": 13.7, "step": 29700, "train_speed(iter/s)": 1.539195 }, { "acc": 0.97922354, "epoch": 13.923131005390204, "grad_norm": 7.4929070472717285, "learning_rate": 8.639038943682473e-06, "loss": 0.05243955, "memory(GiB)": 13.7, "step": 29705, "train_speed(iter/s)": 1.539207 }, { "acc": 0.97867565, "epoch": 13.92547457229904, "grad_norm": 9.456664085388184, "learning_rate": 8.638507320750606e-06, "loss": 0.13694289, "memory(GiB)": 13.7, "step": 29710, "train_speed(iter/s)": 1.5392 }, { "acc": 0.97979574, "epoch": 13.927818139207874, "grad_norm": 4.040122032165527, "learning_rate": 8.637975610372993e-06, "loss": 0.14358587, "memory(GiB)": 13.7, "step": 29715, "train_speed(iter/s)": 1.539194 }, { "acc": 0.98215885, "epoch": 13.93016170611671, "grad_norm": 2.2459537982940674, "learning_rate": 8.637443812562417e-06, "loss": 0.09633392, "memory(GiB)": 13.7, "step": 29720, "train_speed(iter/s)": 1.539192 }, { "acc": 0.96492062, "epoch": 13.932505273025544, "grad_norm": 5.301722526550293, "learning_rate": 8.63691192733166e-06, "loss": 0.19540946, "memory(GiB)": 13.7, "step": 29725, "train_speed(iter/s)": 1.539203 }, { "acc": 0.97870693, "epoch": 13.93484883993438, "grad_norm": 4.330524921417236, "learning_rate": 8.636379954693506e-06, "loss": 0.09475251, "memory(GiB)": 13.7, "step": 29730, "train_speed(iter/s)": 1.539212 }, { "acc": 0.96766033, "epoch": 13.937192406843215, "grad_norm": 6.115721702575684, "learning_rate": 8.635847894660742e-06, "loss": 0.13083863, "memory(GiB)": 13.7, "step": 29735, "train_speed(iter/s)": 1.539226 }, { "acc": 0.98416672, "epoch": 13.93953597375205, "grad_norm": 5.841412544250488, "learning_rate": 8.635315747246162e-06, "loss": 0.08717837, "memory(GiB)": 13.7, "step": 29740, "train_speed(iter/s)": 1.539226 }, { "acc": 0.97458324, "epoch": 13.941879540660885, "grad_norm": 13.193205833435059, "learning_rate": 8.634783512462549e-06, "loss": 0.09648516, "memory(GiB)": 13.7, "step": 29745, "train_speed(iter/s)": 1.539234 }, { "acc": 0.9815815, "epoch": 13.944223107569721, "grad_norm": 2.249950647354126, "learning_rate": 8.634251190322702e-06, "loss": 0.06723086, "memory(GiB)": 13.7, "step": 29750, "train_speed(iter/s)": 1.539235 }, { "acc": 0.97860575, "epoch": 13.946566674478557, "grad_norm": 7.883222579956055, "learning_rate": 8.633718780839412e-06, "loss": 0.11302861, "memory(GiB)": 13.7, "step": 29755, "train_speed(iter/s)": 1.539248 }, { "acc": 0.97383928, "epoch": 13.948910241387392, "grad_norm": 2.2948074340820312, "learning_rate": 8.63318628402548e-06, "loss": 0.10846068, "memory(GiB)": 13.7, "step": 29760, "train_speed(iter/s)": 1.539246 }, { "acc": 0.98833332, "epoch": 13.951253808296228, "grad_norm": 14.370094299316406, "learning_rate": 8.632653699893703e-06, "loss": 0.08153045, "memory(GiB)": 13.7, "step": 29765, "train_speed(iter/s)": 1.539252 }, { "acc": 0.98601761, "epoch": 13.953597375205062, "grad_norm": 2.2411022186279297, "learning_rate": 8.632121028456884e-06, "loss": 0.05244609, "memory(GiB)": 13.7, "step": 29770, "train_speed(iter/s)": 1.539243 }, { "acc": 0.97654762, "epoch": 13.955940942113898, "grad_norm": 2.1445956230163574, "learning_rate": 8.631588269727826e-06, "loss": 0.04830015, "memory(GiB)": 13.7, "step": 29775, "train_speed(iter/s)": 1.539242 }, { "acc": 0.9760417, "epoch": 13.958284509022732, "grad_norm": 5.535449028015137, "learning_rate": 8.631055423719334e-06, "loss": 0.09432521, "memory(GiB)": 13.7, "step": 29780, "train_speed(iter/s)": 1.539246 }, { "acc": 0.97110834, "epoch": 13.960628075931568, "grad_norm": 4.665432453155518, "learning_rate": 8.630522490444221e-06, "loss": 0.07566231, "memory(GiB)": 13.7, "step": 29785, "train_speed(iter/s)": 1.539261 }, { "acc": 0.96394176, "epoch": 13.962971642840403, "grad_norm": 8.479015350341797, "learning_rate": 8.62998946991529e-06, "loss": 0.20955863, "memory(GiB)": 13.7, "step": 29790, "train_speed(iter/s)": 1.539253 }, { "acc": 0.96196384, "epoch": 13.965315209749239, "grad_norm": 0.08790309727191925, "learning_rate": 8.629456362145356e-06, "loss": 0.20917847, "memory(GiB)": 13.7, "step": 29795, "train_speed(iter/s)": 1.539271 }, { "acc": 0.97250004, "epoch": 13.967658776658073, "grad_norm": 16.116073608398438, "learning_rate": 8.628923167147233e-06, "loss": 0.09463322, "memory(GiB)": 13.7, "step": 29800, "train_speed(iter/s)": 1.539277 }, { "acc": 0.973631, "epoch": 13.97000234356691, "grad_norm": 4.190852165222168, "learning_rate": 8.628389884933738e-06, "loss": 0.08357963, "memory(GiB)": 13.7, "step": 29805, "train_speed(iter/s)": 1.539299 }, { "acc": 0.98486118, "epoch": 13.972345910475743, "grad_norm": 4.481003284454346, "learning_rate": 8.627856515517688e-06, "loss": 0.06045508, "memory(GiB)": 13.7, "step": 29810, "train_speed(iter/s)": 1.539307 }, { "acc": 0.98315706, "epoch": 13.97468947738458, "grad_norm": 3.153775453567505, "learning_rate": 8.627323058911905e-06, "loss": 0.08145775, "memory(GiB)": 13.7, "step": 29815, "train_speed(iter/s)": 1.539321 }, { "acc": 0.98195515, "epoch": 13.977033044293414, "grad_norm": 7.165027618408203, "learning_rate": 8.62678951512921e-06, "loss": 0.07983867, "memory(GiB)": 13.7, "step": 29820, "train_speed(iter/s)": 1.539334 }, { "acc": 0.97862558, "epoch": 13.97937661120225, "grad_norm": 0.7674307823181152, "learning_rate": 8.62625588418243e-06, "loss": 0.12667418, "memory(GiB)": 13.7, "step": 29825, "train_speed(iter/s)": 1.53933 }, { "acc": 0.98149405, "epoch": 13.981720178111084, "grad_norm": 0.11025650054216385, "learning_rate": 8.625722166084387e-06, "loss": 0.1466597, "memory(GiB)": 13.7, "step": 29830, "train_speed(iter/s)": 1.539341 }, { "acc": 0.98438034, "epoch": 13.98406374501992, "grad_norm": 2.6535587310791016, "learning_rate": 8.625188360847915e-06, "loss": 0.10223545, "memory(GiB)": 13.7, "step": 29835, "train_speed(iter/s)": 1.539341 }, { "acc": 0.97999992, "epoch": 13.986407311928755, "grad_norm": 3.837576389312744, "learning_rate": 8.62465446848584e-06, "loss": 0.05559326, "memory(GiB)": 13.7, "step": 29840, "train_speed(iter/s)": 1.539335 }, { "acc": 0.97152662, "epoch": 13.98875087883759, "grad_norm": 9.234074592590332, "learning_rate": 8.624120489011002e-06, "loss": 0.14565238, "memory(GiB)": 13.7, "step": 29845, "train_speed(iter/s)": 1.539342 }, { "acc": 0.9676136, "epoch": 13.991094445746427, "grad_norm": 5.952748775482178, "learning_rate": 8.62358642243623e-06, "loss": 0.10172976, "memory(GiB)": 13.7, "step": 29850, "train_speed(iter/s)": 1.539342 }, { "acc": 0.96968212, "epoch": 13.993438012655261, "grad_norm": 6.578710556030273, "learning_rate": 8.623052268774363e-06, "loss": 0.12275822, "memory(GiB)": 13.7, "step": 29855, "train_speed(iter/s)": 1.539353 }, { "acc": 0.98428822, "epoch": 13.995781579564097, "grad_norm": 0.2815636694431305, "learning_rate": 8.62251802803824e-06, "loss": 0.04949852, "memory(GiB)": 13.7, "step": 29860, "train_speed(iter/s)": 1.539365 }, { "acc": 0.98148689, "epoch": 13.998125146472931, "grad_norm": 4.210649490356445, "learning_rate": 8.621983700240704e-06, "loss": 0.09010123, "memory(GiB)": 13.7, "step": 29865, "train_speed(iter/s)": 1.539374 }, { "acc": 0.97055197, "epoch": 14.000468713381768, "grad_norm": 4.089526653289795, "learning_rate": 8.621449285394598e-06, "loss": 0.12015636, "memory(GiB)": 13.7, "step": 29870, "train_speed(iter/s)": 1.539348 }, { "acc": 0.97592258, "epoch": 14.002812280290602, "grad_norm": 3.5218095779418945, "learning_rate": 8.620914783512765e-06, "loss": 0.09830165, "memory(GiB)": 13.7, "step": 29875, "train_speed(iter/s)": 1.539337 }, { "acc": 0.97180872, "epoch": 14.005155847199438, "grad_norm": 9.367171287536621, "learning_rate": 8.620380194608057e-06, "loss": 0.15365381, "memory(GiB)": 13.7, "step": 29880, "train_speed(iter/s)": 1.539342 }, { "acc": 0.98311882, "epoch": 14.007499414108272, "grad_norm": 3.7301976680755615, "learning_rate": 8.619845518693319e-06, "loss": 0.07486918, "memory(GiB)": 13.7, "step": 29885, "train_speed(iter/s)": 1.539341 }, { "acc": 0.99437504, "epoch": 14.009842981017108, "grad_norm": 5.00352668762207, "learning_rate": 8.619310755781406e-06, "loss": 0.02395268, "memory(GiB)": 13.7, "step": 29890, "train_speed(iter/s)": 1.539346 }, { "acc": 0.97674675, "epoch": 14.012186547925943, "grad_norm": 5.407556056976318, "learning_rate": 8.61877590588517e-06, "loss": 0.12706356, "memory(GiB)": 13.7, "step": 29895, "train_speed(iter/s)": 1.539356 }, { "acc": 0.98167667, "epoch": 14.014530114834779, "grad_norm": 2.4068031311035156, "learning_rate": 8.618240969017472e-06, "loss": 0.09297432, "memory(GiB)": 13.7, "step": 29900, "train_speed(iter/s)": 1.539369 }, { "acc": 0.99031658, "epoch": 14.016873681743613, "grad_norm": 2.1116788387298584, "learning_rate": 8.617705945191161e-06, "loss": 0.05576315, "memory(GiB)": 13.7, "step": 29905, "train_speed(iter/s)": 1.539374 }, { "acc": 0.98576393, "epoch": 14.01921724865245, "grad_norm": 5.951136589050293, "learning_rate": 8.617170834419106e-06, "loss": 0.09166851, "memory(GiB)": 13.7, "step": 29910, "train_speed(iter/s)": 1.539376 }, { "acc": 0.9719965, "epoch": 14.021560815561283, "grad_norm": 5.310774326324463, "learning_rate": 8.616635636714167e-06, "loss": 0.11752095, "memory(GiB)": 13.7, "step": 29915, "train_speed(iter/s)": 1.539377 }, { "acc": 0.99075003, "epoch": 14.02390438247012, "grad_norm": 0.6227103471755981, "learning_rate": 8.616100352089204e-06, "loss": 0.09206719, "memory(GiB)": 13.7, "step": 29920, "train_speed(iter/s)": 1.539385 }, { "acc": 0.97652569, "epoch": 14.026247949378956, "grad_norm": 6.193179130554199, "learning_rate": 8.615564980557089e-06, "loss": 0.114314, "memory(GiB)": 13.7, "step": 29925, "train_speed(iter/s)": 1.539392 }, { "acc": 0.98830881, "epoch": 14.02859151628779, "grad_norm": 3.243882179260254, "learning_rate": 8.615029522130686e-06, "loss": 0.06265405, "memory(GiB)": 13.7, "step": 29930, "train_speed(iter/s)": 1.539395 }, { "acc": 0.98321438, "epoch": 14.030935083196626, "grad_norm": 6.608026504516602, "learning_rate": 8.61449397682287e-06, "loss": 0.10086052, "memory(GiB)": 13.7, "step": 29935, "train_speed(iter/s)": 1.539397 }, { "acc": 0.97993631, "epoch": 14.03327865010546, "grad_norm": 2.169529438018799, "learning_rate": 8.61395834464651e-06, "loss": 0.14437006, "memory(GiB)": 13.7, "step": 29940, "train_speed(iter/s)": 1.5394 }, { "acc": 0.97901783, "epoch": 14.035622217014296, "grad_norm": 4.882206439971924, "learning_rate": 8.613422625614484e-06, "loss": 0.12092035, "memory(GiB)": 13.7, "step": 29945, "train_speed(iter/s)": 1.539392 }, { "acc": 0.97986107, "epoch": 14.03796578392313, "grad_norm": 5.547509670257568, "learning_rate": 8.612886819739667e-06, "loss": 0.06878585, "memory(GiB)": 13.7, "step": 29950, "train_speed(iter/s)": 1.539395 }, { "acc": 0.9871726, "epoch": 14.040309350831967, "grad_norm": 4.0600504875183105, "learning_rate": 8.612350927034939e-06, "loss": 0.0514564, "memory(GiB)": 13.7, "step": 29955, "train_speed(iter/s)": 1.539403 }, { "acc": 0.984375, "epoch": 14.042652917740801, "grad_norm": 4.132783889770508, "learning_rate": 8.61181494751318e-06, "loss": 0.08742077, "memory(GiB)": 13.7, "step": 29960, "train_speed(iter/s)": 1.539405 }, { "acc": 0.98650303, "epoch": 14.044996484649637, "grad_norm": 4.339230060577393, "learning_rate": 8.611278881187272e-06, "loss": 0.06235611, "memory(GiB)": 13.7, "step": 29965, "train_speed(iter/s)": 1.53942 }, { "acc": 0.97133932, "epoch": 14.047340051558471, "grad_norm": 5.728674411773682, "learning_rate": 8.610742728070104e-06, "loss": 0.1628166, "memory(GiB)": 13.7, "step": 29970, "train_speed(iter/s)": 1.539428 }, { "acc": 0.97876987, "epoch": 14.049683618467308, "grad_norm": 7.270175933837891, "learning_rate": 8.610206488174563e-06, "loss": 0.11548946, "memory(GiB)": 13.7, "step": 29975, "train_speed(iter/s)": 1.539432 }, { "acc": 0.98053036, "epoch": 14.052027185376142, "grad_norm": 5.385258197784424, "learning_rate": 8.609670161513534e-06, "loss": 0.11374794, "memory(GiB)": 13.7, "step": 29980, "train_speed(iter/s)": 1.539429 }, { "acc": 0.98601103, "epoch": 14.054370752284978, "grad_norm": 0.05811160430312157, "learning_rate": 8.609133748099911e-06, "loss": 0.05946231, "memory(GiB)": 13.7, "step": 29985, "train_speed(iter/s)": 1.539431 }, { "acc": 0.9796814, "epoch": 14.056714319193812, "grad_norm": 3.0676090717315674, "learning_rate": 8.608597247946589e-06, "loss": 0.0776779, "memory(GiB)": 13.7, "step": 29990, "train_speed(iter/s)": 1.539434 }, { "acc": 0.99020834, "epoch": 14.059057886102648, "grad_norm": 1.4464137554168701, "learning_rate": 8.608060661066464e-06, "loss": 0.01738407, "memory(GiB)": 13.7, "step": 29995, "train_speed(iter/s)": 1.539451 }, { "acc": 0.98313494, "epoch": 14.061401453011484, "grad_norm": 1.1610829830169678, "learning_rate": 8.607523987472431e-06, "loss": 0.18903986, "memory(GiB)": 13.7, "step": 30000, "train_speed(iter/s)": 1.539453 }, { "epoch": 14.061401453011484, "eval_acc": 0.7725433331085182, "eval_loss": 1.0825445652008057, "eval_runtime": 144.1571, "eval_samples_per_second": 55.967, "eval_steps_per_second": 6.999, "step": 30000 }, { "acc": 0.97495947, "epoch": 14.063745019920319, "grad_norm": 3.49326229095459, "learning_rate": 8.606987227177393e-06, "loss": 0.10526201, "memory(GiB)": 13.7, "step": 30005, "train_speed(iter/s)": 1.525846 }, { "acc": 0.96277695, "epoch": 14.066088586829155, "grad_norm": 9.5040864944458, "learning_rate": 8.606450380194248e-06, "loss": 0.16843188, "memory(GiB)": 13.7, "step": 30010, "train_speed(iter/s)": 1.525845 }, { "acc": 0.99508934, "epoch": 14.068432153737989, "grad_norm": 3.1324386596679688, "learning_rate": 8.605913446535904e-06, "loss": 0.04685047, "memory(GiB)": 13.7, "step": 30015, "train_speed(iter/s)": 1.525865 }, { "acc": 0.98359585, "epoch": 14.070775720646825, "grad_norm": 1.7260805368423462, "learning_rate": 8.605376426215265e-06, "loss": 0.11899743, "memory(GiB)": 13.7, "step": 30020, "train_speed(iter/s)": 1.525878 }, { "acc": 0.98036861, "epoch": 14.07311928755566, "grad_norm": 8.387788772583008, "learning_rate": 8.604839319245237e-06, "loss": 0.09088486, "memory(GiB)": 13.7, "step": 30025, "train_speed(iter/s)": 1.525889 }, { "acc": 0.98465824, "epoch": 14.075462854464496, "grad_norm": 5.341947078704834, "learning_rate": 8.604302125638737e-06, "loss": 0.06033329, "memory(GiB)": 13.7, "step": 30030, "train_speed(iter/s)": 1.525917 }, { "acc": 0.96643429, "epoch": 14.07780642137333, "grad_norm": 3.262033224105835, "learning_rate": 8.603764845408672e-06, "loss": 0.15434701, "memory(GiB)": 13.7, "step": 30035, "train_speed(iter/s)": 1.525911 }, { "acc": 0.97459326, "epoch": 14.080149988282166, "grad_norm": 5.733255863189697, "learning_rate": 8.603227478567957e-06, "loss": 0.15041919, "memory(GiB)": 13.7, "step": 30040, "train_speed(iter/s)": 1.525918 }, { "acc": 0.98089018, "epoch": 14.082493555191, "grad_norm": 0.9467210173606873, "learning_rate": 8.602690025129508e-06, "loss": 0.1482301, "memory(GiB)": 13.7, "step": 30045, "train_speed(iter/s)": 1.525922 }, { "acc": 0.98050156, "epoch": 14.084837122099836, "grad_norm": 18.810382843017578, "learning_rate": 8.602152485106247e-06, "loss": 0.07316871, "memory(GiB)": 13.7, "step": 30050, "train_speed(iter/s)": 1.525941 }, { "acc": 0.97782192, "epoch": 14.08718068900867, "grad_norm": 5.804651737213135, "learning_rate": 8.601614858511093e-06, "loss": 0.14149078, "memory(GiB)": 13.7, "step": 30055, "train_speed(iter/s)": 1.525945 }, { "acc": 0.97694397, "epoch": 14.089524255917507, "grad_norm": 12.226297378540039, "learning_rate": 8.601077145356968e-06, "loss": 0.15023677, "memory(GiB)": 13.7, "step": 30060, "train_speed(iter/s)": 1.525964 }, { "acc": 0.97553444, "epoch": 14.091867822826341, "grad_norm": 4.026096343994141, "learning_rate": 8.600539345656797e-06, "loss": 0.17151954, "memory(GiB)": 13.7, "step": 30065, "train_speed(iter/s)": 1.525957 }, { "acc": 0.98454857, "epoch": 14.094211389735177, "grad_norm": 0.012592007406055927, "learning_rate": 8.600001459423506e-06, "loss": 0.08997734, "memory(GiB)": 13.7, "step": 30070, "train_speed(iter/s)": 1.52597 }, { "acc": 0.97420635, "epoch": 14.096554956644011, "grad_norm": 4.5580315589904785, "learning_rate": 8.599463486670027e-06, "loss": 0.13604356, "memory(GiB)": 13.7, "step": 30075, "train_speed(iter/s)": 1.525971 }, { "acc": 0.98354168, "epoch": 14.098898523552847, "grad_norm": 6.11962366104126, "learning_rate": 8.59892542740929e-06, "loss": 0.05870872, "memory(GiB)": 13.7, "step": 30080, "train_speed(iter/s)": 1.525972 }, { "acc": 0.97225647, "epoch": 14.101242090461684, "grad_norm": 2.7230215072631836, "learning_rate": 8.598387281654225e-06, "loss": 0.14319628, "memory(GiB)": 13.7, "step": 30085, "train_speed(iter/s)": 1.525972 }, { "acc": 0.9640625, "epoch": 14.103585657370518, "grad_norm": 6.989505767822266, "learning_rate": 8.597849049417769e-06, "loss": 0.16834249, "memory(GiB)": 13.7, "step": 30090, "train_speed(iter/s)": 1.525972 }, { "acc": 0.98084679, "epoch": 14.105929224279354, "grad_norm": 3.3468470573425293, "learning_rate": 8.597310730712864e-06, "loss": 0.12053282, "memory(GiB)": 13.7, "step": 30095, "train_speed(iter/s)": 1.525974 }, { "acc": 0.98154755, "epoch": 14.108272791188188, "grad_norm": 2.0790092945098877, "learning_rate": 8.596772325552443e-06, "loss": 0.06928418, "memory(GiB)": 13.7, "step": 30100, "train_speed(iter/s)": 1.525979 }, { "acc": 0.97560091, "epoch": 14.110616358097024, "grad_norm": 2.761077404022217, "learning_rate": 8.596233833949449e-06, "loss": 0.14312372, "memory(GiB)": 13.7, "step": 30105, "train_speed(iter/s)": 1.525984 }, { "acc": 0.97217255, "epoch": 14.112959925005859, "grad_norm": 7.574004650115967, "learning_rate": 8.595695255916828e-06, "loss": 0.19418154, "memory(GiB)": 13.7, "step": 30110, "train_speed(iter/s)": 1.525973 }, { "acc": 0.97902784, "epoch": 14.115303491914695, "grad_norm": 3.2928617000579834, "learning_rate": 8.595156591467523e-06, "loss": 0.1322392, "memory(GiB)": 13.7, "step": 30115, "train_speed(iter/s)": 1.525983 }, { "acc": 0.97047005, "epoch": 14.117647058823529, "grad_norm": 5.262578964233398, "learning_rate": 8.594617840614486e-06, "loss": 0.12626536, "memory(GiB)": 13.7, "step": 30120, "train_speed(iter/s)": 1.525978 }, { "acc": 0.96601639, "epoch": 14.119990625732365, "grad_norm": 5.580711364746094, "learning_rate": 8.594079003370661e-06, "loss": 0.09858355, "memory(GiB)": 13.7, "step": 30125, "train_speed(iter/s)": 1.52598 }, { "acc": 0.9916667, "epoch": 14.1223341926412, "grad_norm": 1.2048428058624268, "learning_rate": 8.593540079749003e-06, "loss": 0.03538131, "memory(GiB)": 13.7, "step": 30130, "train_speed(iter/s)": 1.525984 }, { "acc": 0.98231688, "epoch": 14.124677759550035, "grad_norm": 8.700007438659668, "learning_rate": 8.593001069762465e-06, "loss": 0.10399891, "memory(GiB)": 13.7, "step": 30135, "train_speed(iter/s)": 1.525989 }, { "acc": 0.98811045, "epoch": 14.12702132645887, "grad_norm": 4.554961681365967, "learning_rate": 8.592461973424005e-06, "loss": 0.04310665, "memory(GiB)": 13.7, "step": 30140, "train_speed(iter/s)": 1.526006 }, { "acc": 0.98553028, "epoch": 14.129364893367706, "grad_norm": 1.9916709661483765, "learning_rate": 8.591922790746578e-06, "loss": 0.05272673, "memory(GiB)": 13.7, "step": 30145, "train_speed(iter/s)": 1.52601 }, { "acc": 0.98027782, "epoch": 14.13170846027654, "grad_norm": 16.773096084594727, "learning_rate": 8.591383521743148e-06, "loss": 0.14467887, "memory(GiB)": 13.7, "step": 30150, "train_speed(iter/s)": 1.526004 }, { "acc": 0.97521172, "epoch": 14.134052027185376, "grad_norm": 5.832727909088135, "learning_rate": 8.590844166426673e-06, "loss": 0.12841206, "memory(GiB)": 13.7, "step": 30155, "train_speed(iter/s)": 1.526013 }, { "acc": 0.97755184, "epoch": 14.13639559409421, "grad_norm": 6.525993824005127, "learning_rate": 8.59030472481012e-06, "loss": 0.08772384, "memory(GiB)": 13.7, "step": 30160, "train_speed(iter/s)": 1.52602 }, { "acc": 0.96364574, "epoch": 14.138739161003047, "grad_norm": 6.542240142822266, "learning_rate": 8.589765196906456e-06, "loss": 0.16290153, "memory(GiB)": 13.7, "step": 30165, "train_speed(iter/s)": 1.526036 }, { "acc": 0.96697922, "epoch": 14.141082727911883, "grad_norm": 6.693478584289551, "learning_rate": 8.589225582728646e-06, "loss": 0.12946899, "memory(GiB)": 13.7, "step": 30170, "train_speed(iter/s)": 1.526034 }, { "acc": 0.97488098, "epoch": 14.143426294820717, "grad_norm": 4.550048828125, "learning_rate": 8.588685882289664e-06, "loss": 0.10035454, "memory(GiB)": 13.7, "step": 30175, "train_speed(iter/s)": 1.526056 }, { "acc": 0.9780921, "epoch": 14.145769861729553, "grad_norm": 3.3893678188323975, "learning_rate": 8.588146095602484e-06, "loss": 0.09045217, "memory(GiB)": 13.7, "step": 30180, "train_speed(iter/s)": 1.526067 }, { "acc": 0.98760023, "epoch": 14.148113428638387, "grad_norm": 2.0851831436157227, "learning_rate": 8.587606222680075e-06, "loss": 0.05730698, "memory(GiB)": 13.7, "step": 30185, "train_speed(iter/s)": 1.526081 }, { "acc": 0.96431541, "epoch": 14.150456995547223, "grad_norm": 11.226059913635254, "learning_rate": 8.587066263535418e-06, "loss": 0.16740808, "memory(GiB)": 13.7, "step": 30190, "train_speed(iter/s)": 1.526102 }, { "acc": 0.9684227, "epoch": 14.152800562456058, "grad_norm": 16.21340560913086, "learning_rate": 8.58652621818149e-06, "loss": 0.13633609, "memory(GiB)": 13.7, "step": 30195, "train_speed(iter/s)": 1.526106 }, { "acc": 0.98856087, "epoch": 14.155144129364894, "grad_norm": 2.0096890926361084, "learning_rate": 8.585986086631274e-06, "loss": 0.10108886, "memory(GiB)": 13.7, "step": 30200, "train_speed(iter/s)": 1.526108 }, { "acc": 0.97321434, "epoch": 14.157487696273728, "grad_norm": 12.337242126464844, "learning_rate": 8.585445868897752e-06, "loss": 0.14493909, "memory(GiB)": 13.7, "step": 30205, "train_speed(iter/s)": 1.526109 }, { "acc": 0.98363094, "epoch": 14.159831263182564, "grad_norm": 4.148352146148682, "learning_rate": 8.584905564993907e-06, "loss": 0.06539447, "memory(GiB)": 13.7, "step": 30210, "train_speed(iter/s)": 1.526114 }, { "acc": 0.98921728, "epoch": 14.162174830091399, "grad_norm": 2.7574357986450195, "learning_rate": 8.584365174932729e-06, "loss": 0.11373085, "memory(GiB)": 13.7, "step": 30215, "train_speed(iter/s)": 1.526124 }, { "acc": 0.98619652, "epoch": 14.164518397000235, "grad_norm": 2.7083611488342285, "learning_rate": 8.583824698727205e-06, "loss": 0.13598583, "memory(GiB)": 13.7, "step": 30220, "train_speed(iter/s)": 1.526134 }, { "acc": 0.99425507, "epoch": 14.166861963909069, "grad_norm": 1.050795316696167, "learning_rate": 8.58328413639033e-06, "loss": 0.02418084, "memory(GiB)": 13.7, "step": 30225, "train_speed(iter/s)": 1.526139 }, { "acc": 0.98931551, "epoch": 14.169205530817905, "grad_norm": 5.203993320465088, "learning_rate": 8.582743487935092e-06, "loss": 0.03896849, "memory(GiB)": 13.7, "step": 30230, "train_speed(iter/s)": 1.526143 }, { "acc": 0.97738008, "epoch": 14.17154909772674, "grad_norm": 4.96525764465332, "learning_rate": 8.58220275337449e-06, "loss": 0.12856007, "memory(GiB)": 13.7, "step": 30235, "train_speed(iter/s)": 1.526146 }, { "acc": 0.98344383, "epoch": 14.173892664635575, "grad_norm": 10.300013542175293, "learning_rate": 8.581661932721522e-06, "loss": 0.08821651, "memory(GiB)": 13.7, "step": 30240, "train_speed(iter/s)": 1.526155 }, { "acc": 0.98411713, "epoch": 14.176236231544411, "grad_norm": 4.127486228942871, "learning_rate": 8.581121025989186e-06, "loss": 0.13913429, "memory(GiB)": 13.7, "step": 30245, "train_speed(iter/s)": 1.526153 }, { "acc": 0.97361116, "epoch": 14.178579798453246, "grad_norm": 4.5722150802612305, "learning_rate": 8.58058003319048e-06, "loss": 0.11065813, "memory(GiB)": 13.7, "step": 30250, "train_speed(iter/s)": 1.526163 }, { "acc": 0.9791667, "epoch": 14.180923365362082, "grad_norm": 0.9127721190452576, "learning_rate": 8.580038954338416e-06, "loss": 0.09568827, "memory(GiB)": 13.7, "step": 30255, "train_speed(iter/s)": 1.526176 }, { "acc": 0.97654762, "epoch": 14.183266932270916, "grad_norm": 6.695627689361572, "learning_rate": 8.579497789445993e-06, "loss": 0.08849066, "memory(GiB)": 13.7, "step": 30260, "train_speed(iter/s)": 1.526173 }, { "acc": 0.99196434, "epoch": 14.185610499179752, "grad_norm": 15.239420890808105, "learning_rate": 8.578956538526223e-06, "loss": 0.04059785, "memory(GiB)": 13.7, "step": 30265, "train_speed(iter/s)": 1.526179 }, { "acc": 0.98005638, "epoch": 14.187954066088587, "grad_norm": 4.913684368133545, "learning_rate": 8.578415201592112e-06, "loss": 0.06815232, "memory(GiB)": 13.7, "step": 30270, "train_speed(iter/s)": 1.526176 }, { "acc": 0.9819643, "epoch": 14.190297632997423, "grad_norm": 4.387862682342529, "learning_rate": 8.577873778656674e-06, "loss": 0.04733481, "memory(GiB)": 13.7, "step": 30275, "train_speed(iter/s)": 1.526178 }, { "acc": 0.98467264, "epoch": 14.192641199906257, "grad_norm": 4.446757793426514, "learning_rate": 8.577332269732922e-06, "loss": 0.05152177, "memory(GiB)": 13.7, "step": 30280, "train_speed(iter/s)": 1.526187 }, { "acc": 0.9789773, "epoch": 14.194984766815093, "grad_norm": 7.53400182723999, "learning_rate": 8.576790674833876e-06, "loss": 0.13543869, "memory(GiB)": 13.7, "step": 30285, "train_speed(iter/s)": 1.526194 }, { "acc": 0.9824522, "epoch": 14.197328333723927, "grad_norm": 6.0969648361206055, "learning_rate": 8.576248993972549e-06, "loss": 0.08872364, "memory(GiB)": 13.7, "step": 30290, "train_speed(iter/s)": 1.52621 }, { "acc": 0.98458328, "epoch": 14.199671900632763, "grad_norm": 7.462759494781494, "learning_rate": 8.575707227161964e-06, "loss": 0.05728731, "memory(GiB)": 13.7, "step": 30295, "train_speed(iter/s)": 1.526218 }, { "acc": 0.97193451, "epoch": 14.202015467541598, "grad_norm": 4.848907470703125, "learning_rate": 8.575165374415142e-06, "loss": 0.16730345, "memory(GiB)": 13.7, "step": 30300, "train_speed(iter/s)": 1.526224 }, { "acc": 0.9789175, "epoch": 14.204359034450434, "grad_norm": 6.39552116394043, "learning_rate": 8.574623435745109e-06, "loss": 0.07452651, "memory(GiB)": 13.7, "step": 30305, "train_speed(iter/s)": 1.526231 }, { "acc": 0.9764946, "epoch": 14.206702601359268, "grad_norm": 6.325210094451904, "learning_rate": 8.574081411164891e-06, "loss": 0.1547925, "memory(GiB)": 13.7, "step": 30310, "train_speed(iter/s)": 1.526246 }, { "acc": 0.96416187, "epoch": 14.209046168268104, "grad_norm": 7.209048271179199, "learning_rate": 8.573539300687515e-06, "loss": 0.14818994, "memory(GiB)": 13.7, "step": 30315, "train_speed(iter/s)": 1.526257 }, { "acc": 0.97895222, "epoch": 14.211389735176938, "grad_norm": 4.834353923797607, "learning_rate": 8.572997104326013e-06, "loss": 0.08827738, "memory(GiB)": 13.7, "step": 30320, "train_speed(iter/s)": 1.526261 }, { "acc": 0.98299675, "epoch": 14.213733302085775, "grad_norm": 1.0546069145202637, "learning_rate": 8.572454822093416e-06, "loss": 0.08115303, "memory(GiB)": 13.7, "step": 30325, "train_speed(iter/s)": 1.526277 }, { "acc": 0.97992563, "epoch": 14.21607686899461, "grad_norm": 6.785747528076172, "learning_rate": 8.571912454002763e-06, "loss": 0.07011846, "memory(GiB)": 13.7, "step": 30330, "train_speed(iter/s)": 1.526287 }, { "acc": 0.97988091, "epoch": 14.218420435903445, "grad_norm": 5.18417501449585, "learning_rate": 8.571370000067085e-06, "loss": 0.09125263, "memory(GiB)": 13.7, "step": 30335, "train_speed(iter/s)": 1.526288 }, { "acc": 0.9744791, "epoch": 14.220764002812281, "grad_norm": 10.60047721862793, "learning_rate": 8.570827460299423e-06, "loss": 0.11945685, "memory(GiB)": 13.7, "step": 30340, "train_speed(iter/s)": 1.5263 }, { "acc": 0.9776041, "epoch": 14.223107569721115, "grad_norm": 5.686521530151367, "learning_rate": 8.57028483471282e-06, "loss": 0.14514089, "memory(GiB)": 13.7, "step": 30345, "train_speed(iter/s)": 1.526307 }, { "acc": 0.97018433, "epoch": 14.225451136629951, "grad_norm": 3.2993178367614746, "learning_rate": 8.569742123320316e-06, "loss": 0.15511934, "memory(GiB)": 13.7, "step": 30350, "train_speed(iter/s)": 1.52631 }, { "acc": 0.97079449, "epoch": 14.227794703538786, "grad_norm": 6.307396411895752, "learning_rate": 8.569199326134959e-06, "loss": 0.12063447, "memory(GiB)": 13.7, "step": 30355, "train_speed(iter/s)": 1.526324 }, { "acc": 0.97753925, "epoch": 14.230138270447622, "grad_norm": 8.926579475402832, "learning_rate": 8.568656443169792e-06, "loss": 0.17577403, "memory(GiB)": 13.7, "step": 30360, "train_speed(iter/s)": 1.526335 }, { "acc": 0.97140627, "epoch": 14.232481837356456, "grad_norm": 10.534551620483398, "learning_rate": 8.568113474437867e-06, "loss": 0.15708991, "memory(GiB)": 13.7, "step": 30365, "train_speed(iter/s)": 1.52634 }, { "acc": 0.97525253, "epoch": 14.234825404265292, "grad_norm": 2.4019317626953125, "learning_rate": 8.567570419952236e-06, "loss": 0.1359373, "memory(GiB)": 13.7, "step": 30370, "train_speed(iter/s)": 1.52635 }, { "acc": 0.98968754, "epoch": 14.237168971174126, "grad_norm": 3.3188090324401855, "learning_rate": 8.56702727972595e-06, "loss": 0.0974383, "memory(GiB)": 13.7, "step": 30375, "train_speed(iter/s)": 1.526347 }, { "acc": 0.97362175, "epoch": 14.239512538082963, "grad_norm": 1.8642724752426147, "learning_rate": 8.566484053772063e-06, "loss": 0.08009183, "memory(GiB)": 13.7, "step": 30380, "train_speed(iter/s)": 1.526358 }, { "acc": 0.97281742, "epoch": 14.241856104991797, "grad_norm": 39.170894622802734, "learning_rate": 8.565940742103637e-06, "loss": 0.16756959, "memory(GiB)": 13.7, "step": 30385, "train_speed(iter/s)": 1.526379 }, { "acc": 0.974125, "epoch": 14.244199671900633, "grad_norm": 4.827662944793701, "learning_rate": 8.56539734473373e-06, "loss": 0.11769898, "memory(GiB)": 13.7, "step": 30390, "train_speed(iter/s)": 1.526396 }, { "acc": 0.97038689, "epoch": 14.246543238809467, "grad_norm": 5.691500186920166, "learning_rate": 8.564853861675402e-06, "loss": 0.09912255, "memory(GiB)": 13.7, "step": 30395, "train_speed(iter/s)": 1.52639 }, { "acc": 0.97305717, "epoch": 14.248886805718303, "grad_norm": 1.796858549118042, "learning_rate": 8.564310292941714e-06, "loss": 0.10032556, "memory(GiB)": 13.7, "step": 30400, "train_speed(iter/s)": 1.526389 }, { "acc": 0.97899532, "epoch": 14.251230372627138, "grad_norm": 2.814812660217285, "learning_rate": 8.563766638545736e-06, "loss": 0.06597189, "memory(GiB)": 13.7, "step": 30405, "train_speed(iter/s)": 1.526398 }, { "acc": 0.9871727, "epoch": 14.253573939535974, "grad_norm": 4.026272773742676, "learning_rate": 8.563222898500536e-06, "loss": 0.0564403, "memory(GiB)": 13.7, "step": 30410, "train_speed(iter/s)": 1.52642 }, { "acc": 0.98536711, "epoch": 14.25591750644481, "grad_norm": 9.150726318359375, "learning_rate": 8.56267907281918e-06, "loss": 0.06070884, "memory(GiB)": 13.7, "step": 30415, "train_speed(iter/s)": 1.526422 }, { "acc": 0.98823872, "epoch": 14.258261073353644, "grad_norm": 0.4423527419567108, "learning_rate": 8.562135161514742e-06, "loss": 0.03180596, "memory(GiB)": 13.7, "step": 30420, "train_speed(iter/s)": 1.526431 }, { "acc": 0.9735363, "epoch": 14.26060464026248, "grad_norm": 8.606762886047363, "learning_rate": 8.561591164600297e-06, "loss": 0.0928108, "memory(GiB)": 13.7, "step": 30425, "train_speed(iter/s)": 1.526439 }, { "acc": 0.98315973, "epoch": 14.262948207171315, "grad_norm": 4.341729640960693, "learning_rate": 8.561047082088917e-06, "loss": 0.06692121, "memory(GiB)": 13.7, "step": 30430, "train_speed(iter/s)": 1.52644 }, { "acc": 0.97986298, "epoch": 14.26529177408015, "grad_norm": 5.109224319458008, "learning_rate": 8.560502913993686e-06, "loss": 0.07554933, "memory(GiB)": 13.7, "step": 30435, "train_speed(iter/s)": 1.526453 }, { "acc": 0.9826705, "epoch": 14.267635340988985, "grad_norm": 2.656696319580078, "learning_rate": 8.559958660327678e-06, "loss": 0.11448663, "memory(GiB)": 13.7, "step": 30440, "train_speed(iter/s)": 1.526454 }, { "acc": 0.97371254, "epoch": 14.269978907897821, "grad_norm": 3.6040148735046387, "learning_rate": 8.559414321103978e-06, "loss": 0.24168255, "memory(GiB)": 13.7, "step": 30445, "train_speed(iter/s)": 1.52646 }, { "acc": 0.9850297, "epoch": 14.272322474806655, "grad_norm": 2.974128484725952, "learning_rate": 8.558869896335668e-06, "loss": 0.07713205, "memory(GiB)": 13.7, "step": 30450, "train_speed(iter/s)": 1.526468 }, { "acc": 0.9822916, "epoch": 14.274666041715491, "grad_norm": 0.010599725879728794, "learning_rate": 8.558325386035839e-06, "loss": 0.05503147, "memory(GiB)": 13.7, "step": 30455, "train_speed(iter/s)": 1.526483 }, { "acc": 0.97786865, "epoch": 14.277009608624326, "grad_norm": 6.80911111831665, "learning_rate": 8.557780790217574e-06, "loss": 0.0721627, "memory(GiB)": 13.7, "step": 30460, "train_speed(iter/s)": 1.52649 }, { "acc": 0.98126888, "epoch": 14.279353175533162, "grad_norm": 3.0680487155914307, "learning_rate": 8.557236108893966e-06, "loss": 0.06091611, "memory(GiB)": 13.7, "step": 30465, "train_speed(iter/s)": 1.526496 }, { "acc": 0.98291664, "epoch": 14.281696742441996, "grad_norm": 3.9410738945007324, "learning_rate": 8.556691342078106e-06, "loss": 0.10573845, "memory(GiB)": 13.7, "step": 30470, "train_speed(iter/s)": 1.526507 }, { "acc": 0.9556385, "epoch": 14.284040309350832, "grad_norm": 6.1206254959106445, "learning_rate": 8.556146489783092e-06, "loss": 0.23886037, "memory(GiB)": 13.7, "step": 30475, "train_speed(iter/s)": 1.526519 }, { "acc": 0.98567543, "epoch": 14.286383876259666, "grad_norm": 26.669431686401367, "learning_rate": 8.555601552022015e-06, "loss": 0.07835006, "memory(GiB)": 13.7, "step": 30480, "train_speed(iter/s)": 1.526516 }, { "acc": 0.98208332, "epoch": 14.288727443168503, "grad_norm": 4.779819488525391, "learning_rate": 8.555056528807975e-06, "loss": 0.07027203, "memory(GiB)": 13.7, "step": 30485, "train_speed(iter/s)": 1.526519 }, { "acc": 0.98604164, "epoch": 14.291071010077339, "grad_norm": 3.7482991218566895, "learning_rate": 8.554511420154076e-06, "loss": 0.08774364, "memory(GiB)": 13.7, "step": 30490, "train_speed(iter/s)": 1.526539 }, { "acc": 0.98008928, "epoch": 14.293414576986173, "grad_norm": 4.528597354888916, "learning_rate": 8.553966226073417e-06, "loss": 0.1132638, "memory(GiB)": 13.7, "step": 30495, "train_speed(iter/s)": 1.526542 }, { "acc": 0.96785679, "epoch": 14.295758143895009, "grad_norm": 4.925398349761963, "learning_rate": 8.553420946579105e-06, "loss": 0.13368378, "memory(GiB)": 13.7, "step": 30500, "train_speed(iter/s)": 1.526558 }, { "acc": 0.96160927, "epoch": 14.298101710803843, "grad_norm": 3.7674779891967773, "learning_rate": 8.552875581684244e-06, "loss": 0.12124559, "memory(GiB)": 13.7, "step": 30505, "train_speed(iter/s)": 1.526565 }, { "acc": 0.97547626, "epoch": 14.30044527771268, "grad_norm": 4.685840606689453, "learning_rate": 8.552330131401945e-06, "loss": 0.15191815, "memory(GiB)": 13.7, "step": 30510, "train_speed(iter/s)": 1.526567 }, { "acc": 0.97273808, "epoch": 14.302788844621514, "grad_norm": 6.331048488616943, "learning_rate": 8.551784595745319e-06, "loss": 0.12356538, "memory(GiB)": 13.7, "step": 30515, "train_speed(iter/s)": 1.526591 }, { "acc": 0.98615761, "epoch": 14.30513241153035, "grad_norm": 9.345054626464844, "learning_rate": 8.55123897472748e-06, "loss": 0.06248586, "memory(GiB)": 13.7, "step": 30520, "train_speed(iter/s)": 1.526597 }, { "acc": 0.97928276, "epoch": 14.307475978439184, "grad_norm": 0.26951807737350464, "learning_rate": 8.550693268361537e-06, "loss": 0.10538719, "memory(GiB)": 13.7, "step": 30525, "train_speed(iter/s)": 1.526602 }, { "acc": 0.97997055, "epoch": 14.30981954534802, "grad_norm": 6.404473304748535, "learning_rate": 8.550147476660615e-06, "loss": 0.05948344, "memory(GiB)": 13.7, "step": 30530, "train_speed(iter/s)": 1.526605 }, { "acc": 0.96798611, "epoch": 14.312163112256854, "grad_norm": 5.42724084854126, "learning_rate": 8.549601599637827e-06, "loss": 0.0913546, "memory(GiB)": 13.7, "step": 30535, "train_speed(iter/s)": 1.526617 }, { "acc": 0.97677078, "epoch": 14.31450667916569, "grad_norm": 5.175582408905029, "learning_rate": 8.549055637306296e-06, "loss": 0.09127383, "memory(GiB)": 13.7, "step": 30540, "train_speed(iter/s)": 1.526632 }, { "acc": 0.97804298, "epoch": 14.316850246074525, "grad_norm": 5.183173179626465, "learning_rate": 8.548509589679146e-06, "loss": 0.1199955, "memory(GiB)": 13.7, "step": 30545, "train_speed(iter/s)": 1.526634 }, { "acc": 0.97925596, "epoch": 14.319193812983361, "grad_norm": 4.167261600494385, "learning_rate": 8.547963456769502e-06, "loss": 0.09214321, "memory(GiB)": 13.7, "step": 30550, "train_speed(iter/s)": 1.526636 }, { "acc": 0.96338501, "epoch": 14.321537379892195, "grad_norm": 6.2469000816345215, "learning_rate": 8.547417238590491e-06, "loss": 0.15772903, "memory(GiB)": 13.7, "step": 30555, "train_speed(iter/s)": 1.526649 }, { "acc": 0.97576618, "epoch": 14.323880946801031, "grad_norm": 3.402784585952759, "learning_rate": 8.546870935155242e-06, "loss": 0.11328323, "memory(GiB)": 13.7, "step": 30560, "train_speed(iter/s)": 1.526668 }, { "acc": 0.98828726, "epoch": 14.326224513709866, "grad_norm": 7.797134876251221, "learning_rate": 8.546324546476884e-06, "loss": 0.08344387, "memory(GiB)": 13.7, "step": 30565, "train_speed(iter/s)": 1.526696 }, { "acc": 0.96672125, "epoch": 14.328568080618702, "grad_norm": 3.7065868377685547, "learning_rate": 8.545778072568557e-06, "loss": 0.14995092, "memory(GiB)": 13.7, "step": 30570, "train_speed(iter/s)": 1.526693 }, { "acc": 0.98078375, "epoch": 14.330911647527538, "grad_norm": 8.157565116882324, "learning_rate": 8.545231513443389e-06, "loss": 0.08272294, "memory(GiB)": 13.7, "step": 30575, "train_speed(iter/s)": 1.526686 }, { "acc": 0.98090277, "epoch": 14.333255214436372, "grad_norm": 2.2951033115386963, "learning_rate": 8.544684869114523e-06, "loss": 0.11251591, "memory(GiB)": 13.7, "step": 30580, "train_speed(iter/s)": 1.5267 }, { "acc": 0.98198862, "epoch": 14.335598781345208, "grad_norm": 4.554335594177246, "learning_rate": 8.544138139595094e-06, "loss": 0.05916932, "memory(GiB)": 13.7, "step": 30585, "train_speed(iter/s)": 1.526717 }, { "acc": 0.98180561, "epoch": 14.337942348254042, "grad_norm": 6.926784515380859, "learning_rate": 8.543591324898248e-06, "loss": 0.09078355, "memory(GiB)": 13.7, "step": 30590, "train_speed(iter/s)": 1.526712 }, { "acc": 0.978125, "epoch": 14.340285915162879, "grad_norm": 1.5107617378234863, "learning_rate": 8.543044425037126e-06, "loss": 0.11415722, "memory(GiB)": 13.7, "step": 30595, "train_speed(iter/s)": 1.526727 }, { "acc": 0.97424822, "epoch": 14.342629482071713, "grad_norm": 1.6790790557861328, "learning_rate": 8.542497440024873e-06, "loss": 0.0931924, "memory(GiB)": 13.7, "step": 30600, "train_speed(iter/s)": 1.526738 }, { "acc": 0.97592258, "epoch": 14.344973048980549, "grad_norm": 5.976473331451416, "learning_rate": 8.541950369874638e-06, "loss": 0.13697692, "memory(GiB)": 13.7, "step": 30605, "train_speed(iter/s)": 1.526743 }, { "acc": 0.98159723, "epoch": 14.347316615889383, "grad_norm": 0.0787249282002449, "learning_rate": 8.541403214599572e-06, "loss": 0.1055837, "memory(GiB)": 13.7, "step": 30610, "train_speed(iter/s)": 1.526761 }, { "acc": 0.97981606, "epoch": 14.34966018279822, "grad_norm": 0.7911257743835449, "learning_rate": 8.540855974212825e-06, "loss": 0.11465242, "memory(GiB)": 13.7, "step": 30615, "train_speed(iter/s)": 1.526777 }, { "acc": 0.98562508, "epoch": 14.352003749707054, "grad_norm": 1.0898536443710327, "learning_rate": 8.540308648727552e-06, "loss": 0.06196635, "memory(GiB)": 13.7, "step": 30620, "train_speed(iter/s)": 1.52678 }, { "acc": 0.98249998, "epoch": 14.35434731661589, "grad_norm": 4.52290153503418, "learning_rate": 8.539761238156907e-06, "loss": 0.08987561, "memory(GiB)": 13.7, "step": 30625, "train_speed(iter/s)": 1.526778 }, { "acc": 0.98125, "epoch": 14.356690883524724, "grad_norm": 5.246073246002197, "learning_rate": 8.53921374251405e-06, "loss": 0.06899672, "memory(GiB)": 13.7, "step": 30630, "train_speed(iter/s)": 1.526787 }, { "acc": 0.97136364, "epoch": 14.35903445043356, "grad_norm": 8.728056907653809, "learning_rate": 8.53866616181214e-06, "loss": 0.13084886, "memory(GiB)": 13.7, "step": 30635, "train_speed(iter/s)": 1.526789 }, { "acc": 0.97614584, "epoch": 14.361378017342394, "grad_norm": 18.80765151977539, "learning_rate": 8.538118496064339e-06, "loss": 0.10048662, "memory(GiB)": 13.7, "step": 30640, "train_speed(iter/s)": 1.526793 }, { "acc": 0.97344694, "epoch": 14.36372158425123, "grad_norm": 1.3656554222106934, "learning_rate": 8.537570745283811e-06, "loss": 0.07387864, "memory(GiB)": 13.7, "step": 30645, "train_speed(iter/s)": 1.526795 }, { "acc": 0.96453371, "epoch": 14.366065151160065, "grad_norm": 12.30596923828125, "learning_rate": 8.537022909483722e-06, "loss": 0.15685892, "memory(GiB)": 13.7, "step": 30650, "train_speed(iter/s)": 1.526809 }, { "acc": 0.98445511, "epoch": 14.3684087180689, "grad_norm": 3.248353958129883, "learning_rate": 8.536474988677243e-06, "loss": 0.07274922, "memory(GiB)": 13.7, "step": 30655, "train_speed(iter/s)": 1.526808 }, { "acc": 0.97634811, "epoch": 14.370752284977737, "grad_norm": 4.949233531951904, "learning_rate": 8.535926982877539e-06, "loss": 0.10521742, "memory(GiB)": 13.7, "step": 30660, "train_speed(iter/s)": 1.526808 }, { "acc": 0.98625002, "epoch": 14.373095851886571, "grad_norm": 3.3684747219085693, "learning_rate": 8.535378892097788e-06, "loss": 0.04738005, "memory(GiB)": 13.7, "step": 30665, "train_speed(iter/s)": 1.526819 }, { "acc": 0.98155556, "epoch": 14.375439418795407, "grad_norm": 6.736538410186768, "learning_rate": 8.53483071635116e-06, "loss": 0.07600733, "memory(GiB)": 13.7, "step": 30670, "train_speed(iter/s)": 1.526828 }, { "acc": 0.97494125, "epoch": 14.377782985704242, "grad_norm": 8.911809921264648, "learning_rate": 8.534282455650833e-06, "loss": 0.12162852, "memory(GiB)": 13.7, "step": 30675, "train_speed(iter/s)": 1.526835 }, { "acc": 0.98450718, "epoch": 14.380126552613078, "grad_norm": 1.4421544075012207, "learning_rate": 8.533734110009985e-06, "loss": 0.06118798, "memory(GiB)": 13.7, "step": 30680, "train_speed(iter/s)": 1.526839 }, { "acc": 0.98086386, "epoch": 14.382470119521912, "grad_norm": 7.9776387214660645, "learning_rate": 8.533185679441796e-06, "loss": 0.09701492, "memory(GiB)": 13.7, "step": 30685, "train_speed(iter/s)": 1.526831 }, { "acc": 0.97729168, "epoch": 14.384813686430748, "grad_norm": 6.996975421905518, "learning_rate": 8.532637163959449e-06, "loss": 0.09552809, "memory(GiB)": 13.7, "step": 30690, "train_speed(iter/s)": 1.526835 }, { "acc": 0.97258835, "epoch": 14.387157253339582, "grad_norm": 4.613712787628174, "learning_rate": 8.532088563576129e-06, "loss": 0.08974171, "memory(GiB)": 13.7, "step": 30695, "train_speed(iter/s)": 1.526841 }, { "acc": 0.97540255, "epoch": 14.389500820248418, "grad_norm": 3.7102556228637695, "learning_rate": 8.531539878305024e-06, "loss": 0.09701629, "memory(GiB)": 13.7, "step": 30700, "train_speed(iter/s)": 1.526854 }, { "acc": 0.9757143, "epoch": 14.391844387157253, "grad_norm": 2.4024949073791504, "learning_rate": 8.530991108159318e-06, "loss": 0.07338387, "memory(GiB)": 13.7, "step": 30705, "train_speed(iter/s)": 1.526864 }, { "acc": 0.98058567, "epoch": 14.394187954066089, "grad_norm": 5.722536087036133, "learning_rate": 8.530442253152205e-06, "loss": 0.07061034, "memory(GiB)": 13.7, "step": 30710, "train_speed(iter/s)": 1.526862 }, { "acc": 0.96659431, "epoch": 14.396531520974923, "grad_norm": 3.158332347869873, "learning_rate": 8.529893313296876e-06, "loss": 0.11920712, "memory(GiB)": 13.7, "step": 30715, "train_speed(iter/s)": 1.526865 }, { "acc": 0.96299114, "epoch": 14.39887508788376, "grad_norm": 4.473976135253906, "learning_rate": 8.529344288606528e-06, "loss": 0.16765692, "memory(GiB)": 13.7, "step": 30720, "train_speed(iter/s)": 1.526863 }, { "acc": 0.97306404, "epoch": 14.401218654792594, "grad_norm": 10.499009132385254, "learning_rate": 8.528795179094358e-06, "loss": 0.1415431, "memory(GiB)": 13.7, "step": 30725, "train_speed(iter/s)": 1.526857 }, { "acc": 0.98674145, "epoch": 14.40356222170143, "grad_norm": 5.114302635192871, "learning_rate": 8.528245984773561e-06, "loss": 0.08511001, "memory(GiB)": 13.7, "step": 30730, "train_speed(iter/s)": 1.526839 }, { "acc": 0.98916664, "epoch": 14.405905788610266, "grad_norm": 0.20443259179592133, "learning_rate": 8.52769670565734e-06, "loss": 0.03801123, "memory(GiB)": 13.7, "step": 30735, "train_speed(iter/s)": 1.526835 }, { "acc": 0.97729168, "epoch": 14.4082493555191, "grad_norm": 2.160590887069702, "learning_rate": 8.527147341758899e-06, "loss": 0.04442267, "memory(GiB)": 13.7, "step": 30740, "train_speed(iter/s)": 1.526841 }, { "acc": 0.97458334, "epoch": 14.410592922427936, "grad_norm": 6.997976303100586, "learning_rate": 8.52659789309144e-06, "loss": 0.14410543, "memory(GiB)": 13.7, "step": 30745, "train_speed(iter/s)": 1.526858 }, { "acc": 0.99226189, "epoch": 14.41293648933677, "grad_norm": 3.1824347972869873, "learning_rate": 8.526048359668174e-06, "loss": 0.04323252, "memory(GiB)": 13.7, "step": 30750, "train_speed(iter/s)": 1.526865 }, { "acc": 0.98879147, "epoch": 14.415280056245606, "grad_norm": 2.4765825271606445, "learning_rate": 8.525498741502306e-06, "loss": 0.05788741, "memory(GiB)": 13.7, "step": 30755, "train_speed(iter/s)": 1.526879 }, { "acc": 0.99341345, "epoch": 14.41762362315444, "grad_norm": 1.4381505250930786, "learning_rate": 8.524949038607051e-06, "loss": 0.05083712, "memory(GiB)": 13.7, "step": 30760, "train_speed(iter/s)": 1.526879 }, { "acc": 0.98457794, "epoch": 14.419967190063277, "grad_norm": 2.9213974475860596, "learning_rate": 8.524399250995618e-06, "loss": 0.08030767, "memory(GiB)": 13.7, "step": 30765, "train_speed(iter/s)": 1.526888 }, { "acc": 0.97753468, "epoch": 14.422310756972111, "grad_norm": 0.1459670066833496, "learning_rate": 8.523849378681224e-06, "loss": 0.17158449, "memory(GiB)": 13.7, "step": 30770, "train_speed(iter/s)": 1.526892 }, { "acc": 0.9630209, "epoch": 14.424654323880947, "grad_norm": 33.54732894897461, "learning_rate": 8.523299421677086e-06, "loss": 0.1472044, "memory(GiB)": 13.7, "step": 30775, "train_speed(iter/s)": 1.526902 }, { "acc": 0.98756666, "epoch": 14.426997890789782, "grad_norm": 3.2358763217926025, "learning_rate": 8.522749379996425e-06, "loss": 0.08215433, "memory(GiB)": 13.7, "step": 30780, "train_speed(iter/s)": 1.52691 }, { "acc": 0.97927666, "epoch": 14.429341457698618, "grad_norm": 4.598866939544678, "learning_rate": 8.52219925365246e-06, "loss": 0.15482678, "memory(GiB)": 13.7, "step": 30785, "train_speed(iter/s)": 1.526911 }, { "acc": 0.96375561, "epoch": 14.431685024607452, "grad_norm": 3.1206257343292236, "learning_rate": 8.521649042658413e-06, "loss": 0.16887112, "memory(GiB)": 13.7, "step": 30790, "train_speed(iter/s)": 1.526907 }, { "acc": 0.96677084, "epoch": 14.434028591516288, "grad_norm": 4.174458980560303, "learning_rate": 8.521098747027513e-06, "loss": 0.15014534, "memory(GiB)": 13.7, "step": 30795, "train_speed(iter/s)": 1.526916 }, { "acc": 0.96776247, "epoch": 14.436372158425122, "grad_norm": 0.42193591594696045, "learning_rate": 8.520548366772983e-06, "loss": 0.19946924, "memory(GiB)": 13.7, "step": 30800, "train_speed(iter/s)": 1.526925 }, { "acc": 0.98394938, "epoch": 14.438715725333958, "grad_norm": 5.073917388916016, "learning_rate": 8.519997901908053e-06, "loss": 0.14383039, "memory(GiB)": 13.7, "step": 30805, "train_speed(iter/s)": 1.526915 }, { "acc": 0.98020144, "epoch": 14.441059292242793, "grad_norm": 11.176013946533203, "learning_rate": 8.51944735244596e-06, "loss": 0.11976509, "memory(GiB)": 13.7, "step": 30810, "train_speed(iter/s)": 1.526926 }, { "acc": 0.98482141, "epoch": 14.443402859151629, "grad_norm": 5.2263617515563965, "learning_rate": 8.518896718399932e-06, "loss": 0.07774544, "memory(GiB)": 13.7, "step": 30815, "train_speed(iter/s)": 1.52693 }, { "acc": 0.98154764, "epoch": 14.445746426060465, "grad_norm": 7.968142509460449, "learning_rate": 8.518345999783204e-06, "loss": 0.09802282, "memory(GiB)": 13.7, "step": 30820, "train_speed(iter/s)": 1.526945 }, { "acc": 0.97361107, "epoch": 14.4480899929693, "grad_norm": 1.0036331415176392, "learning_rate": 8.517795196609015e-06, "loss": 0.14236867, "memory(GiB)": 13.7, "step": 30825, "train_speed(iter/s)": 1.526948 }, { "acc": 0.98354168, "epoch": 14.450433559878135, "grad_norm": 7.6566853523254395, "learning_rate": 8.517244308890607e-06, "loss": 0.06100989, "memory(GiB)": 13.7, "step": 30830, "train_speed(iter/s)": 1.526961 }, { "acc": 0.98571424, "epoch": 14.45277712678697, "grad_norm": 4.983668327331543, "learning_rate": 8.516693336641219e-06, "loss": 0.0679706, "memory(GiB)": 13.7, "step": 30835, "train_speed(iter/s)": 1.526958 }, { "acc": 0.96990261, "epoch": 14.455120693695806, "grad_norm": 6.459051132202148, "learning_rate": 8.516142279874093e-06, "loss": 0.11804525, "memory(GiB)": 13.7, "step": 30840, "train_speed(iter/s)": 1.526959 }, { "acc": 0.98698864, "epoch": 14.45746426060464, "grad_norm": 3.634310007095337, "learning_rate": 8.515591138602479e-06, "loss": 0.06977159, "memory(GiB)": 13.7, "step": 30845, "train_speed(iter/s)": 1.52697 }, { "acc": 0.98340282, "epoch": 14.459807827513476, "grad_norm": 6.448336601257324, "learning_rate": 8.515039912839619e-06, "loss": 0.06828551, "memory(GiB)": 13.7, "step": 30850, "train_speed(iter/s)": 1.526976 }, { "acc": 0.97267857, "epoch": 14.46215139442231, "grad_norm": 1.9216383695602417, "learning_rate": 8.514488602598768e-06, "loss": 0.18286229, "memory(GiB)": 13.7, "step": 30855, "train_speed(iter/s)": 1.52698 }, { "acc": 0.9679203, "epoch": 14.464494961331146, "grad_norm": 4.627481937408447, "learning_rate": 8.513937207893174e-06, "loss": 0.12112014, "memory(GiB)": 13.7, "step": 30860, "train_speed(iter/s)": 1.526984 }, { "acc": 0.98133392, "epoch": 14.46683852823998, "grad_norm": 6.795907497406006, "learning_rate": 8.513385728736092e-06, "loss": 0.16041855, "memory(GiB)": 13.7, "step": 30865, "train_speed(iter/s)": 1.526989 }, { "acc": 0.96394119, "epoch": 14.469182095148817, "grad_norm": 5.158381462097168, "learning_rate": 8.512834165140778e-06, "loss": 0.21166835, "memory(GiB)": 13.7, "step": 30870, "train_speed(iter/s)": 1.527002 }, { "acc": 0.96821423, "epoch": 14.471525662057651, "grad_norm": 6.943767070770264, "learning_rate": 8.512282517120491e-06, "loss": 0.08984763, "memory(GiB)": 13.7, "step": 30875, "train_speed(iter/s)": 1.527021 }, { "acc": 0.984375, "epoch": 14.473869228966487, "grad_norm": 10.898199081420898, "learning_rate": 8.511730784688487e-06, "loss": 0.10746837, "memory(GiB)": 13.7, "step": 30880, "train_speed(iter/s)": 1.527036 }, { "acc": 0.97029762, "epoch": 14.476212795875322, "grad_norm": 6.494176864624023, "learning_rate": 8.511178967858032e-06, "loss": 0.13441596, "memory(GiB)": 13.7, "step": 30885, "train_speed(iter/s)": 1.527039 }, { "acc": 0.97129726, "epoch": 14.478556362784158, "grad_norm": 115.39297485351562, "learning_rate": 8.510627066642387e-06, "loss": 0.12185822, "memory(GiB)": 13.7, "step": 30890, "train_speed(iter/s)": 1.527061 }, { "acc": 0.99106064, "epoch": 14.480899929692992, "grad_norm": 0.253042995929718, "learning_rate": 8.51007508105482e-06, "loss": 0.03973496, "memory(GiB)": 13.7, "step": 30895, "train_speed(iter/s)": 1.527059 }, { "acc": 0.9664938, "epoch": 14.483243496601828, "grad_norm": 4.260870933532715, "learning_rate": 8.509523011108597e-06, "loss": 0.19881492, "memory(GiB)": 13.7, "step": 30900, "train_speed(iter/s)": 1.527076 }, { "acc": 0.98238592, "epoch": 14.485587063510664, "grad_norm": 4.809532165527344, "learning_rate": 8.50897085681699e-06, "loss": 0.0878302, "memory(GiB)": 13.7, "step": 30905, "train_speed(iter/s)": 1.527089 }, { "acc": 0.975, "epoch": 14.487930630419498, "grad_norm": 121.33404541015625, "learning_rate": 8.508418618193268e-06, "loss": 0.0916411, "memory(GiB)": 13.7, "step": 30910, "train_speed(iter/s)": 1.527102 }, { "acc": 0.990625, "epoch": 14.490274197328334, "grad_norm": 0.01913662627339363, "learning_rate": 8.50786629525071e-06, "loss": 0.0711627, "memory(GiB)": 13.7, "step": 30915, "train_speed(iter/s)": 1.527095 }, { "acc": 0.98488092, "epoch": 14.492617764237169, "grad_norm": 0.011524236761033535, "learning_rate": 8.507313888002585e-06, "loss": 0.10110745, "memory(GiB)": 13.7, "step": 30920, "train_speed(iter/s)": 1.527098 }, { "acc": 0.9796875, "epoch": 14.494961331146005, "grad_norm": 4.762354850769043, "learning_rate": 8.506761396462178e-06, "loss": 0.10052347, "memory(GiB)": 13.7, "step": 30925, "train_speed(iter/s)": 1.527095 }, { "acc": 0.98841038, "epoch": 14.49730489805484, "grad_norm": 1.5731927156448364, "learning_rate": 8.506208820642764e-06, "loss": 0.06224147, "memory(GiB)": 13.7, "step": 30930, "train_speed(iter/s)": 1.527095 }, { "acc": 0.97274342, "epoch": 14.499648464963675, "grad_norm": 8.487987518310547, "learning_rate": 8.505656160557626e-06, "loss": 0.1178574, "memory(GiB)": 13.7, "step": 30935, "train_speed(iter/s)": 1.527102 }, { "acc": 0.98500004, "epoch": 14.50199203187251, "grad_norm": 5.201525688171387, "learning_rate": 8.505103416220051e-06, "loss": 0.08700318, "memory(GiB)": 13.7, "step": 30940, "train_speed(iter/s)": 1.527118 }, { "acc": 0.96460581, "epoch": 14.504335598781346, "grad_norm": 6.263192653656006, "learning_rate": 8.504550587643325e-06, "loss": 0.1320976, "memory(GiB)": 13.7, "step": 30945, "train_speed(iter/s)": 1.527117 }, { "acc": 0.96863098, "epoch": 14.50667916569018, "grad_norm": 8.935518264770508, "learning_rate": 8.503997674840733e-06, "loss": 0.11038544, "memory(GiB)": 13.7, "step": 30950, "train_speed(iter/s)": 1.527117 }, { "acc": 0.98739586, "epoch": 14.509022732599016, "grad_norm": 1.4005794525146484, "learning_rate": 8.503444677825567e-06, "loss": 0.06420155, "memory(GiB)": 13.7, "step": 30955, "train_speed(iter/s)": 1.527119 }, { "acc": 0.98680553, "epoch": 14.51136629950785, "grad_norm": 3.7310760021209717, "learning_rate": 8.502891596611119e-06, "loss": 0.0920343, "memory(GiB)": 13.7, "step": 30960, "train_speed(iter/s)": 1.527128 }, { "acc": 0.9854166, "epoch": 14.513709866416686, "grad_norm": 9.622313499450684, "learning_rate": 8.502338431210683e-06, "loss": 0.05717413, "memory(GiB)": 13.7, "step": 30965, "train_speed(iter/s)": 1.52713 }, { "acc": 0.98412142, "epoch": 14.51605343332552, "grad_norm": 4.149718761444092, "learning_rate": 8.501785181637555e-06, "loss": 0.06999985, "memory(GiB)": 13.7, "step": 30970, "train_speed(iter/s)": 1.527134 }, { "acc": 0.96380205, "epoch": 14.518397000234357, "grad_norm": 3.580209732055664, "learning_rate": 8.501231847905034e-06, "loss": 0.05668342, "memory(GiB)": 13.7, "step": 30975, "train_speed(iter/s)": 1.527152 }, { "acc": 0.9765625, "epoch": 14.520740567143193, "grad_norm": 2.6416945457458496, "learning_rate": 8.500678430026421e-06, "loss": 0.09024333, "memory(GiB)": 13.7, "step": 30980, "train_speed(iter/s)": 1.527153 }, { "acc": 0.97777777, "epoch": 14.523084134052027, "grad_norm": 0.004767024423927069, "learning_rate": 8.500124928015017e-06, "loss": 0.06932917, "memory(GiB)": 13.7, "step": 30985, "train_speed(iter/s)": 1.527157 }, { "acc": 0.95642357, "epoch": 14.525427700960863, "grad_norm": 4.9391350746154785, "learning_rate": 8.499571341884127e-06, "loss": 0.16354359, "memory(GiB)": 13.7, "step": 30990, "train_speed(iter/s)": 1.527175 }, { "acc": 0.98822918, "epoch": 14.527771267869698, "grad_norm": 4.917670249938965, "learning_rate": 8.499017671647057e-06, "loss": 0.06008803, "memory(GiB)": 13.7, "step": 30995, "train_speed(iter/s)": 1.527187 }, { "acc": 0.9880208, "epoch": 14.530114834778534, "grad_norm": 2.4635581970214844, "learning_rate": 8.498463917317118e-06, "loss": 0.04998261, "memory(GiB)": 13.7, "step": 31000, "train_speed(iter/s)": 1.527193 }, { "acc": 0.98097725, "epoch": 14.532458401687368, "grad_norm": 3.31740665435791, "learning_rate": 8.497910078907614e-06, "loss": 0.06898792, "memory(GiB)": 13.7, "step": 31005, "train_speed(iter/s)": 1.527195 }, { "acc": 0.97698784, "epoch": 14.534801968596204, "grad_norm": 1.1213538646697998, "learning_rate": 8.497356156431865e-06, "loss": 0.0612713, "memory(GiB)": 13.7, "step": 31010, "train_speed(iter/s)": 1.527191 }, { "acc": 0.98392563, "epoch": 14.537145535505038, "grad_norm": 2.681732177734375, "learning_rate": 8.49680214990318e-06, "loss": 0.07009876, "memory(GiB)": 13.7, "step": 31015, "train_speed(iter/s)": 1.527201 }, { "acc": 0.97520828, "epoch": 14.539489102413874, "grad_norm": 5.610818386077881, "learning_rate": 8.496248059334878e-06, "loss": 0.15627029, "memory(GiB)": 13.7, "step": 31020, "train_speed(iter/s)": 1.527211 }, { "acc": 0.97162771, "epoch": 14.541832669322709, "grad_norm": 9.341143608093262, "learning_rate": 8.495693884740279e-06, "loss": 0.09744728, "memory(GiB)": 13.7, "step": 31025, "train_speed(iter/s)": 1.527225 }, { "acc": 0.97865524, "epoch": 14.544176236231545, "grad_norm": 2.826266288757324, "learning_rate": 8.495139626132698e-06, "loss": 0.07610116, "memory(GiB)": 13.7, "step": 31030, "train_speed(iter/s)": 1.527235 }, { "acc": 0.97763329, "epoch": 14.546519803140379, "grad_norm": 6.098600387573242, "learning_rate": 8.494585283525464e-06, "loss": 0.12485427, "memory(GiB)": 13.7, "step": 31035, "train_speed(iter/s)": 1.527253 }, { "acc": 0.97984304, "epoch": 14.548863370049215, "grad_norm": 10.822596549987793, "learning_rate": 8.494030856931898e-06, "loss": 0.14879634, "memory(GiB)": 13.7, "step": 31040, "train_speed(iter/s)": 1.527263 }, { "acc": 0.9763628, "epoch": 14.55120693695805, "grad_norm": 23.190189361572266, "learning_rate": 8.493476346365327e-06, "loss": 0.12212042, "memory(GiB)": 13.7, "step": 31045, "train_speed(iter/s)": 1.52727 }, { "acc": 0.97874718, "epoch": 14.553550503866886, "grad_norm": 0.16491921246051788, "learning_rate": 8.49292175183908e-06, "loss": 0.10567117, "memory(GiB)": 13.7, "step": 31050, "train_speed(iter/s)": 1.527272 }, { "acc": 0.98907194, "epoch": 14.55589407077572, "grad_norm": 1.0932201147079468, "learning_rate": 8.492367073366487e-06, "loss": 0.05497288, "memory(GiB)": 13.7, "step": 31055, "train_speed(iter/s)": 1.527273 }, { "acc": 0.96844616, "epoch": 14.558237637684556, "grad_norm": 13.051787376403809, "learning_rate": 8.491812310960884e-06, "loss": 0.1729113, "memory(GiB)": 13.7, "step": 31060, "train_speed(iter/s)": 1.527274 }, { "acc": 0.96798611, "epoch": 14.56058120459339, "grad_norm": 3.887878179550171, "learning_rate": 8.4912574646356e-06, "loss": 0.12139655, "memory(GiB)": 13.7, "step": 31065, "train_speed(iter/s)": 1.527279 }, { "acc": 0.96885824, "epoch": 14.562924771502226, "grad_norm": 6.572742462158203, "learning_rate": 8.490702534403976e-06, "loss": 0.18990066, "memory(GiB)": 13.7, "step": 31070, "train_speed(iter/s)": 1.527283 }, { "acc": 0.9894886, "epoch": 14.565268338411062, "grad_norm": 1.32431960105896, "learning_rate": 8.490147520279348e-06, "loss": 0.06049664, "memory(GiB)": 13.7, "step": 31075, "train_speed(iter/s)": 1.527282 }, { "acc": 0.98373508, "epoch": 14.567611905319897, "grad_norm": 5.554272174835205, "learning_rate": 8.489592422275058e-06, "loss": 0.04966554, "memory(GiB)": 13.7, "step": 31080, "train_speed(iter/s)": 1.527273 }, { "acc": 0.96441469, "epoch": 14.569955472228733, "grad_norm": 2.200554609298706, "learning_rate": 8.489037240404451e-06, "loss": 0.14127296, "memory(GiB)": 13.7, "step": 31085, "train_speed(iter/s)": 1.527281 }, { "acc": 0.97718754, "epoch": 14.572299039137567, "grad_norm": 2.600820302963257, "learning_rate": 8.488481974680868e-06, "loss": 0.15615802, "memory(GiB)": 13.7, "step": 31090, "train_speed(iter/s)": 1.527282 }, { "acc": 0.98287773, "epoch": 14.574642606046403, "grad_norm": 2.2665696144104004, "learning_rate": 8.487926625117657e-06, "loss": 0.09627742, "memory(GiB)": 13.7, "step": 31095, "train_speed(iter/s)": 1.527288 }, { "acc": 0.98809214, "epoch": 14.576986172955237, "grad_norm": 2.248349666595459, "learning_rate": 8.487371191728166e-06, "loss": 0.06435413, "memory(GiB)": 13.7, "step": 31100, "train_speed(iter/s)": 1.527287 }, { "acc": 0.9863636, "epoch": 14.579329739864074, "grad_norm": 5.0030741691589355, "learning_rate": 8.486815674525749e-06, "loss": 0.06687651, "memory(GiB)": 13.7, "step": 31105, "train_speed(iter/s)": 1.527291 }, { "acc": 0.97569447, "epoch": 14.581673306772908, "grad_norm": 5.4903130531311035, "learning_rate": 8.486260073523755e-06, "loss": 0.11613364, "memory(GiB)": 13.7, "step": 31110, "train_speed(iter/s)": 1.527291 }, { "acc": 0.9879261, "epoch": 14.584016873681744, "grad_norm": 3.002089023590088, "learning_rate": 8.485704388735542e-06, "loss": 0.05413564, "memory(GiB)": 13.7, "step": 31115, "train_speed(iter/s)": 1.527288 }, { "acc": 0.98812504, "epoch": 14.586360440590578, "grad_norm": 1.463674545288086, "learning_rate": 8.485148620174465e-06, "loss": 0.04288748, "memory(GiB)": 13.7, "step": 31120, "train_speed(iter/s)": 1.527288 }, { "acc": 0.97829552, "epoch": 14.588704007499414, "grad_norm": 0.007640212774276733, "learning_rate": 8.484592767853881e-06, "loss": 0.10564969, "memory(GiB)": 13.7, "step": 31125, "train_speed(iter/s)": 1.527292 }, { "acc": 0.98314486, "epoch": 14.591047574408249, "grad_norm": 5.237585544586182, "learning_rate": 8.484036831787156e-06, "loss": 0.12095957, "memory(GiB)": 13.7, "step": 31130, "train_speed(iter/s)": 1.5273 }, { "acc": 0.98288193, "epoch": 14.593391141317085, "grad_norm": 0.0031582284718751907, "learning_rate": 8.483480811987646e-06, "loss": 0.09407915, "memory(GiB)": 13.7, "step": 31135, "train_speed(iter/s)": 1.527307 }, { "acc": 0.97283726, "epoch": 14.595734708225919, "grad_norm": 3.5376181602478027, "learning_rate": 8.482924708468723e-06, "loss": 0.15771313, "memory(GiB)": 13.7, "step": 31140, "train_speed(iter/s)": 1.527303 }, { "acc": 0.97925596, "epoch": 14.598078275134755, "grad_norm": 4.884405136108398, "learning_rate": 8.48236852124375e-06, "loss": 0.09274576, "memory(GiB)": 13.7, "step": 31145, "train_speed(iter/s)": 1.527308 }, { "acc": 0.97093935, "epoch": 14.600421842043591, "grad_norm": 6.049701690673828, "learning_rate": 8.481812250326097e-06, "loss": 0.13535182, "memory(GiB)": 13.7, "step": 31150, "train_speed(iter/s)": 1.527324 }, { "acc": 0.97550602, "epoch": 14.602765408952425, "grad_norm": 4.974609375, "learning_rate": 8.481255895729134e-06, "loss": 0.05376358, "memory(GiB)": 13.7, "step": 31155, "train_speed(iter/s)": 1.527321 }, { "acc": 0.98280449, "epoch": 14.605108975861262, "grad_norm": 0.0204494446516037, "learning_rate": 8.480699457466234e-06, "loss": 0.07168652, "memory(GiB)": 13.7, "step": 31160, "train_speed(iter/s)": 1.527323 }, { "acc": 0.96594486, "epoch": 14.607452542770096, "grad_norm": 5.677867889404297, "learning_rate": 8.480142935550774e-06, "loss": 0.13419495, "memory(GiB)": 13.7, "step": 31165, "train_speed(iter/s)": 1.527324 }, { "acc": 0.98244047, "epoch": 14.609796109678932, "grad_norm": 4.4532551765441895, "learning_rate": 8.479586329996126e-06, "loss": 0.04255165, "memory(GiB)": 13.7, "step": 31170, "train_speed(iter/s)": 1.527339 }, { "acc": 0.97192326, "epoch": 14.612139676587766, "grad_norm": 3.7072031497955322, "learning_rate": 8.479029640815675e-06, "loss": 0.14033144, "memory(GiB)": 13.7, "step": 31175, "train_speed(iter/s)": 1.527347 }, { "acc": 0.97927494, "epoch": 14.614483243496602, "grad_norm": 6.081404209136963, "learning_rate": 8.478472868022798e-06, "loss": 0.09976836, "memory(GiB)": 13.7, "step": 31180, "train_speed(iter/s)": 1.527352 }, { "acc": 0.96255951, "epoch": 14.616826810405437, "grad_norm": 7.904823303222656, "learning_rate": 8.47791601163088e-06, "loss": 0.19385577, "memory(GiB)": 13.7, "step": 31185, "train_speed(iter/s)": 1.527364 }, { "acc": 0.97828369, "epoch": 14.619170377314273, "grad_norm": 7.494081020355225, "learning_rate": 8.477359071653306e-06, "loss": 0.10180665, "memory(GiB)": 13.7, "step": 31190, "train_speed(iter/s)": 1.52737 }, { "acc": 0.97042313, "epoch": 14.621513944223107, "grad_norm": 8.44749641418457, "learning_rate": 8.476802048103462e-06, "loss": 0.13335797, "memory(GiB)": 13.7, "step": 31195, "train_speed(iter/s)": 1.527374 }, { "acc": 0.975951, "epoch": 14.623857511131943, "grad_norm": 2.1054885387420654, "learning_rate": 8.476244940994736e-06, "loss": 0.10416307, "memory(GiB)": 13.7, "step": 31200, "train_speed(iter/s)": 1.527387 }, { "acc": 0.96222725, "epoch": 14.626201078040777, "grad_norm": 7.939594745635986, "learning_rate": 8.475687750340521e-06, "loss": 0.19010439, "memory(GiB)": 13.7, "step": 31205, "train_speed(iter/s)": 1.527388 }, { "acc": 0.97494621, "epoch": 14.628544644949613, "grad_norm": 7.264781951904297, "learning_rate": 8.47513047615421e-06, "loss": 0.1273443, "memory(GiB)": 13.7, "step": 31210, "train_speed(iter/s)": 1.527386 }, { "acc": 0.97979164, "epoch": 14.630888211858448, "grad_norm": 4.258131504058838, "learning_rate": 8.474573118449197e-06, "loss": 0.06107312, "memory(GiB)": 13.7, "step": 31215, "train_speed(iter/s)": 1.5274 }, { "acc": 0.97296944, "epoch": 14.633231778767284, "grad_norm": 3.2419111728668213, "learning_rate": 8.474015677238879e-06, "loss": 0.11818954, "memory(GiB)": 13.7, "step": 31220, "train_speed(iter/s)": 1.527401 }, { "acc": 0.99333334, "epoch": 14.63557534567612, "grad_norm": 2.324683666229248, "learning_rate": 8.473458152536656e-06, "loss": 0.04684275, "memory(GiB)": 13.7, "step": 31225, "train_speed(iter/s)": 1.527406 }, { "acc": 0.98183422, "epoch": 14.637918912584954, "grad_norm": 1.3108142614364624, "learning_rate": 8.472900544355928e-06, "loss": 0.07555628, "memory(GiB)": 13.7, "step": 31230, "train_speed(iter/s)": 1.527409 }, { "acc": 0.97860107, "epoch": 14.64026247949379, "grad_norm": 3.176501512527466, "learning_rate": 8.472342852710098e-06, "loss": 0.10471228, "memory(GiB)": 13.7, "step": 31235, "train_speed(iter/s)": 1.527423 }, { "acc": 0.98184528, "epoch": 14.642606046402625, "grad_norm": 3.4619057178497314, "learning_rate": 8.471785077612575e-06, "loss": 0.11894317, "memory(GiB)": 13.7, "step": 31240, "train_speed(iter/s)": 1.527431 }, { "acc": 0.97605648, "epoch": 14.64494961331146, "grad_norm": 6.1752166748046875, "learning_rate": 8.471227219076759e-06, "loss": 0.09421979, "memory(GiB)": 13.7, "step": 31245, "train_speed(iter/s)": 1.527435 }, { "acc": 0.98224201, "epoch": 14.647293180220295, "grad_norm": 3.6647369861602783, "learning_rate": 8.470669277116063e-06, "loss": 0.0590961, "memory(GiB)": 13.7, "step": 31250, "train_speed(iter/s)": 1.52743 }, { "acc": 0.98860111, "epoch": 14.649636747129131, "grad_norm": 5.505162715911865, "learning_rate": 8.470111251743901e-06, "loss": 0.06338697, "memory(GiB)": 13.7, "step": 31255, "train_speed(iter/s)": 1.527427 }, { "acc": 0.99196434, "epoch": 14.651980314037965, "grad_norm": 0.7346532344818115, "learning_rate": 8.469553142973682e-06, "loss": 0.07056546, "memory(GiB)": 13.7, "step": 31260, "train_speed(iter/s)": 1.527428 }, { "acc": 0.96447916, "epoch": 14.654323880946801, "grad_norm": 2.470601797103882, "learning_rate": 8.468994950818822e-06, "loss": 0.15479193, "memory(GiB)": 13.7, "step": 31265, "train_speed(iter/s)": 1.527424 }, { "acc": 0.97675724, "epoch": 14.656667447855636, "grad_norm": 5.333702087402344, "learning_rate": 8.468436675292738e-06, "loss": 0.08979768, "memory(GiB)": 13.7, "step": 31270, "train_speed(iter/s)": 1.527431 }, { "acc": 0.9729166, "epoch": 14.659011014764472, "grad_norm": 5.106043815612793, "learning_rate": 8.467878316408848e-06, "loss": 0.1137887, "memory(GiB)": 13.7, "step": 31275, "train_speed(iter/s)": 1.527442 }, { "acc": 0.98862181, "epoch": 14.661354581673306, "grad_norm": 4.7626566886901855, "learning_rate": 8.467319874180575e-06, "loss": 0.05529631, "memory(GiB)": 13.7, "step": 31280, "train_speed(iter/s)": 1.527445 }, { "acc": 0.98104172, "epoch": 14.663698148582142, "grad_norm": 11.391153335571289, "learning_rate": 8.466761348621344e-06, "loss": 0.08160038, "memory(GiB)": 13.7, "step": 31285, "train_speed(iter/s)": 1.527467 }, { "acc": 0.99333334, "epoch": 14.666041715490977, "grad_norm": 0.8335505127906799, "learning_rate": 8.466202739744575e-06, "loss": 0.03655673, "memory(GiB)": 13.7, "step": 31290, "train_speed(iter/s)": 1.527474 }, { "acc": 0.95451441, "epoch": 14.668385282399813, "grad_norm": 3.566716194152832, "learning_rate": 8.4656440475637e-06, "loss": 0.11394672, "memory(GiB)": 13.7, "step": 31295, "train_speed(iter/s)": 1.527495 }, { "acc": 0.9664938, "epoch": 14.670728849308647, "grad_norm": 3.264586925506592, "learning_rate": 8.465085272092143e-06, "loss": 0.13302119, "memory(GiB)": 13.7, "step": 31300, "train_speed(iter/s)": 1.527506 }, { "acc": 0.96541672, "epoch": 14.673072416217483, "grad_norm": 5.91048002243042, "learning_rate": 8.464526413343341e-06, "loss": 0.13225572, "memory(GiB)": 13.7, "step": 31305, "train_speed(iter/s)": 1.527516 }, { "acc": 0.96800594, "epoch": 14.675415983126317, "grad_norm": 9.832465171813965, "learning_rate": 8.463967471330723e-06, "loss": 0.09397717, "memory(GiB)": 13.7, "step": 31310, "train_speed(iter/s)": 1.527524 }, { "acc": 0.96370039, "epoch": 14.677759550035153, "grad_norm": 19.350278854370117, "learning_rate": 8.463408446067726e-06, "loss": 0.11714034, "memory(GiB)": 13.7, "step": 31315, "train_speed(iter/s)": 1.527527 }, { "acc": 0.96496706, "epoch": 14.68010311694399, "grad_norm": 7.374380111694336, "learning_rate": 8.462849337567787e-06, "loss": 0.11406873, "memory(GiB)": 13.7, "step": 31320, "train_speed(iter/s)": 1.527523 }, { "acc": 0.98168564, "epoch": 14.682446683852824, "grad_norm": 4.2310075759887695, "learning_rate": 8.462290145844343e-06, "loss": 0.05124884, "memory(GiB)": 13.7, "step": 31325, "train_speed(iter/s)": 1.527534 }, { "acc": 0.95666666, "epoch": 14.68479025076166, "grad_norm": 8.69548225402832, "learning_rate": 8.461730870910838e-06, "loss": 0.20703597, "memory(GiB)": 13.7, "step": 31330, "train_speed(iter/s)": 1.527548 }, { "acc": 0.96251888, "epoch": 14.687133817670494, "grad_norm": 7.845378875732422, "learning_rate": 8.461171512780712e-06, "loss": 0.11003281, "memory(GiB)": 13.7, "step": 31335, "train_speed(iter/s)": 1.527552 }, { "acc": 0.97883072, "epoch": 14.68947738457933, "grad_norm": 0.15993618965148926, "learning_rate": 8.460612071467415e-06, "loss": 0.05736732, "memory(GiB)": 13.7, "step": 31340, "train_speed(iter/s)": 1.527564 }, { "acc": 0.97450428, "epoch": 14.691820951488165, "grad_norm": 4.684916019439697, "learning_rate": 8.460052546984388e-06, "loss": 0.20145586, "memory(GiB)": 13.7, "step": 31345, "train_speed(iter/s)": 1.527576 }, { "acc": 0.98323202, "epoch": 14.694164518397, "grad_norm": 8.74963092803955, "learning_rate": 8.459492939345084e-06, "loss": 0.10499027, "memory(GiB)": 13.7, "step": 31350, "train_speed(iter/s)": 1.527568 }, { "acc": 0.97758923, "epoch": 14.696508085305835, "grad_norm": 68.13995361328125, "learning_rate": 8.458933248562955e-06, "loss": 0.09394418, "memory(GiB)": 13.7, "step": 31355, "train_speed(iter/s)": 1.527563 }, { "acc": 0.98452072, "epoch": 14.698851652214671, "grad_norm": 7.988948345184326, "learning_rate": 8.458373474651453e-06, "loss": 0.080474, "memory(GiB)": 13.7, "step": 31360, "train_speed(iter/s)": 1.527572 }, { "acc": 0.98963747, "epoch": 14.701195219123505, "grad_norm": 7.891855716705322, "learning_rate": 8.45781361762403e-06, "loss": 0.05394347, "memory(GiB)": 13.7, "step": 31365, "train_speed(iter/s)": 1.527583 }, { "acc": 0.9880209, "epoch": 14.703538786032341, "grad_norm": 5.981151580810547, "learning_rate": 8.457253677494149e-06, "loss": 0.07781774, "memory(GiB)": 13.7, "step": 31370, "train_speed(iter/s)": 1.527584 }, { "acc": 0.96660423, "epoch": 14.705882352941176, "grad_norm": 6.980074405670166, "learning_rate": 8.456693654275263e-06, "loss": 0.11021519, "memory(GiB)": 13.7, "step": 31375, "train_speed(iter/s)": 1.527587 }, { "acc": 0.98458328, "epoch": 14.708225919850012, "grad_norm": 0.5531933903694153, "learning_rate": 8.456133547980839e-06, "loss": 0.03145074, "memory(GiB)": 13.7, "step": 31380, "train_speed(iter/s)": 1.527589 }, { "acc": 0.97765875, "epoch": 14.710569486758846, "grad_norm": 2.7920918464660645, "learning_rate": 8.455573358624335e-06, "loss": 0.08002971, "memory(GiB)": 13.7, "step": 31385, "train_speed(iter/s)": 1.527587 }, { "acc": 0.9821969, "epoch": 14.712913053667682, "grad_norm": 4.5041823387146, "learning_rate": 8.455013086219218e-06, "loss": 0.09889483, "memory(GiB)": 13.7, "step": 31390, "train_speed(iter/s)": 1.527592 }, { "acc": 0.9790699, "epoch": 14.715256620576518, "grad_norm": 9.173791885375977, "learning_rate": 8.454452730778953e-06, "loss": 0.09043024, "memory(GiB)": 13.7, "step": 31395, "train_speed(iter/s)": 1.527593 }, { "acc": 0.98116884, "epoch": 14.717600187485353, "grad_norm": 8.229790687561035, "learning_rate": 8.453892292317018e-06, "loss": 0.06437888, "memory(GiB)": 13.7, "step": 31400, "train_speed(iter/s)": 1.527603 }, { "acc": 0.99173613, "epoch": 14.719943754394189, "grad_norm": 2.8846940994262695, "learning_rate": 8.453331770846873e-06, "loss": 0.05717683, "memory(GiB)": 13.7, "step": 31405, "train_speed(iter/s)": 1.5276 }, { "acc": 0.98013897, "epoch": 14.722287321303023, "grad_norm": 6.0060577392578125, "learning_rate": 8.452771166381996e-06, "loss": 0.10119748, "memory(GiB)": 13.7, "step": 31410, "train_speed(iter/s)": 1.527608 }, { "acc": 0.97732353, "epoch": 14.724630888211859, "grad_norm": 43.027034759521484, "learning_rate": 8.452210478935862e-06, "loss": 0.13389077, "memory(GiB)": 13.7, "step": 31415, "train_speed(iter/s)": 1.527624 }, { "acc": 0.96844692, "epoch": 14.726974455120693, "grad_norm": 0.4307207763195038, "learning_rate": 8.451649708521948e-06, "loss": 0.13907955, "memory(GiB)": 13.7, "step": 31420, "train_speed(iter/s)": 1.52764 }, { "acc": 0.9901041, "epoch": 14.72931802202953, "grad_norm": 2.450716257095337, "learning_rate": 8.451088855153733e-06, "loss": 0.07026841, "memory(GiB)": 13.7, "step": 31425, "train_speed(iter/s)": 1.527642 }, { "acc": 0.97979164, "epoch": 14.731661588938364, "grad_norm": 5.231774806976318, "learning_rate": 8.450527918844699e-06, "loss": 0.10359387, "memory(GiB)": 13.7, "step": 31430, "train_speed(iter/s)": 1.527641 }, { "acc": 0.98658924, "epoch": 14.7340051558472, "grad_norm": 0.3357875645160675, "learning_rate": 8.449966899608325e-06, "loss": 0.04834459, "memory(GiB)": 13.7, "step": 31435, "train_speed(iter/s)": 1.52766 }, { "acc": 0.9864584, "epoch": 14.736348722756034, "grad_norm": 8.482441902160645, "learning_rate": 8.449405797458104e-06, "loss": 0.05317352, "memory(GiB)": 13.7, "step": 31440, "train_speed(iter/s)": 1.527656 }, { "acc": 0.98996105, "epoch": 14.73869228966487, "grad_norm": 4.433126449584961, "learning_rate": 8.448844612407514e-06, "loss": 0.05964345, "memory(GiB)": 13.7, "step": 31445, "train_speed(iter/s)": 1.527653 }, { "acc": 0.98611107, "epoch": 14.741035856573705, "grad_norm": 5.162208557128906, "learning_rate": 8.44828334447005e-06, "loss": 0.04204592, "memory(GiB)": 13.7, "step": 31450, "train_speed(iter/s)": 1.527665 }, { "acc": 0.99092255, "epoch": 14.74337942348254, "grad_norm": 1.340944766998291, "learning_rate": 8.447721993659202e-06, "loss": 0.05151187, "memory(GiB)": 13.7, "step": 31455, "train_speed(iter/s)": 1.527676 }, { "acc": 0.97900133, "epoch": 14.745722990391375, "grad_norm": 3.1024680137634277, "learning_rate": 8.44716055998846e-06, "loss": 0.11340014, "memory(GiB)": 13.7, "step": 31460, "train_speed(iter/s)": 1.527692 }, { "acc": 0.99330807, "epoch": 14.748066557300211, "grad_norm": 5.0043864250183105, "learning_rate": 8.446599043471324e-06, "loss": 0.04995087, "memory(GiB)": 13.7, "step": 31465, "train_speed(iter/s)": 1.527693 }, { "acc": 0.98005209, "epoch": 14.750410124209047, "grad_norm": 3.4570095539093018, "learning_rate": 8.446037444121285e-06, "loss": 0.0936707, "memory(GiB)": 13.7, "step": 31470, "train_speed(iter/s)": 1.527691 }, { "acc": 0.99028845, "epoch": 14.752753691117881, "grad_norm": 1.929150104522705, "learning_rate": 8.445475761951846e-06, "loss": 0.02994805, "memory(GiB)": 13.7, "step": 31475, "train_speed(iter/s)": 1.527696 }, { "acc": 0.97280636, "epoch": 14.755097258026717, "grad_norm": 6.015148639678955, "learning_rate": 8.44491399697651e-06, "loss": 0.09535429, "memory(GiB)": 13.7, "step": 31480, "train_speed(iter/s)": 1.52771 }, { "acc": 0.99505205, "epoch": 14.757440824935552, "grad_norm": 5.035694599151611, "learning_rate": 8.444352149208776e-06, "loss": 0.04614921, "memory(GiB)": 13.7, "step": 31485, "train_speed(iter/s)": 1.527722 }, { "acc": 0.98166666, "epoch": 14.759784391844388, "grad_norm": 2.656353712081909, "learning_rate": 8.44379021866215e-06, "loss": 0.05909895, "memory(GiB)": 13.7, "step": 31490, "train_speed(iter/s)": 1.52773 }, { "acc": 0.9760417, "epoch": 14.762127958753222, "grad_norm": 4.5598249435424805, "learning_rate": 8.443228205350138e-06, "loss": 0.13242631, "memory(GiB)": 13.7, "step": 31495, "train_speed(iter/s)": 1.527749 }, { "acc": 0.96776047, "epoch": 14.764471525662058, "grad_norm": 6.889523983001709, "learning_rate": 8.442666109286253e-06, "loss": 0.10498517, "memory(GiB)": 13.7, "step": 31500, "train_speed(iter/s)": 1.52776 }, { "acc": 0.97146168, "epoch": 14.766815092570893, "grad_norm": 1.89560866355896, "learning_rate": 8.442103930484002e-06, "loss": 0.13339797, "memory(GiB)": 13.7, "step": 31505, "train_speed(iter/s)": 1.527774 }, { "acc": 0.96872025, "epoch": 14.769158659479729, "grad_norm": 4.5066609382629395, "learning_rate": 8.441541668956897e-06, "loss": 0.11800457, "memory(GiB)": 13.7, "step": 31510, "train_speed(iter/s)": 1.527779 }, { "acc": 0.96984625, "epoch": 14.771502226388563, "grad_norm": 6.8760504722595215, "learning_rate": 8.440979324718456e-06, "loss": 0.13179183, "memory(GiB)": 13.7, "step": 31515, "train_speed(iter/s)": 1.527794 }, { "acc": 0.97151241, "epoch": 14.773845793297399, "grad_norm": 1.2989223003387451, "learning_rate": 8.440416897782196e-06, "loss": 0.09004009, "memory(GiB)": 13.7, "step": 31520, "train_speed(iter/s)": 1.527809 }, { "acc": 0.98520298, "epoch": 14.776189360206233, "grad_norm": 1.9100819826126099, "learning_rate": 8.439854388161635e-06, "loss": 0.06527718, "memory(GiB)": 13.7, "step": 31525, "train_speed(iter/s)": 1.527816 }, { "acc": 0.98395834, "epoch": 14.77853292711507, "grad_norm": 4.622058868408203, "learning_rate": 8.439291795870293e-06, "loss": 0.05337389, "memory(GiB)": 13.7, "step": 31530, "train_speed(iter/s)": 1.52782 }, { "acc": 0.97208338, "epoch": 14.780876494023904, "grad_norm": 3.5527849197387695, "learning_rate": 8.438729120921692e-06, "loss": 0.11396548, "memory(GiB)": 13.7, "step": 31535, "train_speed(iter/s)": 1.527828 }, { "acc": 0.98497028, "epoch": 14.78322006093274, "grad_norm": 8.613381385803223, "learning_rate": 8.43816636332936e-06, "loss": 0.06334528, "memory(GiB)": 13.7, "step": 31540, "train_speed(iter/s)": 1.527832 }, { "acc": 0.98543653, "epoch": 14.785563627841574, "grad_norm": 0.03413558378815651, "learning_rate": 8.437603523106822e-06, "loss": 0.08280597, "memory(GiB)": 13.7, "step": 31545, "train_speed(iter/s)": 1.527839 }, { "acc": 0.96767921, "epoch": 14.78790719475041, "grad_norm": 5.55296516418457, "learning_rate": 8.437040600267606e-06, "loss": 0.14594963, "memory(GiB)": 13.7, "step": 31550, "train_speed(iter/s)": 1.527839 }, { "acc": 0.96831875, "epoch": 14.790250761659244, "grad_norm": 5.05643892288208, "learning_rate": 8.436477594825247e-06, "loss": 0.07753421, "memory(GiB)": 13.7, "step": 31555, "train_speed(iter/s)": 1.527848 }, { "acc": 0.99354162, "epoch": 14.79259432856808, "grad_norm": 1.0606595277786255, "learning_rate": 8.435914506793272e-06, "loss": 0.04202849, "memory(GiB)": 13.7, "step": 31560, "train_speed(iter/s)": 1.527845 }, { "acc": 0.98000002, "epoch": 14.794937895476917, "grad_norm": 1.4391392469406128, "learning_rate": 8.435351336185218e-06, "loss": 0.10929662, "memory(GiB)": 13.7, "step": 31565, "train_speed(iter/s)": 1.527831 }, { "acc": 0.98490534, "epoch": 14.797281462385751, "grad_norm": 4.214491367340088, "learning_rate": 8.434788083014624e-06, "loss": 0.06541587, "memory(GiB)": 13.7, "step": 31570, "train_speed(iter/s)": 1.527834 }, { "acc": 0.97703371, "epoch": 14.799625029294587, "grad_norm": 4.7012505531311035, "learning_rate": 8.434224747295026e-06, "loss": 0.10043077, "memory(GiB)": 13.7, "step": 31575, "train_speed(iter/s)": 1.52784 }, { "acc": 0.9865797, "epoch": 14.801968596203421, "grad_norm": 7.680296421051025, "learning_rate": 8.433661329039967e-06, "loss": 0.05953482, "memory(GiB)": 13.7, "step": 31580, "train_speed(iter/s)": 1.527839 }, { "acc": 0.98113098, "epoch": 14.804312163112257, "grad_norm": 13.440555572509766, "learning_rate": 8.433097828262988e-06, "loss": 0.06949687, "memory(GiB)": 13.7, "step": 31585, "train_speed(iter/s)": 1.527833 }, { "acc": 0.97437363, "epoch": 14.806655730021092, "grad_norm": 1.3702247142791748, "learning_rate": 8.432534244977633e-06, "loss": 0.08972139, "memory(GiB)": 13.7, "step": 31590, "train_speed(iter/s)": 1.527834 }, { "acc": 0.96703377, "epoch": 14.808999296929928, "grad_norm": 63.27933120727539, "learning_rate": 8.431970579197451e-06, "loss": 0.18304036, "memory(GiB)": 13.7, "step": 31595, "train_speed(iter/s)": 1.527846 }, { "acc": 0.9770833, "epoch": 14.811342863838762, "grad_norm": 3.632863998413086, "learning_rate": 8.431406830935989e-06, "loss": 0.04773448, "memory(GiB)": 13.7, "step": 31600, "train_speed(iter/s)": 1.527861 }, { "acc": 0.99224205, "epoch": 14.813686430747598, "grad_norm": 1.9596176147460938, "learning_rate": 8.430843000206798e-06, "loss": 0.06885927, "memory(GiB)": 13.7, "step": 31605, "train_speed(iter/s)": 1.527864 }, { "acc": 0.94808064, "epoch": 14.816029997656432, "grad_norm": 6.689362049102783, "learning_rate": 8.43027908702343e-06, "loss": 0.17287035, "memory(GiB)": 13.7, "step": 31610, "train_speed(iter/s)": 1.527871 }, { "acc": 0.98253841, "epoch": 14.818373564565269, "grad_norm": 4.531929969787598, "learning_rate": 8.429715091399442e-06, "loss": 0.0915831, "memory(GiB)": 13.7, "step": 31615, "train_speed(iter/s)": 1.527883 }, { "acc": 0.98983288, "epoch": 14.820717131474103, "grad_norm": 5.8036932945251465, "learning_rate": 8.429151013348389e-06, "loss": 0.08012562, "memory(GiB)": 13.7, "step": 31620, "train_speed(iter/s)": 1.527885 }, { "acc": 0.97818899, "epoch": 14.823060698382939, "grad_norm": 2.834155559539795, "learning_rate": 8.42858685288383e-06, "loss": 0.13563468, "memory(GiB)": 13.7, "step": 31625, "train_speed(iter/s)": 1.527895 }, { "acc": 0.98520832, "epoch": 14.825404265291773, "grad_norm": 3.429347038269043, "learning_rate": 8.428022610019325e-06, "loss": 0.07864807, "memory(GiB)": 13.7, "step": 31630, "train_speed(iter/s)": 1.527889 }, { "acc": 0.96372023, "epoch": 14.82774783220061, "grad_norm": 3.7521116733551025, "learning_rate": 8.427458284768435e-06, "loss": 0.26144066, "memory(GiB)": 13.7, "step": 31635, "train_speed(iter/s)": 1.527899 }, { "acc": 0.9572588, "epoch": 14.830091399109445, "grad_norm": 10.25771427154541, "learning_rate": 8.426893877144728e-06, "loss": 0.13457698, "memory(GiB)": 13.7, "step": 31640, "train_speed(iter/s)": 1.527909 }, { "acc": 0.98321428, "epoch": 14.83243496601828, "grad_norm": 3.0614328384399414, "learning_rate": 8.426329387161767e-06, "loss": 0.07609345, "memory(GiB)": 13.7, "step": 31645, "train_speed(iter/s)": 1.527917 }, { "acc": 0.97854156, "epoch": 14.834778532927116, "grad_norm": 7.088370323181152, "learning_rate": 8.425764814833123e-06, "loss": 0.05777428, "memory(GiB)": 13.7, "step": 31650, "train_speed(iter/s)": 1.52793 }, { "acc": 0.96977673, "epoch": 14.83712209983595, "grad_norm": 6.408076286315918, "learning_rate": 8.425200160172367e-06, "loss": 0.132768, "memory(GiB)": 13.7, "step": 31655, "train_speed(iter/s)": 1.527937 }, { "acc": 0.96825972, "epoch": 14.839465666744786, "grad_norm": 4.525312423706055, "learning_rate": 8.42463542319307e-06, "loss": 0.12769598, "memory(GiB)": 13.7, "step": 31660, "train_speed(iter/s)": 1.527948 }, { "acc": 0.96756554, "epoch": 14.84180923365362, "grad_norm": 2.0884978771209717, "learning_rate": 8.424070603908807e-06, "loss": 0.14899921, "memory(GiB)": 13.7, "step": 31665, "train_speed(iter/s)": 1.527953 }, { "acc": 0.97703943, "epoch": 14.844152800562457, "grad_norm": 4.04380989074707, "learning_rate": 8.423505702333152e-06, "loss": 0.10796076, "memory(GiB)": 13.7, "step": 31670, "train_speed(iter/s)": 1.527957 }, { "acc": 0.983144, "epoch": 14.84649636747129, "grad_norm": 2.918952226638794, "learning_rate": 8.422940718479687e-06, "loss": 0.08472606, "memory(GiB)": 13.7, "step": 31675, "train_speed(iter/s)": 1.527964 }, { "acc": 0.9666667, "epoch": 14.848839934380127, "grad_norm": 6.849269866943359, "learning_rate": 8.422375652361988e-06, "loss": 0.13597119, "memory(GiB)": 13.7, "step": 31680, "train_speed(iter/s)": 1.527974 }, { "acc": 0.98171129, "epoch": 14.851183501288961, "grad_norm": 0.17994503676891327, "learning_rate": 8.421810503993643e-06, "loss": 0.06034931, "memory(GiB)": 13.7, "step": 31685, "train_speed(iter/s)": 1.52799 }, { "acc": 0.95965281, "epoch": 14.853527068197797, "grad_norm": 17.66054916381836, "learning_rate": 8.421245273388232e-06, "loss": 0.15671871, "memory(GiB)": 13.7, "step": 31690, "train_speed(iter/s)": 1.527991 }, { "acc": 0.97697306, "epoch": 14.855870635106632, "grad_norm": 1.4617962837219238, "learning_rate": 8.420679960559343e-06, "loss": 0.07478204, "memory(GiB)": 13.7, "step": 31695, "train_speed(iter/s)": 1.527996 }, { "acc": 0.97690697, "epoch": 14.858214202015468, "grad_norm": 1.5175154209136963, "learning_rate": 8.420114565520562e-06, "loss": 0.09774055, "memory(GiB)": 13.7, "step": 31700, "train_speed(iter/s)": 1.528004 }, { "acc": 0.97166672, "epoch": 14.860557768924302, "grad_norm": 7.573202133178711, "learning_rate": 8.419549088285485e-06, "loss": 0.12683579, "memory(GiB)": 13.7, "step": 31705, "train_speed(iter/s)": 1.528007 }, { "acc": 0.96750002, "epoch": 14.862901335833138, "grad_norm": 3.0449986457824707, "learning_rate": 8.4189835288677e-06, "loss": 0.12880784, "memory(GiB)": 13.7, "step": 31710, "train_speed(iter/s)": 1.528003 }, { "acc": 0.97693148, "epoch": 14.865244902741974, "grad_norm": 0.031327761709690094, "learning_rate": 8.418417887280799e-06, "loss": 0.08113586, "memory(GiB)": 13.7, "step": 31715, "train_speed(iter/s)": 1.528008 }, { "acc": 0.98060923, "epoch": 14.867588469650808, "grad_norm": 5.617767333984375, "learning_rate": 8.41785216353838e-06, "loss": 0.09196309, "memory(GiB)": 13.7, "step": 31720, "train_speed(iter/s)": 1.528016 }, { "acc": 0.98482647, "epoch": 14.869932036559645, "grad_norm": 3.3094799518585205, "learning_rate": 8.417286357654046e-06, "loss": 0.06391795, "memory(GiB)": 13.7, "step": 31725, "train_speed(iter/s)": 1.528025 }, { "acc": 0.96363592, "epoch": 14.872275603468479, "grad_norm": 8.6933012008667, "learning_rate": 8.41672046964139e-06, "loss": 0.21610105, "memory(GiB)": 13.7, "step": 31730, "train_speed(iter/s)": 1.528039 }, { "acc": 0.98453379, "epoch": 14.874619170377315, "grad_norm": 7.8174662590026855, "learning_rate": 8.416154499514018e-06, "loss": 0.07107317, "memory(GiB)": 13.7, "step": 31735, "train_speed(iter/s)": 1.528036 }, { "acc": 0.9854167, "epoch": 14.87696273728615, "grad_norm": 3.421483039855957, "learning_rate": 8.415588447285533e-06, "loss": 0.03745215, "memory(GiB)": 13.7, "step": 31740, "train_speed(iter/s)": 1.528044 }, { "acc": 0.96737175, "epoch": 14.879306304194985, "grad_norm": 10.05374526977539, "learning_rate": 8.415022312969541e-06, "loss": 0.14120806, "memory(GiB)": 13.7, "step": 31745, "train_speed(iter/s)": 1.528062 }, { "acc": 0.98656254, "epoch": 14.88164987110382, "grad_norm": 0.243593230843544, "learning_rate": 8.414456096579649e-06, "loss": 0.05235925, "memory(GiB)": 13.7, "step": 31750, "train_speed(iter/s)": 1.528071 }, { "acc": 0.9833004, "epoch": 14.883993438012656, "grad_norm": 1.7227816581726074, "learning_rate": 8.41388979812947e-06, "loss": 0.0815944, "memory(GiB)": 13.7, "step": 31755, "train_speed(iter/s)": 1.528068 }, { "acc": 0.9682292, "epoch": 14.88633700492149, "grad_norm": 11.354930877685547, "learning_rate": 8.413323417632613e-06, "loss": 0.08957014, "memory(GiB)": 13.7, "step": 31760, "train_speed(iter/s)": 1.528067 }, { "acc": 0.96750011, "epoch": 14.888680571830326, "grad_norm": 0.17246724665164948, "learning_rate": 8.412756955102693e-06, "loss": 0.07732807, "memory(GiB)": 13.7, "step": 31765, "train_speed(iter/s)": 1.528076 }, { "acc": 0.98375597, "epoch": 14.89102413873916, "grad_norm": 5.461164951324463, "learning_rate": 8.412190410553325e-06, "loss": 0.06637209, "memory(GiB)": 13.7, "step": 31770, "train_speed(iter/s)": 1.528082 }, { "acc": 0.98562498, "epoch": 14.893367705647997, "grad_norm": 0.0022677008528262377, "learning_rate": 8.411623783998131e-06, "loss": 0.07229491, "memory(GiB)": 13.7, "step": 31775, "train_speed(iter/s)": 1.52808 }, { "acc": 0.9862752, "epoch": 14.89571127255683, "grad_norm": 3.3825619220733643, "learning_rate": 8.411057075450724e-06, "loss": 0.05303145, "memory(GiB)": 13.7, "step": 31780, "train_speed(iter/s)": 1.528084 }, { "acc": 0.98311014, "epoch": 14.898054839465667, "grad_norm": 1.0780481100082397, "learning_rate": 8.410490284924732e-06, "loss": 0.06365409, "memory(GiB)": 13.7, "step": 31785, "train_speed(iter/s)": 1.528084 }, { "acc": 0.96937504, "epoch": 14.900398406374501, "grad_norm": 5.969022750854492, "learning_rate": 8.409923412433776e-06, "loss": 0.11259302, "memory(GiB)": 13.7, "step": 31790, "train_speed(iter/s)": 1.528087 }, { "acc": 0.97387466, "epoch": 14.902741973283337, "grad_norm": 5.1003923416137695, "learning_rate": 8.409356457991483e-06, "loss": 0.21885345, "memory(GiB)": 13.7, "step": 31795, "train_speed(iter/s)": 1.528089 }, { "acc": 0.96958332, "epoch": 14.905085540192172, "grad_norm": 5.944925785064697, "learning_rate": 8.408789421611476e-06, "loss": 0.10536301, "memory(GiB)": 13.7, "step": 31800, "train_speed(iter/s)": 1.528096 }, { "acc": 0.99300594, "epoch": 14.907429107101008, "grad_norm": 5.011906623840332, "learning_rate": 8.408222303307391e-06, "loss": 0.03231833, "memory(GiB)": 13.7, "step": 31805, "train_speed(iter/s)": 1.528104 }, { "acc": 0.98517361, "epoch": 14.909772674009844, "grad_norm": 0.008131365291774273, "learning_rate": 8.407655103092857e-06, "loss": 0.07960238, "memory(GiB)": 13.7, "step": 31810, "train_speed(iter/s)": 1.528113 }, { "acc": 0.9749279, "epoch": 14.912116240918678, "grad_norm": 0.8114452362060547, "learning_rate": 8.407087820981508e-06, "loss": 0.13856683, "memory(GiB)": 13.7, "step": 31815, "train_speed(iter/s)": 1.528116 }, { "acc": 0.96933613, "epoch": 14.914459807827514, "grad_norm": 7.210229396820068, "learning_rate": 8.406520456986978e-06, "loss": 0.10934043, "memory(GiB)": 13.7, "step": 31820, "train_speed(iter/s)": 1.528124 }, { "acc": 0.98651581, "epoch": 14.916803374736348, "grad_norm": 5.984022617340088, "learning_rate": 8.405953011122907e-06, "loss": 0.07825161, "memory(GiB)": 13.7, "step": 31825, "train_speed(iter/s)": 1.528128 }, { "acc": 0.975, "epoch": 14.919146941645185, "grad_norm": 3.424567222595215, "learning_rate": 8.405385483402933e-06, "loss": 0.1310397, "memory(GiB)": 13.7, "step": 31830, "train_speed(iter/s)": 1.528131 }, { "acc": 0.97791395, "epoch": 14.921490508554019, "grad_norm": 1.4789701700210571, "learning_rate": 8.404817873840696e-06, "loss": 0.15756893, "memory(GiB)": 13.7, "step": 31835, "train_speed(iter/s)": 1.528141 }, { "acc": 0.97906656, "epoch": 14.923834075462855, "grad_norm": 242.79849243164062, "learning_rate": 8.404250182449845e-06, "loss": 0.13794556, "memory(GiB)": 13.7, "step": 31840, "train_speed(iter/s)": 1.528148 }, { "acc": 0.96519604, "epoch": 14.92617764237169, "grad_norm": 3.6912002563476562, "learning_rate": 8.403682409244019e-06, "loss": 0.12453669, "memory(GiB)": 13.7, "step": 31845, "train_speed(iter/s)": 1.528163 }, { "acc": 0.97942219, "epoch": 14.928521209280525, "grad_norm": 3.960618734359741, "learning_rate": 8.40311455423687e-06, "loss": 0.16165013, "memory(GiB)": 13.7, "step": 31850, "train_speed(iter/s)": 1.528165 }, { "acc": 0.96583328, "epoch": 14.93086477618936, "grad_norm": 4.557229042053223, "learning_rate": 8.402546617442043e-06, "loss": 0.12135787, "memory(GiB)": 13.7, "step": 31855, "train_speed(iter/s)": 1.528176 }, { "acc": 0.96875401, "epoch": 14.933208343098196, "grad_norm": 4.183929443359375, "learning_rate": 8.401978598873195e-06, "loss": 0.11114062, "memory(GiB)": 13.7, "step": 31860, "train_speed(iter/s)": 1.528183 }, { "acc": 0.98146906, "epoch": 14.93555191000703, "grad_norm": 1.2102336883544922, "learning_rate": 8.401410498543975e-06, "loss": 0.12035308, "memory(GiB)": 13.7, "step": 31865, "train_speed(iter/s)": 1.528202 }, { "acc": 0.96642323, "epoch": 14.937895476915866, "grad_norm": 3.242371082305908, "learning_rate": 8.400842316468039e-06, "loss": 0.10614369, "memory(GiB)": 13.7, "step": 31870, "train_speed(iter/s)": 1.528198 }, { "acc": 0.98946428, "epoch": 14.9402390438247, "grad_norm": 5.0994873046875, "learning_rate": 8.400274052659044e-06, "loss": 0.03009011, "memory(GiB)": 13.7, "step": 31875, "train_speed(iter/s)": 1.528206 }, { "acc": 0.99134874, "epoch": 14.942582610733536, "grad_norm": 2.868360757827759, "learning_rate": 8.399705707130652e-06, "loss": 0.06773846, "memory(GiB)": 13.7, "step": 31880, "train_speed(iter/s)": 1.528225 }, { "acc": 0.97566471, "epoch": 14.944926177642373, "grad_norm": 4.232818126678467, "learning_rate": 8.399137279896522e-06, "loss": 0.09603149, "memory(GiB)": 13.7, "step": 31885, "train_speed(iter/s)": 1.528223 }, { "acc": 0.97290173, "epoch": 14.947269744551207, "grad_norm": 5.2353644371032715, "learning_rate": 8.398568770970314e-06, "loss": 0.153496, "memory(GiB)": 13.7, "step": 31890, "train_speed(iter/s)": 1.52823 }, { "acc": 0.97166119, "epoch": 14.949613311460043, "grad_norm": 1.7090415954589844, "learning_rate": 8.3980001803657e-06, "loss": 0.11982461, "memory(GiB)": 13.7, "step": 31895, "train_speed(iter/s)": 1.528243 }, { "acc": 0.97888889, "epoch": 14.951956878368877, "grad_norm": 2.6519675254821777, "learning_rate": 8.397431508096342e-06, "loss": 0.05728111, "memory(GiB)": 13.7, "step": 31900, "train_speed(iter/s)": 1.528245 }, { "acc": 0.9875, "epoch": 14.954300445277713, "grad_norm": 17.58837890625, "learning_rate": 8.396862754175909e-06, "loss": 0.07440722, "memory(GiB)": 13.7, "step": 31905, "train_speed(iter/s)": 1.528243 }, { "acc": 0.98312502, "epoch": 14.956644012186548, "grad_norm": 4.218247413635254, "learning_rate": 8.396293918618074e-06, "loss": 0.07921723, "memory(GiB)": 13.7, "step": 31910, "train_speed(iter/s)": 1.528245 }, { "acc": 0.9953125, "epoch": 14.958987579095384, "grad_norm": 1.8922985792160034, "learning_rate": 8.39572500143651e-06, "loss": 0.06780259, "memory(GiB)": 13.7, "step": 31915, "train_speed(iter/s)": 1.528243 }, { "acc": 0.99438848, "epoch": 14.961331146004218, "grad_norm": 37.5294189453125, "learning_rate": 8.395156002644893e-06, "loss": 0.02624444, "memory(GiB)": 13.7, "step": 31920, "train_speed(iter/s)": 1.528252 }, { "acc": 0.97691422, "epoch": 14.963674712913054, "grad_norm": 5.254802227020264, "learning_rate": 8.394586922256894e-06, "loss": 0.08961014, "memory(GiB)": 13.7, "step": 31925, "train_speed(iter/s)": 1.528261 }, { "acc": 0.9809226, "epoch": 14.966018279821888, "grad_norm": 2.870513677597046, "learning_rate": 8.394017760286197e-06, "loss": 0.09151779, "memory(GiB)": 13.7, "step": 31930, "train_speed(iter/s)": 1.528265 }, { "acc": 0.98558674, "epoch": 14.968361846730724, "grad_norm": 11.06175708770752, "learning_rate": 8.393448516746481e-06, "loss": 0.07966841, "memory(GiB)": 13.7, "step": 31935, "train_speed(iter/s)": 1.528275 }, { "acc": 0.97747355, "epoch": 14.970705413639559, "grad_norm": 1.678492546081543, "learning_rate": 8.392879191651431e-06, "loss": 0.10066018, "memory(GiB)": 13.7, "step": 31940, "train_speed(iter/s)": 1.528278 }, { "acc": 0.9763339, "epoch": 14.973048980548395, "grad_norm": 0.11514222621917725, "learning_rate": 8.39230978501473e-06, "loss": 0.11045758, "memory(GiB)": 13.7, "step": 31945, "train_speed(iter/s)": 1.528286 }, { "acc": 0.97293015, "epoch": 14.97539254745723, "grad_norm": 5.639599800109863, "learning_rate": 8.391740296850065e-06, "loss": 0.14351552, "memory(GiB)": 13.7, "step": 31950, "train_speed(iter/s)": 1.528301 }, { "acc": 0.98818455, "epoch": 14.977736114366065, "grad_norm": 4.20274543762207, "learning_rate": 8.391170727171122e-06, "loss": 0.0675683, "memory(GiB)": 13.7, "step": 31955, "train_speed(iter/s)": 1.528306 }, { "acc": 0.98111115, "epoch": 14.980079681274901, "grad_norm": 5.327795505523682, "learning_rate": 8.390601075991598e-06, "loss": 0.05161208, "memory(GiB)": 13.7, "step": 31960, "train_speed(iter/s)": 1.528317 }, { "acc": 0.97150307, "epoch": 14.982423248183736, "grad_norm": 0.020233862102031708, "learning_rate": 8.390031343325179e-06, "loss": 0.10260506, "memory(GiB)": 13.7, "step": 31965, "train_speed(iter/s)": 1.528325 }, { "acc": 0.97369051, "epoch": 14.984766815092572, "grad_norm": 4.5908637046813965, "learning_rate": 8.389461529185562e-06, "loss": 0.06655767, "memory(GiB)": 13.7, "step": 31970, "train_speed(iter/s)": 1.528327 }, { "acc": 0.98312502, "epoch": 14.987110382001406, "grad_norm": 3.087355375289917, "learning_rate": 8.388891633586442e-06, "loss": 0.05125086, "memory(GiB)": 13.7, "step": 31975, "train_speed(iter/s)": 1.528323 }, { "acc": 0.98132935, "epoch": 14.989453948910242, "grad_norm": 4.24842643737793, "learning_rate": 8.388321656541522e-06, "loss": 0.05361447, "memory(GiB)": 13.7, "step": 31980, "train_speed(iter/s)": 1.528322 }, { "acc": 0.98504009, "epoch": 14.991797515819076, "grad_norm": 3.6553244590759277, "learning_rate": 8.387751598064498e-06, "loss": 0.07006903, "memory(GiB)": 13.7, "step": 31985, "train_speed(iter/s)": 1.528328 }, { "acc": 0.9754158, "epoch": 14.994141082727912, "grad_norm": 4.962047576904297, "learning_rate": 8.387181458169072e-06, "loss": 0.11824102, "memory(GiB)": 13.7, "step": 31990, "train_speed(iter/s)": 1.528327 }, { "acc": 0.98551464, "epoch": 14.996484649636747, "grad_norm": 3.6064398288726807, "learning_rate": 8.38661123686895e-06, "loss": 0.07878598, "memory(GiB)": 13.7, "step": 31995, "train_speed(iter/s)": 1.528333 }, { "acc": 0.97790184, "epoch": 14.998828216545583, "grad_norm": 3.0765280723571777, "learning_rate": 8.38604093417784e-06, "loss": 0.07643752, "memory(GiB)": 13.7, "step": 32000, "train_speed(iter/s)": 1.528336 }, { "acc": 0.9879035, "epoch": 15.001171783454417, "grad_norm": 1.935653567314148, "learning_rate": 8.385470550109448e-06, "loss": 0.12588508, "memory(GiB)": 13.7, "step": 32005, "train_speed(iter/s)": 1.528273 }, { "acc": 0.99571428, "epoch": 15.003515350363253, "grad_norm": 0.06329460442066193, "learning_rate": 8.384900084677484e-06, "loss": 0.02420722, "memory(GiB)": 13.7, "step": 32010, "train_speed(iter/s)": 1.528265 }, { "acc": 0.98500004, "epoch": 15.005858917272088, "grad_norm": 4.899120807647705, "learning_rate": 8.38432953789566e-06, "loss": 0.12017026, "memory(GiB)": 13.7, "step": 32015, "train_speed(iter/s)": 1.528257 }, { "acc": 0.97768803, "epoch": 15.008202484180924, "grad_norm": 5.378011226654053, "learning_rate": 8.383758909777691e-06, "loss": 0.06687456, "memory(GiB)": 13.7, "step": 32020, "train_speed(iter/s)": 1.528266 }, { "acc": 0.98111115, "epoch": 15.010546051089758, "grad_norm": 0.6822823286056519, "learning_rate": 8.383188200337294e-06, "loss": 0.07047853, "memory(GiB)": 13.7, "step": 32025, "train_speed(iter/s)": 1.528279 }, { "acc": 0.98212795, "epoch": 15.012889617998594, "grad_norm": 2.3163042068481445, "learning_rate": 8.382617409588184e-06, "loss": 0.05503693, "memory(GiB)": 13.7, "step": 32030, "train_speed(iter/s)": 1.528279 }, { "acc": 0.97442818, "epoch": 15.015233184907428, "grad_norm": 5.199117183685303, "learning_rate": 8.382046537544082e-06, "loss": 0.08540806, "memory(GiB)": 13.7, "step": 32035, "train_speed(iter/s)": 1.528299 }, { "acc": 0.9681612, "epoch": 15.017576751816264, "grad_norm": 5.895695686340332, "learning_rate": 8.381475584218713e-06, "loss": 0.09713349, "memory(GiB)": 13.7, "step": 32040, "train_speed(iter/s)": 1.528306 }, { "acc": 0.96937504, "epoch": 15.0199203187251, "grad_norm": 5.946422576904297, "learning_rate": 8.380904549625796e-06, "loss": 0.18824539, "memory(GiB)": 13.7, "step": 32045, "train_speed(iter/s)": 1.528315 }, { "acc": 0.98239307, "epoch": 15.022263885633935, "grad_norm": 17.116764068603516, "learning_rate": 8.380333433779059e-06, "loss": 0.10093572, "memory(GiB)": 13.7, "step": 32050, "train_speed(iter/s)": 1.528317 }, { "acc": 0.98464241, "epoch": 15.02460745254277, "grad_norm": 1.4552147388458252, "learning_rate": 8.379762236692232e-06, "loss": 0.07062283, "memory(GiB)": 13.7, "step": 32055, "train_speed(iter/s)": 1.528312 }, { "acc": 0.97982359, "epoch": 15.026951019451605, "grad_norm": 6.007373332977295, "learning_rate": 8.379190958379042e-06, "loss": 0.15200562, "memory(GiB)": 13.7, "step": 32060, "train_speed(iter/s)": 1.528312 }, { "acc": 0.98059654, "epoch": 15.029294586360441, "grad_norm": 5.259584903717041, "learning_rate": 8.37861959885322e-06, "loss": 0.04384832, "memory(GiB)": 13.7, "step": 32065, "train_speed(iter/s)": 1.528326 }, { "acc": 0.98272495, "epoch": 15.031638153269276, "grad_norm": 4.0668158531188965, "learning_rate": 8.378048158128503e-06, "loss": 0.08238924, "memory(GiB)": 13.7, "step": 32070, "train_speed(iter/s)": 1.528331 }, { "acc": 0.98519497, "epoch": 15.033981720178112, "grad_norm": 0.15067113935947418, "learning_rate": 8.37747663621862e-06, "loss": 0.07559826, "memory(GiB)": 13.7, "step": 32075, "train_speed(iter/s)": 1.52834 }, { "acc": 0.99250002, "epoch": 15.036325287086946, "grad_norm": 4.734917163848877, "learning_rate": 8.376905033137318e-06, "loss": 0.03250241, "memory(GiB)": 13.7, "step": 32080, "train_speed(iter/s)": 1.528348 }, { "acc": 0.96970234, "epoch": 15.038668853995782, "grad_norm": 10.116412162780762, "learning_rate": 8.376333348898328e-06, "loss": 0.16535223, "memory(GiB)": 13.7, "step": 32085, "train_speed(iter/s)": 1.528349 }, { "acc": 0.96268473, "epoch": 15.041012420904616, "grad_norm": 1.7831976413726807, "learning_rate": 8.375761583515397e-06, "loss": 0.15883178, "memory(GiB)": 13.7, "step": 32090, "train_speed(iter/s)": 1.528361 }, { "acc": 0.9833334, "epoch": 15.043355987813452, "grad_norm": 4.346427917480469, "learning_rate": 8.375189737002265e-06, "loss": 0.08000324, "memory(GiB)": 13.7, "step": 32095, "train_speed(iter/s)": 1.528361 }, { "acc": 0.97147732, "epoch": 15.045699554722287, "grad_norm": 0.45868515968322754, "learning_rate": 8.374617809372678e-06, "loss": 0.11436853, "memory(GiB)": 13.7, "step": 32100, "train_speed(iter/s)": 1.528365 }, { "acc": 0.98770294, "epoch": 15.048043121631123, "grad_norm": 2.2190968990325928, "learning_rate": 8.374045800640383e-06, "loss": 0.06353271, "memory(GiB)": 13.7, "step": 32105, "train_speed(iter/s)": 1.528376 }, { "acc": 0.97145844, "epoch": 15.050386688539957, "grad_norm": 3.22021484375, "learning_rate": 8.373473710819133e-06, "loss": 0.07199821, "memory(GiB)": 13.7, "step": 32110, "train_speed(iter/s)": 1.528385 }, { "acc": 0.98871298, "epoch": 15.052730255448793, "grad_norm": 0.2996838092803955, "learning_rate": 8.372901539922674e-06, "loss": 0.07691361, "memory(GiB)": 13.7, "step": 32115, "train_speed(iter/s)": 1.528407 }, { "acc": 0.98666668, "epoch": 15.055073822357627, "grad_norm": 1.2343807220458984, "learning_rate": 8.372329287964762e-06, "loss": 0.02737507, "memory(GiB)": 13.7, "step": 32120, "train_speed(iter/s)": 1.528413 }, { "acc": 0.9734375, "epoch": 15.057417389266464, "grad_norm": 2.7024717330932617, "learning_rate": 8.37175695495915e-06, "loss": 0.16733009, "memory(GiB)": 13.7, "step": 32125, "train_speed(iter/s)": 1.528432 }, { "acc": 0.96971855, "epoch": 15.0597609561753, "grad_norm": 4.4310526847839355, "learning_rate": 8.371184540919597e-06, "loss": 0.17479054, "memory(GiB)": 13.7, "step": 32130, "train_speed(iter/s)": 1.528437 }, { "acc": 0.98467264, "epoch": 15.062104523084134, "grad_norm": 10.398026466369629, "learning_rate": 8.37061204585986e-06, "loss": 0.04766721, "memory(GiB)": 13.7, "step": 32135, "train_speed(iter/s)": 1.528455 }, { "acc": 0.99149761, "epoch": 15.06444808999297, "grad_norm": 1.5607842206954956, "learning_rate": 8.370039469793703e-06, "loss": 0.06903625, "memory(GiB)": 13.7, "step": 32140, "train_speed(iter/s)": 1.528467 }, { "acc": 0.9802083, "epoch": 15.066791656901804, "grad_norm": 8.556640625, "learning_rate": 8.369466812734886e-06, "loss": 0.08667737, "memory(GiB)": 13.7, "step": 32145, "train_speed(iter/s)": 1.528479 }, { "acc": 0.98083668, "epoch": 15.06913522381064, "grad_norm": 2.9544692039489746, "learning_rate": 8.368894074697175e-06, "loss": 0.09900336, "memory(GiB)": 13.7, "step": 32150, "train_speed(iter/s)": 1.528488 }, { "acc": 0.98583336, "epoch": 15.071478790719475, "grad_norm": 4.077620983123779, "learning_rate": 8.368321255694335e-06, "loss": 0.06089103, "memory(GiB)": 13.7, "step": 32155, "train_speed(iter/s)": 1.528482 }, { "acc": 0.98589287, "epoch": 15.07382235762831, "grad_norm": 2.8000519275665283, "learning_rate": 8.367748355740137e-06, "loss": 0.0935184, "memory(GiB)": 13.7, "step": 32160, "train_speed(iter/s)": 1.528492 }, { "acc": 0.98224649, "epoch": 15.076165924537145, "grad_norm": 1.9400376081466675, "learning_rate": 8.367175374848353e-06, "loss": 0.12022181, "memory(GiB)": 13.7, "step": 32165, "train_speed(iter/s)": 1.528508 }, { "acc": 0.98187504, "epoch": 15.078509491445981, "grad_norm": 1.0656105279922485, "learning_rate": 8.36660231303275e-06, "loss": 0.08630547, "memory(GiB)": 13.7, "step": 32170, "train_speed(iter/s)": 1.528519 }, { "acc": 0.97922859, "epoch": 15.080853058354815, "grad_norm": 12.512736320495605, "learning_rate": 8.366029170307108e-06, "loss": 0.08174435, "memory(GiB)": 13.7, "step": 32175, "train_speed(iter/s)": 1.52851 }, { "acc": 0.9770834, "epoch": 15.083196625263652, "grad_norm": 3.476562261581421, "learning_rate": 8.365455946685201e-06, "loss": 0.1727416, "memory(GiB)": 13.7, "step": 32180, "train_speed(iter/s)": 1.528519 }, { "acc": 0.97881947, "epoch": 15.085540192172486, "grad_norm": 6.447153091430664, "learning_rate": 8.364882642180806e-06, "loss": 0.14893136, "memory(GiB)": 13.7, "step": 32185, "train_speed(iter/s)": 1.528524 }, { "acc": 0.97261372, "epoch": 15.087883759081322, "grad_norm": 6.376333236694336, "learning_rate": 8.36430925680771e-06, "loss": 0.13242257, "memory(GiB)": 13.7, "step": 32190, "train_speed(iter/s)": 1.528544 }, { "acc": 0.96673069, "epoch": 15.090227325990156, "grad_norm": 5.683380126953125, "learning_rate": 8.363735790579685e-06, "loss": 0.15273571, "memory(GiB)": 13.7, "step": 32195, "train_speed(iter/s)": 1.528561 }, { "acc": 0.97645512, "epoch": 15.092570892898992, "grad_norm": 2.0922129154205322, "learning_rate": 8.363162243510523e-06, "loss": 0.10491686, "memory(GiB)": 13.7, "step": 32200, "train_speed(iter/s)": 1.528579 }, { "acc": 0.97466354, "epoch": 15.094914459807827, "grad_norm": 4.716965198516846, "learning_rate": 8.362588615614007e-06, "loss": 0.08571674, "memory(GiB)": 13.7, "step": 32205, "train_speed(iter/s)": 1.52859 }, { "acc": 0.99134922, "epoch": 15.097258026716663, "grad_norm": 0.35495299100875854, "learning_rate": 8.362014906903928e-06, "loss": 0.06319287, "memory(GiB)": 13.7, "step": 32210, "train_speed(iter/s)": 1.528609 }, { "acc": 0.98556547, "epoch": 15.099601593625499, "grad_norm": 3.4094035625457764, "learning_rate": 8.361441117394072e-06, "loss": 0.0906688, "memory(GiB)": 13.7, "step": 32215, "train_speed(iter/s)": 1.528608 }, { "acc": 0.9856534, "epoch": 15.101945160534333, "grad_norm": 3.1394736766815186, "learning_rate": 8.360867247098235e-06, "loss": 0.09687808, "memory(GiB)": 13.7, "step": 32220, "train_speed(iter/s)": 1.528611 }, { "acc": 0.98149624, "epoch": 15.10428872744317, "grad_norm": 1.756294846534729, "learning_rate": 8.360293296030208e-06, "loss": 0.10811534, "memory(GiB)": 13.7, "step": 32225, "train_speed(iter/s)": 1.528628 }, { "acc": 0.96625004, "epoch": 15.106632294352003, "grad_norm": 11.475092887878418, "learning_rate": 8.359719264203787e-06, "loss": 0.18674656, "memory(GiB)": 13.7, "step": 32230, "train_speed(iter/s)": 1.528629 }, { "acc": 0.96871195, "epoch": 15.10897586126084, "grad_norm": 10.102724075317383, "learning_rate": 8.359145151632773e-06, "loss": 0.15705028, "memory(GiB)": 13.7, "step": 32235, "train_speed(iter/s)": 1.528643 }, { "acc": 0.984375, "epoch": 15.111319428169674, "grad_norm": 53.078277587890625, "learning_rate": 8.358570958330962e-06, "loss": 0.07920046, "memory(GiB)": 13.7, "step": 32240, "train_speed(iter/s)": 1.528646 }, { "acc": 0.9760416, "epoch": 15.11366299507851, "grad_norm": 3.676405429840088, "learning_rate": 8.357996684312158e-06, "loss": 0.08110074, "memory(GiB)": 13.7, "step": 32245, "train_speed(iter/s)": 1.528647 }, { "acc": 0.96985693, "epoch": 15.116006561987344, "grad_norm": 4.874732494354248, "learning_rate": 8.357422329590163e-06, "loss": 0.12566901, "memory(GiB)": 13.7, "step": 32250, "train_speed(iter/s)": 1.528659 }, { "acc": 0.98727703, "epoch": 15.11835012889618, "grad_norm": 1.5263972282409668, "learning_rate": 8.356847894178784e-06, "loss": 0.04693717, "memory(GiB)": 13.7, "step": 32255, "train_speed(iter/s)": 1.528665 }, { "acc": 0.98104172, "epoch": 15.120693695805015, "grad_norm": 6.44106912612915, "learning_rate": 8.356273378091827e-06, "loss": 0.06942326, "memory(GiB)": 13.7, "step": 32260, "train_speed(iter/s)": 1.528671 }, { "acc": 0.96615705, "epoch": 15.12303726271385, "grad_norm": 5.032894134521484, "learning_rate": 8.355698781343103e-06, "loss": 0.12332828, "memory(GiB)": 13.7, "step": 32265, "train_speed(iter/s)": 1.528675 }, { "acc": 0.98343754, "epoch": 15.125380829622685, "grad_norm": 3.5603058338165283, "learning_rate": 8.355124103946424e-06, "loss": 0.08805977, "memory(GiB)": 13.7, "step": 32270, "train_speed(iter/s)": 1.528684 }, { "acc": 0.98307295, "epoch": 15.127724396531521, "grad_norm": 4.290195465087891, "learning_rate": 8.354549345915603e-06, "loss": 0.07370067, "memory(GiB)": 13.7, "step": 32275, "train_speed(iter/s)": 1.528683 }, { "acc": 0.96713829, "epoch": 15.130067963440355, "grad_norm": 1.6098765134811401, "learning_rate": 8.353974507264455e-06, "loss": 0.11072801, "memory(GiB)": 13.7, "step": 32280, "train_speed(iter/s)": 1.528694 }, { "acc": 0.9760416, "epoch": 15.132411530349192, "grad_norm": 0.25712889432907104, "learning_rate": 8.353399588006793e-06, "loss": 0.14207484, "memory(GiB)": 13.7, "step": 32285, "train_speed(iter/s)": 1.528698 }, { "acc": 0.988447, "epoch": 15.134755097258028, "grad_norm": 0.006155718117952347, "learning_rate": 8.352824588156444e-06, "loss": 0.07691959, "memory(GiB)": 13.7, "step": 32290, "train_speed(iter/s)": 1.528723 }, { "acc": 0.98476686, "epoch": 15.137098664166862, "grad_norm": 8.472357749938965, "learning_rate": 8.352249507727222e-06, "loss": 0.08789728, "memory(GiB)": 13.7, "step": 32295, "train_speed(iter/s)": 1.528727 }, { "acc": 0.99106064, "epoch": 15.139442231075698, "grad_norm": 4.366355895996094, "learning_rate": 8.351674346732954e-06, "loss": 0.05450536, "memory(GiB)": 13.7, "step": 32300, "train_speed(iter/s)": 1.52874 }, { "acc": 0.98135414, "epoch": 15.141785797984532, "grad_norm": 6.1054301261901855, "learning_rate": 8.351099105187466e-06, "loss": 0.09104311, "memory(GiB)": 13.7, "step": 32305, "train_speed(iter/s)": 1.528738 }, { "acc": 0.96725693, "epoch": 15.144129364893368, "grad_norm": 3.152426242828369, "learning_rate": 8.350523783104583e-06, "loss": 0.1247983, "memory(GiB)": 13.7, "step": 32310, "train_speed(iter/s)": 1.528746 }, { "acc": 0.98182535, "epoch": 15.146472931802203, "grad_norm": 7.130338191986084, "learning_rate": 8.349948380498132e-06, "loss": 0.0602185, "memory(GiB)": 13.7, "step": 32315, "train_speed(iter/s)": 1.528744 }, { "acc": 0.95863094, "epoch": 15.148816498711039, "grad_norm": 6.108010292053223, "learning_rate": 8.349372897381948e-06, "loss": 0.17291794, "memory(GiB)": 13.7, "step": 32320, "train_speed(iter/s)": 1.528769 }, { "acc": 0.9822917, "epoch": 15.151160065619873, "grad_norm": 3.8247838020324707, "learning_rate": 8.34879733376986e-06, "loss": 0.12442477, "memory(GiB)": 13.7, "step": 32325, "train_speed(iter/s)": 1.52878 }, { "acc": 0.9848959, "epoch": 15.15350363252871, "grad_norm": 5.973920822143555, "learning_rate": 8.348221689675705e-06, "loss": 0.0852944, "memory(GiB)": 13.7, "step": 32330, "train_speed(iter/s)": 1.528807 }, { "acc": 0.98125, "epoch": 15.155847199437543, "grad_norm": 0.14188525080680847, "learning_rate": 8.347645965113315e-06, "loss": 0.08457071, "memory(GiB)": 13.7, "step": 32335, "train_speed(iter/s)": 1.528801 }, { "acc": 0.97557545, "epoch": 15.15819076634638, "grad_norm": 3.380239486694336, "learning_rate": 8.347070160096538e-06, "loss": 0.09866661, "memory(GiB)": 13.7, "step": 32340, "train_speed(iter/s)": 1.528811 }, { "acc": 0.98113098, "epoch": 15.160534333255214, "grad_norm": 2.8606982231140137, "learning_rate": 8.346494274639204e-06, "loss": 0.07871647, "memory(GiB)": 13.7, "step": 32345, "train_speed(iter/s)": 1.528809 }, { "acc": 0.98322458, "epoch": 15.16287790016405, "grad_norm": 4.987485885620117, "learning_rate": 8.345918308755163e-06, "loss": 0.1108272, "memory(GiB)": 13.7, "step": 32350, "train_speed(iter/s)": 1.528814 }, { "acc": 0.96978235, "epoch": 15.165221467072884, "grad_norm": 7.248724937438965, "learning_rate": 8.345342262458256e-06, "loss": 0.09052281, "memory(GiB)": 13.7, "step": 32355, "train_speed(iter/s)": 1.528812 }, { "acc": 0.97828321, "epoch": 15.16756503398172, "grad_norm": 8.861250877380371, "learning_rate": 8.344766135762328e-06, "loss": 0.1177537, "memory(GiB)": 13.7, "step": 32360, "train_speed(iter/s)": 1.528826 }, { "acc": 0.97180557, "epoch": 15.169908600890555, "grad_norm": 0.3921133279800415, "learning_rate": 8.344189928681232e-06, "loss": 0.0854491, "memory(GiB)": 13.7, "step": 32365, "train_speed(iter/s)": 1.528824 }, { "acc": 0.98855114, "epoch": 15.17225216779939, "grad_norm": 8.63416862487793, "learning_rate": 8.34361364122881e-06, "loss": 0.03378107, "memory(GiB)": 13.7, "step": 32370, "train_speed(iter/s)": 1.528843 }, { "acc": 0.95967264, "epoch": 15.174595734708227, "grad_norm": 3.554008960723877, "learning_rate": 8.343037273418923e-06, "loss": 0.09337696, "memory(GiB)": 13.7, "step": 32375, "train_speed(iter/s)": 1.528854 }, { "acc": 0.97814779, "epoch": 15.176939301617061, "grad_norm": 0.7583684921264648, "learning_rate": 8.34246082526542e-06, "loss": 0.0751977, "memory(GiB)": 13.7, "step": 32380, "train_speed(iter/s)": 1.528864 }, { "acc": 0.97733135, "epoch": 15.179282868525897, "grad_norm": 12.852027893066406, "learning_rate": 8.341884296782157e-06, "loss": 0.15597603, "memory(GiB)": 13.7, "step": 32385, "train_speed(iter/s)": 1.528873 }, { "acc": 0.98291664, "epoch": 15.181626435434731, "grad_norm": 5.091183185577393, "learning_rate": 8.341307687982994e-06, "loss": 0.12035517, "memory(GiB)": 13.7, "step": 32390, "train_speed(iter/s)": 1.528874 }, { "acc": 0.981007, "epoch": 15.183970002343568, "grad_norm": 6.134565830230713, "learning_rate": 8.34073099888179e-06, "loss": 0.04550097, "memory(GiB)": 13.7, "step": 32395, "train_speed(iter/s)": 1.528872 }, { "acc": 0.98701754, "epoch": 15.186313569252402, "grad_norm": 4.938448905944824, "learning_rate": 8.340154229492406e-06, "loss": 0.06302434, "memory(GiB)": 13.7, "step": 32400, "train_speed(iter/s)": 1.528875 }, { "acc": 0.9864584, "epoch": 15.188657136161238, "grad_norm": 2.006566047668457, "learning_rate": 8.339577379828706e-06, "loss": 0.11603692, "memory(GiB)": 13.7, "step": 32405, "train_speed(iter/s)": 1.528876 }, { "acc": 0.97904758, "epoch": 15.191000703070072, "grad_norm": 7.449853897094727, "learning_rate": 8.339000449904554e-06, "loss": 0.09151698, "memory(GiB)": 13.7, "step": 32410, "train_speed(iter/s)": 1.528889 }, { "acc": 0.98524303, "epoch": 15.193344269978908, "grad_norm": 1.360877275466919, "learning_rate": 8.33842343973382e-06, "loss": 0.05771483, "memory(GiB)": 13.7, "step": 32415, "train_speed(iter/s)": 1.528897 }, { "acc": 0.97046127, "epoch": 15.195687836887743, "grad_norm": 0.015340308658778667, "learning_rate": 8.337846349330371e-06, "loss": 0.09014434, "memory(GiB)": 13.7, "step": 32420, "train_speed(iter/s)": 1.528915 }, { "acc": 0.97000008, "epoch": 15.198031403796579, "grad_norm": 4.633436679840088, "learning_rate": 8.337269178708081e-06, "loss": 0.09550866, "memory(GiB)": 13.7, "step": 32425, "train_speed(iter/s)": 1.528932 }, { "acc": 0.98638887, "epoch": 15.200374970705413, "grad_norm": 0.13638338446617126, "learning_rate": 8.336691927880824e-06, "loss": 0.04836314, "memory(GiB)": 13.7, "step": 32430, "train_speed(iter/s)": 1.528934 }, { "acc": 0.9635417, "epoch": 15.202718537614249, "grad_norm": 4.9502482414245605, "learning_rate": 8.33611459686247e-06, "loss": 0.08106847, "memory(GiB)": 13.7, "step": 32435, "train_speed(iter/s)": 1.528934 }, { "acc": 0.99020834, "epoch": 15.205062104523083, "grad_norm": 1.6585259437561035, "learning_rate": 8.335537185666903e-06, "loss": 0.02950533, "memory(GiB)": 13.7, "step": 32440, "train_speed(iter/s)": 1.528946 }, { "acc": 0.98238125, "epoch": 15.20740567143192, "grad_norm": 4.686163425445557, "learning_rate": 8.334959694307998e-06, "loss": 0.07790536, "memory(GiB)": 13.7, "step": 32445, "train_speed(iter/s)": 1.528956 }, { "acc": 0.96987181, "epoch": 15.209749238340754, "grad_norm": 5.209589004516602, "learning_rate": 8.334382122799635e-06, "loss": 0.19142156, "memory(GiB)": 13.7, "step": 32450, "train_speed(iter/s)": 1.52897 }, { "acc": 0.98416672, "epoch": 15.21209280524959, "grad_norm": 2.8074560165405273, "learning_rate": 8.333804471155698e-06, "loss": 0.04111942, "memory(GiB)": 13.7, "step": 32455, "train_speed(iter/s)": 1.528972 }, { "acc": 0.9828125, "epoch": 15.214436372158426, "grad_norm": 1.0485841035842896, "learning_rate": 8.333226739390072e-06, "loss": 0.07952918, "memory(GiB)": 13.7, "step": 32460, "train_speed(iter/s)": 1.528981 }, { "acc": 0.98845234, "epoch": 15.21677993906726, "grad_norm": 0.4640306234359741, "learning_rate": 8.332648927516644e-06, "loss": 0.03769557, "memory(GiB)": 13.7, "step": 32465, "train_speed(iter/s)": 1.528979 }, { "acc": 0.97659721, "epoch": 15.219123505976096, "grad_norm": 4.296892166137695, "learning_rate": 8.332071035549305e-06, "loss": 0.0754414, "memory(GiB)": 13.7, "step": 32470, "train_speed(iter/s)": 1.52897 }, { "acc": 0.98010426, "epoch": 15.22146707288493, "grad_norm": 4.982559680938721, "learning_rate": 8.33149306350194e-06, "loss": 0.08835628, "memory(GiB)": 13.7, "step": 32475, "train_speed(iter/s)": 1.528966 }, { "acc": 0.97835054, "epoch": 15.223810639793767, "grad_norm": 4.621241092681885, "learning_rate": 8.330915011388449e-06, "loss": 0.12427683, "memory(GiB)": 13.7, "step": 32480, "train_speed(iter/s)": 1.52898 }, { "acc": 0.98314734, "epoch": 15.226154206702601, "grad_norm": 5.61508321762085, "learning_rate": 8.33033687922272e-06, "loss": 0.07098885, "memory(GiB)": 13.7, "step": 32485, "train_speed(iter/s)": 1.528993 }, { "acc": 0.98393221, "epoch": 15.228497773611437, "grad_norm": 3.88340163230896, "learning_rate": 8.329758667018652e-06, "loss": 0.06178219, "memory(GiB)": 13.7, "step": 32490, "train_speed(iter/s)": 1.529004 }, { "acc": 0.98447914, "epoch": 15.230841340520271, "grad_norm": 1.5747498273849487, "learning_rate": 8.329180374790143e-06, "loss": 0.04409572, "memory(GiB)": 13.7, "step": 32495, "train_speed(iter/s)": 1.529005 }, { "acc": 0.97979174, "epoch": 15.233184907429107, "grad_norm": 11.641081809997559, "learning_rate": 8.328602002551093e-06, "loss": 0.06814356, "memory(GiB)": 13.7, "step": 32500, "train_speed(iter/s)": 1.52901 }, { "acc": 0.98395834, "epoch": 15.235528474337942, "grad_norm": 5.625866889953613, "learning_rate": 8.328023550315405e-06, "loss": 0.04906783, "memory(GiB)": 13.7, "step": 32505, "train_speed(iter/s)": 1.52902 }, { "acc": 0.96917267, "epoch": 15.237872041246778, "grad_norm": 17.88514518737793, "learning_rate": 8.327445018096983e-06, "loss": 0.1442144, "memory(GiB)": 13.7, "step": 32510, "train_speed(iter/s)": 1.529037 }, { "acc": 0.97788696, "epoch": 15.240215608155612, "grad_norm": 2.3965415954589844, "learning_rate": 8.326866405909732e-06, "loss": 0.09791618, "memory(GiB)": 13.7, "step": 32515, "train_speed(iter/s)": 1.52904 }, { "acc": 0.9670536, "epoch": 15.242559175064448, "grad_norm": 4.380784511566162, "learning_rate": 8.326287713767561e-06, "loss": 0.14867854, "memory(GiB)": 13.7, "step": 32520, "train_speed(iter/s)": 1.529052 }, { "acc": 0.98800602, "epoch": 15.244902741973283, "grad_norm": 5.747936248779297, "learning_rate": 8.325708941684379e-06, "loss": 0.04947968, "memory(GiB)": 13.7, "step": 32525, "train_speed(iter/s)": 1.529061 }, { "acc": 0.97638264, "epoch": 15.247246308882119, "grad_norm": 8.265270233154297, "learning_rate": 8.325130089674099e-06, "loss": 0.11336746, "memory(GiB)": 13.7, "step": 32530, "train_speed(iter/s)": 1.529065 }, { "acc": 0.99571428, "epoch": 15.249589875790953, "grad_norm": 5.610602378845215, "learning_rate": 8.324551157750633e-06, "loss": 0.02660502, "memory(GiB)": 13.7, "step": 32535, "train_speed(iter/s)": 1.529071 }, { "acc": 0.97562504, "epoch": 15.251933442699789, "grad_norm": 2.930014133453369, "learning_rate": 8.323972145927899e-06, "loss": 0.09994618, "memory(GiB)": 13.7, "step": 32540, "train_speed(iter/s)": 1.529073 }, { "acc": 0.98395834, "epoch": 15.254277009608625, "grad_norm": 5.408086776733398, "learning_rate": 8.32339305421981e-06, "loss": 0.0612384, "memory(GiB)": 13.7, "step": 32545, "train_speed(iter/s)": 1.529075 }, { "acc": 0.97960224, "epoch": 15.25662057651746, "grad_norm": 6.240509033203125, "learning_rate": 8.322813882640291e-06, "loss": 0.12320048, "memory(GiB)": 13.7, "step": 32550, "train_speed(iter/s)": 1.52908 }, { "acc": 0.96195517, "epoch": 15.258964143426295, "grad_norm": 11.785581588745117, "learning_rate": 8.322234631203261e-06, "loss": 0.19104586, "memory(GiB)": 13.7, "step": 32555, "train_speed(iter/s)": 1.52908 }, { "acc": 0.97814941, "epoch": 15.26130771033513, "grad_norm": 4.0109028816223145, "learning_rate": 8.321655299922643e-06, "loss": 0.16133691, "memory(GiB)": 13.7, "step": 32560, "train_speed(iter/s)": 1.529088 }, { "acc": 0.99548607, "epoch": 15.263651277243966, "grad_norm": 0.4654468595981598, "learning_rate": 8.321075888812362e-06, "loss": 0.02279152, "memory(GiB)": 13.7, "step": 32565, "train_speed(iter/s)": 1.529085 }, { "acc": 0.97611485, "epoch": 15.2659948441528, "grad_norm": 9.627829551696777, "learning_rate": 8.320496397886344e-06, "loss": 0.10674061, "memory(GiB)": 13.7, "step": 32570, "train_speed(iter/s)": 1.529092 }, { "acc": 0.9803957, "epoch": 15.268338411061636, "grad_norm": 3.03979754447937, "learning_rate": 8.319916827158522e-06, "loss": 0.14312809, "memory(GiB)": 13.7, "step": 32575, "train_speed(iter/s)": 1.529098 }, { "acc": 0.98353558, "epoch": 15.27068197797047, "grad_norm": 3.185985565185547, "learning_rate": 8.319337176642824e-06, "loss": 0.07596883, "memory(GiB)": 13.7, "step": 32580, "train_speed(iter/s)": 1.529109 }, { "acc": 0.96812534, "epoch": 15.273025544879307, "grad_norm": 4.555654525756836, "learning_rate": 8.318757446353183e-06, "loss": 0.11292992, "memory(GiB)": 13.7, "step": 32585, "train_speed(iter/s)": 1.52912 }, { "acc": 0.95541668, "epoch": 15.275369111788141, "grad_norm": 7.221207141876221, "learning_rate": 8.318177636303533e-06, "loss": 0.28492074, "memory(GiB)": 13.7, "step": 32590, "train_speed(iter/s)": 1.529124 }, { "acc": 0.98083324, "epoch": 15.277712678696977, "grad_norm": 2.038130521774292, "learning_rate": 8.317597746507816e-06, "loss": 0.07999698, "memory(GiB)": 13.7, "step": 32595, "train_speed(iter/s)": 1.529138 }, { "acc": 0.98320198, "epoch": 15.280056245605811, "grad_norm": 2.903691053390503, "learning_rate": 8.317017776979963e-06, "loss": 0.10006952, "memory(GiB)": 13.7, "step": 32600, "train_speed(iter/s)": 1.529142 }, { "acc": 0.9837698, "epoch": 15.282399812514647, "grad_norm": 5.31784200668335, "learning_rate": 8.31643772773392e-06, "loss": 0.09123058, "memory(GiB)": 13.7, "step": 32605, "train_speed(iter/s)": 1.529152 }, { "acc": 0.96035719, "epoch": 15.284743379423482, "grad_norm": 3.312462568283081, "learning_rate": 8.315857598783628e-06, "loss": 0.16518401, "memory(GiB)": 13.7, "step": 32610, "train_speed(iter/s)": 1.529152 }, { "acc": 0.96724205, "epoch": 15.287086946332318, "grad_norm": 2.7577571868896484, "learning_rate": 8.31527739014303e-06, "loss": 0.14239116, "memory(GiB)": 13.7, "step": 32615, "train_speed(iter/s)": 1.529167 }, { "acc": 0.9833333, "epoch": 15.289430513241154, "grad_norm": 4.086915493011475, "learning_rate": 8.314697101826075e-06, "loss": 0.10556885, "memory(GiB)": 13.7, "step": 32620, "train_speed(iter/s)": 1.529169 }, { "acc": 0.98388882, "epoch": 15.291774080149988, "grad_norm": 3.5752077102661133, "learning_rate": 8.314116733846707e-06, "loss": 0.10334831, "memory(GiB)": 13.7, "step": 32625, "train_speed(iter/s)": 1.52917 }, { "acc": 0.98112736, "epoch": 15.294117647058824, "grad_norm": 4.678642749786377, "learning_rate": 8.31353628621888e-06, "loss": 0.10324349, "memory(GiB)": 13.7, "step": 32630, "train_speed(iter/s)": 1.529172 }, { "acc": 0.96855621, "epoch": 15.296461213967659, "grad_norm": 7.2788543701171875, "learning_rate": 8.312955758956547e-06, "loss": 0.1598527, "memory(GiB)": 13.7, "step": 32635, "train_speed(iter/s)": 1.529178 }, { "acc": 0.97613506, "epoch": 15.298804780876495, "grad_norm": 5.548000812530518, "learning_rate": 8.312375152073658e-06, "loss": 0.08829572, "memory(GiB)": 13.7, "step": 32640, "train_speed(iter/s)": 1.529174 }, { "acc": 0.97850695, "epoch": 15.301148347785329, "grad_norm": 7.697466850280762, "learning_rate": 8.31179446558417e-06, "loss": 0.0981213, "memory(GiB)": 13.7, "step": 32645, "train_speed(iter/s)": 1.529185 }, { "acc": 0.98182545, "epoch": 15.303491914694165, "grad_norm": 0.2647571265697479, "learning_rate": 8.311213699502042e-06, "loss": 0.06375217, "memory(GiB)": 13.7, "step": 32650, "train_speed(iter/s)": 1.529201 }, { "acc": 0.99291668, "epoch": 15.305835481603, "grad_norm": 3.2384817600250244, "learning_rate": 8.310632853841233e-06, "loss": 0.05162916, "memory(GiB)": 13.7, "step": 32655, "train_speed(iter/s)": 1.529214 }, { "acc": 0.98967266, "epoch": 15.308179048511835, "grad_norm": 5.434119701385498, "learning_rate": 8.310051928615706e-06, "loss": 0.06004335, "memory(GiB)": 13.7, "step": 32660, "train_speed(iter/s)": 1.529218 }, { "acc": 0.98183985, "epoch": 15.31052261542067, "grad_norm": 4.089189529418945, "learning_rate": 8.309470923839422e-06, "loss": 0.07273575, "memory(GiB)": 13.7, "step": 32665, "train_speed(iter/s)": 1.52922 }, { "acc": 0.97987041, "epoch": 15.312866182329506, "grad_norm": 3.0648715496063232, "learning_rate": 8.308889839526349e-06, "loss": 0.11560216, "memory(GiB)": 13.7, "step": 32670, "train_speed(iter/s)": 1.529219 }, { "acc": 0.98172646, "epoch": 15.31520974923834, "grad_norm": 3.2059593200683594, "learning_rate": 8.30830867569045e-06, "loss": 0.14513855, "memory(GiB)": 13.7, "step": 32675, "train_speed(iter/s)": 1.529225 }, { "acc": 0.98454857, "epoch": 15.317553316147176, "grad_norm": 4.405060768127441, "learning_rate": 8.307727432345702e-06, "loss": 0.04639862, "memory(GiB)": 13.7, "step": 32680, "train_speed(iter/s)": 1.529232 }, { "acc": 0.98731842, "epoch": 15.31989688305601, "grad_norm": 2.235413074493408, "learning_rate": 8.30714610950607e-06, "loss": 0.07274425, "memory(GiB)": 13.7, "step": 32685, "train_speed(iter/s)": 1.529241 }, { "acc": 0.98350925, "epoch": 15.322240449964847, "grad_norm": 0.0817086473107338, "learning_rate": 8.306564707185527e-06, "loss": 0.11416889, "memory(GiB)": 13.7, "step": 32690, "train_speed(iter/s)": 1.529264 }, { "acc": 0.98312502, "epoch": 15.324584016873683, "grad_norm": 2.832656145095825, "learning_rate": 8.305983225398052e-06, "loss": 0.0595437, "memory(GiB)": 13.7, "step": 32695, "train_speed(iter/s)": 1.529284 }, { "acc": 0.98738098, "epoch": 15.326927583782517, "grad_norm": 4.02153205871582, "learning_rate": 8.305401664157617e-06, "loss": 0.0959566, "memory(GiB)": 13.7, "step": 32700, "train_speed(iter/s)": 1.52928 }, { "acc": 0.9725297, "epoch": 15.329271150691353, "grad_norm": 19.990774154663086, "learning_rate": 8.304820023478207e-06, "loss": 0.17592096, "memory(GiB)": 13.7, "step": 32705, "train_speed(iter/s)": 1.529275 }, { "acc": 0.98675594, "epoch": 15.331614717600187, "grad_norm": 4.511055946350098, "learning_rate": 8.304238303373796e-06, "loss": 0.05931038, "memory(GiB)": 13.7, "step": 32710, "train_speed(iter/s)": 1.529269 }, { "acc": 0.97430553, "epoch": 15.333958284509023, "grad_norm": 0.9025149345397949, "learning_rate": 8.30365650385837e-06, "loss": 0.09939082, "memory(GiB)": 13.7, "step": 32715, "train_speed(iter/s)": 1.529274 }, { "acc": 0.98083324, "epoch": 15.336301851417858, "grad_norm": 1.4409364461898804, "learning_rate": 8.303074624945915e-06, "loss": 0.05097795, "memory(GiB)": 13.7, "step": 32720, "train_speed(iter/s)": 1.529289 }, { "acc": 0.99215279, "epoch": 15.338645418326694, "grad_norm": 4.236731052398682, "learning_rate": 8.302492666650415e-06, "loss": 0.03279206, "memory(GiB)": 13.7, "step": 32725, "train_speed(iter/s)": 1.529297 }, { "acc": 0.95484228, "epoch": 15.340988985235528, "grad_norm": 3.4927279949188232, "learning_rate": 8.30191062898586e-06, "loss": 0.22686038, "memory(GiB)": 13.7, "step": 32730, "train_speed(iter/s)": 1.529301 }, { "acc": 0.98227501, "epoch": 15.343332552144364, "grad_norm": 5.602095603942871, "learning_rate": 8.301328511966239e-06, "loss": 0.07082675, "memory(GiB)": 13.7, "step": 32735, "train_speed(iter/s)": 1.529285 }, { "acc": 0.98811054, "epoch": 15.345676119053199, "grad_norm": 5.669442653656006, "learning_rate": 8.300746315605544e-06, "loss": 0.05173243, "memory(GiB)": 13.7, "step": 32740, "train_speed(iter/s)": 1.529295 }, { "acc": 0.98336315, "epoch": 15.348019685962035, "grad_norm": 3.485811710357666, "learning_rate": 8.300164039917771e-06, "loss": 0.07421097, "memory(GiB)": 13.7, "step": 32745, "train_speed(iter/s)": 1.529293 }, { "acc": 0.9791338, "epoch": 15.350363252870869, "grad_norm": 5.1357035636901855, "learning_rate": 8.299581684916913e-06, "loss": 0.08287924, "memory(GiB)": 13.7, "step": 32750, "train_speed(iter/s)": 1.52929 }, { "acc": 0.9895834, "epoch": 15.352706819779705, "grad_norm": 4.3540849685668945, "learning_rate": 8.298999250616971e-06, "loss": 0.02574186, "memory(GiB)": 13.7, "step": 32755, "train_speed(iter/s)": 1.529304 }, { "acc": 0.9916667, "epoch": 15.35505038668854, "grad_norm": 4.080290794372559, "learning_rate": 8.298416737031943e-06, "loss": 0.04195997, "memory(GiB)": 13.7, "step": 32760, "train_speed(iter/s)": 1.529315 }, { "acc": 0.97268314, "epoch": 15.357393953597375, "grad_norm": 4.263672351837158, "learning_rate": 8.297834144175831e-06, "loss": 0.1680845, "memory(GiB)": 13.7, "step": 32765, "train_speed(iter/s)": 1.52932 }, { "acc": 0.97296391, "epoch": 15.35973752050621, "grad_norm": 2.914839267730713, "learning_rate": 8.29725147206264e-06, "loss": 0.07852159, "memory(GiB)": 13.7, "step": 32770, "train_speed(iter/s)": 1.529322 }, { "acc": 0.97279758, "epoch": 15.362081087415046, "grad_norm": 3.5386431217193604, "learning_rate": 8.296668720706373e-06, "loss": 0.12481971, "memory(GiB)": 13.7, "step": 32775, "train_speed(iter/s)": 1.52933 }, { "acc": 0.97799025, "epoch": 15.36442465432388, "grad_norm": 3.9015283584594727, "learning_rate": 8.296085890121038e-06, "loss": 0.0906564, "memory(GiB)": 13.7, "step": 32780, "train_speed(iter/s)": 1.529335 }, { "acc": 0.9916666, "epoch": 15.366768221232716, "grad_norm": 5.164923191070557, "learning_rate": 8.295502980320645e-06, "loss": 0.04411765, "memory(GiB)": 13.7, "step": 32785, "train_speed(iter/s)": 1.529345 }, { "acc": 0.97896786, "epoch": 15.369111788141552, "grad_norm": 4.965769290924072, "learning_rate": 8.294919991319205e-06, "loss": 0.10154141, "memory(GiB)": 13.7, "step": 32790, "train_speed(iter/s)": 1.529351 }, { "acc": 0.9739584, "epoch": 15.371455355050387, "grad_norm": 5.003619194030762, "learning_rate": 8.294336923130734e-06, "loss": 0.14290788, "memory(GiB)": 13.7, "step": 32795, "train_speed(iter/s)": 1.529358 }, { "acc": 0.99503975, "epoch": 15.373798921959223, "grad_norm": 3.7629101276397705, "learning_rate": 8.293753775769245e-06, "loss": 0.04096001, "memory(GiB)": 13.7, "step": 32800, "train_speed(iter/s)": 1.529352 }, { "acc": 0.98999624, "epoch": 15.376142488868057, "grad_norm": 3.145047187805176, "learning_rate": 8.29317054924875e-06, "loss": 0.11436441, "memory(GiB)": 13.7, "step": 32805, "train_speed(iter/s)": 1.529369 }, { "acc": 0.97840271, "epoch": 15.378486055776893, "grad_norm": 0.450340211391449, "learning_rate": 8.292587243583274e-06, "loss": 0.12474028, "memory(GiB)": 13.7, "step": 32810, "train_speed(iter/s)": 1.529379 }, { "acc": 0.98483944, "epoch": 15.380829622685727, "grad_norm": 3.3187060356140137, "learning_rate": 8.292003858786837e-06, "loss": 0.05488944, "memory(GiB)": 13.7, "step": 32815, "train_speed(iter/s)": 1.529395 }, { "acc": 0.97620049, "epoch": 15.383173189594563, "grad_norm": 9.28001880645752, "learning_rate": 8.291420394873461e-06, "loss": 0.12610412, "memory(GiB)": 13.7, "step": 32820, "train_speed(iter/s)": 1.529402 }, { "acc": 0.98264885, "epoch": 15.385516756503398, "grad_norm": 2.551767110824585, "learning_rate": 8.29083685185717e-06, "loss": 0.07260675, "memory(GiB)": 13.7, "step": 32825, "train_speed(iter/s)": 1.52941 }, { "acc": 0.9708334, "epoch": 15.387860323412234, "grad_norm": 5.097208499908447, "learning_rate": 8.29025322975199e-06, "loss": 0.0910927, "memory(GiB)": 13.7, "step": 32830, "train_speed(iter/s)": 1.529422 }, { "acc": 0.97725697, "epoch": 15.390203890321068, "grad_norm": 8.155075073242188, "learning_rate": 8.28966952857195e-06, "loss": 0.09296013, "memory(GiB)": 13.7, "step": 32835, "train_speed(iter/s)": 1.529412 }, { "acc": 0.96520824, "epoch": 15.392547457229904, "grad_norm": 6.438924789428711, "learning_rate": 8.28908574833108e-06, "loss": 0.09750215, "memory(GiB)": 13.7, "step": 32840, "train_speed(iter/s)": 1.529418 }, { "acc": 0.98156948, "epoch": 15.394891024138738, "grad_norm": 12.237375259399414, "learning_rate": 8.288501889043413e-06, "loss": 0.08701982, "memory(GiB)": 13.7, "step": 32845, "train_speed(iter/s)": 1.529437 }, { "acc": 0.98046131, "epoch": 15.397234591047575, "grad_norm": 1.3719141483306885, "learning_rate": 8.287917950722982e-06, "loss": 0.0524068, "memory(GiB)": 13.7, "step": 32850, "train_speed(iter/s)": 1.529439 }, { "acc": 0.97550592, "epoch": 15.399578157956409, "grad_norm": 0.5551128387451172, "learning_rate": 8.287333933383826e-06, "loss": 0.08186159, "memory(GiB)": 13.7, "step": 32855, "train_speed(iter/s)": 1.529438 }, { "acc": 0.97488098, "epoch": 15.401921724865245, "grad_norm": 5.443717002868652, "learning_rate": 8.286749837039978e-06, "loss": 0.05612141, "memory(GiB)": 13.7, "step": 32860, "train_speed(iter/s)": 1.529448 }, { "acc": 0.97615528, "epoch": 15.404265291774081, "grad_norm": 3.2281007766723633, "learning_rate": 8.28616566170548e-06, "loss": 0.14173822, "memory(GiB)": 13.7, "step": 32865, "train_speed(iter/s)": 1.529472 }, { "acc": 0.97778845, "epoch": 15.406608858682915, "grad_norm": 5.024699687957764, "learning_rate": 8.285581407394376e-06, "loss": 0.07677922, "memory(GiB)": 13.7, "step": 32870, "train_speed(iter/s)": 1.529478 }, { "acc": 0.98041134, "epoch": 15.408952425591751, "grad_norm": 5.143198013305664, "learning_rate": 8.284997074120705e-06, "loss": 0.12019782, "memory(GiB)": 13.7, "step": 32875, "train_speed(iter/s)": 1.529493 }, { "acc": 0.97493057, "epoch": 15.411295992500586, "grad_norm": 7.21963357925415, "learning_rate": 8.284412661898514e-06, "loss": 0.08474415, "memory(GiB)": 13.7, "step": 32880, "train_speed(iter/s)": 1.529498 }, { "acc": 0.96896782, "epoch": 15.413639559409422, "grad_norm": 5.4211626052856445, "learning_rate": 8.283828170741854e-06, "loss": 0.13992469, "memory(GiB)": 13.7, "step": 32885, "train_speed(iter/s)": 1.529515 }, { "acc": 0.98149538, "epoch": 15.415983126318256, "grad_norm": 7.238319396972656, "learning_rate": 8.283243600664768e-06, "loss": 0.0636553, "memory(GiB)": 13.7, "step": 32890, "train_speed(iter/s)": 1.52952 }, { "acc": 0.98033571, "epoch": 15.418326693227092, "grad_norm": 3.418931007385254, "learning_rate": 8.282658951681313e-06, "loss": 0.10357935, "memory(GiB)": 13.7, "step": 32895, "train_speed(iter/s)": 1.529532 }, { "acc": 0.97434521, "epoch": 15.420670260135926, "grad_norm": 7.593234062194824, "learning_rate": 8.282074223805538e-06, "loss": 0.10543315, "memory(GiB)": 13.7, "step": 32900, "train_speed(iter/s)": 1.529552 }, { "acc": 0.97354164, "epoch": 15.423013827044763, "grad_norm": 7.98154878616333, "learning_rate": 8.2814894170515e-06, "loss": 0.1192611, "memory(GiB)": 13.7, "step": 32905, "train_speed(iter/s)": 1.529562 }, { "acc": 0.98398809, "epoch": 15.425357393953597, "grad_norm": 5.935365200042725, "learning_rate": 8.280904531433257e-06, "loss": 0.08536411, "memory(GiB)": 13.7, "step": 32910, "train_speed(iter/s)": 1.529573 }, { "acc": 0.98047619, "epoch": 15.427700960862433, "grad_norm": 20.01874351501465, "learning_rate": 8.280319566964862e-06, "loss": 0.11673245, "memory(GiB)": 13.7, "step": 32915, "train_speed(iter/s)": 1.529589 }, { "acc": 0.9925252, "epoch": 15.430044527771267, "grad_norm": 28.50425910949707, "learning_rate": 8.279734523660384e-06, "loss": 0.05378211, "memory(GiB)": 13.7, "step": 32920, "train_speed(iter/s)": 1.529594 }, { "acc": 0.97587795, "epoch": 15.432388094680103, "grad_norm": 5.35567045211792, "learning_rate": 8.279149401533877e-06, "loss": 0.11728179, "memory(GiB)": 13.7, "step": 32925, "train_speed(iter/s)": 1.529596 }, { "acc": 0.99229164, "epoch": 15.434731661588938, "grad_norm": 1.8045488595962524, "learning_rate": 8.278564200599413e-06, "loss": 0.03988401, "memory(GiB)": 13.7, "step": 32930, "train_speed(iter/s)": 1.529604 }, { "acc": 0.97286854, "epoch": 15.437075228497774, "grad_norm": 22.45305824279785, "learning_rate": 8.277978920871055e-06, "loss": 0.09761856, "memory(GiB)": 13.7, "step": 32935, "train_speed(iter/s)": 1.529604 }, { "acc": 0.984375, "epoch": 15.43941879540661, "grad_norm": 1.227057933807373, "learning_rate": 8.277393562362869e-06, "loss": 0.11127281, "memory(GiB)": 13.7, "step": 32940, "train_speed(iter/s)": 1.529606 }, { "acc": 0.98041668, "epoch": 15.441762362315444, "grad_norm": 2.9358208179473877, "learning_rate": 8.276808125088928e-06, "loss": 0.04819521, "memory(GiB)": 13.7, "step": 32945, "train_speed(iter/s)": 1.52961 }, { "acc": 0.97250004, "epoch": 15.44410592922428, "grad_norm": 2.4070470333099365, "learning_rate": 8.276222609063304e-06, "loss": 0.15372999, "memory(GiB)": 13.7, "step": 32950, "train_speed(iter/s)": 1.529626 }, { "acc": 0.96573315, "epoch": 15.446449496133114, "grad_norm": 6.438894748687744, "learning_rate": 8.275637014300068e-06, "loss": 0.24028535, "memory(GiB)": 13.7, "step": 32955, "train_speed(iter/s)": 1.529644 }, { "acc": 0.98819447, "epoch": 15.44879306304195, "grad_norm": 2.9755783081054688, "learning_rate": 8.275051340813299e-06, "loss": 0.03548786, "memory(GiB)": 13.7, "step": 32960, "train_speed(iter/s)": 1.52965 }, { "acc": 0.96556549, "epoch": 15.451136629950785, "grad_norm": 4.4628167152404785, "learning_rate": 8.274465588617074e-06, "loss": 0.07737147, "memory(GiB)": 13.7, "step": 32965, "train_speed(iter/s)": 1.529665 }, { "acc": 0.9476881, "epoch": 15.453480196859621, "grad_norm": 9.112469673156738, "learning_rate": 8.273879757725473e-06, "loss": 0.24066005, "memory(GiB)": 13.7, "step": 32970, "train_speed(iter/s)": 1.529681 }, { "acc": 0.97444134, "epoch": 15.455823763768455, "grad_norm": 6.281894207000732, "learning_rate": 8.273293848152574e-06, "loss": 0.08936878, "memory(GiB)": 13.7, "step": 32975, "train_speed(iter/s)": 1.52969 }, { "acc": 0.9778409, "epoch": 15.458167330677291, "grad_norm": 7.497045516967773, "learning_rate": 8.272707859912465e-06, "loss": 0.08938324, "memory(GiB)": 13.7, "step": 32980, "train_speed(iter/s)": 1.529698 }, { "acc": 0.975, "epoch": 15.460510897586126, "grad_norm": 7.084989070892334, "learning_rate": 8.272121793019227e-06, "loss": 0.08776667, "memory(GiB)": 13.7, "step": 32985, "train_speed(iter/s)": 1.529705 }, { "acc": 0.98239584, "epoch": 15.462854464494962, "grad_norm": 3.5075645446777344, "learning_rate": 8.271535647486951e-06, "loss": 0.06998554, "memory(GiB)": 13.7, "step": 32990, "train_speed(iter/s)": 1.529713 }, { "acc": 0.98819447, "epoch": 15.465198031403796, "grad_norm": 8.865487098693848, "learning_rate": 8.270949423329725e-06, "loss": 0.04953539, "memory(GiB)": 13.7, "step": 32995, "train_speed(iter/s)": 1.52973 }, { "acc": 0.99035797, "epoch": 15.467541598312632, "grad_norm": 1.1877647638320923, "learning_rate": 8.270363120561637e-06, "loss": 0.06754252, "memory(GiB)": 13.7, "step": 33000, "train_speed(iter/s)": 1.529737 }, { "acc": 0.98519344, "epoch": 15.469885165221466, "grad_norm": 2.4937868118286133, "learning_rate": 8.269776739196782e-06, "loss": 0.12474185, "memory(GiB)": 13.7, "step": 33005, "train_speed(iter/s)": 1.529741 }, { "acc": 0.96950893, "epoch": 15.472228732130302, "grad_norm": 2.039078712463379, "learning_rate": 8.269190279249256e-06, "loss": 0.10959327, "memory(GiB)": 13.7, "step": 33010, "train_speed(iter/s)": 1.529747 }, { "acc": 0.98321438, "epoch": 15.474572299039137, "grad_norm": 0.1255902796983719, "learning_rate": 8.268603740733152e-06, "loss": 0.07467465, "memory(GiB)": 13.7, "step": 33015, "train_speed(iter/s)": 1.529753 }, { "acc": 0.99020834, "epoch": 15.476915865947973, "grad_norm": 4.064854621887207, "learning_rate": 8.268017123662573e-06, "loss": 0.08131881, "memory(GiB)": 13.7, "step": 33020, "train_speed(iter/s)": 1.52974 }, { "acc": 0.98791037, "epoch": 15.479259432856807, "grad_norm": 2.914081573486328, "learning_rate": 8.267430428051615e-06, "loss": 0.06224814, "memory(GiB)": 13.7, "step": 33025, "train_speed(iter/s)": 1.529742 }, { "acc": 0.99416666, "epoch": 15.481602999765643, "grad_norm": 3.8727633953094482, "learning_rate": 8.266843653914383e-06, "loss": 0.02871041, "memory(GiB)": 13.7, "step": 33030, "train_speed(iter/s)": 1.529743 }, { "acc": 0.96389885, "epoch": 15.48394656667448, "grad_norm": 6.876461505889893, "learning_rate": 8.266256801264981e-06, "loss": 0.17386234, "memory(GiB)": 13.7, "step": 33035, "train_speed(iter/s)": 1.529745 }, { "acc": 0.99821434, "epoch": 15.486290133583314, "grad_norm": 4.3629469871521, "learning_rate": 8.265669870117514e-06, "loss": 0.01956372, "memory(GiB)": 13.7, "step": 33040, "train_speed(iter/s)": 1.52976 }, { "acc": 0.9747159, "epoch": 15.48863370049215, "grad_norm": 3.0588016510009766, "learning_rate": 8.26508286048609e-06, "loss": 0.07727869, "memory(GiB)": 13.7, "step": 33045, "train_speed(iter/s)": 1.529764 }, { "acc": 0.97529764, "epoch": 15.490977267400984, "grad_norm": 6.640040397644043, "learning_rate": 8.264495772384818e-06, "loss": 0.09537288, "memory(GiB)": 13.7, "step": 33050, "train_speed(iter/s)": 1.529749 }, { "acc": 0.98327465, "epoch": 15.49332083430982, "grad_norm": 2.3944671154022217, "learning_rate": 8.263908605827812e-06, "loss": 0.07026986, "memory(GiB)": 13.7, "step": 33055, "train_speed(iter/s)": 1.529756 }, { "acc": 0.97270832, "epoch": 15.495664401218654, "grad_norm": 4.834174633026123, "learning_rate": 8.263321360829187e-06, "loss": 0.11662567, "memory(GiB)": 13.7, "step": 33060, "train_speed(iter/s)": 1.529748 }, { "acc": 0.97614584, "epoch": 15.49800796812749, "grad_norm": 1.1596914529800415, "learning_rate": 8.262734037403052e-06, "loss": 0.13372619, "memory(GiB)": 13.7, "step": 33065, "train_speed(iter/s)": 1.529745 }, { "acc": 0.98925266, "epoch": 15.500351535036325, "grad_norm": 3.716935634613037, "learning_rate": 8.262146635563528e-06, "loss": 0.05302117, "memory(GiB)": 13.7, "step": 33070, "train_speed(iter/s)": 1.529749 }, { "acc": 0.98974543, "epoch": 15.50269510194516, "grad_norm": 3.927076578140259, "learning_rate": 8.261559155324735e-06, "loss": 0.05542308, "memory(GiB)": 13.7, "step": 33075, "train_speed(iter/s)": 1.529751 }, { "acc": 0.98641481, "epoch": 15.505038668853995, "grad_norm": 5.467989444732666, "learning_rate": 8.260971596700793e-06, "loss": 0.09568181, "memory(GiB)": 13.7, "step": 33080, "train_speed(iter/s)": 1.529753 }, { "acc": 0.97875004, "epoch": 15.507382235762831, "grad_norm": 6.280163288116455, "learning_rate": 8.260383959705824e-06, "loss": 0.07556177, "memory(GiB)": 13.7, "step": 33085, "train_speed(iter/s)": 1.529751 }, { "acc": 0.96423607, "epoch": 15.509725802671666, "grad_norm": 7.797117710113525, "learning_rate": 8.259796244353958e-06, "loss": 0.1464159, "memory(GiB)": 13.7, "step": 33090, "train_speed(iter/s)": 1.52976 }, { "acc": 0.97866879, "epoch": 15.512069369580502, "grad_norm": 9.9148530960083, "learning_rate": 8.259208450659315e-06, "loss": 0.1315109, "memory(GiB)": 13.7, "step": 33095, "train_speed(iter/s)": 1.529772 }, { "acc": 0.97774315, "epoch": 15.514412936489336, "grad_norm": 6.045255184173584, "learning_rate": 8.258620578636027e-06, "loss": 0.08705158, "memory(GiB)": 13.7, "step": 33100, "train_speed(iter/s)": 1.529784 }, { "acc": 0.98407192, "epoch": 15.516756503398172, "grad_norm": 1.5830408334732056, "learning_rate": 8.258032628298226e-06, "loss": 0.10165514, "memory(GiB)": 13.7, "step": 33105, "train_speed(iter/s)": 1.529778 }, { "acc": 0.97967262, "epoch": 15.519100070307008, "grad_norm": 1.4454066753387451, "learning_rate": 8.257444599660038e-06, "loss": 0.09943517, "memory(GiB)": 13.7, "step": 33110, "train_speed(iter/s)": 1.529797 }, { "acc": 0.98083344, "epoch": 15.521443637215842, "grad_norm": 8.195761680603027, "learning_rate": 8.256856492735606e-06, "loss": 0.09568534, "memory(GiB)": 13.7, "step": 33115, "train_speed(iter/s)": 1.529808 }, { "acc": 0.98723221, "epoch": 15.523787204124678, "grad_norm": 1.8264943361282349, "learning_rate": 8.25626830753906e-06, "loss": 0.04039619, "memory(GiB)": 13.7, "step": 33120, "train_speed(iter/s)": 1.529825 }, { "acc": 0.97211409, "epoch": 15.526130771033513, "grad_norm": 7.795262813568115, "learning_rate": 8.25568004408454e-06, "loss": 0.17327754, "memory(GiB)": 13.7, "step": 33125, "train_speed(iter/s)": 1.52983 }, { "acc": 0.97689638, "epoch": 15.528474337942349, "grad_norm": 5.2421393394470215, "learning_rate": 8.255091702386185e-06, "loss": 0.06989573, "memory(GiB)": 13.7, "step": 33130, "train_speed(iter/s)": 1.529831 }, { "acc": 0.97466526, "epoch": 15.530817904851183, "grad_norm": 4.401104927062988, "learning_rate": 8.254503282458139e-06, "loss": 0.1456275, "memory(GiB)": 13.7, "step": 33135, "train_speed(iter/s)": 1.529838 }, { "acc": 0.97469692, "epoch": 15.53316147176002, "grad_norm": 4.588022708892822, "learning_rate": 8.253914784314544e-06, "loss": 0.08705753, "memory(GiB)": 13.7, "step": 33140, "train_speed(iter/s)": 1.52984 }, { "acc": 0.96187496, "epoch": 15.535505038668854, "grad_norm": 8.817615509033203, "learning_rate": 8.253326207969547e-06, "loss": 0.2085784, "memory(GiB)": 13.7, "step": 33145, "train_speed(iter/s)": 1.529853 }, { "acc": 0.97409725, "epoch": 15.53784860557769, "grad_norm": 3.309469223022461, "learning_rate": 8.252737553437292e-06, "loss": 0.10199832, "memory(GiB)": 13.7, "step": 33150, "train_speed(iter/s)": 1.529861 }, { "acc": 0.97770834, "epoch": 15.540192172486524, "grad_norm": 4.773796081542969, "learning_rate": 8.252148820731932e-06, "loss": 0.06205159, "memory(GiB)": 13.7, "step": 33155, "train_speed(iter/s)": 1.529871 }, { "acc": 0.98146782, "epoch": 15.54253573939536, "grad_norm": 1.0625920295715332, "learning_rate": 8.251560009867617e-06, "loss": 0.08198149, "memory(GiB)": 13.7, "step": 33160, "train_speed(iter/s)": 1.529879 }, { "acc": 0.97288189, "epoch": 15.544879306304194, "grad_norm": 3.879793405532837, "learning_rate": 8.250971120858498e-06, "loss": 0.10625772, "memory(GiB)": 13.7, "step": 33165, "train_speed(iter/s)": 1.529889 }, { "acc": 0.98649998, "epoch": 15.54722287321303, "grad_norm": 3.4375064373016357, "learning_rate": 8.250382153718735e-06, "loss": 0.04567677, "memory(GiB)": 13.7, "step": 33170, "train_speed(iter/s)": 1.529897 }, { "acc": 0.98604164, "epoch": 15.549566440121865, "grad_norm": 2.7561652660369873, "learning_rate": 8.24979310846248e-06, "loss": 0.0652953, "memory(GiB)": 13.7, "step": 33175, "train_speed(iter/s)": 1.529901 }, { "acc": 0.97729168, "epoch": 15.5519100070307, "grad_norm": 5.412046432495117, "learning_rate": 8.249203985103894e-06, "loss": 0.08457004, "memory(GiB)": 13.7, "step": 33180, "train_speed(iter/s)": 1.529916 }, { "acc": 0.98106308, "epoch": 15.554253573939537, "grad_norm": 4.498180866241455, "learning_rate": 8.248614783657137e-06, "loss": 0.15585303, "memory(GiB)": 13.7, "step": 33185, "train_speed(iter/s)": 1.529913 }, { "acc": 0.97870827, "epoch": 15.556597140848371, "grad_norm": 1.3782278299331665, "learning_rate": 8.24802550413637e-06, "loss": 0.12249584, "memory(GiB)": 13.7, "step": 33190, "train_speed(iter/s)": 1.529919 }, { "acc": 0.96281662, "epoch": 15.558940707757207, "grad_norm": 5.1006178855896, "learning_rate": 8.24743614655576e-06, "loss": 0.11668446, "memory(GiB)": 13.7, "step": 33195, "train_speed(iter/s)": 1.52993 }, { "acc": 0.97921305, "epoch": 15.561284274666042, "grad_norm": 4.077541351318359, "learning_rate": 8.246846710929472e-06, "loss": 0.13125808, "memory(GiB)": 13.7, "step": 33200, "train_speed(iter/s)": 1.529934 }, { "acc": 0.96208344, "epoch": 15.563627841574878, "grad_norm": 4.642343044281006, "learning_rate": 8.246257197271675e-06, "loss": 0.17862904, "memory(GiB)": 13.7, "step": 33205, "train_speed(iter/s)": 1.529945 }, { "acc": 0.98779602, "epoch": 15.565971408483712, "grad_norm": 2.823559045791626, "learning_rate": 8.245667605596537e-06, "loss": 0.07210838, "memory(GiB)": 13.7, "step": 33210, "train_speed(iter/s)": 1.52995 }, { "acc": 0.97863092, "epoch": 15.568314975392548, "grad_norm": 4.284489631652832, "learning_rate": 8.24507793591823e-06, "loss": 0.10756903, "memory(GiB)": 13.7, "step": 33215, "train_speed(iter/s)": 1.529961 }, { "acc": 0.98182116, "epoch": 15.570658542301382, "grad_norm": 6.190539360046387, "learning_rate": 8.24448818825093e-06, "loss": 0.06634914, "memory(GiB)": 13.7, "step": 33220, "train_speed(iter/s)": 1.529972 }, { "acc": 0.99040184, "epoch": 15.573002109210218, "grad_norm": 1.1913553476333618, "learning_rate": 8.243898362608811e-06, "loss": 0.07524495, "memory(GiB)": 13.7, "step": 33225, "train_speed(iter/s)": 1.529976 }, { "acc": 0.9824852, "epoch": 15.575345676119053, "grad_norm": 2.4609413146972656, "learning_rate": 8.243308459006053e-06, "loss": 0.10661006, "memory(GiB)": 13.7, "step": 33230, "train_speed(iter/s)": 1.529997 }, { "acc": 0.9927084, "epoch": 15.577689243027889, "grad_norm": 6.951370716094971, "learning_rate": 8.24271847745683e-06, "loss": 0.05026245, "memory(GiB)": 13.7, "step": 33235, "train_speed(iter/s)": 1.530008 }, { "acc": 0.96907787, "epoch": 15.580032809936723, "grad_norm": 6.4616475105285645, "learning_rate": 8.242128417975328e-06, "loss": 0.07837151, "memory(GiB)": 13.7, "step": 33240, "train_speed(iter/s)": 1.530011 }, { "acc": 0.97875004, "epoch": 15.58237637684556, "grad_norm": 1.8861932754516602, "learning_rate": 8.241538280575726e-06, "loss": 0.14125414, "memory(GiB)": 13.7, "step": 33245, "train_speed(iter/s)": 1.530019 }, { "acc": 0.99199409, "epoch": 15.584719943754394, "grad_norm": 2.5179340839385986, "learning_rate": 8.240948065272214e-06, "loss": 0.05746164, "memory(GiB)": 13.7, "step": 33250, "train_speed(iter/s)": 1.530035 }, { "acc": 0.98690472, "epoch": 15.58706351066323, "grad_norm": 11.185967445373535, "learning_rate": 8.240357772078977e-06, "loss": 0.08150123, "memory(GiB)": 13.7, "step": 33255, "train_speed(iter/s)": 1.530034 }, { "acc": 0.97785034, "epoch": 15.589407077572064, "grad_norm": 3.9985461235046387, "learning_rate": 8.239767401010198e-06, "loss": 0.11140813, "memory(GiB)": 13.7, "step": 33260, "train_speed(iter/s)": 1.530034 }, { "acc": 0.97957792, "epoch": 15.5917506444809, "grad_norm": 0.03325656056404114, "learning_rate": 8.239176952080078e-06, "loss": 0.0758269, "memory(GiB)": 13.7, "step": 33265, "train_speed(iter/s)": 1.530048 }, { "acc": 0.97515869, "epoch": 15.594094211389734, "grad_norm": 6.461987018585205, "learning_rate": 8.238586425302801e-06, "loss": 0.10214701, "memory(GiB)": 13.7, "step": 33270, "train_speed(iter/s)": 1.530054 }, { "acc": 0.97244053, "epoch": 15.59643777829857, "grad_norm": 9.33299732208252, "learning_rate": 8.237995820692565e-06, "loss": 0.14578156, "memory(GiB)": 13.7, "step": 33275, "train_speed(iter/s)": 1.530059 }, { "acc": 0.98770828, "epoch": 15.598781345207406, "grad_norm": 7.221110820770264, "learning_rate": 8.237405138263565e-06, "loss": 0.0577598, "memory(GiB)": 13.7, "step": 33280, "train_speed(iter/s)": 1.530076 }, { "acc": 0.99071426, "epoch": 15.60112491211624, "grad_norm": 5.763730049133301, "learning_rate": 8.23681437803e-06, "loss": 0.06340653, "memory(GiB)": 13.7, "step": 33285, "train_speed(iter/s)": 1.530088 }, { "acc": 0.97979164, "epoch": 15.603468479025077, "grad_norm": 0.24034534394741058, "learning_rate": 8.236223540006071e-06, "loss": 0.08508371, "memory(GiB)": 13.7, "step": 33290, "train_speed(iter/s)": 1.530087 }, { "acc": 0.99079857, "epoch": 15.605812045933911, "grad_norm": 3.0606133937835693, "learning_rate": 8.235632624205974e-06, "loss": 0.06328309, "memory(GiB)": 13.7, "step": 33295, "train_speed(iter/s)": 1.530092 }, { "acc": 0.97939482, "epoch": 15.608155612842747, "grad_norm": 25.38407325744629, "learning_rate": 8.235041630643922e-06, "loss": 0.12577889, "memory(GiB)": 13.7, "step": 33300, "train_speed(iter/s)": 1.53011 }, { "acc": 0.97875004, "epoch": 15.610499179751582, "grad_norm": 8.213727951049805, "learning_rate": 8.234450559334113e-06, "loss": 0.11604317, "memory(GiB)": 13.7, "step": 33305, "train_speed(iter/s)": 1.530122 }, { "acc": 0.97054625, "epoch": 15.612842746660418, "grad_norm": 6.7114667892456055, "learning_rate": 8.233859410290756e-06, "loss": 0.13832309, "memory(GiB)": 13.7, "step": 33310, "train_speed(iter/s)": 1.530136 }, { "acc": 0.96633472, "epoch": 15.615186313569252, "grad_norm": 10.47932243347168, "learning_rate": 8.233268183528064e-06, "loss": 0.10197325, "memory(GiB)": 13.7, "step": 33315, "train_speed(iter/s)": 1.530133 }, { "acc": 0.9760416, "epoch": 15.617529880478088, "grad_norm": 0.9377831220626831, "learning_rate": 8.232676879060241e-06, "loss": 0.09023795, "memory(GiB)": 13.7, "step": 33320, "train_speed(iter/s)": 1.530142 }, { "acc": 0.97585564, "epoch": 15.619873447386922, "grad_norm": 4.717864513397217, "learning_rate": 8.232085496901508e-06, "loss": 0.07436432, "memory(GiB)": 13.7, "step": 33325, "train_speed(iter/s)": 1.53014 }, { "acc": 0.98647728, "epoch": 15.622217014295758, "grad_norm": 4.4118452072143555, "learning_rate": 8.231494037066076e-06, "loss": 0.06548482, "memory(GiB)": 13.7, "step": 33330, "train_speed(iter/s)": 1.530148 }, { "acc": 0.96502523, "epoch": 15.624560581204593, "grad_norm": 5.923543930053711, "learning_rate": 8.23090249956816e-06, "loss": 0.08168821, "memory(GiB)": 13.7, "step": 33335, "train_speed(iter/s)": 1.530156 }, { "acc": 0.96756945, "epoch": 15.626904148113429, "grad_norm": 8.726611137390137, "learning_rate": 8.230310884421982e-06, "loss": 0.15996656, "memory(GiB)": 13.7, "step": 33340, "train_speed(iter/s)": 1.530159 }, { "acc": 0.97955046, "epoch": 15.629247715022263, "grad_norm": 10.888360977172852, "learning_rate": 8.229719191641762e-06, "loss": 0.05797088, "memory(GiB)": 13.7, "step": 33345, "train_speed(iter/s)": 1.530163 }, { "acc": 0.9874053, "epoch": 15.6315912819311, "grad_norm": 4.141604423522949, "learning_rate": 8.22912742124172e-06, "loss": 0.09243165, "memory(GiB)": 13.7, "step": 33350, "train_speed(iter/s)": 1.530155 }, { "acc": 0.98647814, "epoch": 15.633934848839935, "grad_norm": 3.4492616653442383, "learning_rate": 8.228535573236081e-06, "loss": 0.0682029, "memory(GiB)": 13.7, "step": 33355, "train_speed(iter/s)": 1.530169 }, { "acc": 0.98099365, "epoch": 15.63627841574877, "grad_norm": 5.858780860900879, "learning_rate": 8.227943647639073e-06, "loss": 0.06742801, "memory(GiB)": 13.7, "step": 33360, "train_speed(iter/s)": 1.530174 }, { "acc": 0.9840909, "epoch": 15.638621982657606, "grad_norm": 5.653415203094482, "learning_rate": 8.227351644464924e-06, "loss": 0.07407418, "memory(GiB)": 13.7, "step": 33365, "train_speed(iter/s)": 1.530179 }, { "acc": 0.97539072, "epoch": 15.64096554956644, "grad_norm": 4.195214748382568, "learning_rate": 8.22675956372786e-06, "loss": 0.11161622, "memory(GiB)": 13.7, "step": 33370, "train_speed(iter/s)": 1.530181 }, { "acc": 0.98615265, "epoch": 15.643309116475276, "grad_norm": 5.358078956604004, "learning_rate": 8.226167405442116e-06, "loss": 0.07435356, "memory(GiB)": 13.7, "step": 33375, "train_speed(iter/s)": 1.530186 }, { "acc": 0.98736115, "epoch": 15.64565268338411, "grad_norm": 2.3554770946502686, "learning_rate": 8.225575169621925e-06, "loss": 0.02902703, "memory(GiB)": 13.7, "step": 33380, "train_speed(iter/s)": 1.530196 }, { "acc": 0.96859379, "epoch": 15.647996250292946, "grad_norm": 3.4897592067718506, "learning_rate": 8.224982856281521e-06, "loss": 0.11035866, "memory(GiB)": 13.7, "step": 33385, "train_speed(iter/s)": 1.530202 }, { "acc": 0.98793564, "epoch": 15.65033981720178, "grad_norm": 2.1407318115234375, "learning_rate": 8.224390465435145e-06, "loss": 0.06367784, "memory(GiB)": 13.7, "step": 33390, "train_speed(iter/s)": 1.530216 }, { "acc": 0.98588066, "epoch": 15.652683384110617, "grad_norm": 0.45117998123168945, "learning_rate": 8.223797997097032e-06, "loss": 0.07056832, "memory(GiB)": 13.7, "step": 33395, "train_speed(iter/s)": 1.530225 }, { "acc": 0.98695889, "epoch": 15.655026951019451, "grad_norm": 5.651977062225342, "learning_rate": 8.223205451281422e-06, "loss": 0.08838753, "memory(GiB)": 13.7, "step": 33400, "train_speed(iter/s)": 1.530221 }, { "acc": 0.95814219, "epoch": 15.657370517928287, "grad_norm": 10.405949592590332, "learning_rate": 8.222612828002562e-06, "loss": 0.15311061, "memory(GiB)": 13.7, "step": 33405, "train_speed(iter/s)": 1.530234 }, { "acc": 0.96958332, "epoch": 15.659714084837121, "grad_norm": 7.995851993560791, "learning_rate": 8.222020127274696e-06, "loss": 0.23537693, "memory(GiB)": 13.7, "step": 33410, "train_speed(iter/s)": 1.530238 }, { "acc": 0.98726187, "epoch": 15.662057651745958, "grad_norm": 2.671295642852783, "learning_rate": 8.22142734911207e-06, "loss": 0.09075084, "memory(GiB)": 13.7, "step": 33415, "train_speed(iter/s)": 1.530244 }, { "acc": 0.98840275, "epoch": 15.664401218654792, "grad_norm": 3.586344003677368, "learning_rate": 8.220834493528931e-06, "loss": 0.08150055, "memory(GiB)": 13.7, "step": 33420, "train_speed(iter/s)": 1.530251 }, { "acc": 0.9762743, "epoch": 15.666744785563628, "grad_norm": 8.107171058654785, "learning_rate": 8.220241560539532e-06, "loss": 0.08393099, "memory(GiB)": 13.7, "step": 33425, "train_speed(iter/s)": 1.530254 }, { "acc": 0.97701397, "epoch": 15.669088352472464, "grad_norm": 2.879345178604126, "learning_rate": 8.219648550158123e-06, "loss": 0.10451453, "memory(GiB)": 13.7, "step": 33430, "train_speed(iter/s)": 1.530264 }, { "acc": 0.98094826, "epoch": 15.671431919381298, "grad_norm": 15.946455955505371, "learning_rate": 8.219055462398958e-06, "loss": 0.08222855, "memory(GiB)": 13.7, "step": 33435, "train_speed(iter/s)": 1.530266 }, { "acc": 0.97150879, "epoch": 15.673775486290134, "grad_norm": 5.794370174407959, "learning_rate": 8.218462297276295e-06, "loss": 0.14885671, "memory(GiB)": 13.7, "step": 33440, "train_speed(iter/s)": 1.530276 }, { "acc": 0.98522358, "epoch": 15.676119053198969, "grad_norm": 4.837802886962891, "learning_rate": 8.217869054804387e-06, "loss": 0.04786087, "memory(GiB)": 13.7, "step": 33445, "train_speed(iter/s)": 1.53028 }, { "acc": 0.98309031, "epoch": 15.678462620107805, "grad_norm": 13.255080223083496, "learning_rate": 8.217275734997499e-06, "loss": 0.12373235, "memory(GiB)": 13.7, "step": 33450, "train_speed(iter/s)": 1.530288 }, { "acc": 0.98062496, "epoch": 15.680806187016639, "grad_norm": 1.9875574111938477, "learning_rate": 8.216682337869889e-06, "loss": 0.08299319, "memory(GiB)": 13.7, "step": 33455, "train_speed(iter/s)": 1.53029 }, { "acc": 0.98300505, "epoch": 15.683149753925475, "grad_norm": 4.701588153839111, "learning_rate": 8.216088863435823e-06, "loss": 0.09213427, "memory(GiB)": 13.7, "step": 33460, "train_speed(iter/s)": 1.530302 }, { "acc": 0.98205814, "epoch": 15.68549332083431, "grad_norm": 5.505649566650391, "learning_rate": 8.215495311709562e-06, "loss": 0.08407223, "memory(GiB)": 13.7, "step": 33465, "train_speed(iter/s)": 1.530307 }, { "acc": 0.98538189, "epoch": 15.687836887743146, "grad_norm": 6.862853527069092, "learning_rate": 8.214901682705378e-06, "loss": 0.09514471, "memory(GiB)": 13.7, "step": 33470, "train_speed(iter/s)": 1.530307 }, { "acc": 0.9731102, "epoch": 15.69018045465198, "grad_norm": 6.099068641662598, "learning_rate": 8.21430797643754e-06, "loss": 0.10135072, "memory(GiB)": 13.7, "step": 33475, "train_speed(iter/s)": 1.530308 }, { "acc": 0.97221107, "epoch": 15.692524021560816, "grad_norm": 11.20064926147461, "learning_rate": 8.213714192920313e-06, "loss": 0.12177817, "memory(GiB)": 13.7, "step": 33480, "train_speed(iter/s)": 1.530334 }, { "acc": 0.97448492, "epoch": 15.69486758846965, "grad_norm": 31.966493606567383, "learning_rate": 8.213120332167972e-06, "loss": 0.1943197, "memory(GiB)": 13.7, "step": 33485, "train_speed(iter/s)": 1.530337 }, { "acc": 0.98193169, "epoch": 15.697211155378486, "grad_norm": 3.8262226581573486, "learning_rate": 8.212526394194796e-06, "loss": 0.0706671, "memory(GiB)": 13.7, "step": 33490, "train_speed(iter/s)": 1.530341 }, { "acc": 0.98061008, "epoch": 15.69955472228732, "grad_norm": 1.172873854637146, "learning_rate": 8.211932379015058e-06, "loss": 0.10801141, "memory(GiB)": 13.7, "step": 33495, "train_speed(iter/s)": 1.530348 }, { "acc": 0.97842264, "epoch": 15.701898289196157, "grad_norm": 5.631064414978027, "learning_rate": 8.211338286643032e-06, "loss": 0.08200157, "memory(GiB)": 13.7, "step": 33500, "train_speed(iter/s)": 1.53035 }, { "acc": 0.97475281, "epoch": 15.704241856104991, "grad_norm": 2.419621467590332, "learning_rate": 8.210744117093004e-06, "loss": 0.1045529, "memory(GiB)": 13.7, "step": 33505, "train_speed(iter/s)": 1.530361 }, { "acc": 0.98096657, "epoch": 15.706585423013827, "grad_norm": 5.920021057128906, "learning_rate": 8.210149870379254e-06, "loss": 0.08310446, "memory(GiB)": 13.7, "step": 33510, "train_speed(iter/s)": 1.530376 }, { "acc": 0.98047619, "epoch": 15.708928989922661, "grad_norm": 0.9845284223556519, "learning_rate": 8.209555546516066e-06, "loss": 0.11360154, "memory(GiB)": 13.7, "step": 33515, "train_speed(iter/s)": 1.530396 }, { "acc": 0.97334328, "epoch": 15.711272556831497, "grad_norm": 2.6523916721343994, "learning_rate": 8.208961145517725e-06, "loss": 0.12664857, "memory(GiB)": 13.7, "step": 33520, "train_speed(iter/s)": 1.530407 }, { "acc": 0.98331347, "epoch": 15.713616123740334, "grad_norm": 1.919640302658081, "learning_rate": 8.20836666739852e-06, "loss": 0.12109735, "memory(GiB)": 13.7, "step": 33525, "train_speed(iter/s)": 1.530412 }, { "acc": 0.9840539, "epoch": 15.715959690649168, "grad_norm": 3.364039897918701, "learning_rate": 8.207772112172737e-06, "loss": 0.0627835, "memory(GiB)": 13.7, "step": 33530, "train_speed(iter/s)": 1.530416 }, { "acc": 0.97919016, "epoch": 15.718303257558004, "grad_norm": 6.051022052764893, "learning_rate": 8.20717747985467e-06, "loss": 0.04482907, "memory(GiB)": 13.7, "step": 33535, "train_speed(iter/s)": 1.530434 }, { "acc": 0.97633934, "epoch": 15.720646824466838, "grad_norm": 4.072787761688232, "learning_rate": 8.20658277045861e-06, "loss": 0.13420457, "memory(GiB)": 13.7, "step": 33540, "train_speed(iter/s)": 1.530434 }, { "acc": 0.96303806, "epoch": 15.722990391375674, "grad_norm": 12.486966133117676, "learning_rate": 8.205987983998854e-06, "loss": 0.17174679, "memory(GiB)": 13.7, "step": 33545, "train_speed(iter/s)": 1.530448 }, { "acc": 0.97677422, "epoch": 15.725333958284509, "grad_norm": 6.007805347442627, "learning_rate": 8.205393120489698e-06, "loss": 0.129559, "memory(GiB)": 13.7, "step": 33550, "train_speed(iter/s)": 1.530468 }, { "acc": 0.98145828, "epoch": 15.727677525193345, "grad_norm": 0.2963337302207947, "learning_rate": 8.20479817994544e-06, "loss": 0.06271133, "memory(GiB)": 13.7, "step": 33555, "train_speed(iter/s)": 1.530465 }, { "acc": 0.9791667, "epoch": 15.730021092102179, "grad_norm": 5.8491291999816895, "learning_rate": 8.20420316238038e-06, "loss": 0.07880697, "memory(GiB)": 13.7, "step": 33560, "train_speed(iter/s)": 1.530469 }, { "acc": 0.97906437, "epoch": 15.732364659011015, "grad_norm": 4.506622791290283, "learning_rate": 8.203608067808822e-06, "loss": 0.10080447, "memory(GiB)": 13.7, "step": 33565, "train_speed(iter/s)": 1.530478 }, { "acc": 0.97379465, "epoch": 15.73470822591985, "grad_norm": 4.726933002471924, "learning_rate": 8.203012896245069e-06, "loss": 0.07639239, "memory(GiB)": 13.7, "step": 33570, "train_speed(iter/s)": 1.53048 }, { "acc": 0.98673143, "epoch": 15.737051792828685, "grad_norm": 2.181941270828247, "learning_rate": 8.202417647703426e-06, "loss": 0.09331034, "memory(GiB)": 13.7, "step": 33575, "train_speed(iter/s)": 1.530492 }, { "acc": 0.9760417, "epoch": 15.73939535973752, "grad_norm": 5.217173099517822, "learning_rate": 8.201822322198203e-06, "loss": 0.07045478, "memory(GiB)": 13.7, "step": 33580, "train_speed(iter/s)": 1.530491 }, { "acc": 0.97910852, "epoch": 15.741738926646356, "grad_norm": 0.35943540930747986, "learning_rate": 8.201226919743707e-06, "loss": 0.06968752, "memory(GiB)": 13.7, "step": 33585, "train_speed(iter/s)": 1.530494 }, { "acc": 0.96193457, "epoch": 15.74408249355519, "grad_norm": 6.54160213470459, "learning_rate": 8.200631440354252e-06, "loss": 0.12207493, "memory(GiB)": 13.7, "step": 33590, "train_speed(iter/s)": 1.530502 }, { "acc": 0.96359844, "epoch": 15.746426060464026, "grad_norm": 2.0054454803466797, "learning_rate": 8.200035884044152e-06, "loss": 0.0862518, "memory(GiB)": 13.7, "step": 33595, "train_speed(iter/s)": 1.530519 }, { "acc": 0.96607141, "epoch": 15.748769627372862, "grad_norm": 12.889423370361328, "learning_rate": 8.199440250827719e-06, "loss": 0.27654667, "memory(GiB)": 13.7, "step": 33600, "train_speed(iter/s)": 1.530528 }, { "acc": 0.98562336, "epoch": 15.751113194281697, "grad_norm": 0.035584546625614166, "learning_rate": 8.198844540719271e-06, "loss": 0.0846085, "memory(GiB)": 13.7, "step": 33605, "train_speed(iter/s)": 1.530538 }, { "acc": 0.96489582, "epoch": 15.753456761190533, "grad_norm": 6.30051326751709, "learning_rate": 8.19824875373313e-06, "loss": 0.1255861, "memory(GiB)": 13.7, "step": 33610, "train_speed(iter/s)": 1.530541 }, { "acc": 0.96944447, "epoch": 15.755800328099367, "grad_norm": 4.247738838195801, "learning_rate": 8.197652889883612e-06, "loss": 0.09171334, "memory(GiB)": 13.7, "step": 33615, "train_speed(iter/s)": 1.53055 }, { "acc": 0.96425047, "epoch": 15.758143895008203, "grad_norm": 93.13969421386719, "learning_rate": 8.197056949185044e-06, "loss": 0.11774445, "memory(GiB)": 13.7, "step": 33620, "train_speed(iter/s)": 1.530568 }, { "acc": 0.96903667, "epoch": 15.760487461917037, "grad_norm": 3.3069911003112793, "learning_rate": 8.196460931651746e-06, "loss": 0.09786891, "memory(GiB)": 13.7, "step": 33625, "train_speed(iter/s)": 1.530561 }, { "acc": 0.98028851, "epoch": 15.762831028825874, "grad_norm": 7.159064292907715, "learning_rate": 8.195864837298047e-06, "loss": 0.1110762, "memory(GiB)": 13.7, "step": 33630, "train_speed(iter/s)": 1.530572 }, { "acc": 0.97354164, "epoch": 15.765174595734708, "grad_norm": 4.145695686340332, "learning_rate": 8.195268666138277e-06, "loss": 0.13067604, "memory(GiB)": 13.7, "step": 33635, "train_speed(iter/s)": 1.530587 }, { "acc": 0.98847599, "epoch": 15.767518162643544, "grad_norm": 3.160022020339966, "learning_rate": 8.194672418186764e-06, "loss": 0.06197302, "memory(GiB)": 13.7, "step": 33640, "train_speed(iter/s)": 1.530592 }, { "acc": 0.96411285, "epoch": 15.769861729552378, "grad_norm": 6.873016834259033, "learning_rate": 8.194076093457837e-06, "loss": 0.13672708, "memory(GiB)": 13.7, "step": 33645, "train_speed(iter/s)": 1.530606 }, { "acc": 0.97269468, "epoch": 15.772205296461214, "grad_norm": 1.0795081853866577, "learning_rate": 8.193479691965836e-06, "loss": 0.11762296, "memory(GiB)": 13.7, "step": 33650, "train_speed(iter/s)": 1.530607 }, { "acc": 0.98699999, "epoch": 15.774548863370049, "grad_norm": 1.41227388381958, "learning_rate": 8.192883213725089e-06, "loss": 0.05900723, "memory(GiB)": 13.7, "step": 33655, "train_speed(iter/s)": 1.530623 }, { "acc": 0.98745041, "epoch": 15.776892430278885, "grad_norm": 3.002852439880371, "learning_rate": 8.192286658749942e-06, "loss": 0.03980519, "memory(GiB)": 13.7, "step": 33660, "train_speed(iter/s)": 1.530638 }, { "acc": 0.98824406, "epoch": 15.779235997187719, "grad_norm": 2.9667067527770996, "learning_rate": 8.191690027054726e-06, "loss": 0.06249572, "memory(GiB)": 13.7, "step": 33665, "train_speed(iter/s)": 1.53064 }, { "acc": 0.97818813, "epoch": 15.781579564096555, "grad_norm": 2.6217198371887207, "learning_rate": 8.191093318653787e-06, "loss": 0.13770608, "memory(GiB)": 13.7, "step": 33670, "train_speed(iter/s)": 1.530646 }, { "acc": 0.97008018, "epoch": 15.783923131005391, "grad_norm": 3.3127448558807373, "learning_rate": 8.190496533561466e-06, "loss": 0.10258358, "memory(GiB)": 13.7, "step": 33675, "train_speed(iter/s)": 1.530675 }, { "acc": 0.96541624, "epoch": 15.786266697914225, "grad_norm": 11.18585205078125, "learning_rate": 8.189899671792107e-06, "loss": 0.16850219, "memory(GiB)": 13.7, "step": 33680, "train_speed(iter/s)": 1.53068 }, { "acc": 0.97195177, "epoch": 15.788610264823062, "grad_norm": 6.388521194458008, "learning_rate": 8.189302733360059e-06, "loss": 0.14647305, "memory(GiB)": 13.7, "step": 33685, "train_speed(iter/s)": 1.530682 }, { "acc": 0.98133926, "epoch": 15.790953831731896, "grad_norm": 4.7837300300598145, "learning_rate": 8.188705718279669e-06, "loss": 0.04768805, "memory(GiB)": 13.7, "step": 33690, "train_speed(iter/s)": 1.530698 }, { "acc": 0.97390623, "epoch": 15.793297398640732, "grad_norm": 4.375538349151611, "learning_rate": 8.18810862656529e-06, "loss": 0.10783396, "memory(GiB)": 13.7, "step": 33695, "train_speed(iter/s)": 1.530711 }, { "acc": 0.9979167, "epoch": 15.795640965549566, "grad_norm": 0.09006400406360626, "learning_rate": 8.18751145823127e-06, "loss": 0.03303746, "memory(GiB)": 13.7, "step": 33700, "train_speed(iter/s)": 1.530717 }, { "acc": 0.99499454, "epoch": 15.797984532458402, "grad_norm": 3.9078593254089355, "learning_rate": 8.186914213291964e-06, "loss": 0.05266752, "memory(GiB)": 13.7, "step": 33705, "train_speed(iter/s)": 1.530709 }, { "acc": 0.98819132, "epoch": 15.800328099367237, "grad_norm": 2.6888043880462646, "learning_rate": 8.186316891761729e-06, "loss": 0.08565442, "memory(GiB)": 13.7, "step": 33710, "train_speed(iter/s)": 1.530719 }, { "acc": 0.97696428, "epoch": 15.802671666276073, "grad_norm": 4.647353649139404, "learning_rate": 8.18571949365492e-06, "loss": 0.10362811, "memory(GiB)": 13.7, "step": 33715, "train_speed(iter/s)": 1.530737 }, { "acc": 0.97227182, "epoch": 15.805015233184907, "grad_norm": 6.412948131561279, "learning_rate": 8.185122018985902e-06, "loss": 0.14203432, "memory(GiB)": 13.7, "step": 33720, "train_speed(iter/s)": 1.530751 }, { "acc": 0.97279758, "epoch": 15.807358800093743, "grad_norm": 3.869640588760376, "learning_rate": 8.184524467769032e-06, "loss": 0.11673136, "memory(GiB)": 13.7, "step": 33725, "train_speed(iter/s)": 1.530758 }, { "acc": 0.98447418, "epoch": 15.809702367002577, "grad_norm": 7.314418315887451, "learning_rate": 8.183926840018675e-06, "loss": 0.06058142, "memory(GiB)": 13.7, "step": 33730, "train_speed(iter/s)": 1.530764 }, { "acc": 0.96807289, "epoch": 15.812045933911413, "grad_norm": 2.189037322998047, "learning_rate": 8.183329135749193e-06, "loss": 0.16344794, "memory(GiB)": 13.7, "step": 33735, "train_speed(iter/s)": 1.530764 }, { "acc": 0.98270836, "epoch": 15.814389500820248, "grad_norm": 60.75065994262695, "learning_rate": 8.182731354974956e-06, "loss": 0.05748336, "memory(GiB)": 13.7, "step": 33740, "train_speed(iter/s)": 1.53077 }, { "acc": 0.97441921, "epoch": 15.816733067729084, "grad_norm": 3.406919002532959, "learning_rate": 8.182133497710332e-06, "loss": 0.14736397, "memory(GiB)": 13.7, "step": 33745, "train_speed(iter/s)": 1.53078 }, { "acc": 0.98712797, "epoch": 15.819076634637918, "grad_norm": 2.2657742500305176, "learning_rate": 8.18153556396969e-06, "loss": 0.05509972, "memory(GiB)": 13.7, "step": 33750, "train_speed(iter/s)": 1.530784 }, { "acc": 0.97833672, "epoch": 15.821420201546754, "grad_norm": 3.4423909187316895, "learning_rate": 8.180937553767406e-06, "loss": 0.09378448, "memory(GiB)": 13.7, "step": 33755, "train_speed(iter/s)": 1.530801 }, { "acc": 0.98078365, "epoch": 15.823763768455589, "grad_norm": 3.2899909019470215, "learning_rate": 8.180339467117849e-06, "loss": 0.08615562, "memory(GiB)": 13.7, "step": 33760, "train_speed(iter/s)": 1.530811 }, { "acc": 0.98447914, "epoch": 15.826107335364425, "grad_norm": 4.272223949432373, "learning_rate": 8.1797413040354e-06, "loss": 0.05687937, "memory(GiB)": 13.7, "step": 33765, "train_speed(iter/s)": 1.530807 }, { "acc": 0.98354168, "epoch": 15.82845090227326, "grad_norm": 2.702359676361084, "learning_rate": 8.179143064534433e-06, "loss": 0.05060191, "memory(GiB)": 13.7, "step": 33770, "train_speed(iter/s)": 1.530807 }, { "acc": 0.97193451, "epoch": 15.830794469182095, "grad_norm": 3.5306408405303955, "learning_rate": 8.178544748629328e-06, "loss": 0.16764947, "memory(GiB)": 13.7, "step": 33775, "train_speed(iter/s)": 1.530806 }, { "acc": 0.9885416, "epoch": 15.833138036090931, "grad_norm": 3.3757591247558594, "learning_rate": 8.177946356334471e-06, "loss": 0.05668638, "memory(GiB)": 13.7, "step": 33780, "train_speed(iter/s)": 1.530809 }, { "acc": 0.97234783, "epoch": 15.835481602999765, "grad_norm": 5.179317951202393, "learning_rate": 8.177347887664241e-06, "loss": 0.11554269, "memory(GiB)": 13.7, "step": 33785, "train_speed(iter/s)": 1.53082 }, { "acc": 0.97466345, "epoch": 15.837825169908601, "grad_norm": 0.19121497869491577, "learning_rate": 8.176749342633026e-06, "loss": 0.10572729, "memory(GiB)": 13.7, "step": 33790, "train_speed(iter/s)": 1.530811 }, { "acc": 0.97455349, "epoch": 15.840168736817436, "grad_norm": 6.763711452484131, "learning_rate": 8.17615072125521e-06, "loss": 0.07679028, "memory(GiB)": 13.7, "step": 33795, "train_speed(iter/s)": 1.530822 }, { "acc": 0.97383013, "epoch": 15.842512303726272, "grad_norm": 8.821433067321777, "learning_rate": 8.175552023545184e-06, "loss": 0.11511335, "memory(GiB)": 13.7, "step": 33800, "train_speed(iter/s)": 1.530824 }, { "acc": 0.9833333, "epoch": 15.844855870635106, "grad_norm": 4.438462734222412, "learning_rate": 8.174953249517338e-06, "loss": 0.04138583, "memory(GiB)": 13.7, "step": 33805, "train_speed(iter/s)": 1.53083 }, { "acc": 0.979072, "epoch": 15.847199437543942, "grad_norm": 10.406952857971191, "learning_rate": 8.174354399186066e-06, "loss": 0.08039845, "memory(GiB)": 13.7, "step": 33810, "train_speed(iter/s)": 1.530839 }, { "acc": 0.9764782, "epoch": 15.849543004452777, "grad_norm": 7.275729656219482, "learning_rate": 8.17375547256576e-06, "loss": 0.09515657, "memory(GiB)": 13.7, "step": 33815, "train_speed(iter/s)": 1.530849 }, { "acc": 0.96889877, "epoch": 15.851886571361613, "grad_norm": 8.605268478393555, "learning_rate": 8.173156469670816e-06, "loss": 0.16812732, "memory(GiB)": 13.7, "step": 33820, "train_speed(iter/s)": 1.530857 }, { "acc": 0.9790659, "epoch": 15.854230138270447, "grad_norm": 5.287258148193359, "learning_rate": 8.172557390515634e-06, "loss": 0.11271574, "memory(GiB)": 13.7, "step": 33825, "train_speed(iter/s)": 1.530862 }, { "acc": 0.98083334, "epoch": 15.856573705179283, "grad_norm": 4.295576095581055, "learning_rate": 8.171958235114615e-06, "loss": 0.12749047, "memory(GiB)": 13.7, "step": 33830, "train_speed(iter/s)": 1.530874 }, { "acc": 0.9822917, "epoch": 15.858917272088117, "grad_norm": 7.2233123779296875, "learning_rate": 8.171359003482158e-06, "loss": 0.10821249, "memory(GiB)": 13.7, "step": 33835, "train_speed(iter/s)": 1.530885 }, { "acc": 0.96875, "epoch": 15.861260838996953, "grad_norm": 8.284433364868164, "learning_rate": 8.170759695632668e-06, "loss": 0.09946694, "memory(GiB)": 13.7, "step": 33840, "train_speed(iter/s)": 1.530894 }, { "acc": 0.98687496, "epoch": 15.86360440590579, "grad_norm": 1.4665793180465698, "learning_rate": 8.170160311580549e-06, "loss": 0.09990413, "memory(GiB)": 13.7, "step": 33845, "train_speed(iter/s)": 1.530902 }, { "acc": 0.97979164, "epoch": 15.865947972814624, "grad_norm": 5.512117385864258, "learning_rate": 8.169560851340213e-06, "loss": 0.09894555, "memory(GiB)": 13.7, "step": 33850, "train_speed(iter/s)": 1.530908 }, { "acc": 0.98083334, "epoch": 15.86829153972346, "grad_norm": 5.7166852951049805, "learning_rate": 8.168961314926061e-06, "loss": 0.06706718, "memory(GiB)": 13.7, "step": 33855, "train_speed(iter/s)": 1.530917 }, { "acc": 0.97557545, "epoch": 15.870635106632294, "grad_norm": 4.064847946166992, "learning_rate": 8.16836170235251e-06, "loss": 0.11642888, "memory(GiB)": 13.7, "step": 33860, "train_speed(iter/s)": 1.530923 }, { "acc": 0.97958336, "epoch": 15.87297867354113, "grad_norm": 4.73087215423584, "learning_rate": 8.16776201363397e-06, "loss": 0.07650501, "memory(GiB)": 13.7, "step": 33865, "train_speed(iter/s)": 1.530912 }, { "acc": 0.98049107, "epoch": 15.875322240449965, "grad_norm": 5.501094341278076, "learning_rate": 8.167162248784857e-06, "loss": 0.12113124, "memory(GiB)": 13.7, "step": 33870, "train_speed(iter/s)": 1.530919 }, { "acc": 0.9760417, "epoch": 15.8776658073588, "grad_norm": 6.3388190269470215, "learning_rate": 8.166562407819588e-06, "loss": 0.07454684, "memory(GiB)": 13.7, "step": 33875, "train_speed(iter/s)": 1.530924 }, { "acc": 0.97181549, "epoch": 15.880009374267635, "grad_norm": 5.407827377319336, "learning_rate": 8.165962490752577e-06, "loss": 0.141105, "memory(GiB)": 13.7, "step": 33880, "train_speed(iter/s)": 1.530929 }, { "acc": 0.99125004, "epoch": 15.882352941176471, "grad_norm": 2.799851179122925, "learning_rate": 8.16536249759825e-06, "loss": 0.0269696, "memory(GiB)": 13.7, "step": 33885, "train_speed(iter/s)": 1.530945 }, { "acc": 0.97592421, "epoch": 15.884696508085305, "grad_norm": 0.10092326253652573, "learning_rate": 8.164762428371024e-06, "loss": 0.12540121, "memory(GiB)": 13.7, "step": 33890, "train_speed(iter/s)": 1.530954 }, { "acc": 0.97835388, "epoch": 15.887040074994141, "grad_norm": 1.246522307395935, "learning_rate": 8.164162283085326e-06, "loss": 0.05990705, "memory(GiB)": 13.7, "step": 33895, "train_speed(iter/s)": 1.53096 }, { "acc": 0.98892546, "epoch": 15.889383641902976, "grad_norm": 3.427255153656006, "learning_rate": 8.163562061755582e-06, "loss": 0.02114592, "memory(GiB)": 13.7, "step": 33900, "train_speed(iter/s)": 1.530953 }, { "acc": 0.97854385, "epoch": 15.891727208811812, "grad_norm": 4.097299575805664, "learning_rate": 8.162961764396214e-06, "loss": 0.11474195, "memory(GiB)": 13.7, "step": 33905, "train_speed(iter/s)": 1.530951 }, { "acc": 0.9666666, "epoch": 15.894070775720646, "grad_norm": 3.2447760105133057, "learning_rate": 8.162361391021655e-06, "loss": 0.13093344, "memory(GiB)": 13.7, "step": 33910, "train_speed(iter/s)": 1.530953 }, { "acc": 0.97321272, "epoch": 15.896414342629482, "grad_norm": 6.039539813995361, "learning_rate": 8.161760941646337e-06, "loss": 0.13052238, "memory(GiB)": 13.7, "step": 33915, "train_speed(iter/s)": 1.530967 }, { "acc": 0.97768736, "epoch": 15.898757909538318, "grad_norm": 3.0282371044158936, "learning_rate": 8.16116041628469e-06, "loss": 0.06704711, "memory(GiB)": 13.7, "step": 33920, "train_speed(iter/s)": 1.530976 }, { "acc": 0.98119049, "epoch": 15.901101476447153, "grad_norm": 5.289542198181152, "learning_rate": 8.160559814951151e-06, "loss": 0.09817747, "memory(GiB)": 13.7, "step": 33925, "train_speed(iter/s)": 1.530991 }, { "acc": 0.9760416, "epoch": 15.903445043355989, "grad_norm": 7.092075347900391, "learning_rate": 8.159959137660156e-06, "loss": 0.13653669, "memory(GiB)": 13.7, "step": 33930, "train_speed(iter/s)": 1.530992 }, { "acc": 0.99011364, "epoch": 15.905788610264823, "grad_norm": 2.3421709537506104, "learning_rate": 8.159358384426144e-06, "loss": 0.0688725, "memory(GiB)": 13.7, "step": 33935, "train_speed(iter/s)": 1.530987 }, { "acc": 0.97687712, "epoch": 15.908132177173659, "grad_norm": 1.6452362537384033, "learning_rate": 8.158757555263553e-06, "loss": 0.11457908, "memory(GiB)": 13.7, "step": 33940, "train_speed(iter/s)": 1.530986 }, { "acc": 0.98320885, "epoch": 15.910475744082493, "grad_norm": 2.64475417137146, "learning_rate": 8.158156650186826e-06, "loss": 0.10254625, "memory(GiB)": 13.7, "step": 33945, "train_speed(iter/s)": 1.530988 }, { "acc": 0.98763313, "epoch": 15.91281931099133, "grad_norm": 6.493328094482422, "learning_rate": 8.157555669210408e-06, "loss": 0.05925907, "memory(GiB)": 13.7, "step": 33950, "train_speed(iter/s)": 1.530989 }, { "acc": 0.98187504, "epoch": 15.915162877900164, "grad_norm": 4.995184898376465, "learning_rate": 8.156954612348741e-06, "loss": 0.09347796, "memory(GiB)": 13.7, "step": 33955, "train_speed(iter/s)": 1.530998 }, { "acc": 0.98893766, "epoch": 15.917506444809, "grad_norm": 2.262054681777954, "learning_rate": 8.156353479616277e-06, "loss": 0.07475744, "memory(GiB)": 13.7, "step": 33960, "train_speed(iter/s)": 1.531005 }, { "acc": 0.98946428, "epoch": 15.919850011717834, "grad_norm": 0.025147635489702225, "learning_rate": 8.155752271027461e-06, "loss": 0.03522244, "memory(GiB)": 13.7, "step": 33965, "train_speed(iter/s)": 1.531003 }, { "acc": 0.97656002, "epoch": 15.92219357862667, "grad_norm": 6.225302219390869, "learning_rate": 8.155150986596748e-06, "loss": 0.10397296, "memory(GiB)": 13.7, "step": 33970, "train_speed(iter/s)": 1.531014 }, { "acc": 0.98341227, "epoch": 15.924537145535504, "grad_norm": 5.633376121520996, "learning_rate": 8.15454962633859e-06, "loss": 0.09811634, "memory(GiB)": 13.7, "step": 33975, "train_speed(iter/s)": 1.531028 }, { "acc": 0.96492662, "epoch": 15.92688071244434, "grad_norm": 9.576289176940918, "learning_rate": 8.153948190267438e-06, "loss": 0.18121305, "memory(GiB)": 13.7, "step": 33980, "train_speed(iter/s)": 1.531032 }, { "acc": 0.97446346, "epoch": 15.929224279353175, "grad_norm": 4.6526408195495605, "learning_rate": 8.153346678397754e-06, "loss": 0.1264514, "memory(GiB)": 13.7, "step": 33985, "train_speed(iter/s)": 1.531025 }, { "acc": 0.96990814, "epoch": 15.931567846262011, "grad_norm": 7.474478721618652, "learning_rate": 8.152745090743992e-06, "loss": 0.11505706, "memory(GiB)": 13.7, "step": 33990, "train_speed(iter/s)": 1.531026 }, { "acc": 0.98160963, "epoch": 15.933911413170845, "grad_norm": 5.776485919952393, "learning_rate": 8.152143427320614e-06, "loss": 0.10098128, "memory(GiB)": 13.7, "step": 33995, "train_speed(iter/s)": 1.531039 }, { "acc": 0.98611107, "epoch": 15.936254980079681, "grad_norm": 2.5417518615722656, "learning_rate": 8.151541688142082e-06, "loss": 0.1038383, "memory(GiB)": 13.7, "step": 34000, "train_speed(iter/s)": 1.53105 }, { "acc": 0.9739583, "epoch": 15.938598546988516, "grad_norm": 5.762083053588867, "learning_rate": 8.15093987322286e-06, "loss": 0.09581477, "memory(GiB)": 13.7, "step": 34005, "train_speed(iter/s)": 1.53106 }, { "acc": 0.97374992, "epoch": 15.940942113897352, "grad_norm": 2.0447826385498047, "learning_rate": 8.150337982577416e-06, "loss": 0.08189223, "memory(GiB)": 13.7, "step": 34010, "train_speed(iter/s)": 1.531067 }, { "acc": 0.98065357, "epoch": 15.943285680806188, "grad_norm": 2.4039106369018555, "learning_rate": 8.149736016220213e-06, "loss": 0.07453883, "memory(GiB)": 13.7, "step": 34015, "train_speed(iter/s)": 1.531063 }, { "acc": 0.96916132, "epoch": 15.945629247715022, "grad_norm": 4.318210601806641, "learning_rate": 8.149133974165721e-06, "loss": 0.14951804, "memory(GiB)": 13.7, "step": 34020, "train_speed(iter/s)": 1.531072 }, { "acc": 0.9926136, "epoch": 15.947972814623858, "grad_norm": 4.415242671966553, "learning_rate": 8.148531856428413e-06, "loss": 0.06100228, "memory(GiB)": 13.7, "step": 34025, "train_speed(iter/s)": 1.531086 }, { "acc": 0.98490524, "epoch": 15.950316381532692, "grad_norm": 4.667230129241943, "learning_rate": 8.14792966302276e-06, "loss": 0.05536446, "memory(GiB)": 13.7, "step": 34030, "train_speed(iter/s)": 1.531098 }, { "acc": 0.96978168, "epoch": 15.952659948441529, "grad_norm": 0.030364954844117165, "learning_rate": 8.14732739396324e-06, "loss": 0.15521958, "memory(GiB)": 13.7, "step": 34035, "train_speed(iter/s)": 1.531108 }, { "acc": 0.98538189, "epoch": 15.955003515350363, "grad_norm": 4.082118034362793, "learning_rate": 8.146725049264328e-06, "loss": 0.09463315, "memory(GiB)": 13.7, "step": 34040, "train_speed(iter/s)": 1.531097 }, { "acc": 0.97359619, "epoch": 15.957347082259199, "grad_norm": 3.7117042541503906, "learning_rate": 8.146122628940501e-06, "loss": 0.11709645, "memory(GiB)": 13.7, "step": 34045, "train_speed(iter/s)": 1.531105 }, { "acc": 0.97653103, "epoch": 15.959690649168033, "grad_norm": 0.11994272470474243, "learning_rate": 8.14552013300624e-06, "loss": 0.05615819, "memory(GiB)": 13.7, "step": 34050, "train_speed(iter/s)": 1.531112 }, { "acc": 0.97718134, "epoch": 15.96203421607687, "grad_norm": 4.584234237670898, "learning_rate": 8.144917561476027e-06, "loss": 0.10990046, "memory(GiB)": 13.7, "step": 34055, "train_speed(iter/s)": 1.531109 }, { "acc": 0.98883934, "epoch": 15.964377782985704, "grad_norm": 4.800717830657959, "learning_rate": 8.144314914364348e-06, "loss": 0.05429015, "memory(GiB)": 13.7, "step": 34060, "train_speed(iter/s)": 1.531113 }, { "acc": 0.99636173, "epoch": 15.96672134989454, "grad_norm": 2.261181116104126, "learning_rate": 8.143712191685686e-06, "loss": 0.01247025, "memory(GiB)": 13.7, "step": 34065, "train_speed(iter/s)": 1.531118 }, { "acc": 0.97749996, "epoch": 15.969064916803374, "grad_norm": 0.4422866702079773, "learning_rate": 8.143109393454525e-06, "loss": 0.09747854, "memory(GiB)": 13.7, "step": 34070, "train_speed(iter/s)": 1.531133 }, { "acc": 0.98363094, "epoch": 15.97140848371221, "grad_norm": 3.461470127105713, "learning_rate": 8.142506519685364e-06, "loss": 0.07541933, "memory(GiB)": 13.7, "step": 34075, "train_speed(iter/s)": 1.531139 }, { "acc": 0.97909718, "epoch": 15.973752050621044, "grad_norm": 7.27617883682251, "learning_rate": 8.141903570392687e-06, "loss": 0.07320507, "memory(GiB)": 13.7, "step": 34080, "train_speed(iter/s)": 1.531146 }, { "acc": 0.985742, "epoch": 15.97609561752988, "grad_norm": 1.2412817478179932, "learning_rate": 8.141300545590987e-06, "loss": 0.07572184, "memory(GiB)": 13.7, "step": 34085, "train_speed(iter/s)": 1.531143 }, { "acc": 0.96472225, "epoch": 15.978439184438717, "grad_norm": 3.3498799800872803, "learning_rate": 8.14069744529476e-06, "loss": 0.10239601, "memory(GiB)": 13.7, "step": 34090, "train_speed(iter/s)": 1.531137 }, { "acc": 0.99100876, "epoch": 15.98078275134755, "grad_norm": 3.7904701232910156, "learning_rate": 8.140094269518503e-06, "loss": 0.06021954, "memory(GiB)": 13.7, "step": 34095, "train_speed(iter/s)": 1.531149 }, { "acc": 0.97479162, "epoch": 15.983126318256387, "grad_norm": 5.710752487182617, "learning_rate": 8.139491018276715e-06, "loss": 0.14113548, "memory(GiB)": 13.7, "step": 34100, "train_speed(iter/s)": 1.53115 }, { "acc": 0.9676754, "epoch": 15.985469885165221, "grad_norm": 7.359822750091553, "learning_rate": 8.138887691583896e-06, "loss": 0.14997182, "memory(GiB)": 13.7, "step": 34105, "train_speed(iter/s)": 1.53116 }, { "acc": 0.98500004, "epoch": 15.987813452074057, "grad_norm": 0.49598023295402527, "learning_rate": 8.138284289454547e-06, "loss": 0.06556222, "memory(GiB)": 13.7, "step": 34110, "train_speed(iter/s)": 1.531178 }, { "acc": 0.98520832, "epoch": 15.990157018982892, "grad_norm": 7.393469333648682, "learning_rate": 8.137680811903171e-06, "loss": 0.06240608, "memory(GiB)": 13.7, "step": 34115, "train_speed(iter/s)": 1.531175 }, { "acc": 0.97486858, "epoch": 15.992500585891728, "grad_norm": 6.260555744171143, "learning_rate": 8.137077258944275e-06, "loss": 0.12593496, "memory(GiB)": 13.7, "step": 34120, "train_speed(iter/s)": 1.53118 }, { "acc": 0.98934975, "epoch": 15.994844152800562, "grad_norm": 2.838833808898926, "learning_rate": 8.136473630592367e-06, "loss": 0.08069693, "memory(GiB)": 13.7, "step": 34125, "train_speed(iter/s)": 1.531181 }, { "acc": 0.97786865, "epoch": 15.997187719709398, "grad_norm": 5.5510711669921875, "learning_rate": 8.135869926861955e-06, "loss": 0.12224972, "memory(GiB)": 13.7, "step": 34130, "train_speed(iter/s)": 1.531191 }, { "acc": 0.98354168, "epoch": 15.999531286618232, "grad_norm": 2.5239624977111816, "learning_rate": 8.135266147767549e-06, "loss": 0.05442886, "memory(GiB)": 13.7, "step": 34135, "train_speed(iter/s)": 1.531182 }, { "acc": 0.96479168, "epoch": 16.001874853527067, "grad_norm": 2.668348550796509, "learning_rate": 8.134662293323665e-06, "loss": 0.12138181, "memory(GiB)": 13.7, "step": 34140, "train_speed(iter/s)": 1.531147 }, { "acc": 0.96341476, "epoch": 16.004218420435905, "grad_norm": 10.365741729736328, "learning_rate": 8.134058363544817e-06, "loss": 0.19906974, "memory(GiB)": 13.7, "step": 34145, "train_speed(iter/s)": 1.531154 }, { "acc": 0.98351192, "epoch": 16.00656198734474, "grad_norm": 2.0210323333740234, "learning_rate": 8.133454358445517e-06, "loss": 0.06433442, "memory(GiB)": 13.7, "step": 34150, "train_speed(iter/s)": 1.531154 }, { "acc": 0.98050594, "epoch": 16.008905554253573, "grad_norm": 5.705018043518066, "learning_rate": 8.13285027804029e-06, "loss": 0.04752081, "memory(GiB)": 13.7, "step": 34155, "train_speed(iter/s)": 1.531156 }, { "acc": 0.98916664, "epoch": 16.011249121162408, "grad_norm": 0.01785924658179283, "learning_rate": 8.13224612234365e-06, "loss": 0.03863907, "memory(GiB)": 13.7, "step": 34160, "train_speed(iter/s)": 1.53117 }, { "acc": 0.98704491, "epoch": 16.013592688071245, "grad_norm": 1.072129726409912, "learning_rate": 8.131641891370125e-06, "loss": 0.10942142, "memory(GiB)": 13.7, "step": 34165, "train_speed(iter/s)": 1.531175 }, { "acc": 0.96219692, "epoch": 16.01593625498008, "grad_norm": 9.884819030761719, "learning_rate": 8.131037585134234e-06, "loss": 0.12085737, "memory(GiB)": 13.7, "step": 34170, "train_speed(iter/s)": 1.531184 }, { "acc": 0.98322916, "epoch": 16.018279821888914, "grad_norm": 2.4243736267089844, "learning_rate": 8.130433203650505e-06, "loss": 0.06666981, "memory(GiB)": 13.7, "step": 34175, "train_speed(iter/s)": 1.531196 }, { "acc": 0.9740366, "epoch": 16.020623388797752, "grad_norm": 3.1275978088378906, "learning_rate": 8.129828746933463e-06, "loss": 0.11550815, "memory(GiB)": 13.7, "step": 34180, "train_speed(iter/s)": 1.531199 }, { "acc": 0.97498512, "epoch": 16.022966955706586, "grad_norm": 2.792703866958618, "learning_rate": 8.12922421499764e-06, "loss": 0.06542962, "memory(GiB)": 13.7, "step": 34185, "train_speed(iter/s)": 1.53121 }, { "acc": 0.98786869, "epoch": 16.02531052261542, "grad_norm": 5.749772071838379, "learning_rate": 8.128619607857564e-06, "loss": 0.06152383, "memory(GiB)": 13.7, "step": 34190, "train_speed(iter/s)": 1.531213 }, { "acc": 0.97476196, "epoch": 16.027654089524255, "grad_norm": 5.863595485687256, "learning_rate": 8.128014925527769e-06, "loss": 0.11786451, "memory(GiB)": 13.7, "step": 34195, "train_speed(iter/s)": 1.53124 }, { "acc": 0.98585072, "epoch": 16.029997656433093, "grad_norm": 0.8924685716629028, "learning_rate": 8.127410168022792e-06, "loss": 0.03488725, "memory(GiB)": 13.7, "step": 34200, "train_speed(iter/s)": 1.531232 }, { "acc": 0.98123512, "epoch": 16.032341223341927, "grad_norm": 11.025349617004395, "learning_rate": 8.126805335357166e-06, "loss": 0.11483294, "memory(GiB)": 13.7, "step": 34205, "train_speed(iter/s)": 1.531237 }, { "acc": 0.97468748, "epoch": 16.03468479025076, "grad_norm": 4.0895490646362305, "learning_rate": 8.12620042754543e-06, "loss": 0.08623388, "memory(GiB)": 13.7, "step": 34210, "train_speed(iter/s)": 1.53124 }, { "acc": 0.98529768, "epoch": 16.037028357159596, "grad_norm": 0.1761837601661682, "learning_rate": 8.125595444602124e-06, "loss": 0.08891083, "memory(GiB)": 13.7, "step": 34215, "train_speed(iter/s)": 1.531234 }, { "acc": 0.98687496, "epoch": 16.039371924068433, "grad_norm": 0.1796931028366089, "learning_rate": 8.124990386541792e-06, "loss": 0.04231497, "memory(GiB)": 13.7, "step": 34220, "train_speed(iter/s)": 1.53124 }, { "acc": 0.98909817, "epoch": 16.041715490977268, "grad_norm": 0.3474147915840149, "learning_rate": 8.124385253378976e-06, "loss": 0.06921636, "memory(GiB)": 13.7, "step": 34225, "train_speed(iter/s)": 1.531252 }, { "acc": 0.98206844, "epoch": 16.044059057886102, "grad_norm": 1.2263638973236084, "learning_rate": 8.12378004512822e-06, "loss": 0.09125011, "memory(GiB)": 13.7, "step": 34230, "train_speed(iter/s)": 1.531249 }, { "acc": 0.96702385, "epoch": 16.046402624794936, "grad_norm": 6.944718360900879, "learning_rate": 8.123174761804073e-06, "loss": 0.146574, "memory(GiB)": 13.7, "step": 34235, "train_speed(iter/s)": 1.531247 }, { "acc": 0.99363976, "epoch": 16.048746191703774, "grad_norm": 0.039395324885845184, "learning_rate": 8.122569403421084e-06, "loss": 0.055925, "memory(GiB)": 13.7, "step": 34240, "train_speed(iter/s)": 1.53126 }, { "acc": 0.9825839, "epoch": 16.05108975861261, "grad_norm": 7.624049186706543, "learning_rate": 8.121963969993801e-06, "loss": 0.07634333, "memory(GiB)": 13.7, "step": 34245, "train_speed(iter/s)": 1.531267 }, { "acc": 0.98275242, "epoch": 16.053433325521443, "grad_norm": 5.115231990814209, "learning_rate": 8.121358461536782e-06, "loss": 0.09253109, "memory(GiB)": 13.7, "step": 34250, "train_speed(iter/s)": 1.531264 }, { "acc": 0.98631945, "epoch": 16.05577689243028, "grad_norm": 4.191227436065674, "learning_rate": 8.120752878064578e-06, "loss": 0.06153847, "memory(GiB)": 13.7, "step": 34255, "train_speed(iter/s)": 1.531259 }, { "acc": 0.9791667, "epoch": 16.058120459339115, "grad_norm": 2.0162558555603027, "learning_rate": 8.120147219591742e-06, "loss": 0.07675868, "memory(GiB)": 13.7, "step": 34260, "train_speed(iter/s)": 1.531259 }, { "acc": 0.97976189, "epoch": 16.06046402624795, "grad_norm": 3.9031059741973877, "learning_rate": 8.11954148613284e-06, "loss": 0.08474159, "memory(GiB)": 13.7, "step": 34265, "train_speed(iter/s)": 1.531259 }, { "acc": 0.96249008, "epoch": 16.062807593156784, "grad_norm": 8.346068382263184, "learning_rate": 8.118935677702425e-06, "loss": 0.17443011, "memory(GiB)": 13.7, "step": 34270, "train_speed(iter/s)": 1.53127 }, { "acc": 0.97779016, "epoch": 16.06515116006562, "grad_norm": 3.6822845935821533, "learning_rate": 8.118329794315063e-06, "loss": 0.09077644, "memory(GiB)": 13.7, "step": 34275, "train_speed(iter/s)": 1.531282 }, { "acc": 0.97730045, "epoch": 16.067494726974456, "grad_norm": 9.095252990722656, "learning_rate": 8.117723835985313e-06, "loss": 0.09256363, "memory(GiB)": 13.7, "step": 34280, "train_speed(iter/s)": 1.531301 }, { "acc": 0.9776042, "epoch": 16.06983829388329, "grad_norm": 2.8885722160339355, "learning_rate": 8.117117802727746e-06, "loss": 0.06790243, "memory(GiB)": 13.7, "step": 34285, "train_speed(iter/s)": 1.531303 }, { "acc": 0.9766613, "epoch": 16.072181860792124, "grad_norm": 4.0829668045043945, "learning_rate": 8.116511694556923e-06, "loss": 0.13056155, "memory(GiB)": 13.7, "step": 34290, "train_speed(iter/s)": 1.531305 }, { "acc": 0.98395834, "epoch": 16.074525427700962, "grad_norm": 3.8415958881378174, "learning_rate": 8.115905511487418e-06, "loss": 0.04161515, "memory(GiB)": 13.7, "step": 34295, "train_speed(iter/s)": 1.53131 }, { "acc": 0.98399954, "epoch": 16.076868994609796, "grad_norm": 48.676517486572266, "learning_rate": 8.115299253533797e-06, "loss": 0.08262424, "memory(GiB)": 13.7, "step": 34300, "train_speed(iter/s)": 1.531334 }, { "acc": 0.988447, "epoch": 16.07921256151863, "grad_norm": 0.19393572211265564, "learning_rate": 8.114692920710638e-06, "loss": 0.0874562, "memory(GiB)": 13.7, "step": 34305, "train_speed(iter/s)": 1.531332 }, { "acc": 0.98369389, "epoch": 16.081556128427465, "grad_norm": 4.3770670890808105, "learning_rate": 8.114086513032512e-06, "loss": 0.08661814, "memory(GiB)": 13.7, "step": 34310, "train_speed(iter/s)": 1.531352 }, { "acc": 0.97378788, "epoch": 16.083899695336303, "grad_norm": 14.060773849487305, "learning_rate": 8.113480030513992e-06, "loss": 0.14208468, "memory(GiB)": 13.7, "step": 34315, "train_speed(iter/s)": 1.53137 }, { "acc": 0.98104172, "epoch": 16.086243262245137, "grad_norm": 2.7958731651306152, "learning_rate": 8.112873473169661e-06, "loss": 0.05319699, "memory(GiB)": 13.7, "step": 34320, "train_speed(iter/s)": 1.531376 }, { "acc": 0.9770834, "epoch": 16.08858682915397, "grad_norm": 4.528419017791748, "learning_rate": 8.112266841014096e-06, "loss": 0.12851763, "memory(GiB)": 13.7, "step": 34325, "train_speed(iter/s)": 1.531386 }, { "acc": 0.98090801, "epoch": 16.090930396062806, "grad_norm": 3.152146816253662, "learning_rate": 8.11166013406188e-06, "loss": 0.07583264, "memory(GiB)": 13.7, "step": 34330, "train_speed(iter/s)": 1.531393 }, { "acc": 0.98202953, "epoch": 16.093273962971644, "grad_norm": 3.4054200649261475, "learning_rate": 8.111053352327594e-06, "loss": 0.10409179, "memory(GiB)": 13.7, "step": 34335, "train_speed(iter/s)": 1.531406 }, { "acc": 0.9859046, "epoch": 16.095617529880478, "grad_norm": 0.2121478021144867, "learning_rate": 8.110446495825825e-06, "loss": 0.10452034, "memory(GiB)": 13.7, "step": 34340, "train_speed(iter/s)": 1.5314 }, { "acc": 0.98081474, "epoch": 16.097961096789312, "grad_norm": 1.622409462928772, "learning_rate": 8.109839564571161e-06, "loss": 0.07797921, "memory(GiB)": 13.7, "step": 34345, "train_speed(iter/s)": 1.5314 }, { "acc": 0.98258934, "epoch": 16.10030466369815, "grad_norm": 5.270715713500977, "learning_rate": 8.109232558578185e-06, "loss": 0.06910995, "memory(GiB)": 13.7, "step": 34350, "train_speed(iter/s)": 1.531395 }, { "acc": 0.98857145, "epoch": 16.102648230606984, "grad_norm": 2.376206398010254, "learning_rate": 8.108625477861493e-06, "loss": 0.04153489, "memory(GiB)": 13.7, "step": 34355, "train_speed(iter/s)": 1.531399 }, { "acc": 0.98353701, "epoch": 16.10499179751582, "grad_norm": 9.29220962524414, "learning_rate": 8.108018322435678e-06, "loss": 0.12312788, "memory(GiB)": 13.7, "step": 34360, "train_speed(iter/s)": 1.531416 }, { "acc": 0.97155132, "epoch": 16.107335364424653, "grad_norm": 5.425060272216797, "learning_rate": 8.107411092315329e-06, "loss": 0.09291991, "memory(GiB)": 13.7, "step": 34365, "train_speed(iter/s)": 1.531421 }, { "acc": 0.9791667, "epoch": 16.10967893133349, "grad_norm": 0.02366757206618786, "learning_rate": 8.106803787515044e-06, "loss": 0.10294274, "memory(GiB)": 13.7, "step": 34370, "train_speed(iter/s)": 1.531421 }, { "acc": 0.97535715, "epoch": 16.112022498242325, "grad_norm": 15.89481258392334, "learning_rate": 8.106196408049423e-06, "loss": 0.1001798, "memory(GiB)": 13.7, "step": 34375, "train_speed(iter/s)": 1.531432 }, { "acc": 0.98291664, "epoch": 16.11436606515116, "grad_norm": 5.491997718811035, "learning_rate": 8.105588953933062e-06, "loss": 0.06732264, "memory(GiB)": 13.7, "step": 34380, "train_speed(iter/s)": 1.531435 }, { "acc": 0.96395836, "epoch": 16.116709632059994, "grad_norm": 3.5721068382263184, "learning_rate": 8.104981425180562e-06, "loss": 0.1394233, "memory(GiB)": 13.7, "step": 34385, "train_speed(iter/s)": 1.531422 }, { "acc": 0.97912912, "epoch": 16.11905319896883, "grad_norm": 3.4348526000976562, "learning_rate": 8.10437382180653e-06, "loss": 0.10089154, "memory(GiB)": 13.7, "step": 34390, "train_speed(iter/s)": 1.531424 }, { "acc": 0.96652775, "epoch": 16.121396765877666, "grad_norm": 3.9078257083892822, "learning_rate": 8.103766143825568e-06, "loss": 0.12522906, "memory(GiB)": 13.7, "step": 34395, "train_speed(iter/s)": 1.531435 }, { "acc": 0.97661705, "epoch": 16.1237403327865, "grad_norm": 17.672529220581055, "learning_rate": 8.103158391252281e-06, "loss": 0.08998702, "memory(GiB)": 13.7, "step": 34400, "train_speed(iter/s)": 1.531432 }, { "acc": 0.98745041, "epoch": 16.126083899695335, "grad_norm": 1.1202667951583862, "learning_rate": 8.10255056410128e-06, "loss": 0.0625859, "memory(GiB)": 13.7, "step": 34405, "train_speed(iter/s)": 1.531452 }, { "acc": 0.98767366, "epoch": 16.128427466604172, "grad_norm": 3.055610418319702, "learning_rate": 8.101942662387177e-06, "loss": 0.05975161, "memory(GiB)": 13.7, "step": 34410, "train_speed(iter/s)": 1.531465 }, { "acc": 0.98568459, "epoch": 16.130771033513007, "grad_norm": 6.728300094604492, "learning_rate": 8.10133468612458e-06, "loss": 0.08764142, "memory(GiB)": 13.7, "step": 34415, "train_speed(iter/s)": 1.53147 }, { "acc": 0.9825695, "epoch": 16.13311460042184, "grad_norm": 7.166365623474121, "learning_rate": 8.100726635328104e-06, "loss": 0.12889864, "memory(GiB)": 13.7, "step": 34420, "train_speed(iter/s)": 1.531471 }, { "acc": 0.97819939, "epoch": 16.13545816733068, "grad_norm": 6.404716491699219, "learning_rate": 8.100118510012364e-06, "loss": 0.09008272, "memory(GiB)": 13.7, "step": 34425, "train_speed(iter/s)": 1.531479 }, { "acc": 0.97592258, "epoch": 16.137801734239513, "grad_norm": 8.34139633178711, "learning_rate": 8.099510310191978e-06, "loss": 0.10230051, "memory(GiB)": 13.7, "step": 34430, "train_speed(iter/s)": 1.531485 }, { "acc": 0.98026047, "epoch": 16.140145301148348, "grad_norm": 3.0660884380340576, "learning_rate": 8.098902035881568e-06, "loss": 0.11584802, "memory(GiB)": 13.7, "step": 34435, "train_speed(iter/s)": 1.531486 }, { "acc": 0.988447, "epoch": 16.142488868057182, "grad_norm": 1.5443496704101562, "learning_rate": 8.098293687095748e-06, "loss": 0.06692043, "memory(GiB)": 13.7, "step": 34440, "train_speed(iter/s)": 1.531497 }, { "acc": 0.98888893, "epoch": 16.14483243496602, "grad_norm": 1.045325517654419, "learning_rate": 8.097685263849148e-06, "loss": 0.09511784, "memory(GiB)": 13.7, "step": 34445, "train_speed(iter/s)": 1.531495 }, { "acc": 0.96511364, "epoch": 16.147176001874854, "grad_norm": 6.1852827072143555, "learning_rate": 8.097076766156387e-06, "loss": 0.23940647, "memory(GiB)": 13.7, "step": 34450, "train_speed(iter/s)": 1.531504 }, { "acc": 0.9885417, "epoch": 16.14951956878369, "grad_norm": 4.062901020050049, "learning_rate": 8.096468194032094e-06, "loss": 0.03647257, "memory(GiB)": 13.7, "step": 34455, "train_speed(iter/s)": 1.531511 }, { "acc": 0.96613092, "epoch": 16.151863135692523, "grad_norm": 7.843287944793701, "learning_rate": 8.095859547490897e-06, "loss": 0.09805605, "memory(GiB)": 13.7, "step": 34460, "train_speed(iter/s)": 1.53151 }, { "acc": 0.9742836, "epoch": 16.15420670260136, "grad_norm": 0.3967777192592621, "learning_rate": 8.095250826547427e-06, "loss": 0.0817854, "memory(GiB)": 13.7, "step": 34465, "train_speed(iter/s)": 1.531506 }, { "acc": 0.98104172, "epoch": 16.156550269510195, "grad_norm": 7.047272205352783, "learning_rate": 8.094642031216314e-06, "loss": 0.06566986, "memory(GiB)": 13.7, "step": 34470, "train_speed(iter/s)": 1.531528 }, { "acc": 0.98812504, "epoch": 16.15889383641903, "grad_norm": 0.17793801426887512, "learning_rate": 8.09403316151219e-06, "loss": 0.03316906, "memory(GiB)": 13.7, "step": 34475, "train_speed(iter/s)": 1.531546 }, { "acc": 0.99197922, "epoch": 16.161237403327863, "grad_norm": 4.497366428375244, "learning_rate": 8.09342421744969e-06, "loss": 0.03217009, "memory(GiB)": 13.7, "step": 34480, "train_speed(iter/s)": 1.531556 }, { "acc": 0.97830362, "epoch": 16.1635809702367, "grad_norm": 18.73745346069336, "learning_rate": 8.092815199043457e-06, "loss": 0.07358723, "memory(GiB)": 13.7, "step": 34485, "train_speed(iter/s)": 1.531552 }, { "acc": 0.98812504, "epoch": 16.165924537145536, "grad_norm": 6.338849067687988, "learning_rate": 8.092206106308124e-06, "loss": 0.05150626, "memory(GiB)": 13.7, "step": 34490, "train_speed(iter/s)": 1.531566 }, { "acc": 0.97580357, "epoch": 16.16826810405437, "grad_norm": 3.9166624546051025, "learning_rate": 8.091596939258333e-06, "loss": 0.08962454, "memory(GiB)": 13.7, "step": 34495, "train_speed(iter/s)": 1.53158 }, { "acc": 0.98484383, "epoch": 16.170611670963208, "grad_norm": 1.9469789266586304, "learning_rate": 8.090987697908726e-06, "loss": 0.06891379, "memory(GiB)": 13.7, "step": 34500, "train_speed(iter/s)": 1.531594 }, { "acc": 0.97990532, "epoch": 16.172955237872042, "grad_norm": 0.8192656636238098, "learning_rate": 8.09037838227395e-06, "loss": 0.10472026, "memory(GiB)": 13.7, "step": 34505, "train_speed(iter/s)": 1.531602 }, { "acc": 0.97830353, "epoch": 16.175298804780876, "grad_norm": 3.34662127494812, "learning_rate": 8.089768992368647e-06, "loss": 0.1143223, "memory(GiB)": 13.7, "step": 34510, "train_speed(iter/s)": 1.531609 }, { "acc": 0.97706852, "epoch": 16.17764237168971, "grad_norm": 2.2281317710876465, "learning_rate": 8.089159528207466e-06, "loss": 0.12265855, "memory(GiB)": 13.7, "step": 34515, "train_speed(iter/s)": 1.531628 }, { "acc": 0.98506947, "epoch": 16.17998593859855, "grad_norm": 3.8707797527313232, "learning_rate": 8.088549989805058e-06, "loss": 0.05998479, "memory(GiB)": 13.7, "step": 34520, "train_speed(iter/s)": 1.531642 }, { "acc": 0.9715477, "epoch": 16.182329505507383, "grad_norm": 7.9049811363220215, "learning_rate": 8.087940377176074e-06, "loss": 0.1232886, "memory(GiB)": 13.7, "step": 34525, "train_speed(iter/s)": 1.531654 }, { "acc": 0.97247925, "epoch": 16.184673072416217, "grad_norm": 2.739682197570801, "learning_rate": 8.087330690335164e-06, "loss": 0.11076885, "memory(GiB)": 13.7, "step": 34530, "train_speed(iter/s)": 1.531654 }, { "acc": 0.98150787, "epoch": 16.18701663932505, "grad_norm": 2.9182088375091553, "learning_rate": 8.086720929296987e-06, "loss": 0.10523871, "memory(GiB)": 13.7, "step": 34535, "train_speed(iter/s)": 1.531656 }, { "acc": 0.96577377, "epoch": 16.18936020623389, "grad_norm": 5.7277092933654785, "learning_rate": 8.086111094076196e-06, "loss": 0.15591824, "memory(GiB)": 13.7, "step": 34540, "train_speed(iter/s)": 1.531672 }, { "acc": 0.98988094, "epoch": 16.191703773142724, "grad_norm": 0.6211907267570496, "learning_rate": 8.085501184687454e-06, "loss": 0.05714403, "memory(GiB)": 13.7, "step": 34545, "train_speed(iter/s)": 1.531676 }, { "acc": 0.96851196, "epoch": 16.194047340051558, "grad_norm": 451.3318176269531, "learning_rate": 8.084891201145417e-06, "loss": 0.11931419, "memory(GiB)": 13.7, "step": 34550, "train_speed(iter/s)": 1.531675 }, { "acc": 0.95333328, "epoch": 16.196390906960392, "grad_norm": 4.430492877960205, "learning_rate": 8.08428114346475e-06, "loss": 0.13277541, "memory(GiB)": 13.7, "step": 34555, "train_speed(iter/s)": 1.531699 }, { "acc": 0.97654839, "epoch": 16.19873447386923, "grad_norm": 6.128765106201172, "learning_rate": 8.08367101166011e-06, "loss": 0.09529561, "memory(GiB)": 13.7, "step": 34560, "train_speed(iter/s)": 1.531698 }, { "acc": 0.98125, "epoch": 16.201078040778064, "grad_norm": 4.202198505401611, "learning_rate": 8.083060805746176e-06, "loss": 0.10555338, "memory(GiB)": 13.7, "step": 34565, "train_speed(iter/s)": 1.531694 }, { "acc": 0.9802083, "epoch": 16.2034216076869, "grad_norm": 1.7490183115005493, "learning_rate": 8.082450525737603e-06, "loss": 0.08099709, "memory(GiB)": 13.7, "step": 34570, "train_speed(iter/s)": 1.5317 }, { "acc": 0.96696424, "epoch": 16.205765174595733, "grad_norm": 7.362743854522705, "learning_rate": 8.081840171649064e-06, "loss": 0.09497609, "memory(GiB)": 13.7, "step": 34575, "train_speed(iter/s)": 1.531709 }, { "acc": 0.98781252, "epoch": 16.20810874150457, "grad_norm": 1.59811270236969, "learning_rate": 8.081229743495232e-06, "loss": 0.08064668, "memory(GiB)": 13.7, "step": 34580, "train_speed(iter/s)": 1.531715 }, { "acc": 0.96168022, "epoch": 16.210452308413405, "grad_norm": 6.066680908203125, "learning_rate": 8.080619241290776e-06, "loss": 0.16424019, "memory(GiB)": 13.7, "step": 34585, "train_speed(iter/s)": 1.531726 }, { "acc": 0.98000002, "epoch": 16.21279587532224, "grad_norm": 0.04471806809306145, "learning_rate": 8.080008665050372e-06, "loss": 0.06939429, "memory(GiB)": 13.7, "step": 34590, "train_speed(iter/s)": 1.531723 }, { "acc": 0.95494051, "epoch": 16.215139442231077, "grad_norm": 4.0402326583862305, "learning_rate": 8.0793980147887e-06, "loss": 0.12834926, "memory(GiB)": 13.7, "step": 34595, "train_speed(iter/s)": 1.531725 }, { "acc": 0.9802949, "epoch": 16.21748300913991, "grad_norm": 0.7038148045539856, "learning_rate": 8.078787290520433e-06, "loss": 0.12052307, "memory(GiB)": 13.7, "step": 34600, "train_speed(iter/s)": 1.531735 }, { "acc": 0.96985121, "epoch": 16.219826576048746, "grad_norm": 2.2332026958465576, "learning_rate": 8.07817649226025e-06, "loss": 0.13555727, "memory(GiB)": 13.7, "step": 34605, "train_speed(iter/s)": 1.531744 }, { "acc": 0.98738098, "epoch": 16.22217014295758, "grad_norm": 4.248812675476074, "learning_rate": 8.077565620022837e-06, "loss": 0.06841795, "memory(GiB)": 13.7, "step": 34610, "train_speed(iter/s)": 1.531763 }, { "acc": 0.97442703, "epoch": 16.224513709866418, "grad_norm": 11.415604591369629, "learning_rate": 8.076954673822875e-06, "loss": 0.11878434, "memory(GiB)": 13.7, "step": 34615, "train_speed(iter/s)": 1.531767 }, { "acc": 0.98769846, "epoch": 16.226857276775252, "grad_norm": 2.4702510833740234, "learning_rate": 8.076343653675051e-06, "loss": 0.06963421, "memory(GiB)": 13.7, "step": 34620, "train_speed(iter/s)": 1.531769 }, { "acc": 0.97201385, "epoch": 16.229200843684087, "grad_norm": 3.219550371170044, "learning_rate": 8.07573255959405e-06, "loss": 0.1183149, "memory(GiB)": 13.7, "step": 34625, "train_speed(iter/s)": 1.531782 }, { "acc": 0.97741594, "epoch": 16.23154441059292, "grad_norm": 2.9363560676574707, "learning_rate": 8.07512139159456e-06, "loss": 0.10228002, "memory(GiB)": 13.7, "step": 34630, "train_speed(iter/s)": 1.531786 }, { "acc": 0.98125, "epoch": 16.23388797750176, "grad_norm": 0.7198861241340637, "learning_rate": 8.074510149691274e-06, "loss": 0.07519456, "memory(GiB)": 13.7, "step": 34635, "train_speed(iter/s)": 1.531788 }, { "acc": 0.98961887, "epoch": 16.236231544410593, "grad_norm": 4.360238075256348, "learning_rate": 8.073898833898882e-06, "loss": 0.07025306, "memory(GiB)": 13.7, "step": 34640, "train_speed(iter/s)": 1.531786 }, { "acc": 0.97986603, "epoch": 16.238575111319427, "grad_norm": 3.093930721282959, "learning_rate": 8.073287444232081e-06, "loss": 0.08646732, "memory(GiB)": 13.7, "step": 34645, "train_speed(iter/s)": 1.531796 }, { "acc": 0.97523527, "epoch": 16.24091867822826, "grad_norm": 5.6073150634765625, "learning_rate": 8.072675980705563e-06, "loss": 0.08213782, "memory(GiB)": 13.7, "step": 34650, "train_speed(iter/s)": 1.531796 }, { "acc": 0.96307535, "epoch": 16.2432622451371, "grad_norm": 0.11568526178598404, "learning_rate": 8.072064443334027e-06, "loss": 0.17952338, "memory(GiB)": 13.7, "step": 34655, "train_speed(iter/s)": 1.531805 }, { "acc": 0.97279768, "epoch": 16.245605812045934, "grad_norm": 2.581451416015625, "learning_rate": 8.071452832132173e-06, "loss": 0.09615995, "memory(GiB)": 13.7, "step": 34660, "train_speed(iter/s)": 1.53182 }, { "acc": 0.9875, "epoch": 16.247949378954768, "grad_norm": 3.00968861579895, "learning_rate": 8.070841147114701e-06, "loss": 0.07186719, "memory(GiB)": 13.7, "step": 34665, "train_speed(iter/s)": 1.531816 }, { "acc": 0.98623514, "epoch": 16.250292945863606, "grad_norm": 3.991983652114868, "learning_rate": 8.070229388296316e-06, "loss": 0.12245051, "memory(GiB)": 13.7, "step": 34670, "train_speed(iter/s)": 1.531826 }, { "acc": 0.97479172, "epoch": 16.25263651277244, "grad_norm": 0.3477858304977417, "learning_rate": 8.069617555691723e-06, "loss": 0.11160216, "memory(GiB)": 13.7, "step": 34675, "train_speed(iter/s)": 1.53184 }, { "acc": 0.96425591, "epoch": 16.254980079681275, "grad_norm": 3.903667688369751, "learning_rate": 8.069005649315622e-06, "loss": 0.09824252, "memory(GiB)": 13.7, "step": 34680, "train_speed(iter/s)": 1.531857 }, { "acc": 0.96999464, "epoch": 16.25732364659011, "grad_norm": 6.2154316902160645, "learning_rate": 8.06839366918273e-06, "loss": 0.09383334, "memory(GiB)": 13.7, "step": 34685, "train_speed(iter/s)": 1.531862 }, { "acc": 0.97875004, "epoch": 16.259667213498947, "grad_norm": 26.226667404174805, "learning_rate": 8.06778161530775e-06, "loss": 0.09971874, "memory(GiB)": 13.7, "step": 34690, "train_speed(iter/s)": 1.531853 }, { "acc": 0.96314125, "epoch": 16.26201078040778, "grad_norm": 5.8086442947387695, "learning_rate": 8.0671694877054e-06, "loss": 0.16742373, "memory(GiB)": 13.7, "step": 34695, "train_speed(iter/s)": 1.531867 }, { "acc": 0.97886372, "epoch": 16.264354347316615, "grad_norm": 0.25450900197029114, "learning_rate": 8.066557286390388e-06, "loss": 0.08713342, "memory(GiB)": 13.7, "step": 34700, "train_speed(iter/s)": 1.531873 }, { "acc": 0.96271076, "epoch": 16.26669791422545, "grad_norm": 9.526504516601562, "learning_rate": 8.065945011377431e-06, "loss": 0.17640471, "memory(GiB)": 13.7, "step": 34705, "train_speed(iter/s)": 1.531877 }, { "acc": 0.98979168, "epoch": 16.269041481134288, "grad_norm": 0.566741943359375, "learning_rate": 8.06533266268125e-06, "loss": 0.05406693, "memory(GiB)": 13.7, "step": 34710, "train_speed(iter/s)": 1.531889 }, { "acc": 0.97580357, "epoch": 16.271385048043122, "grad_norm": 1.1149466037750244, "learning_rate": 8.064720240316558e-06, "loss": 0.08551843, "memory(GiB)": 13.7, "step": 34715, "train_speed(iter/s)": 1.531902 }, { "acc": 0.98277779, "epoch": 16.273728614951956, "grad_norm": 2.101120710372925, "learning_rate": 8.064107744298078e-06, "loss": 0.05610034, "memory(GiB)": 13.7, "step": 34720, "train_speed(iter/s)": 1.531917 }, { "acc": 0.98073864, "epoch": 16.27607218186079, "grad_norm": 1.0943995714187622, "learning_rate": 8.063495174640534e-06, "loss": 0.09122717, "memory(GiB)": 13.7, "step": 34725, "train_speed(iter/s)": 1.531918 }, { "acc": 0.98803034, "epoch": 16.27841574876963, "grad_norm": 1.2171236276626587, "learning_rate": 8.062882531358648e-06, "loss": 0.03140669, "memory(GiB)": 13.7, "step": 34730, "train_speed(iter/s)": 1.531918 }, { "acc": 0.95636902, "epoch": 16.280759315678463, "grad_norm": 9.654317855834961, "learning_rate": 8.062269814467147e-06, "loss": 0.23106489, "memory(GiB)": 13.7, "step": 34735, "train_speed(iter/s)": 1.531923 }, { "acc": 0.97928028, "epoch": 16.283102882587297, "grad_norm": 2.596174716949463, "learning_rate": 8.061657023980758e-06, "loss": 0.11024221, "memory(GiB)": 13.7, "step": 34740, "train_speed(iter/s)": 1.531918 }, { "acc": 0.97270832, "epoch": 16.285446449496135, "grad_norm": 2.9898226261138916, "learning_rate": 8.061044159914211e-06, "loss": 0.08499751, "memory(GiB)": 13.7, "step": 34745, "train_speed(iter/s)": 1.53192 }, { "acc": 0.98699169, "epoch": 16.28779001640497, "grad_norm": 0.9710070490837097, "learning_rate": 8.060431222282238e-06, "loss": 0.07041248, "memory(GiB)": 13.7, "step": 34750, "train_speed(iter/s)": 1.531903 }, { "acc": 0.96618156, "epoch": 16.290133583313803, "grad_norm": 7.867416858673096, "learning_rate": 8.05981821109957e-06, "loss": 0.10407305, "memory(GiB)": 13.7, "step": 34755, "train_speed(iter/s)": 1.531906 }, { "acc": 0.97833328, "epoch": 16.292477150222638, "grad_norm": 7.787805080413818, "learning_rate": 8.05920512638094e-06, "loss": 0.05342805, "memory(GiB)": 13.7, "step": 34760, "train_speed(iter/s)": 1.531921 }, { "acc": 0.99548607, "epoch": 16.294820717131476, "grad_norm": 2.7566916942596436, "learning_rate": 8.058591968141091e-06, "loss": 0.03007041, "memory(GiB)": 13.7, "step": 34765, "train_speed(iter/s)": 1.531926 }, { "acc": 0.98021412, "epoch": 16.29716428404031, "grad_norm": 2.745738983154297, "learning_rate": 8.057978736394759e-06, "loss": 0.0791591, "memory(GiB)": 13.7, "step": 34770, "train_speed(iter/s)": 1.531934 }, { "acc": 0.97569447, "epoch": 16.299507850949144, "grad_norm": 1.6634888648986816, "learning_rate": 8.057365431156681e-06, "loss": 0.06045004, "memory(GiB)": 13.7, "step": 34775, "train_speed(iter/s)": 1.53194 }, { "acc": 0.96947498, "epoch": 16.30185141785798, "grad_norm": 3.8359298706054688, "learning_rate": 8.0567520524416e-06, "loss": 0.07792896, "memory(GiB)": 13.7, "step": 34780, "train_speed(iter/s)": 1.531926 }, { "acc": 0.97816162, "epoch": 16.304194984766816, "grad_norm": 6.892028331756592, "learning_rate": 8.05613860026426e-06, "loss": 0.07337924, "memory(GiB)": 13.7, "step": 34785, "train_speed(iter/s)": 1.531929 }, { "acc": 0.97015877, "epoch": 16.30653855167565, "grad_norm": 2.1045830249786377, "learning_rate": 8.055525074639406e-06, "loss": 0.10307596, "memory(GiB)": 13.7, "step": 34790, "train_speed(iter/s)": 1.531935 }, { "acc": 0.98380947, "epoch": 16.308882118584485, "grad_norm": 3.0263030529022217, "learning_rate": 8.054911475581786e-06, "loss": 0.09678466, "memory(GiB)": 13.7, "step": 34795, "train_speed(iter/s)": 1.531932 }, { "acc": 0.98328648, "epoch": 16.31122568549332, "grad_norm": 5.64033317565918, "learning_rate": 8.054297803106151e-06, "loss": 0.09938544, "memory(GiB)": 13.7, "step": 34800, "train_speed(iter/s)": 1.531935 }, { "acc": 0.97894344, "epoch": 16.313569252402157, "grad_norm": 3.726297378540039, "learning_rate": 8.053684057227249e-06, "loss": 0.08372502, "memory(GiB)": 13.7, "step": 34805, "train_speed(iter/s)": 1.531942 }, { "acc": 0.95887833, "epoch": 16.31591281931099, "grad_norm": 7.772969722747803, "learning_rate": 8.053070237959831e-06, "loss": 0.15498127, "memory(GiB)": 13.7, "step": 34810, "train_speed(iter/s)": 1.531949 }, { "acc": 0.98005209, "epoch": 16.318256386219826, "grad_norm": 11.221092224121094, "learning_rate": 8.052456345318654e-06, "loss": 0.07300025, "memory(GiB)": 13.7, "step": 34815, "train_speed(iter/s)": 1.531949 }, { "acc": 0.990625, "epoch": 16.32059995312866, "grad_norm": 0.2462470382452011, "learning_rate": 8.051842379318472e-06, "loss": 0.03148722, "memory(GiB)": 13.7, "step": 34820, "train_speed(iter/s)": 1.531952 }, { "acc": 0.98224201, "epoch": 16.322943520037498, "grad_norm": 5.621937274932861, "learning_rate": 8.051228339974044e-06, "loss": 0.05362893, "memory(GiB)": 13.7, "step": 34825, "train_speed(iter/s)": 1.531954 }, { "acc": 0.98009806, "epoch": 16.325287086946332, "grad_norm": 12.37425422668457, "learning_rate": 8.05061422730013e-06, "loss": 0.14240534, "memory(GiB)": 13.7, "step": 34830, "train_speed(iter/s)": 1.531945 }, { "acc": 0.98479176, "epoch": 16.327630653855167, "grad_norm": 2.214702844619751, "learning_rate": 8.050000041311488e-06, "loss": 0.07351223, "memory(GiB)": 13.7, "step": 34835, "train_speed(iter/s)": 1.531943 }, { "acc": 0.9760416, "epoch": 16.329974220764004, "grad_norm": 4.346316814422607, "learning_rate": 8.049385782022886e-06, "loss": 0.06448334, "memory(GiB)": 13.7, "step": 34840, "train_speed(iter/s)": 1.531942 }, { "acc": 0.98319626, "epoch": 16.33231778767284, "grad_norm": 0.3965654671192169, "learning_rate": 8.048771449449086e-06, "loss": 0.0494491, "memory(GiB)": 13.7, "step": 34845, "train_speed(iter/s)": 1.531951 }, { "acc": 0.9838542, "epoch": 16.334661354581673, "grad_norm": 0.38386017084121704, "learning_rate": 8.048157043604852e-06, "loss": 0.03410218, "memory(GiB)": 13.7, "step": 34850, "train_speed(iter/s)": 1.531948 }, { "acc": 0.96937504, "epoch": 16.337004921490507, "grad_norm": 9.895987510681152, "learning_rate": 8.047542564504959e-06, "loss": 0.12636852, "memory(GiB)": 13.7, "step": 34855, "train_speed(iter/s)": 1.53195 }, { "acc": 0.97229166, "epoch": 16.339348488399345, "grad_norm": 4.188347816467285, "learning_rate": 8.046928012164171e-06, "loss": 0.06126649, "memory(GiB)": 13.7, "step": 34860, "train_speed(iter/s)": 1.531957 }, { "acc": 0.962887, "epoch": 16.34169205530818, "grad_norm": 7.106667518615723, "learning_rate": 8.04631338659726e-06, "loss": 0.12954087, "memory(GiB)": 13.7, "step": 34865, "train_speed(iter/s)": 1.531968 }, { "acc": 0.9802084, "epoch": 16.344035622217014, "grad_norm": 3.5806989669799805, "learning_rate": 8.045698687819003e-06, "loss": 0.08832431, "memory(GiB)": 13.7, "step": 34870, "train_speed(iter/s)": 1.531976 }, { "acc": 0.96904306, "epoch": 16.346379189125848, "grad_norm": 44.04075622558594, "learning_rate": 8.045083915844176e-06, "loss": 0.11823225, "memory(GiB)": 13.7, "step": 34875, "train_speed(iter/s)": 1.531972 }, { "acc": 0.9916666, "epoch": 16.348722756034686, "grad_norm": 0.7351693511009216, "learning_rate": 8.044469070687553e-06, "loss": 0.03023941, "memory(GiB)": 13.7, "step": 34880, "train_speed(iter/s)": 1.53197 }, { "acc": 0.97806234, "epoch": 16.35106632294352, "grad_norm": 3.1903016567230225, "learning_rate": 8.04385415236391e-06, "loss": 0.11275424, "memory(GiB)": 13.7, "step": 34885, "train_speed(iter/s)": 1.531986 }, { "acc": 0.97735357, "epoch": 16.353409889852355, "grad_norm": 4.678144931793213, "learning_rate": 8.043239160888036e-06, "loss": 0.09708518, "memory(GiB)": 13.7, "step": 34890, "train_speed(iter/s)": 1.531991 }, { "acc": 0.97338657, "epoch": 16.35575345676119, "grad_norm": 8.86921215057373, "learning_rate": 8.042624096274706e-06, "loss": 0.2064487, "memory(GiB)": 13.7, "step": 34895, "train_speed(iter/s)": 1.532 }, { "acc": 0.9874053, "epoch": 16.358097023670027, "grad_norm": 0.6638758182525635, "learning_rate": 8.042008958538708e-06, "loss": 0.03949039, "memory(GiB)": 13.7, "step": 34900, "train_speed(iter/s)": 1.531999 }, { "acc": 0.98145838, "epoch": 16.36044059057886, "grad_norm": 1.8082985877990723, "learning_rate": 8.041393747694825e-06, "loss": 0.06179828, "memory(GiB)": 13.7, "step": 34905, "train_speed(iter/s)": 1.532001 }, { "acc": 0.9879261, "epoch": 16.362784157487695, "grad_norm": 4.011989116668701, "learning_rate": 8.04077846375785e-06, "loss": 0.07228891, "memory(GiB)": 13.7, "step": 34910, "train_speed(iter/s)": 1.532015 }, { "acc": 0.97281799, "epoch": 16.365127724396533, "grad_norm": 0.9222736954689026, "learning_rate": 8.040163106742565e-06, "loss": 0.09242408, "memory(GiB)": 13.7, "step": 34915, "train_speed(iter/s)": 1.532022 }, { "acc": 0.98298616, "epoch": 16.367471291305367, "grad_norm": 6.324649333953857, "learning_rate": 8.039547676663768e-06, "loss": 0.071302, "memory(GiB)": 13.7, "step": 34920, "train_speed(iter/s)": 1.532021 }, { "acc": 0.97663708, "epoch": 16.369814858214202, "grad_norm": 5.364848613739014, "learning_rate": 8.038932173536247e-06, "loss": 0.14724468, "memory(GiB)": 13.7, "step": 34925, "train_speed(iter/s)": 1.532013 }, { "acc": 0.99159718, "epoch": 16.372158425123036, "grad_norm": 1.5358221530914307, "learning_rate": 8.0383165973748e-06, "loss": 0.0422272, "memory(GiB)": 13.7, "step": 34930, "train_speed(iter/s)": 1.532015 }, { "acc": 0.98604164, "epoch": 16.374501992031874, "grad_norm": 3.978801727294922, "learning_rate": 8.03770094819422e-06, "loss": 0.08129095, "memory(GiB)": 13.7, "step": 34935, "train_speed(iter/s)": 1.532021 }, { "acc": 0.97710323, "epoch": 16.37684555894071, "grad_norm": 1.6030185222625732, "learning_rate": 8.03708522600931e-06, "loss": 0.1227114, "memory(GiB)": 13.7, "step": 34940, "train_speed(iter/s)": 1.532024 }, { "acc": 0.98104172, "epoch": 16.379189125849543, "grad_norm": 0.6914746165275574, "learning_rate": 8.036469430834864e-06, "loss": 0.06752967, "memory(GiB)": 13.7, "step": 34945, "train_speed(iter/s)": 1.532026 }, { "acc": 0.98500004, "epoch": 16.381532692758377, "grad_norm": 6.601855278015137, "learning_rate": 8.035853562685688e-06, "loss": 0.07182784, "memory(GiB)": 13.7, "step": 34950, "train_speed(iter/s)": 1.532019 }, { "acc": 0.9821969, "epoch": 16.383876259667215, "grad_norm": 0.08963797986507416, "learning_rate": 8.035237621576585e-06, "loss": 0.04501331, "memory(GiB)": 13.7, "step": 34955, "train_speed(iter/s)": 1.532034 }, { "acc": 0.97289562, "epoch": 16.38621982657605, "grad_norm": 69.96292114257812, "learning_rate": 8.034621607522359e-06, "loss": 0.14330118, "memory(GiB)": 13.7, "step": 34960, "train_speed(iter/s)": 1.532036 }, { "acc": 0.96747103, "epoch": 16.388563393484883, "grad_norm": 4.258331775665283, "learning_rate": 8.034005520537818e-06, "loss": 0.17915525, "memory(GiB)": 13.7, "step": 34965, "train_speed(iter/s)": 1.532048 }, { "acc": 0.98840275, "epoch": 16.390906960393718, "grad_norm": 2.084095001220703, "learning_rate": 8.03338936063777e-06, "loss": 0.06941739, "memory(GiB)": 13.7, "step": 34970, "train_speed(iter/s)": 1.532066 }, { "acc": 0.96676474, "epoch": 16.393250527302555, "grad_norm": 8.555922508239746, "learning_rate": 8.032773127837024e-06, "loss": 0.10221965, "memory(GiB)": 13.7, "step": 34975, "train_speed(iter/s)": 1.532076 }, { "acc": 0.978125, "epoch": 16.39559409421139, "grad_norm": 2.9936418533325195, "learning_rate": 8.032156822150396e-06, "loss": 0.08180554, "memory(GiB)": 13.7, "step": 34980, "train_speed(iter/s)": 1.532096 }, { "acc": 0.98650255, "epoch": 16.397937661120224, "grad_norm": 5.297770977020264, "learning_rate": 8.031540443592696e-06, "loss": 0.05211118, "memory(GiB)": 13.7, "step": 34985, "train_speed(iter/s)": 1.532101 }, { "acc": 0.96919651, "epoch": 16.400281228029062, "grad_norm": 2.208808183670044, "learning_rate": 8.030923992178742e-06, "loss": 0.10088782, "memory(GiB)": 13.7, "step": 34990, "train_speed(iter/s)": 1.532105 }, { "acc": 0.9833334, "epoch": 16.402624794937896, "grad_norm": 5.240561485290527, "learning_rate": 8.030307467923352e-06, "loss": 0.05983002, "memory(GiB)": 13.7, "step": 34995, "train_speed(iter/s)": 1.532119 }, { "acc": 0.97210312, "epoch": 16.40496836184673, "grad_norm": 7.664764881134033, "learning_rate": 8.029690870841342e-06, "loss": 0.07132442, "memory(GiB)": 13.7, "step": 35000, "train_speed(iter/s)": 1.532134 }, { "acc": 0.9698801, "epoch": 16.407311928755565, "grad_norm": 5.622228622436523, "learning_rate": 8.029074200947538e-06, "loss": 0.0921481, "memory(GiB)": 13.7, "step": 35005, "train_speed(iter/s)": 1.532148 }, { "acc": 0.97998514, "epoch": 16.409655495664403, "grad_norm": 4.195547580718994, "learning_rate": 8.028457458256758e-06, "loss": 0.06166968, "memory(GiB)": 13.7, "step": 35010, "train_speed(iter/s)": 1.532154 }, { "acc": 0.98467264, "epoch": 16.411999062573237, "grad_norm": 3.9161810874938965, "learning_rate": 8.027840642783828e-06, "loss": 0.05655249, "memory(GiB)": 13.7, "step": 35015, "train_speed(iter/s)": 1.532152 }, { "acc": 0.98138351, "epoch": 16.41434262948207, "grad_norm": 5.018801689147949, "learning_rate": 8.027223754543576e-06, "loss": 0.10532413, "memory(GiB)": 13.7, "step": 35020, "train_speed(iter/s)": 1.532158 }, { "acc": 0.97794647, "epoch": 16.416686196390906, "grad_norm": 1.951858401298523, "learning_rate": 8.026606793550828e-06, "loss": 0.08683214, "memory(GiB)": 13.7, "step": 35025, "train_speed(iter/s)": 1.532158 }, { "acc": 0.9907671, "epoch": 16.419029763299744, "grad_norm": 1.2734246253967285, "learning_rate": 8.025989759820415e-06, "loss": 0.06364947, "memory(GiB)": 13.7, "step": 35030, "train_speed(iter/s)": 1.532155 }, { "acc": 0.98487597, "epoch": 16.421373330208578, "grad_norm": 5.182840824127197, "learning_rate": 8.025372653367166e-06, "loss": 0.06611816, "memory(GiB)": 13.7, "step": 35035, "train_speed(iter/s)": 1.53215 }, { "acc": 0.98539257, "epoch": 16.423716897117412, "grad_norm": 5.1471123695373535, "learning_rate": 8.024755474205915e-06, "loss": 0.14435601, "memory(GiB)": 13.7, "step": 35040, "train_speed(iter/s)": 1.532162 }, { "acc": 0.97852678, "epoch": 16.426060464026246, "grad_norm": 5.322487831115723, "learning_rate": 8.0241382223515e-06, "loss": 0.07889651, "memory(GiB)": 13.7, "step": 35045, "train_speed(iter/s)": 1.532177 }, { "acc": 0.98065739, "epoch": 16.428404030935084, "grad_norm": 5.1847052574157715, "learning_rate": 8.023520897818754e-06, "loss": 0.09787485, "memory(GiB)": 13.7, "step": 35050, "train_speed(iter/s)": 1.532188 }, { "acc": 0.97937508, "epoch": 16.43074759784392, "grad_norm": 6.405287265777588, "learning_rate": 8.022903500622519e-06, "loss": 0.09620503, "memory(GiB)": 13.7, "step": 35055, "train_speed(iter/s)": 1.532198 }, { "acc": 0.97150002, "epoch": 16.433091164752753, "grad_norm": 2.868586301803589, "learning_rate": 8.022286030777631e-06, "loss": 0.07667208, "memory(GiB)": 13.7, "step": 35060, "train_speed(iter/s)": 1.53221 }, { "acc": 0.98408737, "epoch": 16.435434731661587, "grad_norm": 3.040635108947754, "learning_rate": 8.021668488298935e-06, "loss": 0.07180868, "memory(GiB)": 13.7, "step": 35065, "train_speed(iter/s)": 1.532208 }, { "acc": 0.97602062, "epoch": 16.437778298570425, "grad_norm": 6.807173728942871, "learning_rate": 8.021050873201275e-06, "loss": 0.08237298, "memory(GiB)": 13.7, "step": 35070, "train_speed(iter/s)": 1.532208 }, { "acc": 0.97160444, "epoch": 16.44012186547926, "grad_norm": 4.2510294914245605, "learning_rate": 8.020433185499494e-06, "loss": 0.11003877, "memory(GiB)": 13.7, "step": 35075, "train_speed(iter/s)": 1.5322 }, { "acc": 0.98862181, "epoch": 16.442465432388094, "grad_norm": 4.019846439361572, "learning_rate": 8.01981542520844e-06, "loss": 0.03453643, "memory(GiB)": 13.7, "step": 35080, "train_speed(iter/s)": 1.532177 }, { "acc": 0.98644371, "epoch": 16.44480899929693, "grad_norm": 1.434194803237915, "learning_rate": 8.019197592342964e-06, "loss": 0.07541372, "memory(GiB)": 13.7, "step": 35085, "train_speed(iter/s)": 1.532194 }, { "acc": 0.96738091, "epoch": 16.447152566205766, "grad_norm": 34.942710876464844, "learning_rate": 8.018579686917915e-06, "loss": 0.14553543, "memory(GiB)": 13.7, "step": 35090, "train_speed(iter/s)": 1.532198 }, { "acc": 0.98028851, "epoch": 16.4494961331146, "grad_norm": 0.30385124683380127, "learning_rate": 8.017961708948144e-06, "loss": 0.07380773, "memory(GiB)": 13.7, "step": 35095, "train_speed(iter/s)": 1.532203 }, { "acc": 0.98290176, "epoch": 16.451839700023434, "grad_norm": 4.1501922607421875, "learning_rate": 8.017343658448508e-06, "loss": 0.09901873, "memory(GiB)": 13.7, "step": 35100, "train_speed(iter/s)": 1.532203 }, { "acc": 0.99541664, "epoch": 16.454183266932272, "grad_norm": 0.7939175963401794, "learning_rate": 8.01672553543386e-06, "loss": 0.03149386, "memory(GiB)": 13.7, "step": 35105, "train_speed(iter/s)": 1.532216 }, { "acc": 0.96894341, "epoch": 16.456526833841107, "grad_norm": 8.235493659973145, "learning_rate": 8.01610733991906e-06, "loss": 0.11987712, "memory(GiB)": 13.7, "step": 35110, "train_speed(iter/s)": 1.53222 }, { "acc": 0.978125, "epoch": 16.45887040074994, "grad_norm": 4.769999027252197, "learning_rate": 8.015489071918967e-06, "loss": 0.04766852, "memory(GiB)": 13.7, "step": 35115, "train_speed(iter/s)": 1.532224 }, { "acc": 0.97303028, "epoch": 16.461213967658775, "grad_norm": 12.895623207092285, "learning_rate": 8.014870731448442e-06, "loss": 0.10433078, "memory(GiB)": 13.7, "step": 35120, "train_speed(iter/s)": 1.532235 }, { "acc": 0.96988087, "epoch": 16.463557534567613, "grad_norm": 5.252911567687988, "learning_rate": 8.014252318522346e-06, "loss": 0.15689198, "memory(GiB)": 13.7, "step": 35125, "train_speed(iter/s)": 1.532241 }, { "acc": 0.988447, "epoch": 16.465901101476447, "grad_norm": 1.6535123586654663, "learning_rate": 8.013633833155547e-06, "loss": 0.04026385, "memory(GiB)": 13.7, "step": 35130, "train_speed(iter/s)": 1.532253 }, { "acc": 0.98041668, "epoch": 16.46824466838528, "grad_norm": 4.164154529571533, "learning_rate": 8.01301527536291e-06, "loss": 0.09735398, "memory(GiB)": 13.7, "step": 35135, "train_speed(iter/s)": 1.532256 }, { "acc": 0.9957386, "epoch": 16.470588235294116, "grad_norm": 5.358633995056152, "learning_rate": 8.012396645159299e-06, "loss": 0.03924795, "memory(GiB)": 13.7, "step": 35140, "train_speed(iter/s)": 1.532249 }, { "acc": 0.98467264, "epoch": 16.472931802202954, "grad_norm": 2.656055212020874, "learning_rate": 8.011777942559592e-06, "loss": 0.05586308, "memory(GiB)": 13.7, "step": 35145, "train_speed(iter/s)": 1.532246 }, { "acc": 0.98427086, "epoch": 16.475275369111788, "grad_norm": 8.171103477478027, "learning_rate": 8.011159167578653e-06, "loss": 0.07075492, "memory(GiB)": 13.7, "step": 35150, "train_speed(iter/s)": 1.532259 }, { "acc": 0.97928028, "epoch": 16.477618936020622, "grad_norm": 5.744789123535156, "learning_rate": 8.01054032023136e-06, "loss": 0.10419962, "memory(GiB)": 13.7, "step": 35155, "train_speed(iter/s)": 1.532264 }, { "acc": 0.97961197, "epoch": 16.47996250292946, "grad_norm": 6.258573532104492, "learning_rate": 8.009921400532587e-06, "loss": 0.10745838, "memory(GiB)": 13.7, "step": 35160, "train_speed(iter/s)": 1.532273 }, { "acc": 0.97559528, "epoch": 16.482306069838295, "grad_norm": 24.430028915405273, "learning_rate": 8.009302408497207e-06, "loss": 0.08136934, "memory(GiB)": 13.7, "step": 35165, "train_speed(iter/s)": 1.53227 }, { "acc": 0.98277779, "epoch": 16.48464963674713, "grad_norm": 3.260998249053955, "learning_rate": 8.008683344140105e-06, "loss": 0.0719774, "memory(GiB)": 13.7, "step": 35170, "train_speed(iter/s)": 1.532267 }, { "acc": 0.98312502, "epoch": 16.486993203655963, "grad_norm": 2.7261288166046143, "learning_rate": 8.008064207476157e-06, "loss": 0.07260978, "memory(GiB)": 13.7, "step": 35175, "train_speed(iter/s)": 1.532265 }, { "acc": 0.96800594, "epoch": 16.4893367705648, "grad_norm": 4.764906883239746, "learning_rate": 8.007444998520246e-06, "loss": 0.10420324, "memory(GiB)": 13.7, "step": 35180, "train_speed(iter/s)": 1.532273 }, { "acc": 0.97583923, "epoch": 16.491680337473635, "grad_norm": 18.55512809753418, "learning_rate": 8.006825717287257e-06, "loss": 0.09495393, "memory(GiB)": 13.7, "step": 35185, "train_speed(iter/s)": 1.532272 }, { "acc": 0.99360065, "epoch": 16.49402390438247, "grad_norm": 2.0655741691589355, "learning_rate": 8.006206363792073e-06, "loss": 0.06220855, "memory(GiB)": 13.7, "step": 35190, "train_speed(iter/s)": 1.532269 }, { "acc": 0.97312508, "epoch": 16.496367471291304, "grad_norm": 4.994667053222656, "learning_rate": 8.005586938049582e-06, "loss": 0.13440282, "memory(GiB)": 13.7, "step": 35195, "train_speed(iter/s)": 1.532274 }, { "acc": 0.98103008, "epoch": 16.498711038200142, "grad_norm": 3.5243208408355713, "learning_rate": 8.004967440074674e-06, "loss": 0.08749567, "memory(GiB)": 13.7, "step": 35200, "train_speed(iter/s)": 1.532275 }, { "acc": 0.97476187, "epoch": 16.501054605108976, "grad_norm": 7.271252155303955, "learning_rate": 8.00434786988224e-06, "loss": 0.15746318, "memory(GiB)": 13.7, "step": 35205, "train_speed(iter/s)": 1.532268 }, { "acc": 0.97785721, "epoch": 16.50339817201781, "grad_norm": 3.7026185989379883, "learning_rate": 8.00372822748717e-06, "loss": 0.15137563, "memory(GiB)": 13.7, "step": 35210, "train_speed(iter/s)": 1.53228 }, { "acc": 0.97391424, "epoch": 16.505741738926645, "grad_norm": 6.5398969650268555, "learning_rate": 8.00310851290436e-06, "loss": 0.09418654, "memory(GiB)": 13.7, "step": 35215, "train_speed(iter/s)": 1.532288 }, { "acc": 0.96784725, "epoch": 16.508085305835483, "grad_norm": 6.084871768951416, "learning_rate": 8.002488726148709e-06, "loss": 0.12708561, "memory(GiB)": 13.7, "step": 35220, "train_speed(iter/s)": 1.532282 }, { "acc": 0.98835316, "epoch": 16.510428872744317, "grad_norm": 3.616025447845459, "learning_rate": 8.001868867235108e-06, "loss": 0.05159742, "memory(GiB)": 13.7, "step": 35225, "train_speed(iter/s)": 1.532287 }, { "acc": 0.97875004, "epoch": 16.51277243965315, "grad_norm": 0.8054064512252808, "learning_rate": 8.001248936178459e-06, "loss": 0.05105112, "memory(GiB)": 13.7, "step": 35230, "train_speed(iter/s)": 1.532284 }, { "acc": 0.97050591, "epoch": 16.51511600656199, "grad_norm": 8.339361190795898, "learning_rate": 8.000628932993667e-06, "loss": 0.12506626, "memory(GiB)": 13.7, "step": 35235, "train_speed(iter/s)": 1.532297 }, { "acc": 0.97452383, "epoch": 16.517459573470823, "grad_norm": 1.5667774677276611, "learning_rate": 8.00000885769563e-06, "loss": 0.07534127, "memory(GiB)": 13.7, "step": 35240, "train_speed(iter/s)": 1.532306 }, { "acc": 0.99392357, "epoch": 16.519803140379658, "grad_norm": 2.894395112991333, "learning_rate": 7.999388710299253e-06, "loss": 0.03976356, "memory(GiB)": 13.7, "step": 35245, "train_speed(iter/s)": 1.532314 }, { "acc": 0.98286705, "epoch": 16.522146707288492, "grad_norm": 2.5548620223999023, "learning_rate": 7.998768490819444e-06, "loss": 0.04476077, "memory(GiB)": 13.7, "step": 35250, "train_speed(iter/s)": 1.532317 }, { "acc": 0.97079449, "epoch": 16.52449027419733, "grad_norm": 1.0312659740447998, "learning_rate": 7.998148199271113e-06, "loss": 0.0941815, "memory(GiB)": 13.7, "step": 35255, "train_speed(iter/s)": 1.532332 }, { "acc": 0.97987175, "epoch": 16.526833841106164, "grad_norm": 1.4452205896377563, "learning_rate": 7.997527835669165e-06, "loss": 0.0513028, "memory(GiB)": 13.7, "step": 35260, "train_speed(iter/s)": 1.532342 }, { "acc": 0.97819443, "epoch": 16.529177408015, "grad_norm": 4.4618072509765625, "learning_rate": 7.996907400028515e-06, "loss": 0.06886231, "memory(GiB)": 13.7, "step": 35265, "train_speed(iter/s)": 1.53234 }, { "acc": 0.98770828, "epoch": 16.531520974923833, "grad_norm": 3.5289053916931152, "learning_rate": 7.996286892364074e-06, "loss": 0.03658156, "memory(GiB)": 13.7, "step": 35270, "train_speed(iter/s)": 1.532357 }, { "acc": 0.9770834, "epoch": 16.53386454183267, "grad_norm": 2.8762059211730957, "learning_rate": 7.995666312690758e-06, "loss": 0.10737225, "memory(GiB)": 13.7, "step": 35275, "train_speed(iter/s)": 1.532361 }, { "acc": 0.97706852, "epoch": 16.536208108741505, "grad_norm": 188.52064514160156, "learning_rate": 7.995045661023484e-06, "loss": 0.18281097, "memory(GiB)": 13.7, "step": 35280, "train_speed(iter/s)": 1.532368 }, { "acc": 0.97041664, "epoch": 16.53855167565034, "grad_norm": 3.247145175933838, "learning_rate": 7.994424937377171e-06, "loss": 0.13118281, "memory(GiB)": 13.7, "step": 35285, "train_speed(iter/s)": 1.532378 }, { "acc": 0.98090277, "epoch": 16.540895242559174, "grad_norm": 3.09822154045105, "learning_rate": 7.993804141766736e-06, "loss": 0.11625531, "memory(GiB)": 13.7, "step": 35290, "train_speed(iter/s)": 1.532382 }, { "acc": 0.98217258, "epoch": 16.54323880946801, "grad_norm": 1.673768162727356, "learning_rate": 7.993183274207106e-06, "loss": 0.0677546, "memory(GiB)": 13.7, "step": 35295, "train_speed(iter/s)": 1.532374 }, { "acc": 0.98675594, "epoch": 16.545582376376846, "grad_norm": 4.437146186828613, "learning_rate": 7.9925623347132e-06, "loss": 0.04035621, "memory(GiB)": 13.7, "step": 35300, "train_speed(iter/s)": 1.53237 }, { "acc": 0.98356943, "epoch": 16.54792594328568, "grad_norm": 2.8247621059417725, "learning_rate": 7.991941323299946e-06, "loss": 0.05776565, "memory(GiB)": 13.7, "step": 35305, "train_speed(iter/s)": 1.532383 }, { "acc": 0.97256947, "epoch": 16.550269510194518, "grad_norm": 4.091460704803467, "learning_rate": 7.991320239982271e-06, "loss": 0.12085392, "memory(GiB)": 13.7, "step": 35310, "train_speed(iter/s)": 1.532408 }, { "acc": 0.96875, "epoch": 16.552613077103352, "grad_norm": 5.366138458251953, "learning_rate": 7.990699084775101e-06, "loss": 0.12746943, "memory(GiB)": 13.7, "step": 35315, "train_speed(iter/s)": 1.532413 }, { "acc": 0.9682333, "epoch": 16.554956644012186, "grad_norm": 18.673107147216797, "learning_rate": 7.99007785769337e-06, "loss": 0.14531726, "memory(GiB)": 13.7, "step": 35320, "train_speed(iter/s)": 1.532423 }, { "acc": 0.9606945, "epoch": 16.55730021092102, "grad_norm": 7.875671863555908, "learning_rate": 7.989456558752009e-06, "loss": 0.18880277, "memory(GiB)": 13.7, "step": 35325, "train_speed(iter/s)": 1.532442 }, { "acc": 0.97649622, "epoch": 16.55964377782986, "grad_norm": 6.136626243591309, "learning_rate": 7.988835187965952e-06, "loss": 0.10275282, "memory(GiB)": 13.7, "step": 35330, "train_speed(iter/s)": 1.532433 }, { "acc": 0.97832108, "epoch": 16.561987344738693, "grad_norm": 2.131412982940674, "learning_rate": 7.988213745350133e-06, "loss": 0.11154644, "memory(GiB)": 13.7, "step": 35335, "train_speed(iter/s)": 1.532435 }, { "acc": 0.98125, "epoch": 16.564330911647527, "grad_norm": 5.523720741271973, "learning_rate": 7.987592230919492e-06, "loss": 0.08600205, "memory(GiB)": 13.7, "step": 35340, "train_speed(iter/s)": 1.532435 }, { "acc": 0.97255955, "epoch": 16.56667447855636, "grad_norm": 4.297797203063965, "learning_rate": 7.986970644688966e-06, "loss": 0.18960133, "memory(GiB)": 13.7, "step": 35345, "train_speed(iter/s)": 1.532439 }, { "acc": 0.97861471, "epoch": 16.5690180454652, "grad_norm": 3.518578052520752, "learning_rate": 7.9863489866735e-06, "loss": 0.10830436, "memory(GiB)": 13.7, "step": 35350, "train_speed(iter/s)": 1.532431 }, { "acc": 0.97355595, "epoch": 16.571361612374034, "grad_norm": 1.1864817142486572, "learning_rate": 7.98572725688803e-06, "loss": 0.16144605, "memory(GiB)": 13.7, "step": 35355, "train_speed(iter/s)": 1.53243 }, { "acc": 0.97950754, "epoch": 16.573705179282868, "grad_norm": 8.48898983001709, "learning_rate": 7.985105455347505e-06, "loss": 0.11059434, "memory(GiB)": 13.7, "step": 35360, "train_speed(iter/s)": 1.532441 }, { "acc": 0.97741528, "epoch": 16.576048746191702, "grad_norm": 5.004195690155029, "learning_rate": 7.984483582066872e-06, "loss": 0.10578766, "memory(GiB)": 13.7, "step": 35365, "train_speed(iter/s)": 1.532451 }, { "acc": 0.99312496, "epoch": 16.57839231310054, "grad_norm": 1.8343095779418945, "learning_rate": 7.983861637061074e-06, "loss": 0.05572152, "memory(GiB)": 13.7, "step": 35370, "train_speed(iter/s)": 1.53245 }, { "acc": 0.9780302, "epoch": 16.580735880009374, "grad_norm": 4.747739791870117, "learning_rate": 7.983239620345065e-06, "loss": 0.14163268, "memory(GiB)": 13.7, "step": 35375, "train_speed(iter/s)": 1.532457 }, { "acc": 0.97629843, "epoch": 16.58307944691821, "grad_norm": 7.46933650970459, "learning_rate": 7.982617531933794e-06, "loss": 0.101877, "memory(GiB)": 13.7, "step": 35380, "train_speed(iter/s)": 1.532469 }, { "acc": 0.97954445, "epoch": 16.585423013827043, "grad_norm": 6.4496541023254395, "learning_rate": 7.981995371842216e-06, "loss": 0.07608482, "memory(GiB)": 13.7, "step": 35385, "train_speed(iter/s)": 1.532476 }, { "acc": 0.97378979, "epoch": 16.58776658073588, "grad_norm": 6.165863513946533, "learning_rate": 7.981373140085283e-06, "loss": 0.16872029, "memory(GiB)": 13.7, "step": 35390, "train_speed(iter/s)": 1.532475 }, { "acc": 0.9884531, "epoch": 16.590110147644715, "grad_norm": 1.7521435022354126, "learning_rate": 7.980750836677954e-06, "loss": 0.08053248, "memory(GiB)": 13.7, "step": 35395, "train_speed(iter/s)": 1.532473 }, { "acc": 0.97814579, "epoch": 16.59245371455355, "grad_norm": 0.22869326174259186, "learning_rate": 7.980128461635182e-06, "loss": 0.10599223, "memory(GiB)": 13.7, "step": 35400, "train_speed(iter/s)": 1.532456 }, { "acc": 0.99025307, "epoch": 16.594797281462387, "grad_norm": 6.6049299240112305, "learning_rate": 7.979506014971934e-06, "loss": 0.04770004, "memory(GiB)": 13.7, "step": 35405, "train_speed(iter/s)": 1.532463 }, { "acc": 0.98154755, "epoch": 16.59714084837122, "grad_norm": 9.723374366760254, "learning_rate": 7.978883496703169e-06, "loss": 0.16904502, "memory(GiB)": 13.7, "step": 35410, "train_speed(iter/s)": 1.532472 }, { "acc": 0.96980648, "epoch": 16.599484415280056, "grad_norm": 3.609415054321289, "learning_rate": 7.978260906843846e-06, "loss": 0.11089768, "memory(GiB)": 13.7, "step": 35415, "train_speed(iter/s)": 1.532465 }, { "acc": 0.98265915, "epoch": 16.60182798218889, "grad_norm": 3.4580743312835693, "learning_rate": 7.977638245408937e-06, "loss": 0.07519261, "memory(GiB)": 13.7, "step": 35420, "train_speed(iter/s)": 1.532476 }, { "acc": 0.98855114, "epoch": 16.604171549097728, "grad_norm": 1.2541736364364624, "learning_rate": 7.977015512413402e-06, "loss": 0.07463406, "memory(GiB)": 13.7, "step": 35425, "train_speed(iter/s)": 1.532485 }, { "acc": 0.98363094, "epoch": 16.606515116006562, "grad_norm": 8.79566764831543, "learning_rate": 7.976392707872216e-06, "loss": 0.08396838, "memory(GiB)": 13.7, "step": 35430, "train_speed(iter/s)": 1.532485 }, { "acc": 0.9760416, "epoch": 16.608858682915397, "grad_norm": 3.0786900520324707, "learning_rate": 7.975769831800343e-06, "loss": 0.0797037, "memory(GiB)": 13.7, "step": 35435, "train_speed(iter/s)": 1.532495 }, { "acc": 0.98120613, "epoch": 16.61120224982423, "grad_norm": 0.017010170966386795, "learning_rate": 7.975146884212759e-06, "loss": 0.06926434, "memory(GiB)": 13.7, "step": 35440, "train_speed(iter/s)": 1.532495 }, { "acc": 0.98531246, "epoch": 16.61354581673307, "grad_norm": 3.430758476257324, "learning_rate": 7.974523865124437e-06, "loss": 0.06472116, "memory(GiB)": 13.7, "step": 35445, "train_speed(iter/s)": 1.532509 }, { "acc": 0.9811553, "epoch": 16.615889383641903, "grad_norm": 2.486961603164673, "learning_rate": 7.97390077455035e-06, "loss": 0.04021353, "memory(GiB)": 13.7, "step": 35450, "train_speed(iter/s)": 1.532512 }, { "acc": 0.97498512, "epoch": 16.618232950550738, "grad_norm": 7.9825520515441895, "learning_rate": 7.973277612505478e-06, "loss": 0.10229661, "memory(GiB)": 13.7, "step": 35455, "train_speed(iter/s)": 1.532504 }, { "acc": 0.98291283, "epoch": 16.620576517459572, "grad_norm": 6.7812299728393555, "learning_rate": 7.972654379004799e-06, "loss": 0.11086361, "memory(GiB)": 13.7, "step": 35460, "train_speed(iter/s)": 1.532522 }, { "acc": 0.97290668, "epoch": 16.62292008436841, "grad_norm": 9.793073654174805, "learning_rate": 7.972031074063291e-06, "loss": 0.10757631, "memory(GiB)": 13.7, "step": 35465, "train_speed(iter/s)": 1.532538 }, { "acc": 0.9635416, "epoch": 16.625263651277244, "grad_norm": 8.34163761138916, "learning_rate": 7.97140769769594e-06, "loss": 0.10269568, "memory(GiB)": 13.7, "step": 35470, "train_speed(iter/s)": 1.53254 }, { "acc": 0.98291664, "epoch": 16.62760721818608, "grad_norm": 1.9033751487731934, "learning_rate": 7.970784249917728e-06, "loss": 0.10773228, "memory(GiB)": 13.7, "step": 35475, "train_speed(iter/s)": 1.532549 }, { "acc": 0.97722759, "epoch": 16.629950785094913, "grad_norm": 0.12108192592859268, "learning_rate": 7.97016073074364e-06, "loss": 0.05928668, "memory(GiB)": 13.7, "step": 35480, "train_speed(iter/s)": 1.532553 }, { "acc": 0.98163376, "epoch": 16.63229435200375, "grad_norm": 9.05606460571289, "learning_rate": 7.969537140188664e-06, "loss": 0.15081133, "memory(GiB)": 13.7, "step": 35485, "train_speed(iter/s)": 1.532547 }, { "acc": 0.97939491, "epoch": 16.634637918912585, "grad_norm": 1.973299503326416, "learning_rate": 7.968913478267792e-06, "loss": 0.13511832, "memory(GiB)": 13.7, "step": 35490, "train_speed(iter/s)": 1.532543 }, { "acc": 0.96249819, "epoch": 16.63698148582142, "grad_norm": 5.400034427642822, "learning_rate": 7.968289744996007e-06, "loss": 0.11766989, "memory(GiB)": 13.7, "step": 35495, "train_speed(iter/s)": 1.532547 }, { "acc": 0.96958332, "epoch": 16.639325052730257, "grad_norm": 4.241340637207031, "learning_rate": 7.96766594038831e-06, "loss": 0.10319529, "memory(GiB)": 13.7, "step": 35500, "train_speed(iter/s)": 1.532556 }, { "acc": 0.97313786, "epoch": 16.64166861963909, "grad_norm": 5.644686222076416, "learning_rate": 7.967042064459692e-06, "loss": 0.16701614, "memory(GiB)": 13.7, "step": 35505, "train_speed(iter/s)": 1.532562 }, { "acc": 0.94645834, "epoch": 16.644012186547926, "grad_norm": 8.880767822265625, "learning_rate": 7.966418117225146e-06, "loss": 0.17490424, "memory(GiB)": 13.7, "step": 35510, "train_speed(iter/s)": 1.532577 }, { "acc": 0.9890564, "epoch": 16.64635575345676, "grad_norm": 1.5813815593719482, "learning_rate": 7.965794098699675e-06, "loss": 0.08278126, "memory(GiB)": 13.7, "step": 35515, "train_speed(iter/s)": 1.53258 }, { "acc": 0.97541666, "epoch": 16.648699320365598, "grad_norm": 2.5582754611968994, "learning_rate": 7.965170008898273e-06, "loss": 0.09734708, "memory(GiB)": 13.7, "step": 35520, "train_speed(iter/s)": 1.532589 }, { "acc": 0.98062506, "epoch": 16.651042887274432, "grad_norm": 4.725190162658691, "learning_rate": 7.964545847835946e-06, "loss": 0.04447821, "memory(GiB)": 13.7, "step": 35525, "train_speed(iter/s)": 1.532595 }, { "acc": 0.96807537, "epoch": 16.653386454183266, "grad_norm": 6.707442760467529, "learning_rate": 7.963921615527695e-06, "loss": 0.11005366, "memory(GiB)": 13.7, "step": 35530, "train_speed(iter/s)": 1.532603 }, { "acc": 0.98145828, "epoch": 16.6557300210921, "grad_norm": 5.327627182006836, "learning_rate": 7.963297311988522e-06, "loss": 0.07961759, "memory(GiB)": 13.7, "step": 35535, "train_speed(iter/s)": 1.532614 }, { "acc": 0.97666664, "epoch": 16.65807358800094, "grad_norm": 4.002551555633545, "learning_rate": 7.962672937233434e-06, "loss": 0.06802129, "memory(GiB)": 13.7, "step": 35540, "train_speed(iter/s)": 1.532621 }, { "acc": 0.99236107, "epoch": 16.660417154909773, "grad_norm": 3.7706942558288574, "learning_rate": 7.962048491277441e-06, "loss": 0.02545122, "memory(GiB)": 13.7, "step": 35545, "train_speed(iter/s)": 1.532632 }, { "acc": 0.97153282, "epoch": 16.662760721818607, "grad_norm": 8.280501365661621, "learning_rate": 7.961423974135555e-06, "loss": 0.10155823, "memory(GiB)": 13.7, "step": 35550, "train_speed(iter/s)": 1.532634 }, { "acc": 0.96823368, "epoch": 16.66510428872744, "grad_norm": 10.159722328186035, "learning_rate": 7.96079938582278e-06, "loss": 0.24711671, "memory(GiB)": 13.7, "step": 35555, "train_speed(iter/s)": 1.532636 }, { "acc": 0.98710318, "epoch": 16.66744785563628, "grad_norm": 4.329361438751221, "learning_rate": 7.960174726354135e-06, "loss": 0.0843548, "memory(GiB)": 13.7, "step": 35560, "train_speed(iter/s)": 1.532627 }, { "acc": 0.97512341, "epoch": 16.669791422545114, "grad_norm": 4.691922664642334, "learning_rate": 7.959549995744631e-06, "loss": 0.14348261, "memory(GiB)": 13.7, "step": 35565, "train_speed(iter/s)": 1.532632 }, { "acc": 0.9677084, "epoch": 16.672134989453948, "grad_norm": 4.334455490112305, "learning_rate": 7.958925194009291e-06, "loss": 0.07709563, "memory(GiB)": 13.7, "step": 35570, "train_speed(iter/s)": 1.532642 }, { "acc": 0.96696424, "epoch": 16.674478556362786, "grad_norm": 6.245400428771973, "learning_rate": 7.958300321163123e-06, "loss": 0.1584662, "memory(GiB)": 13.7, "step": 35575, "train_speed(iter/s)": 1.532655 }, { "acc": 0.9664732, "epoch": 16.67682212327162, "grad_norm": 0.08470942825078964, "learning_rate": 7.957675377221156e-06, "loss": 0.18790958, "memory(GiB)": 13.7, "step": 35580, "train_speed(iter/s)": 1.532666 }, { "acc": 0.98298607, "epoch": 16.679165690180454, "grad_norm": 2.419219493865967, "learning_rate": 7.957050362198407e-06, "loss": 0.03995481, "memory(GiB)": 13.7, "step": 35585, "train_speed(iter/s)": 1.532677 }, { "acc": 0.9815773, "epoch": 16.68150925708929, "grad_norm": 0.5601147413253784, "learning_rate": 7.956425276109899e-06, "loss": 0.08070636, "memory(GiB)": 13.7, "step": 35590, "train_speed(iter/s)": 1.532693 }, { "acc": 0.9828125, "epoch": 16.683852823998127, "grad_norm": 4.2353668212890625, "learning_rate": 7.95580011897066e-06, "loss": 0.09296238, "memory(GiB)": 13.7, "step": 35595, "train_speed(iter/s)": 1.532686 }, { "acc": 0.9733429, "epoch": 16.68619639090696, "grad_norm": 5.610008716583252, "learning_rate": 7.955174890795714e-06, "loss": 0.07199053, "memory(GiB)": 13.7, "step": 35600, "train_speed(iter/s)": 1.5327 }, { "acc": 0.98432541, "epoch": 16.688539957815795, "grad_norm": 0.1064317524433136, "learning_rate": 7.95454959160009e-06, "loss": 0.06348489, "memory(GiB)": 13.7, "step": 35605, "train_speed(iter/s)": 1.532719 }, { "acc": 0.97071953, "epoch": 16.69088352472463, "grad_norm": 6.391347408294678, "learning_rate": 7.95392422139882e-06, "loss": 0.09155229, "memory(GiB)": 13.7, "step": 35610, "train_speed(iter/s)": 1.53272 }, { "acc": 0.97416668, "epoch": 16.693227091633467, "grad_norm": 1.7902684211730957, "learning_rate": 7.953298780206933e-06, "loss": 0.1167271, "memory(GiB)": 13.7, "step": 35615, "train_speed(iter/s)": 1.532717 }, { "acc": 0.98288689, "epoch": 16.6955706585423, "grad_norm": 3.2603442668914795, "learning_rate": 7.952673268039465e-06, "loss": 0.05113852, "memory(GiB)": 13.7, "step": 35620, "train_speed(iter/s)": 1.532729 }, { "acc": 0.98340282, "epoch": 16.697914225451136, "grad_norm": 3.2955024242401123, "learning_rate": 7.95204768491145e-06, "loss": 0.06958045, "memory(GiB)": 13.7, "step": 35625, "train_speed(iter/s)": 1.532738 }, { "acc": 0.98113098, "epoch": 16.70025779235997, "grad_norm": 17.094083786010742, "learning_rate": 7.951422030837927e-06, "loss": 0.12505715, "memory(GiB)": 13.7, "step": 35630, "train_speed(iter/s)": 1.532746 }, { "acc": 0.9802084, "epoch": 16.702601359268808, "grad_norm": 0.1382574886083603, "learning_rate": 7.950796305833932e-06, "loss": 0.08518472, "memory(GiB)": 13.7, "step": 35635, "train_speed(iter/s)": 1.532752 }, { "acc": 0.98027782, "epoch": 16.704944926177642, "grad_norm": 7.018346786499023, "learning_rate": 7.950170509914505e-06, "loss": 0.12406702, "memory(GiB)": 13.7, "step": 35640, "train_speed(iter/s)": 1.532764 }, { "acc": 0.98155937, "epoch": 16.707288493086477, "grad_norm": 8.213397979736328, "learning_rate": 7.949544643094689e-06, "loss": 0.09877553, "memory(GiB)": 13.7, "step": 35645, "train_speed(iter/s)": 1.53277 }, { "acc": 0.96276779, "epoch": 16.709632059995315, "grad_norm": 4.444516181945801, "learning_rate": 7.94891870538953e-06, "loss": 0.18825016, "memory(GiB)": 13.7, "step": 35650, "train_speed(iter/s)": 1.532777 }, { "acc": 0.97898989, "epoch": 16.71197562690415, "grad_norm": 0.7834565043449402, "learning_rate": 7.948292696814071e-06, "loss": 0.08199028, "memory(GiB)": 13.7, "step": 35655, "train_speed(iter/s)": 1.532764 }, { "acc": 0.97549133, "epoch": 16.714319193812983, "grad_norm": 6.287900447845459, "learning_rate": 7.94766661738336e-06, "loss": 0.11104882, "memory(GiB)": 13.7, "step": 35660, "train_speed(iter/s)": 1.532767 }, { "acc": 0.97469692, "epoch": 16.716662760721817, "grad_norm": 1.710225224494934, "learning_rate": 7.947040467112444e-06, "loss": 0.1324239, "memory(GiB)": 13.7, "step": 35665, "train_speed(iter/s)": 1.532778 }, { "acc": 0.991572, "epoch": 16.719006327630655, "grad_norm": 0.0038920463994145393, "learning_rate": 7.946414246016378e-06, "loss": 0.03817669, "memory(GiB)": 13.7, "step": 35670, "train_speed(iter/s)": 1.532775 }, { "acc": 0.98125, "epoch": 16.72134989453949, "grad_norm": 5.3193159103393555, "learning_rate": 7.945787954110211e-06, "loss": 0.10274659, "memory(GiB)": 13.7, "step": 35675, "train_speed(iter/s)": 1.532783 }, { "acc": 0.97229166, "epoch": 16.723693461448324, "grad_norm": 0.01421032939106226, "learning_rate": 7.945161591408998e-06, "loss": 0.06507086, "memory(GiB)": 13.7, "step": 35680, "train_speed(iter/s)": 1.532799 }, { "acc": 0.96833334, "epoch": 16.726037028357158, "grad_norm": 5.756342887878418, "learning_rate": 7.944535157927793e-06, "loss": 0.15555251, "memory(GiB)": 13.7, "step": 35685, "train_speed(iter/s)": 1.532809 }, { "acc": 0.97559528, "epoch": 16.728380595265996, "grad_norm": 1.959757924079895, "learning_rate": 7.943908653681653e-06, "loss": 0.10536926, "memory(GiB)": 13.7, "step": 35690, "train_speed(iter/s)": 1.532815 }, { "acc": 0.97879696, "epoch": 16.73072416217483, "grad_norm": 3.2217602729797363, "learning_rate": 7.943282078685642e-06, "loss": 0.14738424, "memory(GiB)": 13.7, "step": 35695, "train_speed(iter/s)": 1.532819 }, { "acc": 0.97420654, "epoch": 16.733067729083665, "grad_norm": 4.893171787261963, "learning_rate": 7.942655432954816e-06, "loss": 0.09523474, "memory(GiB)": 13.7, "step": 35700, "train_speed(iter/s)": 1.532843 }, { "acc": 0.98026724, "epoch": 16.7354112959925, "grad_norm": 3.275390148162842, "learning_rate": 7.94202871650424e-06, "loss": 0.0958672, "memory(GiB)": 13.7, "step": 35705, "train_speed(iter/s)": 1.532854 }, { "acc": 0.97592258, "epoch": 16.737754862901337, "grad_norm": 8.523584365844727, "learning_rate": 7.941401929348977e-06, "loss": 0.1145591, "memory(GiB)": 13.7, "step": 35710, "train_speed(iter/s)": 1.532859 }, { "acc": 0.99375, "epoch": 16.74009842981017, "grad_norm": 3.1424572467803955, "learning_rate": 7.940775071504092e-06, "loss": 0.06737127, "memory(GiB)": 13.7, "step": 35715, "train_speed(iter/s)": 1.532861 }, { "acc": 0.98815479, "epoch": 16.742441996719005, "grad_norm": 2.1922824382781982, "learning_rate": 7.940148142984655e-06, "loss": 0.07594672, "memory(GiB)": 13.7, "step": 35720, "train_speed(iter/s)": 1.532868 }, { "acc": 0.97145834, "epoch": 16.744785563627843, "grad_norm": 6.710118293762207, "learning_rate": 7.939521143805733e-06, "loss": 0.10902635, "memory(GiB)": 13.7, "step": 35725, "train_speed(iter/s)": 1.532889 }, { "acc": 0.98276787, "epoch": 16.747129130536678, "grad_norm": 6.4376959800720215, "learning_rate": 7.9388940739824e-06, "loss": 0.09880375, "memory(GiB)": 13.7, "step": 35730, "train_speed(iter/s)": 1.532892 }, { "acc": 0.97235126, "epoch": 16.749472697445512, "grad_norm": 5.289013385772705, "learning_rate": 7.938266933529728e-06, "loss": 0.12778, "memory(GiB)": 13.7, "step": 35735, "train_speed(iter/s)": 1.532906 }, { "acc": 0.98884802, "epoch": 16.751816264354346, "grad_norm": 1.525795578956604, "learning_rate": 7.937639722462787e-06, "loss": 0.04144527, "memory(GiB)": 13.7, "step": 35740, "train_speed(iter/s)": 1.532911 }, { "acc": 0.96991072, "epoch": 16.754159831263184, "grad_norm": 2.9950456619262695, "learning_rate": 7.937012440796657e-06, "loss": 0.1150681, "memory(GiB)": 13.7, "step": 35745, "train_speed(iter/s)": 1.532913 }, { "acc": 0.96148767, "epoch": 16.75650339817202, "grad_norm": 2.8091580867767334, "learning_rate": 7.936385088546415e-06, "loss": 0.17316905, "memory(GiB)": 13.7, "step": 35750, "train_speed(iter/s)": 1.532925 }, { "acc": 0.96808033, "epoch": 16.758846965080853, "grad_norm": 1.1971954107284546, "learning_rate": 7.935757665727143e-06, "loss": 0.13758618, "memory(GiB)": 13.7, "step": 35755, "train_speed(iter/s)": 1.532924 }, { "acc": 0.98705359, "epoch": 16.761190531989687, "grad_norm": 1.3316234350204468, "learning_rate": 7.935130172353917e-06, "loss": 0.09219545, "memory(GiB)": 13.7, "step": 35760, "train_speed(iter/s)": 1.532926 }, { "acc": 0.97527781, "epoch": 16.763534098898525, "grad_norm": 10.45249080657959, "learning_rate": 7.934502608441824e-06, "loss": 0.15791507, "memory(GiB)": 13.7, "step": 35765, "train_speed(iter/s)": 1.53294 }, { "acc": 0.98819447, "epoch": 16.76587766580736, "grad_norm": 0.025453219190239906, "learning_rate": 7.933874974005947e-06, "loss": 0.04580038, "memory(GiB)": 13.7, "step": 35770, "train_speed(iter/s)": 1.532946 }, { "acc": 0.9890625, "epoch": 16.768221232716193, "grad_norm": 3.136298179626465, "learning_rate": 7.933247269061373e-06, "loss": 0.05192198, "memory(GiB)": 13.7, "step": 35775, "train_speed(iter/s)": 1.532941 }, { "acc": 0.96989584, "epoch": 16.770564799625028, "grad_norm": 7.618168830871582, "learning_rate": 7.932619493623186e-06, "loss": 0.11178998, "memory(GiB)": 13.7, "step": 35780, "train_speed(iter/s)": 1.532954 }, { "acc": 0.97770834, "epoch": 16.772908366533866, "grad_norm": 4.045332431793213, "learning_rate": 7.931991647706486e-06, "loss": 0.07489202, "memory(GiB)": 13.7, "step": 35785, "train_speed(iter/s)": 1.532948 }, { "acc": 0.97221355, "epoch": 16.7752519334427, "grad_norm": 8.67349624633789, "learning_rate": 7.931363731326351e-06, "loss": 0.11703181, "memory(GiB)": 13.7, "step": 35790, "train_speed(iter/s)": 1.532949 }, { "acc": 0.9822917, "epoch": 16.777595500351534, "grad_norm": 2.1465446949005127, "learning_rate": 7.930735744497884e-06, "loss": 0.08135052, "memory(GiB)": 13.7, "step": 35795, "train_speed(iter/s)": 1.532946 }, { "acc": 0.97726192, "epoch": 16.779939067260372, "grad_norm": 6.085187911987305, "learning_rate": 7.930107687236175e-06, "loss": 0.11498624, "memory(GiB)": 13.7, "step": 35800, "train_speed(iter/s)": 1.532956 }, { "acc": 0.97603626, "epoch": 16.782282634169206, "grad_norm": 4.83567476272583, "learning_rate": 7.929479559556321e-06, "loss": 0.069815, "memory(GiB)": 13.7, "step": 35805, "train_speed(iter/s)": 1.532956 }, { "acc": 0.97539225, "epoch": 16.78462620107804, "grad_norm": 3.2639365196228027, "learning_rate": 7.928851361473422e-06, "loss": 0.09744298, "memory(GiB)": 13.7, "step": 35810, "train_speed(iter/s)": 1.532971 }, { "acc": 0.98520832, "epoch": 16.786969767986875, "grad_norm": 1.1717393398284912, "learning_rate": 7.928223093002577e-06, "loss": 0.04413209, "memory(GiB)": 13.7, "step": 35815, "train_speed(iter/s)": 1.532984 }, { "acc": 0.97145834, "epoch": 16.789313334895713, "grad_norm": 7.555881023406982, "learning_rate": 7.927594754158887e-06, "loss": 0.10253577, "memory(GiB)": 13.7, "step": 35820, "train_speed(iter/s)": 1.532998 }, { "acc": 0.98019028, "epoch": 16.791656901804547, "grad_norm": 0.1609218418598175, "learning_rate": 7.926966344957455e-06, "loss": 0.116908, "memory(GiB)": 13.7, "step": 35825, "train_speed(iter/s)": 1.533 }, { "acc": 0.9828373, "epoch": 16.79400046871338, "grad_norm": 2.524811029434204, "learning_rate": 7.926337865413386e-06, "loss": 0.08218517, "memory(GiB)": 13.7, "step": 35830, "train_speed(iter/s)": 1.533014 }, { "acc": 0.97225704, "epoch": 16.796344035622216, "grad_norm": 1.915582537651062, "learning_rate": 7.925709315541788e-06, "loss": 0.15580226, "memory(GiB)": 13.7, "step": 35835, "train_speed(iter/s)": 1.533012 }, { "acc": 0.97583332, "epoch": 16.798687602531054, "grad_norm": 3.0930583477020264, "learning_rate": 7.925080695357768e-06, "loss": 0.06645154, "memory(GiB)": 13.7, "step": 35840, "train_speed(iter/s)": 1.533017 }, { "acc": 0.96918049, "epoch": 16.801031169439888, "grad_norm": 5.675683498382568, "learning_rate": 7.924452004876434e-06, "loss": 0.14020287, "memory(GiB)": 13.7, "step": 35845, "train_speed(iter/s)": 1.533018 }, { "acc": 0.97878475, "epoch": 16.803374736348722, "grad_norm": 8.631660461425781, "learning_rate": 7.923823244112901e-06, "loss": 0.08529074, "memory(GiB)": 13.7, "step": 35850, "train_speed(iter/s)": 1.533019 }, { "acc": 0.97788696, "epoch": 16.805718303257557, "grad_norm": 0.1411089450120926, "learning_rate": 7.923194413082282e-06, "loss": 0.08132619, "memory(GiB)": 13.7, "step": 35855, "train_speed(iter/s)": 1.533023 }, { "acc": 0.98125, "epoch": 16.808061870166394, "grad_norm": 4.787132263183594, "learning_rate": 7.92256551179969e-06, "loss": 0.06462113, "memory(GiB)": 13.7, "step": 35860, "train_speed(iter/s)": 1.533023 }, { "acc": 0.9827446, "epoch": 16.81040543707523, "grad_norm": 1.0030122995376587, "learning_rate": 7.921936540280243e-06, "loss": 0.09936593, "memory(GiB)": 13.7, "step": 35865, "train_speed(iter/s)": 1.533026 }, { "acc": 0.9875, "epoch": 16.812749003984063, "grad_norm": 3.1097829341888428, "learning_rate": 7.92130749853906e-06, "loss": 0.07029154, "memory(GiB)": 13.7, "step": 35870, "train_speed(iter/s)": 1.53303 }, { "acc": 0.97791672, "epoch": 16.815092570892897, "grad_norm": 8.049542427062988, "learning_rate": 7.920678386591258e-06, "loss": 0.06275548, "memory(GiB)": 13.7, "step": 35875, "train_speed(iter/s)": 1.533029 }, { "acc": 0.98071423, "epoch": 16.817436137801735, "grad_norm": 5.054859161376953, "learning_rate": 7.920049204451962e-06, "loss": 0.07416279, "memory(GiB)": 13.7, "step": 35880, "train_speed(iter/s)": 1.533025 }, { "acc": 0.97750711, "epoch": 16.81977970471057, "grad_norm": 3.9470386505126953, "learning_rate": 7.919419952136296e-06, "loss": 0.08006167, "memory(GiB)": 13.7, "step": 35885, "train_speed(iter/s)": 1.533036 }, { "acc": 0.98883934, "epoch": 16.822123271619404, "grad_norm": 1.1556322574615479, "learning_rate": 7.918790629659384e-06, "loss": 0.06173989, "memory(GiB)": 13.7, "step": 35890, "train_speed(iter/s)": 1.533043 }, { "acc": 0.98415184, "epoch": 16.82446683852824, "grad_norm": 1.988755702972412, "learning_rate": 7.918161237036351e-06, "loss": 0.08492954, "memory(GiB)": 13.7, "step": 35895, "train_speed(iter/s)": 1.533036 }, { "acc": 0.96851187, "epoch": 16.826810405437076, "grad_norm": 9.478392601013184, "learning_rate": 7.917531774282329e-06, "loss": 0.07736337, "memory(GiB)": 13.7, "step": 35900, "train_speed(iter/s)": 1.533052 }, { "acc": 0.97625008, "epoch": 16.82915397234591, "grad_norm": 4.057054042816162, "learning_rate": 7.916902241412444e-06, "loss": 0.09073439, "memory(GiB)": 13.7, "step": 35905, "train_speed(iter/s)": 1.533055 }, { "acc": 0.97006559, "epoch": 16.831497539254745, "grad_norm": 4.921230316162109, "learning_rate": 7.916272638441832e-06, "loss": 0.12321506, "memory(GiB)": 13.7, "step": 35910, "train_speed(iter/s)": 1.533064 }, { "acc": 0.9838541, "epoch": 16.833841106163582, "grad_norm": 3.19881534576416, "learning_rate": 7.915642965385624e-06, "loss": 0.04669952, "memory(GiB)": 13.7, "step": 35915, "train_speed(iter/s)": 1.533072 }, { "acc": 0.99077387, "epoch": 16.836184673072417, "grad_norm": 4.729652404785156, "learning_rate": 7.915013222258955e-06, "loss": 0.05716009, "memory(GiB)": 13.7, "step": 35920, "train_speed(iter/s)": 1.533072 }, { "acc": 0.99504414, "epoch": 16.83852823998125, "grad_norm": 0.09382839500904083, "learning_rate": 7.914383409076966e-06, "loss": 0.04378916, "memory(GiB)": 13.7, "step": 35925, "train_speed(iter/s)": 1.53308 }, { "acc": 0.98883924, "epoch": 16.840871806890085, "grad_norm": 1.3340178728103638, "learning_rate": 7.913753525854791e-06, "loss": 0.03740684, "memory(GiB)": 13.7, "step": 35930, "train_speed(iter/s)": 1.533079 }, { "acc": 0.9730979, "epoch": 16.843215373798923, "grad_norm": 7.133692264556885, "learning_rate": 7.913123572607571e-06, "loss": 0.14775345, "memory(GiB)": 13.7, "step": 35935, "train_speed(iter/s)": 1.533076 }, { "acc": 0.97905083, "epoch": 16.845558940707758, "grad_norm": 5.115228176116943, "learning_rate": 7.912493549350452e-06, "loss": 0.08947406, "memory(GiB)": 13.7, "step": 35940, "train_speed(iter/s)": 1.533076 }, { "acc": 0.99250984, "epoch": 16.847902507616592, "grad_norm": 2.9524712562561035, "learning_rate": 7.911863456098573e-06, "loss": 0.03578143, "memory(GiB)": 13.7, "step": 35945, "train_speed(iter/s)": 1.53309 }, { "acc": 0.98312502, "epoch": 16.850246074525426, "grad_norm": 3.717971086502075, "learning_rate": 7.91123329286708e-06, "loss": 0.06117755, "memory(GiB)": 13.7, "step": 35950, "train_speed(iter/s)": 1.5331 }, { "acc": 0.98185101, "epoch": 16.852589641434264, "grad_norm": 3.8024256229400635, "learning_rate": 7.910603059671123e-06, "loss": 0.0968851, "memory(GiB)": 13.7, "step": 35955, "train_speed(iter/s)": 1.533114 }, { "acc": 0.97564983, "epoch": 16.8549332083431, "grad_norm": 3.5577504634857178, "learning_rate": 7.909972756525846e-06, "loss": 0.14046025, "memory(GiB)": 13.7, "step": 35960, "train_speed(iter/s)": 1.533114 }, { "acc": 0.96277771, "epoch": 16.857276775251933, "grad_norm": 8.828560829162598, "learning_rate": 7.909342383446405e-06, "loss": 0.18157393, "memory(GiB)": 13.7, "step": 35965, "train_speed(iter/s)": 1.533115 }, { "acc": 0.9856945, "epoch": 16.859620342160767, "grad_norm": 2.663782835006714, "learning_rate": 7.90871194044795e-06, "loss": 0.06054291, "memory(GiB)": 13.7, "step": 35970, "train_speed(iter/s)": 1.533115 }, { "acc": 0.97744789, "epoch": 16.861963909069605, "grad_norm": 3.097435235977173, "learning_rate": 7.908081427545633e-06, "loss": 0.0862952, "memory(GiB)": 13.7, "step": 35975, "train_speed(iter/s)": 1.533116 }, { "acc": 0.98049679, "epoch": 16.86430747597844, "grad_norm": 3.5450599193573, "learning_rate": 7.90745084475461e-06, "loss": 0.08360231, "memory(GiB)": 13.7, "step": 35980, "train_speed(iter/s)": 1.533116 }, { "acc": 0.98482466, "epoch": 16.866651042887273, "grad_norm": 4.40536642074585, "learning_rate": 7.906820192090039e-06, "loss": 0.08365293, "memory(GiB)": 13.7, "step": 35985, "train_speed(iter/s)": 1.533117 }, { "acc": 0.99020834, "epoch": 16.86899460979611, "grad_norm": 0.9686586856842041, "learning_rate": 7.906189469567079e-06, "loss": 0.0565886, "memory(GiB)": 13.7, "step": 35990, "train_speed(iter/s)": 1.533122 }, { "acc": 0.98540916, "epoch": 16.871338176704946, "grad_norm": 2.5475451946258545, "learning_rate": 7.90555867720089e-06, "loss": 0.08730716, "memory(GiB)": 13.7, "step": 35995, "train_speed(iter/s)": 1.533135 }, { "acc": 0.98184528, "epoch": 16.87368174361378, "grad_norm": 3.7842094898223877, "learning_rate": 7.904927815006632e-06, "loss": 0.07878768, "memory(GiB)": 13.7, "step": 36000, "train_speed(iter/s)": 1.533132 }, { "acc": 0.9904315, "epoch": 16.876025310522614, "grad_norm": 0.0074408939108252525, "learning_rate": 7.904296882999473e-06, "loss": 0.05652111, "memory(GiB)": 13.7, "step": 36005, "train_speed(iter/s)": 1.533129 }, { "acc": 0.9875, "epoch": 16.878368877431452, "grad_norm": 1.6558523178100586, "learning_rate": 7.903665881194578e-06, "loss": 0.03191003, "memory(GiB)": 13.7, "step": 36010, "train_speed(iter/s)": 1.533133 }, { "acc": 0.96559038, "epoch": 16.880712444340286, "grad_norm": 6.0886030197143555, "learning_rate": 7.903034809607112e-06, "loss": 0.15208009, "memory(GiB)": 13.7, "step": 36015, "train_speed(iter/s)": 1.533137 }, { "acc": 0.98418655, "epoch": 16.88305601124912, "grad_norm": 5.741796016693115, "learning_rate": 7.902403668252243e-06, "loss": 0.05785445, "memory(GiB)": 13.7, "step": 36020, "train_speed(iter/s)": 1.53314 }, { "acc": 0.97349663, "epoch": 16.885399578157955, "grad_norm": 6.077870845794678, "learning_rate": 7.901772457145145e-06, "loss": 0.14333905, "memory(GiB)": 13.7, "step": 36025, "train_speed(iter/s)": 1.533137 }, { "acc": 0.97738094, "epoch": 16.887743145066793, "grad_norm": 4.576150417327881, "learning_rate": 7.901141176300989e-06, "loss": 0.10221322, "memory(GiB)": 13.7, "step": 36030, "train_speed(iter/s)": 1.53314 }, { "acc": 0.9791666, "epoch": 16.890086711975627, "grad_norm": 21.791942596435547, "learning_rate": 7.900509825734947e-06, "loss": 0.10086778, "memory(GiB)": 13.7, "step": 36035, "train_speed(iter/s)": 1.533143 }, { "acc": 0.99129467, "epoch": 16.89243027888446, "grad_norm": 0.0030408096499741077, "learning_rate": 7.899878405462197e-06, "loss": 0.04616286, "memory(GiB)": 13.7, "step": 36040, "train_speed(iter/s)": 1.533138 }, { "acc": 0.97749996, "epoch": 16.894773845793296, "grad_norm": 7.35791015625, "learning_rate": 7.899246915497915e-06, "loss": 0.11802245, "memory(GiB)": 13.7, "step": 36045, "train_speed(iter/s)": 1.533147 }, { "acc": 0.97352734, "epoch": 16.897117412702134, "grad_norm": 0.3922514319419861, "learning_rate": 7.898615355857281e-06, "loss": 0.08693894, "memory(GiB)": 13.7, "step": 36050, "train_speed(iter/s)": 1.533152 }, { "acc": 0.98416672, "epoch": 16.899460979610968, "grad_norm": 3.836782455444336, "learning_rate": 7.897983726555475e-06, "loss": 0.06464253, "memory(GiB)": 13.7, "step": 36055, "train_speed(iter/s)": 1.533155 }, { "acc": 0.97848215, "epoch": 16.901804546519802, "grad_norm": 3.8518128395080566, "learning_rate": 7.897352027607677e-06, "loss": 0.14556942, "memory(GiB)": 13.7, "step": 36060, "train_speed(iter/s)": 1.533151 }, { "acc": 0.97881947, "epoch": 16.90414811342864, "grad_norm": 4.22980260848999, "learning_rate": 7.896720259029075e-06, "loss": 0.05891604, "memory(GiB)": 13.7, "step": 36065, "train_speed(iter/s)": 1.533151 }, { "acc": 0.98071766, "epoch": 16.906491680337474, "grad_norm": 37.66700744628906, "learning_rate": 7.896088420834855e-06, "loss": 0.08516099, "memory(GiB)": 13.7, "step": 36070, "train_speed(iter/s)": 1.533159 }, { "acc": 0.990625, "epoch": 16.90883524724631, "grad_norm": 5.218933582305908, "learning_rate": 7.895456513040198e-06, "loss": 0.06609436, "memory(GiB)": 13.7, "step": 36075, "train_speed(iter/s)": 1.533169 }, { "acc": 0.98041668, "epoch": 16.911178814155143, "grad_norm": 4.345597267150879, "learning_rate": 7.894824535660299e-06, "loss": 0.11052419, "memory(GiB)": 13.7, "step": 36080, "train_speed(iter/s)": 1.533161 }, { "acc": 0.97562504, "epoch": 16.91352238106398, "grad_norm": 3.3969569206237793, "learning_rate": 7.894192488710346e-06, "loss": 0.14961203, "memory(GiB)": 13.7, "step": 36085, "train_speed(iter/s)": 1.533165 }, { "acc": 0.97902775, "epoch": 16.915865947972815, "grad_norm": 5.930027008056641, "learning_rate": 7.893560372205532e-06, "loss": 0.07121695, "memory(GiB)": 13.7, "step": 36090, "train_speed(iter/s)": 1.533175 }, { "acc": 0.99266939, "epoch": 16.91820951488165, "grad_norm": 5.9827775955200195, "learning_rate": 7.892928186161052e-06, "loss": 0.04315202, "memory(GiB)": 13.7, "step": 36095, "train_speed(iter/s)": 1.533184 }, { "acc": 0.986269, "epoch": 16.920553081790484, "grad_norm": 0.32057005167007446, "learning_rate": 7.8922959305921e-06, "loss": 0.07815324, "memory(GiB)": 13.7, "step": 36100, "train_speed(iter/s)": 1.533183 }, { "acc": 0.96844549, "epoch": 16.92289664869932, "grad_norm": 2.3237597942352295, "learning_rate": 7.891663605513876e-06, "loss": 0.10082639, "memory(GiB)": 13.7, "step": 36105, "train_speed(iter/s)": 1.533201 }, { "acc": 0.9800808, "epoch": 16.925240215608156, "grad_norm": 0.04061134159564972, "learning_rate": 7.891031210941574e-06, "loss": 0.06700748, "memory(GiB)": 13.7, "step": 36110, "train_speed(iter/s)": 1.533216 }, { "acc": 0.98666668, "epoch": 16.92758378251699, "grad_norm": 2.814460515975952, "learning_rate": 7.890398746890401e-06, "loss": 0.06033329, "memory(GiB)": 13.7, "step": 36115, "train_speed(iter/s)": 1.533224 }, { "acc": 0.9822917, "epoch": 16.929927349425824, "grad_norm": 4.456396579742432, "learning_rate": 7.889766213375554e-06, "loss": 0.08145905, "memory(GiB)": 13.7, "step": 36120, "train_speed(iter/s)": 1.533228 }, { "acc": 0.98498545, "epoch": 16.932270916334662, "grad_norm": 2.206465005874634, "learning_rate": 7.88913361041224e-06, "loss": 0.06812952, "memory(GiB)": 13.7, "step": 36125, "train_speed(iter/s)": 1.533239 }, { "acc": 0.97558413, "epoch": 16.934614483243497, "grad_norm": 0.9010413289070129, "learning_rate": 7.888500938015664e-06, "loss": 0.09990651, "memory(GiB)": 13.7, "step": 36130, "train_speed(iter/s)": 1.533247 }, { "acc": 0.97997017, "epoch": 16.93695805015233, "grad_norm": 4.23579740524292, "learning_rate": 7.887868196201034e-06, "loss": 0.14425721, "memory(GiB)": 13.7, "step": 36135, "train_speed(iter/s)": 1.533256 }, { "acc": 0.98365536, "epoch": 16.93930161706117, "grad_norm": 0.3726617395877838, "learning_rate": 7.887235384983554e-06, "loss": 0.03622161, "memory(GiB)": 13.7, "step": 36140, "train_speed(iter/s)": 1.533255 }, { "acc": 0.98203373, "epoch": 16.941645183970003, "grad_norm": 9.800973892211914, "learning_rate": 7.886602504378444e-06, "loss": 0.08120265, "memory(GiB)": 13.7, "step": 36145, "train_speed(iter/s)": 1.533264 }, { "acc": 0.96828127, "epoch": 16.943988750878837, "grad_norm": 4.8265061378479, "learning_rate": 7.885969554400912e-06, "loss": 0.12636697, "memory(GiB)": 13.7, "step": 36150, "train_speed(iter/s)": 1.533265 }, { "acc": 0.98530741, "epoch": 16.94633231778767, "grad_norm": 1.911143183708191, "learning_rate": 7.885336535066167e-06, "loss": 0.10204377, "memory(GiB)": 13.7, "step": 36155, "train_speed(iter/s)": 1.53326 }, { "acc": 0.97433043, "epoch": 16.94867588469651, "grad_norm": 2.8966825008392334, "learning_rate": 7.884703446389432e-06, "loss": 0.12375576, "memory(GiB)": 13.7, "step": 36160, "train_speed(iter/s)": 1.533266 }, { "acc": 0.97791672, "epoch": 16.951019451605344, "grad_norm": 2.4714486598968506, "learning_rate": 7.88407028838592e-06, "loss": 0.08395389, "memory(GiB)": 13.7, "step": 36165, "train_speed(iter/s)": 1.533269 }, { "acc": 0.98971977, "epoch": 16.953363018514178, "grad_norm": 1.817192554473877, "learning_rate": 7.883437061070853e-06, "loss": 0.04903303, "memory(GiB)": 13.7, "step": 36170, "train_speed(iter/s)": 1.533256 }, { "acc": 0.98159723, "epoch": 16.955706585423012, "grad_norm": 8.265588760375977, "learning_rate": 7.882803764459449e-06, "loss": 0.11641978, "memory(GiB)": 13.7, "step": 36175, "train_speed(iter/s)": 1.533267 }, { "acc": 0.98520222, "epoch": 16.95805015233185, "grad_norm": 4.279609203338623, "learning_rate": 7.882170398566932e-06, "loss": 0.05818248, "memory(GiB)": 13.7, "step": 36180, "train_speed(iter/s)": 1.533272 }, { "acc": 0.98534727, "epoch": 16.960393719240685, "grad_norm": 1.8899781703948975, "learning_rate": 7.881536963408526e-06, "loss": 0.07295398, "memory(GiB)": 13.7, "step": 36185, "train_speed(iter/s)": 1.533284 }, { "acc": 0.97292166, "epoch": 16.96273728614952, "grad_norm": 4.466211318969727, "learning_rate": 7.880903458999456e-06, "loss": 0.12783523, "memory(GiB)": 13.7, "step": 36190, "train_speed(iter/s)": 1.533298 }, { "acc": 0.98164263, "epoch": 16.965080853058353, "grad_norm": 0.02893855981528759, "learning_rate": 7.88026988535495e-06, "loss": 0.09201987, "memory(GiB)": 13.7, "step": 36195, "train_speed(iter/s)": 1.533302 }, { "acc": 0.99027786, "epoch": 16.96742441996719, "grad_norm": 7.033449649810791, "learning_rate": 7.879636242490235e-06, "loss": 0.04661134, "memory(GiB)": 13.7, "step": 36200, "train_speed(iter/s)": 1.533308 }, { "acc": 0.94458332, "epoch": 16.969767986876025, "grad_norm": 4.53453254699707, "learning_rate": 7.879002530420545e-06, "loss": 0.22326279, "memory(GiB)": 13.7, "step": 36205, "train_speed(iter/s)": 1.533304 }, { "acc": 0.9822916, "epoch": 16.97211155378486, "grad_norm": 6.713918209075928, "learning_rate": 7.878368749161112e-06, "loss": 0.0500286, "memory(GiB)": 13.7, "step": 36210, "train_speed(iter/s)": 1.5333 }, { "acc": 0.96897812, "epoch": 16.974455120693698, "grad_norm": 7.9330620765686035, "learning_rate": 7.877734898727167e-06, "loss": 0.16537824, "memory(GiB)": 13.7, "step": 36215, "train_speed(iter/s)": 1.533305 }, { "acc": 0.9942709, "epoch": 16.976798687602532, "grad_norm": 11.888998985290527, "learning_rate": 7.877100979133949e-06, "loss": 0.0196278, "memory(GiB)": 13.7, "step": 36220, "train_speed(iter/s)": 1.533318 }, { "acc": 0.98392859, "epoch": 16.979142254511366, "grad_norm": 1.3867685794830322, "learning_rate": 7.876466990396692e-06, "loss": 0.13958914, "memory(GiB)": 13.7, "step": 36225, "train_speed(iter/s)": 1.533327 }, { "acc": 0.98078327, "epoch": 16.9814858214202, "grad_norm": 8.625897407531738, "learning_rate": 7.875832932530638e-06, "loss": 0.10686779, "memory(GiB)": 13.7, "step": 36230, "train_speed(iter/s)": 1.533335 }, { "acc": 0.99150562, "epoch": 16.98382938832904, "grad_norm": 4.99009370803833, "learning_rate": 7.875198805551025e-06, "loss": 0.04931847, "memory(GiB)": 13.7, "step": 36235, "train_speed(iter/s)": 1.533333 }, { "acc": 0.97035933, "epoch": 16.986172955237873, "grad_norm": 5.9666900634765625, "learning_rate": 7.874564609473098e-06, "loss": 0.11148092, "memory(GiB)": 13.7, "step": 36240, "train_speed(iter/s)": 1.533341 }, { "acc": 0.98708344, "epoch": 16.988516522146707, "grad_norm": 1.289044737815857, "learning_rate": 7.873930344312101e-06, "loss": 0.06152084, "memory(GiB)": 13.7, "step": 36245, "train_speed(iter/s)": 1.533349 }, { "acc": 0.98574409, "epoch": 16.99086008905554, "grad_norm": 1.2899456024169922, "learning_rate": 7.873296010083277e-06, "loss": 0.0654797, "memory(GiB)": 13.7, "step": 36250, "train_speed(iter/s)": 1.533343 }, { "acc": 0.98187504, "epoch": 16.99320365596438, "grad_norm": 3.325972080230713, "learning_rate": 7.872661606801874e-06, "loss": 0.07536955, "memory(GiB)": 13.7, "step": 36255, "train_speed(iter/s)": 1.533346 }, { "acc": 0.99049683, "epoch": 16.995547222873213, "grad_norm": 1.0463041067123413, "learning_rate": 7.872027134483144e-06, "loss": 0.04413128, "memory(GiB)": 13.7, "step": 36260, "train_speed(iter/s)": 1.533354 }, { "acc": 0.97770834, "epoch": 16.997890789782048, "grad_norm": 6.18942928314209, "learning_rate": 7.871392593142335e-06, "loss": 0.16129121, "memory(GiB)": 13.7, "step": 36265, "train_speed(iter/s)": 1.533355 }, { "acc": 0.97291126, "epoch": 17.000234356690882, "grad_norm": 0.8540231585502625, "learning_rate": 7.870757982794697e-06, "loss": 0.11203153, "memory(GiB)": 13.7, "step": 36270, "train_speed(iter/s)": 1.533323 }, { "acc": 0.96974211, "epoch": 17.00257792359972, "grad_norm": 2.3303332328796387, "learning_rate": 7.870123303455488e-06, "loss": 0.17973212, "memory(GiB)": 13.7, "step": 36275, "train_speed(iter/s)": 1.533331 }, { "acc": 0.98029766, "epoch": 17.004921490508554, "grad_norm": 2.25529408454895, "learning_rate": 7.869488555139964e-06, "loss": 0.10176187, "memory(GiB)": 13.7, "step": 36280, "train_speed(iter/s)": 1.533324 }, { "acc": 0.97495537, "epoch": 17.00726505741739, "grad_norm": 6.908610820770264, "learning_rate": 7.868853737863379e-06, "loss": 0.11511608, "memory(GiB)": 13.7, "step": 36285, "train_speed(iter/s)": 1.53334 }, { "acc": 0.9927084, "epoch": 17.009608624326223, "grad_norm": 0.05517285317182541, "learning_rate": 7.868218851640994e-06, "loss": 0.02895798, "memory(GiB)": 13.7, "step": 36290, "train_speed(iter/s)": 1.533342 }, { "acc": 0.9661541, "epoch": 17.01195219123506, "grad_norm": 8.365413665771484, "learning_rate": 7.867583896488068e-06, "loss": 0.10145452, "memory(GiB)": 13.7, "step": 36295, "train_speed(iter/s)": 1.533347 }, { "acc": 0.98399448, "epoch": 17.014295758143895, "grad_norm": 4.053845405578613, "learning_rate": 7.866948872419865e-06, "loss": 0.10187023, "memory(GiB)": 13.7, "step": 36300, "train_speed(iter/s)": 1.533355 }, { "acc": 0.98708344, "epoch": 17.01663932505273, "grad_norm": 5.700409412384033, "learning_rate": 7.86631377945165e-06, "loss": 0.05719513, "memory(GiB)": 13.7, "step": 36305, "train_speed(iter/s)": 1.533363 }, { "acc": 0.99502974, "epoch": 17.018982891961567, "grad_norm": 0.4537149965763092, "learning_rate": 7.865678617598683e-06, "loss": 0.02490089, "memory(GiB)": 13.7, "step": 36310, "train_speed(iter/s)": 1.533378 }, { "acc": 0.98604164, "epoch": 17.0213264588704, "grad_norm": 1.0095772743225098, "learning_rate": 7.865043386876237e-06, "loss": 0.04437749, "memory(GiB)": 13.7, "step": 36315, "train_speed(iter/s)": 1.533387 }, { "acc": 0.98222218, "epoch": 17.023670025779236, "grad_norm": 4.261697292327881, "learning_rate": 7.864408087299578e-06, "loss": 0.06383472, "memory(GiB)": 13.7, "step": 36320, "train_speed(iter/s)": 1.533391 }, { "acc": 0.97687502, "epoch": 17.02601359268807, "grad_norm": 3.1666526794433594, "learning_rate": 7.863772718883977e-06, "loss": 0.08182243, "memory(GiB)": 13.7, "step": 36325, "train_speed(iter/s)": 1.533391 }, { "acc": 0.97184801, "epoch": 17.028357159596908, "grad_norm": 5.694510459899902, "learning_rate": 7.863137281644708e-06, "loss": 0.15111942, "memory(GiB)": 13.7, "step": 36330, "train_speed(iter/s)": 1.533401 }, { "acc": 0.97835226, "epoch": 17.030700726505742, "grad_norm": 8.936201095581055, "learning_rate": 7.862501775597042e-06, "loss": 0.09234042, "memory(GiB)": 13.7, "step": 36335, "train_speed(iter/s)": 1.533414 }, { "acc": 0.96898575, "epoch": 17.033044293414576, "grad_norm": 4.665838241577148, "learning_rate": 7.861866200756257e-06, "loss": 0.15238432, "memory(GiB)": 13.7, "step": 36340, "train_speed(iter/s)": 1.533411 }, { "acc": 0.97027779, "epoch": 17.03538786032341, "grad_norm": 4.396013259887695, "learning_rate": 7.861230557137626e-06, "loss": 0.11902974, "memory(GiB)": 13.7, "step": 36345, "train_speed(iter/s)": 1.533426 }, { "acc": 0.97697296, "epoch": 17.03773142723225, "grad_norm": 2.7158596515655518, "learning_rate": 7.860594844756434e-06, "loss": 0.10102797, "memory(GiB)": 13.7, "step": 36350, "train_speed(iter/s)": 1.533428 }, { "acc": 0.97153282, "epoch": 17.040074994141083, "grad_norm": 4.417887210845947, "learning_rate": 7.859959063627955e-06, "loss": 0.16443679, "memory(GiB)": 13.7, "step": 36355, "train_speed(iter/s)": 1.533439 }, { "acc": 0.9822916, "epoch": 17.042418561049917, "grad_norm": 6.962798118591309, "learning_rate": 7.859323213767478e-06, "loss": 0.12589509, "memory(GiB)": 13.7, "step": 36360, "train_speed(iter/s)": 1.533443 }, { "acc": 0.96883116, "epoch": 17.04476212795875, "grad_norm": 11.68781566619873, "learning_rate": 7.85868729519028e-06, "loss": 0.09779803, "memory(GiB)": 13.7, "step": 36365, "train_speed(iter/s)": 1.533443 }, { "acc": 0.97474861, "epoch": 17.04710569486759, "grad_norm": 0.969927966594696, "learning_rate": 7.858051307911648e-06, "loss": 0.05160404, "memory(GiB)": 13.7, "step": 36370, "train_speed(iter/s)": 1.533453 }, { "acc": 0.97024059, "epoch": 17.049449261776424, "grad_norm": 3.4573349952697754, "learning_rate": 7.857415251946873e-06, "loss": 0.25096049, "memory(GiB)": 13.7, "step": 36375, "train_speed(iter/s)": 1.533471 }, { "acc": 0.98098221, "epoch": 17.051792828685258, "grad_norm": 1.9375791549682617, "learning_rate": 7.856779127311239e-06, "loss": 0.05336004, "memory(GiB)": 13.7, "step": 36380, "train_speed(iter/s)": 1.533478 }, { "acc": 0.96065979, "epoch": 17.054136395594096, "grad_norm": 10.855822563171387, "learning_rate": 7.856142934020042e-06, "loss": 0.23119593, "memory(GiB)": 13.7, "step": 36385, "train_speed(iter/s)": 1.533488 }, { "acc": 0.96970234, "epoch": 17.05647996250293, "grad_norm": 8.338834762573242, "learning_rate": 7.855506672088566e-06, "loss": 0.10712298, "memory(GiB)": 13.7, "step": 36390, "train_speed(iter/s)": 1.533496 }, { "acc": 0.99345646, "epoch": 17.058823529411764, "grad_norm": 2.037623405456543, "learning_rate": 7.854870341532112e-06, "loss": 0.05928203, "memory(GiB)": 13.7, "step": 36395, "train_speed(iter/s)": 1.533503 }, { "acc": 0.97479172, "epoch": 17.0611670963206, "grad_norm": 6.090406894683838, "learning_rate": 7.85423394236597e-06, "loss": 0.10052772, "memory(GiB)": 13.7, "step": 36400, "train_speed(iter/s)": 1.53351 }, { "acc": 0.9958333, "epoch": 17.063510663229437, "grad_norm": 1.7286845445632935, "learning_rate": 7.853597474605441e-06, "loss": 0.03710491, "memory(GiB)": 13.7, "step": 36405, "train_speed(iter/s)": 1.533513 }, { "acc": 0.98445568, "epoch": 17.06585423013827, "grad_norm": 4.555636405944824, "learning_rate": 7.85296093826582e-06, "loss": 0.07954409, "memory(GiB)": 13.7, "step": 36410, "train_speed(iter/s)": 1.533511 }, { "acc": 0.9864583, "epoch": 17.068197797047105, "grad_norm": 1.6225249767303467, "learning_rate": 7.852324333362412e-06, "loss": 0.0773887, "memory(GiB)": 13.7, "step": 36415, "train_speed(iter/s)": 1.533513 }, { "acc": 0.98062496, "epoch": 17.07054136395594, "grad_norm": 4.212890625, "learning_rate": 7.851687659910515e-06, "loss": 0.09342333, "memory(GiB)": 13.7, "step": 36420, "train_speed(iter/s)": 1.533521 }, { "acc": 0.98529758, "epoch": 17.072884930864777, "grad_norm": 3.0763938426971436, "learning_rate": 7.851050917925434e-06, "loss": 0.07090946, "memory(GiB)": 13.7, "step": 36425, "train_speed(iter/s)": 1.533517 }, { "acc": 0.98321428, "epoch": 17.07522849777361, "grad_norm": 5.200169086456299, "learning_rate": 7.850414107422472e-06, "loss": 0.07193787, "memory(GiB)": 13.7, "step": 36430, "train_speed(iter/s)": 1.533524 }, { "acc": 0.98170137, "epoch": 17.077572064682446, "grad_norm": 7.3150177001953125, "learning_rate": 7.849777228416938e-06, "loss": 0.07640095, "memory(GiB)": 13.7, "step": 36435, "train_speed(iter/s)": 1.533533 }, { "acc": 0.9746727, "epoch": 17.07991563159128, "grad_norm": 5.118531227111816, "learning_rate": 7.84914028092414e-06, "loss": 0.09533256, "memory(GiB)": 13.7, "step": 36440, "train_speed(iter/s)": 1.533536 }, { "acc": 0.98621254, "epoch": 17.082259198500118, "grad_norm": 10.518465995788574, "learning_rate": 7.848503264959388e-06, "loss": 0.06654158, "memory(GiB)": 13.7, "step": 36445, "train_speed(iter/s)": 1.533545 }, { "acc": 0.97479172, "epoch": 17.084602765408953, "grad_norm": 4.431353569030762, "learning_rate": 7.847866180537995e-06, "loss": 0.10578951, "memory(GiB)": 13.7, "step": 36450, "train_speed(iter/s)": 1.533553 }, { "acc": 0.97263889, "epoch": 17.086946332317787, "grad_norm": 3.0191493034362793, "learning_rate": 7.847229027675275e-06, "loss": 0.11343509, "memory(GiB)": 13.7, "step": 36455, "train_speed(iter/s)": 1.533558 }, { "acc": 0.98343754, "epoch": 17.089289899226625, "grad_norm": 2.404935121536255, "learning_rate": 7.846591806386537e-06, "loss": 0.06765382, "memory(GiB)": 13.7, "step": 36460, "train_speed(iter/s)": 1.533566 }, { "acc": 0.98237181, "epoch": 17.09163346613546, "grad_norm": 1.4772872924804688, "learning_rate": 7.845954516687106e-06, "loss": 0.06819587, "memory(GiB)": 13.7, "step": 36465, "train_speed(iter/s)": 1.533563 }, { "acc": 0.99621716, "epoch": 17.093977033044293, "grad_norm": 0.3842954635620117, "learning_rate": 7.845317158592294e-06, "loss": 0.04123809, "memory(GiB)": 13.7, "step": 36470, "train_speed(iter/s)": 1.533565 }, { "acc": 0.9786459, "epoch": 17.096320599953128, "grad_norm": 3.4904282093048096, "learning_rate": 7.844679732117424e-06, "loss": 0.06794125, "memory(GiB)": 13.7, "step": 36475, "train_speed(iter/s)": 1.53357 }, { "acc": 0.96383934, "epoch": 17.098664166861965, "grad_norm": 0.33721715211868286, "learning_rate": 7.844042237277818e-06, "loss": 0.12366333, "memory(GiB)": 13.7, "step": 36480, "train_speed(iter/s)": 1.533573 }, { "acc": 0.97520828, "epoch": 17.1010077337708, "grad_norm": 4.774847030639648, "learning_rate": 7.843404674088797e-06, "loss": 0.09628257, "memory(GiB)": 13.7, "step": 36485, "train_speed(iter/s)": 1.533577 }, { "acc": 0.98630209, "epoch": 17.103351300679634, "grad_norm": 5.4301605224609375, "learning_rate": 7.842767042565688e-06, "loss": 0.05681844, "memory(GiB)": 13.7, "step": 36490, "train_speed(iter/s)": 1.533575 }, { "acc": 0.98470955, "epoch": 17.10569486758847, "grad_norm": 2.040748357772827, "learning_rate": 7.842129342723818e-06, "loss": 0.05968511, "memory(GiB)": 13.7, "step": 36495, "train_speed(iter/s)": 1.533592 }, { "acc": 0.99359379, "epoch": 17.108038434497306, "grad_norm": 1.8131500482559204, "learning_rate": 7.841491574578513e-06, "loss": 0.06664367, "memory(GiB)": 13.7, "step": 36500, "train_speed(iter/s)": 1.533605 }, { "acc": 0.9802083, "epoch": 17.11038200140614, "grad_norm": 4.677987098693848, "learning_rate": 7.840853738145103e-06, "loss": 0.07254127, "memory(GiB)": 13.7, "step": 36505, "train_speed(iter/s)": 1.533611 }, { "acc": 0.97322922, "epoch": 17.112725568314975, "grad_norm": 2.3598415851593018, "learning_rate": 7.840215833438922e-06, "loss": 0.06269262, "memory(GiB)": 13.7, "step": 36510, "train_speed(iter/s)": 1.533615 }, { "acc": 0.96196423, "epoch": 17.11506913522381, "grad_norm": 0.10661441087722778, "learning_rate": 7.8395778604753e-06, "loss": 0.08449332, "memory(GiB)": 13.7, "step": 36515, "train_speed(iter/s)": 1.533611 }, { "acc": 0.98895836, "epoch": 17.117412702132647, "grad_norm": 0.003197911661118269, "learning_rate": 7.838939819269576e-06, "loss": 0.03824397, "memory(GiB)": 13.7, "step": 36520, "train_speed(iter/s)": 1.533622 }, { "acc": 0.97791672, "epoch": 17.11975626904148, "grad_norm": 2.559556484222412, "learning_rate": 7.838301709837081e-06, "loss": 0.06448027, "memory(GiB)": 13.7, "step": 36525, "train_speed(iter/s)": 1.533637 }, { "acc": 0.98663692, "epoch": 17.122099835950316, "grad_norm": 2.103079080581665, "learning_rate": 7.837663532193157e-06, "loss": 0.09457604, "memory(GiB)": 13.7, "step": 36530, "train_speed(iter/s)": 1.533638 }, { "acc": 0.99319782, "epoch": 17.12444340285915, "grad_norm": 3.25818133354187, "learning_rate": 7.837025286353142e-06, "loss": 0.0422098, "memory(GiB)": 13.7, "step": 36535, "train_speed(iter/s)": 1.533638 }, { "acc": 0.98091354, "epoch": 17.126786969767988, "grad_norm": 5.060121536254883, "learning_rate": 7.836386972332376e-06, "loss": 0.06924216, "memory(GiB)": 13.7, "step": 36540, "train_speed(iter/s)": 1.53364 }, { "acc": 0.98874998, "epoch": 17.129130536676822, "grad_norm": 1.397549033164978, "learning_rate": 7.835748590146208e-06, "loss": 0.0700236, "memory(GiB)": 13.7, "step": 36545, "train_speed(iter/s)": 1.533647 }, { "acc": 0.98037777, "epoch": 17.131474103585656, "grad_norm": 9.25574779510498, "learning_rate": 7.835110139809975e-06, "loss": 0.07658542, "memory(GiB)": 13.7, "step": 36550, "train_speed(iter/s)": 1.533654 }, { "acc": 0.98813372, "epoch": 17.133817670494494, "grad_norm": 2.290071487426758, "learning_rate": 7.834471621339028e-06, "loss": 0.06844252, "memory(GiB)": 13.7, "step": 36555, "train_speed(iter/s)": 1.533664 }, { "acc": 0.97798767, "epoch": 17.13616123740333, "grad_norm": 8.037941932678223, "learning_rate": 7.833833034748713e-06, "loss": 0.12147045, "memory(GiB)": 13.7, "step": 36560, "train_speed(iter/s)": 1.533667 }, { "acc": 0.96450758, "epoch": 17.138504804312163, "grad_norm": 3.5433409214019775, "learning_rate": 7.833194380054381e-06, "loss": 0.14736418, "memory(GiB)": 13.7, "step": 36565, "train_speed(iter/s)": 1.533667 }, { "acc": 0.96277781, "epoch": 17.140848371220997, "grad_norm": 3.5489659309387207, "learning_rate": 7.832555657271382e-06, "loss": 0.1456607, "memory(GiB)": 13.7, "step": 36570, "train_speed(iter/s)": 1.533671 }, { "acc": 0.97008934, "epoch": 17.143191938129835, "grad_norm": 7.7176947593688965, "learning_rate": 7.83191686641507e-06, "loss": 0.1351564, "memory(GiB)": 13.7, "step": 36575, "train_speed(iter/s)": 1.533672 }, { "acc": 0.98976192, "epoch": 17.14553550503867, "grad_norm": 0.05569196864962578, "learning_rate": 7.831278007500796e-06, "loss": 0.02369495, "memory(GiB)": 13.7, "step": 36580, "train_speed(iter/s)": 1.533677 }, { "acc": 0.990625, "epoch": 17.147879071947504, "grad_norm": 3.6302497386932373, "learning_rate": 7.83063908054392e-06, "loss": 0.06107222, "memory(GiB)": 13.7, "step": 36585, "train_speed(iter/s)": 1.533677 }, { "acc": 0.98197174, "epoch": 17.150222638856338, "grad_norm": 8.071208000183105, "learning_rate": 7.830000085559799e-06, "loss": 0.06045947, "memory(GiB)": 13.7, "step": 36590, "train_speed(iter/s)": 1.533682 }, { "acc": 0.98879547, "epoch": 17.152566205765176, "grad_norm": 2.3422181606292725, "learning_rate": 7.82936102256379e-06, "loss": 0.03469208, "memory(GiB)": 13.7, "step": 36595, "train_speed(iter/s)": 1.533702 }, { "acc": 0.96919641, "epoch": 17.15490977267401, "grad_norm": 6.195121765136719, "learning_rate": 7.828721891571255e-06, "loss": 0.15260893, "memory(GiB)": 13.7, "step": 36600, "train_speed(iter/s)": 1.533711 }, { "acc": 0.98133011, "epoch": 17.157253339582844, "grad_norm": 0.23321297764778137, "learning_rate": 7.828082692597558e-06, "loss": 0.08755993, "memory(GiB)": 13.7, "step": 36605, "train_speed(iter/s)": 1.533712 }, { "acc": 0.99177074, "epoch": 17.15959690649168, "grad_norm": 2.1838748455047607, "learning_rate": 7.827443425658063e-06, "loss": 0.01918983, "memory(GiB)": 13.7, "step": 36610, "train_speed(iter/s)": 1.533716 }, { "acc": 0.97430553, "epoch": 17.161940473400517, "grad_norm": 6.8647284507751465, "learning_rate": 7.826804090768137e-06, "loss": 0.08491077, "memory(GiB)": 13.7, "step": 36615, "train_speed(iter/s)": 1.533719 }, { "acc": 0.96071434, "epoch": 17.16428404030935, "grad_norm": 7.21769905090332, "learning_rate": 7.826164687943142e-06, "loss": 0.10903856, "memory(GiB)": 13.7, "step": 36620, "train_speed(iter/s)": 1.53372 }, { "acc": 0.9802084, "epoch": 17.166627607218185, "grad_norm": 5.182687759399414, "learning_rate": 7.825525217198453e-06, "loss": 0.0546307, "memory(GiB)": 13.7, "step": 36625, "train_speed(iter/s)": 1.533729 }, { "acc": 0.98260422, "epoch": 17.168971174127023, "grad_norm": 4.055380821228027, "learning_rate": 7.824885678549438e-06, "loss": 0.07870427, "memory(GiB)": 13.7, "step": 36630, "train_speed(iter/s)": 1.533743 }, { "acc": 0.95342264, "epoch": 17.171314741035857, "grad_norm": 5.598762512207031, "learning_rate": 7.824246072011473e-06, "loss": 0.12496653, "memory(GiB)": 13.7, "step": 36635, "train_speed(iter/s)": 1.533748 }, { "acc": 0.98967266, "epoch": 17.17365830794469, "grad_norm": 8.224461555480957, "learning_rate": 7.823606397599924e-06, "loss": 0.10613234, "memory(GiB)": 13.7, "step": 36640, "train_speed(iter/s)": 1.533743 }, { "acc": 0.98161459, "epoch": 17.176001874853526, "grad_norm": 2.2915163040161133, "learning_rate": 7.822966655330175e-06, "loss": 0.0687135, "memory(GiB)": 13.7, "step": 36645, "train_speed(iter/s)": 1.533749 }, { "acc": 0.98673611, "epoch": 17.178345441762364, "grad_norm": 4.259636878967285, "learning_rate": 7.8223268452176e-06, "loss": 0.06101758, "memory(GiB)": 13.7, "step": 36650, "train_speed(iter/s)": 1.533757 }, { "acc": 0.98916664, "epoch": 17.180689008671198, "grad_norm": 6.166407585144043, "learning_rate": 7.821686967277577e-06, "loss": 0.03422043, "memory(GiB)": 13.7, "step": 36655, "train_speed(iter/s)": 1.533766 }, { "acc": 0.98145294, "epoch": 17.183032575580032, "grad_norm": 3.5566089153289795, "learning_rate": 7.821047021525488e-06, "loss": 0.07577894, "memory(GiB)": 13.7, "step": 36660, "train_speed(iter/s)": 1.533773 }, { "acc": 0.99376898, "epoch": 17.185376142488867, "grad_norm": 21.41510581970215, "learning_rate": 7.820407007976716e-06, "loss": 0.04896655, "memory(GiB)": 13.7, "step": 36665, "train_speed(iter/s)": 1.53378 }, { "acc": 0.95687504, "epoch": 17.187719709397705, "grad_norm": 5.254188537597656, "learning_rate": 7.819766926646642e-06, "loss": 0.15478022, "memory(GiB)": 13.7, "step": 36670, "train_speed(iter/s)": 1.533784 }, { "acc": 0.97894344, "epoch": 17.19006327630654, "grad_norm": 8.147838592529297, "learning_rate": 7.819126777550652e-06, "loss": 0.09108656, "memory(GiB)": 13.7, "step": 36675, "train_speed(iter/s)": 1.533782 }, { "acc": 0.996875, "epoch": 17.192406843215373, "grad_norm": 3.328397750854492, "learning_rate": 7.818486560704139e-06, "loss": 0.03041034, "memory(GiB)": 13.7, "step": 36680, "train_speed(iter/s)": 1.533783 }, { "acc": 0.98993053, "epoch": 17.194750410124207, "grad_norm": 1.9624463319778442, "learning_rate": 7.817846276122484e-06, "loss": 0.05170863, "memory(GiB)": 13.7, "step": 36685, "train_speed(iter/s)": 1.533791 }, { "acc": 0.97687492, "epoch": 17.197093977033045, "grad_norm": 4.3546576499938965, "learning_rate": 7.81720592382108e-06, "loss": 0.08312746, "memory(GiB)": 13.7, "step": 36690, "train_speed(iter/s)": 1.533798 }, { "acc": 0.98463516, "epoch": 17.19943754394188, "grad_norm": 4.432351589202881, "learning_rate": 7.81656550381532e-06, "loss": 0.05623054, "memory(GiB)": 13.7, "step": 36695, "train_speed(iter/s)": 1.533808 }, { "acc": 0.97582226, "epoch": 17.201781110850714, "grad_norm": 10.896602630615234, "learning_rate": 7.815925016120595e-06, "loss": 0.11551578, "memory(GiB)": 13.7, "step": 36700, "train_speed(iter/s)": 1.533813 }, { "acc": 0.9802083, "epoch": 17.204124677759552, "grad_norm": 6.819037914276123, "learning_rate": 7.815284460752302e-06, "loss": 0.10703601, "memory(GiB)": 13.7, "step": 36705, "train_speed(iter/s)": 1.533812 }, { "acc": 0.97666664, "epoch": 17.206468244668386, "grad_norm": 3.138235569000244, "learning_rate": 7.81464383772584e-06, "loss": 0.05789471, "memory(GiB)": 13.7, "step": 36710, "train_speed(iter/s)": 1.533815 }, { "acc": 0.9807292, "epoch": 17.20881181157722, "grad_norm": 5.551456451416016, "learning_rate": 7.814003147056605e-06, "loss": 0.10223496, "memory(GiB)": 13.7, "step": 36715, "train_speed(iter/s)": 1.533822 }, { "acc": 0.98721762, "epoch": 17.211155378486055, "grad_norm": 3.7130963802337646, "learning_rate": 7.813362388759996e-06, "loss": 0.04693194, "memory(GiB)": 13.7, "step": 36720, "train_speed(iter/s)": 1.533823 }, { "acc": 0.98049488, "epoch": 17.213498945394893, "grad_norm": 5.325390338897705, "learning_rate": 7.812721562851418e-06, "loss": 0.10334969, "memory(GiB)": 13.7, "step": 36725, "train_speed(iter/s)": 1.533832 }, { "acc": 0.97049427, "epoch": 17.215842512303727, "grad_norm": 5.230839729309082, "learning_rate": 7.81208066934627e-06, "loss": 0.16107392, "memory(GiB)": 13.7, "step": 36730, "train_speed(iter/s)": 1.533833 }, { "acc": 0.9668601, "epoch": 17.21818607921256, "grad_norm": 1.4690626859664917, "learning_rate": 7.81143970825996e-06, "loss": 0.09112247, "memory(GiB)": 13.7, "step": 36735, "train_speed(iter/s)": 1.533847 }, { "acc": 0.97361107, "epoch": 17.220529646121395, "grad_norm": 4.0960893630981445, "learning_rate": 7.810798679607896e-06, "loss": 0.11164124, "memory(GiB)": 13.7, "step": 36740, "train_speed(iter/s)": 1.533841 }, { "acc": 0.97048769, "epoch": 17.222873213030233, "grad_norm": 9.663206100463867, "learning_rate": 7.810157583405483e-06, "loss": 0.13079534, "memory(GiB)": 13.7, "step": 36745, "train_speed(iter/s)": 1.533854 }, { "acc": 0.97609377, "epoch": 17.225216779939068, "grad_norm": 5.08193826675415, "learning_rate": 7.809516419668133e-06, "loss": 0.10237417, "memory(GiB)": 13.7, "step": 36750, "train_speed(iter/s)": 1.533859 }, { "acc": 0.97889881, "epoch": 17.227560346847902, "grad_norm": 0.627861738204956, "learning_rate": 7.808875188411255e-06, "loss": 0.08761433, "memory(GiB)": 13.7, "step": 36755, "train_speed(iter/s)": 1.533864 }, { "acc": 0.98113098, "epoch": 17.229903913756736, "grad_norm": 0.25985896587371826, "learning_rate": 7.808233889650266e-06, "loss": 0.11929238, "memory(GiB)": 13.7, "step": 36760, "train_speed(iter/s)": 1.53387 }, { "acc": 0.9764286, "epoch": 17.232247480665574, "grad_norm": 4.837906360626221, "learning_rate": 7.807592523400575e-06, "loss": 0.07916595, "memory(GiB)": 13.7, "step": 36765, "train_speed(iter/s)": 1.533872 }, { "acc": 0.97736111, "epoch": 17.23459104757441, "grad_norm": 2.6601016521453857, "learning_rate": 7.806951089677605e-06, "loss": 0.086999, "memory(GiB)": 13.7, "step": 36770, "train_speed(iter/s)": 1.533878 }, { "acc": 0.99450893, "epoch": 17.236934614483243, "grad_norm": 3.19808030128479, "learning_rate": 7.806309588496768e-06, "loss": 0.02682664, "memory(GiB)": 13.7, "step": 36775, "train_speed(iter/s)": 1.533886 }, { "acc": 0.96826639, "epoch": 17.239278181392077, "grad_norm": 8.227811813354492, "learning_rate": 7.80566801987349e-06, "loss": 0.10957505, "memory(GiB)": 13.7, "step": 36780, "train_speed(iter/s)": 1.533899 }, { "acc": 0.98532734, "epoch": 17.241621748300915, "grad_norm": 3.829224109649658, "learning_rate": 7.805026383823185e-06, "loss": 0.06347018, "memory(GiB)": 13.7, "step": 36785, "train_speed(iter/s)": 1.533916 }, { "acc": 0.97642365, "epoch": 17.24396531520975, "grad_norm": 4.326068878173828, "learning_rate": 7.80438468036128e-06, "loss": 0.1083842, "memory(GiB)": 13.7, "step": 36790, "train_speed(iter/s)": 1.53392 }, { "acc": 0.96111107, "epoch": 17.246308882118583, "grad_norm": 7.295716285705566, "learning_rate": 7.803742909503199e-06, "loss": 0.15773166, "memory(GiB)": 13.7, "step": 36795, "train_speed(iter/s)": 1.533924 }, { "acc": 0.97979164, "epoch": 17.24865244902742, "grad_norm": 8.9661283493042, "learning_rate": 7.80310107126437e-06, "loss": 0.070964, "memory(GiB)": 13.7, "step": 36800, "train_speed(iter/s)": 1.533938 }, { "acc": 0.9901042, "epoch": 17.250996015936256, "grad_norm": 1.8174734115600586, "learning_rate": 7.802459165660215e-06, "loss": 0.042412, "memory(GiB)": 13.7, "step": 36805, "train_speed(iter/s)": 1.53394 }, { "acc": 0.97658768, "epoch": 17.25333958284509, "grad_norm": 5.811248779296875, "learning_rate": 7.801817192706169e-06, "loss": 0.1145806, "memory(GiB)": 13.7, "step": 36810, "train_speed(iter/s)": 1.533948 }, { "acc": 0.98250008, "epoch": 17.255683149753924, "grad_norm": 5.0708746910095215, "learning_rate": 7.80117515241766e-06, "loss": 0.0836661, "memory(GiB)": 13.7, "step": 36815, "train_speed(iter/s)": 1.533951 }, { "acc": 0.97719698, "epoch": 17.258026716662762, "grad_norm": 6.4233856201171875, "learning_rate": 7.800533044810123e-06, "loss": 0.09586003, "memory(GiB)": 13.7, "step": 36820, "train_speed(iter/s)": 1.533958 }, { "acc": 0.98705359, "epoch": 17.260370283571596, "grad_norm": 2.9909510612487793, "learning_rate": 7.799890869898989e-06, "loss": 0.04189612, "memory(GiB)": 13.7, "step": 36825, "train_speed(iter/s)": 1.533969 }, { "acc": 0.98681545, "epoch": 17.26271385048043, "grad_norm": 2.8767354488372803, "learning_rate": 7.799248627699694e-06, "loss": 0.03562597, "memory(GiB)": 13.7, "step": 36830, "train_speed(iter/s)": 1.533971 }, { "acc": 0.96011362, "epoch": 17.265057417389265, "grad_norm": 0.544966995716095, "learning_rate": 7.798606318227678e-06, "loss": 0.16576729, "memory(GiB)": 13.7, "step": 36835, "train_speed(iter/s)": 1.533972 }, { "acc": 0.97666664, "epoch": 17.267400984298103, "grad_norm": 4.066108226776123, "learning_rate": 7.797963941498379e-06, "loss": 0.10344324, "memory(GiB)": 13.7, "step": 36840, "train_speed(iter/s)": 1.533973 }, { "acc": 0.98795643, "epoch": 17.269744551206937, "grad_norm": 0.9287334680557251, "learning_rate": 7.797321497527234e-06, "loss": 0.06025714, "memory(GiB)": 13.7, "step": 36845, "train_speed(iter/s)": 1.53398 }, { "acc": 0.97631941, "epoch": 17.27208811811577, "grad_norm": 46.86530685424805, "learning_rate": 7.796678986329692e-06, "loss": 0.09124165, "memory(GiB)": 13.7, "step": 36850, "train_speed(iter/s)": 1.533979 }, { "acc": 0.96049557, "epoch": 17.274431685024606, "grad_norm": 6.067826747894287, "learning_rate": 7.796036407921192e-06, "loss": 0.1845201, "memory(GiB)": 13.7, "step": 36855, "train_speed(iter/s)": 1.533973 }, { "acc": 0.97872028, "epoch": 17.276775251933444, "grad_norm": 0.2425948828458786, "learning_rate": 7.79539376231718e-06, "loss": 0.10118605, "memory(GiB)": 13.7, "step": 36860, "train_speed(iter/s)": 1.533984 }, { "acc": 0.97666664, "epoch": 17.279118818842278, "grad_norm": 5.842190265655518, "learning_rate": 7.794751049533106e-06, "loss": 0.11820567, "memory(GiB)": 13.7, "step": 36865, "train_speed(iter/s)": 1.53399 }, { "acc": 0.98551464, "epoch": 17.281462385751112, "grad_norm": 3.630617618560791, "learning_rate": 7.794108269584418e-06, "loss": 0.02896893, "memory(GiB)": 13.7, "step": 36870, "train_speed(iter/s)": 1.533993 }, { "acc": 0.988447, "epoch": 17.28380595265995, "grad_norm": 4.851346015930176, "learning_rate": 7.793465422486561e-06, "loss": 0.06511693, "memory(GiB)": 13.7, "step": 36875, "train_speed(iter/s)": 1.534012 }, { "acc": 0.97875004, "epoch": 17.286149519568784, "grad_norm": 5.21736478805542, "learning_rate": 7.792822508254993e-06, "loss": 0.0886314, "memory(GiB)": 13.7, "step": 36880, "train_speed(iter/s)": 1.534016 }, { "acc": 0.96699409, "epoch": 17.28849308647762, "grad_norm": 17.49964141845703, "learning_rate": 7.792179526905163e-06, "loss": 0.22258265, "memory(GiB)": 13.7, "step": 36885, "train_speed(iter/s)": 1.53402 }, { "acc": 0.98086605, "epoch": 17.290836653386453, "grad_norm": 4.895810127258301, "learning_rate": 7.79153647845253e-06, "loss": 0.12727959, "memory(GiB)": 13.7, "step": 36890, "train_speed(iter/s)": 1.534018 }, { "acc": 0.9851326, "epoch": 17.29318022029529, "grad_norm": 5.829428195953369, "learning_rate": 7.79089336291255e-06, "loss": 0.08031427, "memory(GiB)": 13.7, "step": 36895, "train_speed(iter/s)": 1.534015 }, { "acc": 0.98217258, "epoch": 17.295523787204125, "grad_norm": 1.9510364532470703, "learning_rate": 7.790250180300682e-06, "loss": 0.05333279, "memory(GiB)": 13.7, "step": 36900, "train_speed(iter/s)": 1.534028 }, { "acc": 0.97901783, "epoch": 17.29786735411296, "grad_norm": 5.040942668914795, "learning_rate": 7.789606930632382e-06, "loss": 0.05947641, "memory(GiB)": 13.7, "step": 36905, "train_speed(iter/s)": 1.534041 }, { "acc": 0.97645836, "epoch": 17.300210921021794, "grad_norm": 5.743114471435547, "learning_rate": 7.788963613923115e-06, "loss": 0.16106497, "memory(GiB)": 13.7, "step": 36910, "train_speed(iter/s)": 1.53405 }, { "acc": 0.98152771, "epoch": 17.30255448793063, "grad_norm": 3.4418911933898926, "learning_rate": 7.788320230188345e-06, "loss": 0.10038862, "memory(GiB)": 13.7, "step": 36915, "train_speed(iter/s)": 1.534061 }, { "acc": 0.98848925, "epoch": 17.304898054839466, "grad_norm": 1.4961172342300415, "learning_rate": 7.787676779443532e-06, "loss": 0.05229988, "memory(GiB)": 13.7, "step": 36920, "train_speed(iter/s)": 1.534067 }, { "acc": 0.97419643, "epoch": 17.3072416217483, "grad_norm": 7.177262783050537, "learning_rate": 7.787033261704148e-06, "loss": 0.08747609, "memory(GiB)": 13.7, "step": 36925, "train_speed(iter/s)": 1.534071 }, { "acc": 0.98601198, "epoch": 17.309585188657135, "grad_norm": 3.6374475955963135, "learning_rate": 7.78638967698566e-06, "loss": 0.09367324, "memory(GiB)": 13.7, "step": 36930, "train_speed(iter/s)": 1.534067 }, { "acc": 0.97090054, "epoch": 17.311928755565972, "grad_norm": 2.917543649673462, "learning_rate": 7.785746025303535e-06, "loss": 0.07661224, "memory(GiB)": 13.7, "step": 36935, "train_speed(iter/s)": 1.534072 }, { "acc": 0.98842258, "epoch": 17.314272322474807, "grad_norm": 3.7023913860321045, "learning_rate": 7.785102306673247e-06, "loss": 0.03324715, "memory(GiB)": 13.7, "step": 36940, "train_speed(iter/s)": 1.53408 }, { "acc": 0.9760417, "epoch": 17.31661588938364, "grad_norm": 0.013913463801145554, "learning_rate": 7.784458521110266e-06, "loss": 0.13801299, "memory(GiB)": 13.7, "step": 36945, "train_speed(iter/s)": 1.534085 }, { "acc": 0.98552084, "epoch": 17.318959456292475, "grad_norm": 2.757448434829712, "learning_rate": 7.78381466863007e-06, "loss": 0.04311509, "memory(GiB)": 13.7, "step": 36950, "train_speed(iter/s)": 1.534091 }, { "acc": 0.98842258, "epoch": 17.321303023201313, "grad_norm": 3.975048542022705, "learning_rate": 7.783170749248131e-06, "loss": 0.05180219, "memory(GiB)": 13.7, "step": 36955, "train_speed(iter/s)": 1.534083 }, { "acc": 0.99181547, "epoch": 17.323646590110148, "grad_norm": 3.196315050125122, "learning_rate": 7.782526762979931e-06, "loss": 0.04161101, "memory(GiB)": 13.7, "step": 36960, "train_speed(iter/s)": 1.534086 }, { "acc": 0.98017111, "epoch": 17.325990157018982, "grad_norm": 48.586055755615234, "learning_rate": 7.781882709840949e-06, "loss": 0.11260347, "memory(GiB)": 13.7, "step": 36965, "train_speed(iter/s)": 1.534098 }, { "acc": 0.96672344, "epoch": 17.32833372392782, "grad_norm": 8.750056266784668, "learning_rate": 7.78123858984666e-06, "loss": 0.10053657, "memory(GiB)": 13.7, "step": 36970, "train_speed(iter/s)": 1.534106 }, { "acc": 0.99305553, "epoch": 17.330677290836654, "grad_norm": 1.2494215965270996, "learning_rate": 7.780594403012552e-06, "loss": 0.02210047, "memory(GiB)": 13.7, "step": 36975, "train_speed(iter/s)": 1.534112 }, { "acc": 0.9577877, "epoch": 17.33302085774549, "grad_norm": 2.4746339321136475, "learning_rate": 7.77995014935411e-06, "loss": 0.10004016, "memory(GiB)": 13.7, "step": 36980, "train_speed(iter/s)": 1.534113 }, { "acc": 0.96276722, "epoch": 17.335364424654323, "grad_norm": 6.124101638793945, "learning_rate": 7.779305828886815e-06, "loss": 0.14028647, "memory(GiB)": 13.7, "step": 36985, "train_speed(iter/s)": 1.534117 }, { "acc": 0.97061329, "epoch": 17.33770799156316, "grad_norm": 1.9075746536254883, "learning_rate": 7.778661441626157e-06, "loss": 0.15057853, "memory(GiB)": 13.7, "step": 36990, "train_speed(iter/s)": 1.534122 }, { "acc": 0.97603245, "epoch": 17.340051558471995, "grad_norm": 2.96899151802063, "learning_rate": 7.778016987587627e-06, "loss": 0.06192309, "memory(GiB)": 13.7, "step": 36995, "train_speed(iter/s)": 1.534125 }, { "acc": 0.9638628, "epoch": 17.34239512538083, "grad_norm": 5.931206226348877, "learning_rate": 7.777372466786712e-06, "loss": 0.14222398, "memory(GiB)": 13.7, "step": 37000, "train_speed(iter/s)": 1.534118 }, { "acc": 0.98883934, "epoch": 17.344738692289663, "grad_norm": 4.192101001739502, "learning_rate": 7.776727879238905e-06, "loss": 0.10055778, "memory(GiB)": 13.7, "step": 37005, "train_speed(iter/s)": 1.534129 }, { "acc": 0.97975035, "epoch": 17.3470822591985, "grad_norm": 48.31745529174805, "learning_rate": 7.776083224959703e-06, "loss": 0.12807406, "memory(GiB)": 13.7, "step": 37010, "train_speed(iter/s)": 1.534131 }, { "acc": 0.98187504, "epoch": 17.349425826107336, "grad_norm": 4.610866069793701, "learning_rate": 7.775438503964597e-06, "loss": 0.08639978, "memory(GiB)": 13.7, "step": 37015, "train_speed(iter/s)": 1.534141 }, { "acc": 0.97979164, "epoch": 17.35176939301617, "grad_norm": 10.913679122924805, "learning_rate": 7.774793716269086e-06, "loss": 0.07403242, "memory(GiB)": 13.7, "step": 37020, "train_speed(iter/s)": 1.534147 }, { "acc": 0.98708324, "epoch": 17.354112959925004, "grad_norm": 0.018106741830706596, "learning_rate": 7.774148861888667e-06, "loss": 0.05819801, "memory(GiB)": 13.7, "step": 37025, "train_speed(iter/s)": 1.53415 }, { "acc": 0.98473959, "epoch": 17.356456526833842, "grad_norm": 0.007103531621396542, "learning_rate": 7.773503940838843e-06, "loss": 0.09115894, "memory(GiB)": 13.7, "step": 37030, "train_speed(iter/s)": 1.534159 }, { "acc": 0.97996111, "epoch": 17.358800093742676, "grad_norm": 3.852881908416748, "learning_rate": 7.772858953135115e-06, "loss": 0.09881052, "memory(GiB)": 13.7, "step": 37035, "train_speed(iter/s)": 1.534167 }, { "acc": 0.99520226, "epoch": 17.36114366065151, "grad_norm": 2.786980390548706, "learning_rate": 7.772213898792985e-06, "loss": 0.03398961, "memory(GiB)": 13.7, "step": 37040, "train_speed(iter/s)": 1.53417 }, { "acc": 0.98243465, "epoch": 17.36348722756035, "grad_norm": 3.7574687004089355, "learning_rate": 7.771568777827958e-06, "loss": 0.12310767, "memory(GiB)": 13.7, "step": 37045, "train_speed(iter/s)": 1.534173 }, { "acc": 0.97041664, "epoch": 17.365830794469183, "grad_norm": 9.731449127197266, "learning_rate": 7.77092359025554e-06, "loss": 0.09607028, "memory(GiB)": 13.7, "step": 37050, "train_speed(iter/s)": 1.534159 }, { "acc": 0.98913689, "epoch": 17.368174361378017, "grad_norm": 5.260018348693848, "learning_rate": 7.770278336091244e-06, "loss": 0.07961012, "memory(GiB)": 13.7, "step": 37055, "train_speed(iter/s)": 1.534179 }, { "acc": 0.98062506, "epoch": 17.37051792828685, "grad_norm": 0.005252489820122719, "learning_rate": 7.769633015350573e-06, "loss": 0.05316679, "memory(GiB)": 13.7, "step": 37060, "train_speed(iter/s)": 1.534192 }, { "acc": 0.9772665, "epoch": 17.37286149519569, "grad_norm": 5.351593971252441, "learning_rate": 7.768987628049042e-06, "loss": 0.04782456, "memory(GiB)": 13.7, "step": 37065, "train_speed(iter/s)": 1.534196 }, { "acc": 0.97833328, "epoch": 17.375205062104524, "grad_norm": 3.5425777435302734, "learning_rate": 7.768342174202166e-06, "loss": 0.07830227, "memory(GiB)": 13.7, "step": 37070, "train_speed(iter/s)": 1.534205 }, { "acc": 0.98413687, "epoch": 17.377548629013358, "grad_norm": 3.5834405422210693, "learning_rate": 7.767696653825456e-06, "loss": 0.06730439, "memory(GiB)": 13.7, "step": 37075, "train_speed(iter/s)": 1.534212 }, { "acc": 0.98989086, "epoch": 17.379892195922192, "grad_norm": 0.4175112247467041, "learning_rate": 7.767051066934429e-06, "loss": 0.04828562, "memory(GiB)": 13.7, "step": 37080, "train_speed(iter/s)": 1.53421 }, { "acc": 0.98680553, "epoch": 17.38223576283103, "grad_norm": 5.735934257507324, "learning_rate": 7.766405413544603e-06, "loss": 0.09086683, "memory(GiB)": 13.7, "step": 37085, "train_speed(iter/s)": 1.534223 }, { "acc": 0.97944031, "epoch": 17.384579329739864, "grad_norm": 3.6124980449676514, "learning_rate": 7.765759693671499e-06, "loss": 0.12935823, "memory(GiB)": 13.7, "step": 37090, "train_speed(iter/s)": 1.534227 }, { "acc": 0.96615534, "epoch": 17.3869228966487, "grad_norm": 6.222967624664307, "learning_rate": 7.765113907330634e-06, "loss": 0.12183721, "memory(GiB)": 13.7, "step": 37095, "train_speed(iter/s)": 1.534233 }, { "acc": 0.99250002, "epoch": 17.389266463557533, "grad_norm": 3.6829168796539307, "learning_rate": 7.764468054537535e-06, "loss": 0.02341507, "memory(GiB)": 13.7, "step": 37100, "train_speed(iter/s)": 1.534247 }, { "acc": 0.98727684, "epoch": 17.39161003046637, "grad_norm": 2.7601730823516846, "learning_rate": 7.763822135307723e-06, "loss": 0.07384921, "memory(GiB)": 13.7, "step": 37105, "train_speed(iter/s)": 1.534253 }, { "acc": 0.96104164, "epoch": 17.393953597375205, "grad_norm": 7.424562454223633, "learning_rate": 7.763176149656727e-06, "loss": 0.19807506, "memory(GiB)": 13.7, "step": 37110, "train_speed(iter/s)": 1.534266 }, { "acc": 0.99623013, "epoch": 17.39629716428404, "grad_norm": 0.030620744451880455, "learning_rate": 7.76253009760007e-06, "loss": 0.04493611, "memory(GiB)": 13.7, "step": 37115, "train_speed(iter/s)": 1.534278 }, { "acc": 0.9827282, "epoch": 17.398640731192877, "grad_norm": 0.008462505415081978, "learning_rate": 7.761883979153285e-06, "loss": 0.10786026, "memory(GiB)": 13.7, "step": 37120, "train_speed(iter/s)": 1.534292 }, { "acc": 0.97020836, "epoch": 17.40098429810171, "grad_norm": 4.5258941650390625, "learning_rate": 7.761237794331901e-06, "loss": 0.10550792, "memory(GiB)": 13.7, "step": 37125, "train_speed(iter/s)": 1.534309 }, { "acc": 0.97368279, "epoch": 17.403327865010546, "grad_norm": 10.258184432983398, "learning_rate": 7.76059154315145e-06, "loss": 0.17023975, "memory(GiB)": 13.7, "step": 37130, "train_speed(iter/s)": 1.534324 }, { "acc": 0.98476191, "epoch": 17.40567143191938, "grad_norm": 0.07245240360498428, "learning_rate": 7.759945225627466e-06, "loss": 0.07215014, "memory(GiB)": 13.7, "step": 37135, "train_speed(iter/s)": 1.53432 }, { "acc": 0.96590281, "epoch": 17.408014998828218, "grad_norm": 6.336733341217041, "learning_rate": 7.759298841775485e-06, "loss": 0.06817125, "memory(GiB)": 13.7, "step": 37140, "train_speed(iter/s)": 1.534327 }, { "acc": 0.96782742, "epoch": 17.410358565737052, "grad_norm": 6.3298726081848145, "learning_rate": 7.75865239161104e-06, "loss": 0.15239024, "memory(GiB)": 13.7, "step": 37145, "train_speed(iter/s)": 1.534331 }, { "acc": 0.99285717, "epoch": 17.412702132645887, "grad_norm": 0.5499712228775024, "learning_rate": 7.758005875149677e-06, "loss": 0.0508117, "memory(GiB)": 13.7, "step": 37150, "train_speed(iter/s)": 1.534325 }, { "acc": 0.97645836, "epoch": 17.41504569955472, "grad_norm": 1.3604633808135986, "learning_rate": 7.75735929240693e-06, "loss": 0.06938286, "memory(GiB)": 13.7, "step": 37155, "train_speed(iter/s)": 1.534324 }, { "acc": 0.96687498, "epoch": 17.41738926646356, "grad_norm": 49.30060958862305, "learning_rate": 7.75671264339834e-06, "loss": 0.11988368, "memory(GiB)": 13.7, "step": 37160, "train_speed(iter/s)": 1.534338 }, { "acc": 0.98312502, "epoch": 17.419732833372393, "grad_norm": 5.285721302032471, "learning_rate": 7.756065928139458e-06, "loss": 0.05982491, "memory(GiB)": 13.7, "step": 37165, "train_speed(iter/s)": 1.534338 }, { "acc": 0.97520294, "epoch": 17.422076400281227, "grad_norm": 8.018071174621582, "learning_rate": 7.755419146645819e-06, "loss": 0.11721823, "memory(GiB)": 13.7, "step": 37170, "train_speed(iter/s)": 1.534338 }, { "acc": 0.97156258, "epoch": 17.42441996719006, "grad_norm": 4.564333438873291, "learning_rate": 7.754772298932978e-06, "loss": 0.12251437, "memory(GiB)": 13.7, "step": 37175, "train_speed(iter/s)": 1.534354 }, { "acc": 0.98125, "epoch": 17.4267635340989, "grad_norm": 32.879459381103516, "learning_rate": 7.754125385016477e-06, "loss": 0.06837987, "memory(GiB)": 13.7, "step": 37180, "train_speed(iter/s)": 1.534363 }, { "acc": 0.9744792, "epoch": 17.429107101007734, "grad_norm": 4.0318498611450195, "learning_rate": 7.75347840491187e-06, "loss": 0.10660298, "memory(GiB)": 13.7, "step": 37185, "train_speed(iter/s)": 1.534366 }, { "acc": 0.98481064, "epoch": 17.431450667916568, "grad_norm": 2.4704275131225586, "learning_rate": 7.752831358634707e-06, "loss": 0.06402721, "memory(GiB)": 13.7, "step": 37190, "train_speed(iter/s)": 1.534371 }, { "acc": 0.98874998, "epoch": 17.433794234825406, "grad_norm": 5.149723052978516, "learning_rate": 7.752184246200537e-06, "loss": 0.07213037, "memory(GiB)": 13.7, "step": 37195, "train_speed(iter/s)": 1.534377 }, { "acc": 0.99459391, "epoch": 17.43613780173424, "grad_norm": 4.683570861816406, "learning_rate": 7.75153706762492e-06, "loss": 0.02761568, "memory(GiB)": 13.7, "step": 37200, "train_speed(iter/s)": 1.534382 }, { "acc": 0.98202343, "epoch": 17.438481368643075, "grad_norm": 0.032947465777397156, "learning_rate": 7.750889822923407e-06, "loss": 0.07656716, "memory(GiB)": 13.7, "step": 37205, "train_speed(iter/s)": 1.534385 }, { "acc": 0.95946426, "epoch": 17.44082493555191, "grad_norm": 9.077463150024414, "learning_rate": 7.750242512111562e-06, "loss": 0.21309569, "memory(GiB)": 13.7, "step": 37210, "train_speed(iter/s)": 1.534391 }, { "acc": 0.9749054, "epoch": 17.443168502460747, "grad_norm": 1.0132337808609009, "learning_rate": 7.74959513520494e-06, "loss": 0.11535518, "memory(GiB)": 13.7, "step": 37215, "train_speed(iter/s)": 1.534385 }, { "acc": 0.9885417, "epoch": 17.44551206936958, "grad_norm": 9.154582023620605, "learning_rate": 7.7489476922191e-06, "loss": 0.07081789, "memory(GiB)": 13.7, "step": 37220, "train_speed(iter/s)": 1.534383 }, { "acc": 0.97445393, "epoch": 17.447855636278415, "grad_norm": 7.766025066375732, "learning_rate": 7.748300183169611e-06, "loss": 0.07401212, "memory(GiB)": 13.7, "step": 37225, "train_speed(iter/s)": 1.534391 }, { "acc": 0.98043156, "epoch": 17.45019920318725, "grad_norm": 4.256999492645264, "learning_rate": 7.74765260807203e-06, "loss": 0.11732376, "memory(GiB)": 13.7, "step": 37230, "train_speed(iter/s)": 1.534399 }, { "acc": 0.98291664, "epoch": 17.452542770096088, "grad_norm": 4.184788227081299, "learning_rate": 7.747004966941924e-06, "loss": 0.07226887, "memory(GiB)": 13.7, "step": 37235, "train_speed(iter/s)": 1.534399 }, { "acc": 0.98430634, "epoch": 17.454886337004922, "grad_norm": 0.02522932179272175, "learning_rate": 7.746357259794863e-06, "loss": 0.07098194, "memory(GiB)": 13.7, "step": 37240, "train_speed(iter/s)": 1.5344 }, { "acc": 0.98806343, "epoch": 17.457229903913756, "grad_norm": 1.432226538658142, "learning_rate": 7.745709486646416e-06, "loss": 0.06579128, "memory(GiB)": 13.7, "step": 37245, "train_speed(iter/s)": 1.5344 }, { "acc": 0.9609623, "epoch": 17.45957347082259, "grad_norm": 8.283358573913574, "learning_rate": 7.74506164751215e-06, "loss": 0.17467632, "memory(GiB)": 13.7, "step": 37250, "train_speed(iter/s)": 1.534414 }, { "acc": 0.95806618, "epoch": 17.46191703773143, "grad_norm": 5.2877702713012695, "learning_rate": 7.744413742407638e-06, "loss": 0.11231278, "memory(GiB)": 13.7, "step": 37255, "train_speed(iter/s)": 1.534424 }, { "acc": 0.98753719, "epoch": 17.464260604640263, "grad_norm": 7.216772079467773, "learning_rate": 7.743765771348456e-06, "loss": 0.07574144, "memory(GiB)": 13.7, "step": 37260, "train_speed(iter/s)": 1.534431 }, { "acc": 0.98116665, "epoch": 17.466604171549097, "grad_norm": 1.4822883605957031, "learning_rate": 7.743117734350176e-06, "loss": 0.12566493, "memory(GiB)": 13.7, "step": 37265, "train_speed(iter/s)": 1.534426 }, { "acc": 0.96794109, "epoch": 17.46894773845793, "grad_norm": 23.344085693359375, "learning_rate": 7.742469631428374e-06, "loss": 0.17402067, "memory(GiB)": 13.7, "step": 37270, "train_speed(iter/s)": 1.534438 }, { "acc": 0.97893429, "epoch": 17.47129130536677, "grad_norm": 3.636943817138672, "learning_rate": 7.741821462598633e-06, "loss": 0.14136143, "memory(GiB)": 13.7, "step": 37275, "train_speed(iter/s)": 1.53445 }, { "acc": 0.96306553, "epoch": 17.473634872275603, "grad_norm": 5.455361366271973, "learning_rate": 7.741173227876528e-06, "loss": 0.14075731, "memory(GiB)": 13.7, "step": 37280, "train_speed(iter/s)": 1.53447 }, { "acc": 0.97562504, "epoch": 17.475978439184438, "grad_norm": 4.218660831451416, "learning_rate": 7.740524927277644e-06, "loss": 0.14758922, "memory(GiB)": 13.7, "step": 37285, "train_speed(iter/s)": 1.534468 }, { "acc": 0.98415184, "epoch": 17.478322006093276, "grad_norm": 0.014823820441961288, "learning_rate": 7.739876560817561e-06, "loss": 0.07607531, "memory(GiB)": 13.7, "step": 37290, "train_speed(iter/s)": 1.534481 }, { "acc": 0.9833334, "epoch": 17.48066557300211, "grad_norm": 5.026787757873535, "learning_rate": 7.739228128511866e-06, "loss": 0.09301116, "memory(GiB)": 13.7, "step": 37295, "train_speed(iter/s)": 1.53449 }, { "acc": 0.98581305, "epoch": 17.483009139910944, "grad_norm": 6.472017765045166, "learning_rate": 7.738579630376144e-06, "loss": 0.11277692, "memory(GiB)": 13.7, "step": 37300, "train_speed(iter/s)": 1.5345 }, { "acc": 0.97937498, "epoch": 17.48535270681978, "grad_norm": 35.99127960205078, "learning_rate": 7.737931066425983e-06, "loss": 0.08918966, "memory(GiB)": 13.7, "step": 37305, "train_speed(iter/s)": 1.534497 }, { "acc": 0.98125, "epoch": 17.487696273728616, "grad_norm": 8.048243522644043, "learning_rate": 7.737282436676974e-06, "loss": 0.04211743, "memory(GiB)": 13.7, "step": 37310, "train_speed(iter/s)": 1.534502 }, { "acc": 0.98947916, "epoch": 17.49003984063745, "grad_norm": 1.426300287246704, "learning_rate": 7.736633741144703e-06, "loss": 0.11593149, "memory(GiB)": 13.7, "step": 37315, "train_speed(iter/s)": 1.534514 }, { "acc": 0.98491669, "epoch": 17.492383407546285, "grad_norm": 6.451032638549805, "learning_rate": 7.735984979844767e-06, "loss": 0.03482496, "memory(GiB)": 13.7, "step": 37320, "train_speed(iter/s)": 1.534522 }, { "acc": 0.96099167, "epoch": 17.49472697445512, "grad_norm": 5.148991107940674, "learning_rate": 7.735336152792762e-06, "loss": 0.13916671, "memory(GiB)": 13.7, "step": 37325, "train_speed(iter/s)": 1.534526 }, { "acc": 0.97739582, "epoch": 17.497070541363957, "grad_norm": 4.641429901123047, "learning_rate": 7.734687260004276e-06, "loss": 0.09477889, "memory(GiB)": 13.7, "step": 37330, "train_speed(iter/s)": 1.534527 }, { "acc": 0.98052082, "epoch": 17.49941410827279, "grad_norm": 7.099184513092041, "learning_rate": 7.734038301494913e-06, "loss": 0.05339388, "memory(GiB)": 13.7, "step": 37335, "train_speed(iter/s)": 1.534521 }, { "acc": 0.98407192, "epoch": 17.501757675181626, "grad_norm": 3.295595407485962, "learning_rate": 7.733389277280268e-06, "loss": 0.08495561, "memory(GiB)": 13.7, "step": 37340, "train_speed(iter/s)": 1.534523 }, { "acc": 0.98794641, "epoch": 17.50410124209046, "grad_norm": 4.244952201843262, "learning_rate": 7.732740187375945e-06, "loss": 0.07360197, "memory(GiB)": 13.7, "step": 37345, "train_speed(iter/s)": 1.534524 }, { "acc": 0.9875, "epoch": 17.506444808999298, "grad_norm": 5.717555999755859, "learning_rate": 7.732091031797543e-06, "loss": 0.06693483, "memory(GiB)": 13.7, "step": 37350, "train_speed(iter/s)": 1.534528 }, { "acc": 0.98149681, "epoch": 17.508788375908132, "grad_norm": 6.667624473571777, "learning_rate": 7.731441810560666e-06, "loss": 0.08091879, "memory(GiB)": 13.7, "step": 37355, "train_speed(iter/s)": 1.534528 }, { "acc": 0.98197918, "epoch": 17.511131942816967, "grad_norm": 1.4740079641342163, "learning_rate": 7.730792523680922e-06, "loss": 0.07447101, "memory(GiB)": 13.7, "step": 37360, "train_speed(iter/s)": 1.534538 }, { "acc": 0.98481064, "epoch": 17.513475509725804, "grad_norm": 3.6732990741729736, "learning_rate": 7.730143171173914e-06, "loss": 0.05427115, "memory(GiB)": 13.7, "step": 37365, "train_speed(iter/s)": 1.534546 }, { "acc": 0.97011595, "epoch": 17.51581907663464, "grad_norm": 6.038825988769531, "learning_rate": 7.729493753055252e-06, "loss": 0.14970558, "memory(GiB)": 13.7, "step": 37370, "train_speed(iter/s)": 1.534546 }, { "acc": 0.99375, "epoch": 17.518162643543473, "grad_norm": 4.403307914733887, "learning_rate": 7.728844269340544e-06, "loss": 0.03437909, "memory(GiB)": 13.7, "step": 37375, "train_speed(iter/s)": 1.534549 }, { "acc": 0.9885417, "epoch": 17.520506210452307, "grad_norm": 2.512415647506714, "learning_rate": 7.728194720045404e-06, "loss": 0.0245415, "memory(GiB)": 13.7, "step": 37380, "train_speed(iter/s)": 1.534564 }, { "acc": 0.97911701, "epoch": 17.522849777361145, "grad_norm": 1.8465971946716309, "learning_rate": 7.727545105185444e-06, "loss": 0.08186601, "memory(GiB)": 13.7, "step": 37385, "train_speed(iter/s)": 1.53457 }, { "acc": 0.97647047, "epoch": 17.52519334426998, "grad_norm": 9.188017845153809, "learning_rate": 7.72689542477628e-06, "loss": 0.09680879, "memory(GiB)": 13.7, "step": 37390, "train_speed(iter/s)": 1.534577 }, { "acc": 0.97979164, "epoch": 17.527536911178814, "grad_norm": 3.6505982875823975, "learning_rate": 7.726245678833524e-06, "loss": 0.06030573, "memory(GiB)": 13.7, "step": 37395, "train_speed(iter/s)": 1.534591 }, { "acc": 0.98410711, "epoch": 17.529880478087648, "grad_norm": 6.208308696746826, "learning_rate": 7.725595867372801e-06, "loss": 0.05452303, "memory(GiB)": 13.7, "step": 37400, "train_speed(iter/s)": 1.53459 }, { "acc": 0.98462753, "epoch": 17.532224044996486, "grad_norm": 7.210578441619873, "learning_rate": 7.724945990409724e-06, "loss": 0.06263915, "memory(GiB)": 13.7, "step": 37405, "train_speed(iter/s)": 1.534586 }, { "acc": 0.98149796, "epoch": 17.53456761190532, "grad_norm": 2.86458683013916, "learning_rate": 7.724296047959914e-06, "loss": 0.0776418, "memory(GiB)": 13.7, "step": 37410, "train_speed(iter/s)": 1.534587 }, { "acc": 0.97036705, "epoch": 17.536911178814155, "grad_norm": 8.206602096557617, "learning_rate": 7.723646040038998e-06, "loss": 0.10872614, "memory(GiB)": 13.7, "step": 37415, "train_speed(iter/s)": 1.53459 }, { "acc": 0.98708334, "epoch": 17.53925474572299, "grad_norm": 0.3747938573360443, "learning_rate": 7.722995966662595e-06, "loss": 0.04372542, "memory(GiB)": 13.7, "step": 37420, "train_speed(iter/s)": 1.534594 }, { "acc": 0.99341345, "epoch": 17.541598312631827, "grad_norm": 4.760228633880615, "learning_rate": 7.722345827846333e-06, "loss": 0.0480215, "memory(GiB)": 13.7, "step": 37425, "train_speed(iter/s)": 1.534605 }, { "acc": 0.97508888, "epoch": 17.54394187954066, "grad_norm": 9.156645774841309, "learning_rate": 7.721695623605841e-06, "loss": 0.10153826, "memory(GiB)": 13.7, "step": 37430, "train_speed(iter/s)": 1.534605 }, { "acc": 0.97570515, "epoch": 17.546285446449495, "grad_norm": 31.515567779541016, "learning_rate": 7.721045353956748e-06, "loss": 0.13882247, "memory(GiB)": 13.7, "step": 37435, "train_speed(iter/s)": 1.534602 }, { "acc": 0.97296991, "epoch": 17.54862901335833, "grad_norm": 4.472476959228516, "learning_rate": 7.720395018914679e-06, "loss": 0.13481606, "memory(GiB)": 13.7, "step": 37440, "train_speed(iter/s)": 1.534619 }, { "acc": 0.97937508, "epoch": 17.550972580267167, "grad_norm": 10.537446975708008, "learning_rate": 7.719744618495271e-06, "loss": 0.07148421, "memory(GiB)": 13.7, "step": 37445, "train_speed(iter/s)": 1.534637 }, { "acc": 0.98701, "epoch": 17.553316147176, "grad_norm": 3.994903564453125, "learning_rate": 7.719094152714156e-06, "loss": 0.03924712, "memory(GiB)": 13.7, "step": 37450, "train_speed(iter/s)": 1.534646 }, { "acc": 0.98794641, "epoch": 17.555659714084836, "grad_norm": 3.989243984222412, "learning_rate": 7.718443621586968e-06, "loss": 0.07977363, "memory(GiB)": 13.7, "step": 37455, "train_speed(iter/s)": 1.534648 }, { "acc": 0.97666664, "epoch": 17.558003280993674, "grad_norm": 5.505475997924805, "learning_rate": 7.717793025129343e-06, "loss": 0.11308067, "memory(GiB)": 13.7, "step": 37460, "train_speed(iter/s)": 1.534667 }, { "acc": 0.98258934, "epoch": 17.560346847902508, "grad_norm": 5.132164001464844, "learning_rate": 7.717142363356923e-06, "loss": 0.09175708, "memory(GiB)": 13.7, "step": 37465, "train_speed(iter/s)": 1.534673 }, { "acc": 0.98187494, "epoch": 17.562690414811343, "grad_norm": 1.040297269821167, "learning_rate": 7.716491636285345e-06, "loss": 0.06712749, "memory(GiB)": 13.7, "step": 37470, "train_speed(iter/s)": 1.534688 }, { "acc": 0.98923607, "epoch": 17.565033981720177, "grad_norm": 0.08016317337751389, "learning_rate": 7.71584084393025e-06, "loss": 0.04543812, "memory(GiB)": 13.7, "step": 37475, "train_speed(iter/s)": 1.534693 }, { "acc": 0.9854166, "epoch": 17.567377548629015, "grad_norm": 4.907040596008301, "learning_rate": 7.715189986307282e-06, "loss": 0.08507096, "memory(GiB)": 13.7, "step": 37480, "train_speed(iter/s)": 1.534696 }, { "acc": 0.9864584, "epoch": 17.56972111553785, "grad_norm": 3.911343574523926, "learning_rate": 7.714539063432084e-06, "loss": 0.0307396, "memory(GiB)": 13.7, "step": 37485, "train_speed(iter/s)": 1.534707 }, { "acc": 0.98447914, "epoch": 17.572064682446683, "grad_norm": 1.8334392309188843, "learning_rate": 7.713888075320306e-06, "loss": 0.09231293, "memory(GiB)": 13.7, "step": 37490, "train_speed(iter/s)": 1.534706 }, { "acc": 0.97833328, "epoch": 17.574408249355518, "grad_norm": 0.21990437805652618, "learning_rate": 7.71323702198759e-06, "loss": 0.06742886, "memory(GiB)": 13.7, "step": 37495, "train_speed(iter/s)": 1.534714 }, { "acc": 0.97642365, "epoch": 17.576751816264355, "grad_norm": 4.246755599975586, "learning_rate": 7.712585903449589e-06, "loss": 0.10013943, "memory(GiB)": 13.7, "step": 37500, "train_speed(iter/s)": 1.53472 }, { "acc": 0.97431622, "epoch": 17.57909538317319, "grad_norm": 11.828009605407715, "learning_rate": 7.711934719721953e-06, "loss": 0.06846334, "memory(GiB)": 13.7, "step": 37505, "train_speed(iter/s)": 1.534707 }, { "acc": 0.9731348, "epoch": 17.581438950082024, "grad_norm": 5.309834957122803, "learning_rate": 7.711283470820335e-06, "loss": 0.11913863, "memory(GiB)": 13.7, "step": 37510, "train_speed(iter/s)": 1.534716 }, { "acc": 0.98050594, "epoch": 17.58378251699086, "grad_norm": 0.955780029296875, "learning_rate": 7.710632156760385e-06, "loss": 0.09893589, "memory(GiB)": 13.7, "step": 37515, "train_speed(iter/s)": 1.534722 }, { "acc": 0.9845211, "epoch": 17.586126083899696, "grad_norm": 33.547462463378906, "learning_rate": 7.709980777557766e-06, "loss": 0.05135771, "memory(GiB)": 13.7, "step": 37520, "train_speed(iter/s)": 1.534714 }, { "acc": 0.97661705, "epoch": 17.58846965080853, "grad_norm": 2.232051134109497, "learning_rate": 7.709329333228127e-06, "loss": 0.06385527, "memory(GiB)": 13.7, "step": 37525, "train_speed(iter/s)": 1.534722 }, { "acc": 0.9760417, "epoch": 17.590813217717365, "grad_norm": 7.311918258666992, "learning_rate": 7.708677823787131e-06, "loss": 0.10935199, "memory(GiB)": 13.7, "step": 37530, "train_speed(iter/s)": 1.534721 }, { "acc": 0.975947, "epoch": 17.593156784626203, "grad_norm": 3.396273136138916, "learning_rate": 7.708026249250437e-06, "loss": 0.05871311, "memory(GiB)": 13.7, "step": 37535, "train_speed(iter/s)": 1.534735 }, { "acc": 0.984375, "epoch": 17.595500351535037, "grad_norm": 4.481067657470703, "learning_rate": 7.707374609633707e-06, "loss": 0.04845558, "memory(GiB)": 13.7, "step": 37540, "train_speed(iter/s)": 1.534732 }, { "acc": 0.98584328, "epoch": 17.59784391844387, "grad_norm": 2.4501407146453857, "learning_rate": 7.706722904952605e-06, "loss": 0.03598076, "memory(GiB)": 13.7, "step": 37545, "train_speed(iter/s)": 1.534743 }, { "acc": 0.96864996, "epoch": 17.600187485352706, "grad_norm": 26.69933319091797, "learning_rate": 7.706071135222794e-06, "loss": 0.11235811, "memory(GiB)": 13.7, "step": 37550, "train_speed(iter/s)": 1.534747 }, { "acc": 0.98123512, "epoch": 17.602531052261543, "grad_norm": 3.0288708209991455, "learning_rate": 7.705419300459942e-06, "loss": 0.07913085, "memory(GiB)": 13.7, "step": 37555, "train_speed(iter/s)": 1.534748 }, { "acc": 0.9741621, "epoch": 17.604874619170378, "grad_norm": 0.967641294002533, "learning_rate": 7.704767400679714e-06, "loss": 0.12156411, "memory(GiB)": 13.7, "step": 37560, "train_speed(iter/s)": 1.534758 }, { "acc": 0.97840271, "epoch": 17.607218186079212, "grad_norm": 7.744964122772217, "learning_rate": 7.704115435897784e-06, "loss": 0.06665698, "memory(GiB)": 13.7, "step": 37565, "train_speed(iter/s)": 1.534766 }, { "acc": 0.98500004, "epoch": 17.609561752988046, "grad_norm": 8.297747611999512, "learning_rate": 7.703463406129822e-06, "loss": 0.09423531, "memory(GiB)": 13.7, "step": 37570, "train_speed(iter/s)": 1.534771 }, { "acc": 0.98205357, "epoch": 17.611905319896884, "grad_norm": 4.849624156951904, "learning_rate": 7.702811311391499e-06, "loss": 0.07332001, "memory(GiB)": 13.7, "step": 37575, "train_speed(iter/s)": 1.53477 }, { "acc": 0.97633934, "epoch": 17.61424888680572, "grad_norm": 5.303658485412598, "learning_rate": 7.70215915169849e-06, "loss": 0.0678806, "memory(GiB)": 13.7, "step": 37580, "train_speed(iter/s)": 1.534793 }, { "acc": 0.98736115, "epoch": 17.616592453714553, "grad_norm": 3.7404298782348633, "learning_rate": 7.701506927066468e-06, "loss": 0.10720544, "memory(GiB)": 13.7, "step": 37585, "train_speed(iter/s)": 1.534793 }, { "acc": 0.97166662, "epoch": 17.618936020623387, "grad_norm": 3.8018429279327393, "learning_rate": 7.700854637511117e-06, "loss": 0.13812648, "memory(GiB)": 13.7, "step": 37590, "train_speed(iter/s)": 1.534804 }, { "acc": 0.9880209, "epoch": 17.621279587532225, "grad_norm": 2.0824077129364014, "learning_rate": 7.700202283048108e-06, "loss": 0.05079593, "memory(GiB)": 13.7, "step": 37595, "train_speed(iter/s)": 1.534816 }, { "acc": 0.98594465, "epoch": 17.62362315444106, "grad_norm": 5.364997863769531, "learning_rate": 7.699549863693127e-06, "loss": 0.09251562, "memory(GiB)": 13.7, "step": 37600, "train_speed(iter/s)": 1.534835 }, { "acc": 0.99437504, "epoch": 17.625966721349894, "grad_norm": 7.254963397979736, "learning_rate": 7.698897379461857e-06, "loss": 0.06458151, "memory(GiB)": 13.7, "step": 37605, "train_speed(iter/s)": 1.534843 }, { "acc": 0.99686604, "epoch": 17.62831028825873, "grad_norm": 3.0229170322418213, "learning_rate": 7.698244830369975e-06, "loss": 0.0818212, "memory(GiB)": 13.7, "step": 37610, "train_speed(iter/s)": 1.534848 }, { "acc": 0.97990532, "epoch": 17.630653855167566, "grad_norm": 6.632075309753418, "learning_rate": 7.697592216433171e-06, "loss": 0.08563082, "memory(GiB)": 13.7, "step": 37615, "train_speed(iter/s)": 1.534858 }, { "acc": 0.95923615, "epoch": 17.6329974220764, "grad_norm": 16.611330032348633, "learning_rate": 7.696939537667132e-06, "loss": 0.13779881, "memory(GiB)": 13.7, "step": 37620, "train_speed(iter/s)": 1.534877 }, { "acc": 0.9864583, "epoch": 17.635340988985234, "grad_norm": 4.141943454742432, "learning_rate": 7.696286794087543e-06, "loss": 0.06982803, "memory(GiB)": 13.7, "step": 37625, "train_speed(iter/s)": 1.534876 }, { "acc": 0.9875988, "epoch": 17.637684555894072, "grad_norm": 4.206977844238281, "learning_rate": 7.695633985710098e-06, "loss": 0.06628863, "memory(GiB)": 13.7, "step": 37630, "train_speed(iter/s)": 1.534881 }, { "acc": 0.98800507, "epoch": 17.640028122802907, "grad_norm": 1.3243626356124878, "learning_rate": 7.694981112550485e-06, "loss": 0.11453395, "memory(GiB)": 13.7, "step": 37635, "train_speed(iter/s)": 1.534886 }, { "acc": 0.97748013, "epoch": 17.64237168971174, "grad_norm": 6.6976847648620605, "learning_rate": 7.694328174624399e-06, "loss": 0.11700994, "memory(GiB)": 13.7, "step": 37640, "train_speed(iter/s)": 1.534898 }, { "acc": 0.97668648, "epoch": 17.644715256620575, "grad_norm": 6.597141265869141, "learning_rate": 7.693675171947533e-06, "loss": 0.11553144, "memory(GiB)": 13.7, "step": 37645, "train_speed(iter/s)": 1.534896 }, { "acc": 0.97156248, "epoch": 17.647058823529413, "grad_norm": 6.47575569152832, "learning_rate": 7.693022104535583e-06, "loss": 0.13043756, "memory(GiB)": 13.7, "step": 37650, "train_speed(iter/s)": 1.534903 }, { "acc": 0.98883934, "epoch": 17.649402390438247, "grad_norm": 2.238950252532959, "learning_rate": 7.692368972404249e-06, "loss": 0.05006774, "memory(GiB)": 13.7, "step": 37655, "train_speed(iter/s)": 1.53491 }, { "acc": 0.98279667, "epoch": 17.65174595734708, "grad_norm": 2.9814796447753906, "learning_rate": 7.691715775569225e-06, "loss": 0.13347197, "memory(GiB)": 13.7, "step": 37660, "train_speed(iter/s)": 1.534915 }, { "acc": 0.97223034, "epoch": 17.654089524255916, "grad_norm": 2.182830810546875, "learning_rate": 7.691062514046218e-06, "loss": 0.09759848, "memory(GiB)": 13.7, "step": 37665, "train_speed(iter/s)": 1.534926 }, { "acc": 0.97468138, "epoch": 17.656433091164754, "grad_norm": 7.431822776794434, "learning_rate": 7.690409187850928e-06, "loss": 0.12188535, "memory(GiB)": 13.7, "step": 37670, "train_speed(iter/s)": 1.534922 }, { "acc": 0.97549982, "epoch": 17.658776658073588, "grad_norm": 8.538534164428711, "learning_rate": 7.689755796999056e-06, "loss": 0.14513006, "memory(GiB)": 13.7, "step": 37675, "train_speed(iter/s)": 1.534927 }, { "acc": 0.96488094, "epoch": 17.661120224982422, "grad_norm": 32.28731155395508, "learning_rate": 7.689102341506311e-06, "loss": 0.17745554, "memory(GiB)": 13.7, "step": 37680, "train_speed(iter/s)": 1.534929 }, { "acc": 0.97889881, "epoch": 17.66346379189126, "grad_norm": 4.7333550453186035, "learning_rate": 7.688448821388398e-06, "loss": 0.04666101, "memory(GiB)": 13.7, "step": 37685, "train_speed(iter/s)": 1.534934 }, { "acc": 0.98093748, "epoch": 17.665807358800095, "grad_norm": 2.290804386138916, "learning_rate": 7.687795236661025e-06, "loss": 0.08509257, "memory(GiB)": 13.7, "step": 37690, "train_speed(iter/s)": 1.534938 }, { "acc": 0.98041668, "epoch": 17.66815092570893, "grad_norm": 0.8582624197006226, "learning_rate": 7.687141587339903e-06, "loss": 0.04033631, "memory(GiB)": 13.7, "step": 37695, "train_speed(iter/s)": 1.534936 }, { "acc": 0.97590237, "epoch": 17.670494492617763, "grad_norm": 6.6632609367370605, "learning_rate": 7.686487873440746e-06, "loss": 0.09428669, "memory(GiB)": 13.7, "step": 37700, "train_speed(iter/s)": 1.534936 }, { "acc": 0.9733532, "epoch": 17.6728380595266, "grad_norm": 1.5068162679672241, "learning_rate": 7.685834094979263e-06, "loss": 0.09136868, "memory(GiB)": 13.7, "step": 37705, "train_speed(iter/s)": 1.534939 }, { "acc": 0.96196432, "epoch": 17.675181626435435, "grad_norm": 4.857964992523193, "learning_rate": 7.68518025197117e-06, "loss": 0.1824375, "memory(GiB)": 13.7, "step": 37710, "train_speed(iter/s)": 1.534931 }, { "acc": 0.98660717, "epoch": 17.67752519334427, "grad_norm": 1.5961878299713135, "learning_rate": 7.684526344432182e-06, "loss": 0.03295195, "memory(GiB)": 13.7, "step": 37715, "train_speed(iter/s)": 1.534939 }, { "acc": 0.9833333, "epoch": 17.679868760253104, "grad_norm": 0.2264135181903839, "learning_rate": 7.68387237237802e-06, "loss": 0.04096903, "memory(GiB)": 13.7, "step": 37720, "train_speed(iter/s)": 1.534947 }, { "acc": 0.95113096, "epoch": 17.682212327161942, "grad_norm": 6.323017120361328, "learning_rate": 7.683218335824403e-06, "loss": 0.16700497, "memory(GiB)": 13.7, "step": 37725, "train_speed(iter/s)": 1.534955 }, { "acc": 0.97842579, "epoch": 17.684555894070776, "grad_norm": 7.046759605407715, "learning_rate": 7.68256423478705e-06, "loss": 0.15472121, "memory(GiB)": 13.7, "step": 37730, "train_speed(iter/s)": 1.534955 }, { "acc": 0.96300507, "epoch": 17.68689946097961, "grad_norm": 17.37908935546875, "learning_rate": 7.681910069281683e-06, "loss": 0.10039411, "memory(GiB)": 13.7, "step": 37735, "train_speed(iter/s)": 1.534963 }, { "acc": 0.98264828, "epoch": 17.689243027888445, "grad_norm": 2.3101611137390137, "learning_rate": 7.681255839324028e-06, "loss": 0.06925151, "memory(GiB)": 13.7, "step": 37740, "train_speed(iter/s)": 1.534968 }, { "acc": 0.9773098, "epoch": 17.691586594797283, "grad_norm": 6.509002685546875, "learning_rate": 7.68060154492981e-06, "loss": 0.10780602, "memory(GiB)": 13.7, "step": 37745, "train_speed(iter/s)": 1.534974 }, { "acc": 0.97383013, "epoch": 17.693930161706117, "grad_norm": 4.46195125579834, "learning_rate": 7.679947186114754e-06, "loss": 0.09075309, "memory(GiB)": 13.7, "step": 37750, "train_speed(iter/s)": 1.53498 }, { "acc": 0.98125, "epoch": 17.69627372861495, "grad_norm": 2.144335985183716, "learning_rate": 7.67929276289459e-06, "loss": 0.108799, "memory(GiB)": 13.7, "step": 37755, "train_speed(iter/s)": 1.534986 }, { "acc": 0.97366076, "epoch": 17.698617295523785, "grad_norm": 1.4354437589645386, "learning_rate": 7.67863827528505e-06, "loss": 0.12230058, "memory(GiB)": 13.7, "step": 37760, "train_speed(iter/s)": 1.53499 }, { "acc": 0.97453375, "epoch": 17.700960862432623, "grad_norm": 5.8193359375, "learning_rate": 7.677983723301864e-06, "loss": 0.10600402, "memory(GiB)": 13.7, "step": 37765, "train_speed(iter/s)": 1.535 }, { "acc": 0.97240534, "epoch": 17.703304429341458, "grad_norm": 6.859181880950928, "learning_rate": 7.677329106960767e-06, "loss": 0.10322353, "memory(GiB)": 13.7, "step": 37770, "train_speed(iter/s)": 1.535011 }, { "acc": 0.98108587, "epoch": 17.705647996250292, "grad_norm": 1.0835856199264526, "learning_rate": 7.676674426277492e-06, "loss": 0.07069833, "memory(GiB)": 13.7, "step": 37775, "train_speed(iter/s)": 1.535013 }, { "acc": 0.98674107, "epoch": 17.70799156315913, "grad_norm": 4.014217376708984, "learning_rate": 7.676019681267772e-06, "loss": 0.08951526, "memory(GiB)": 13.7, "step": 37780, "train_speed(iter/s)": 1.535007 }, { "acc": 0.9744792, "epoch": 17.710335130067964, "grad_norm": 1.820339322090149, "learning_rate": 7.675364871947352e-06, "loss": 0.13354853, "memory(GiB)": 13.7, "step": 37785, "train_speed(iter/s)": 1.535017 }, { "acc": 0.97565203, "epoch": 17.7126786969768, "grad_norm": 9.530699729919434, "learning_rate": 7.674709998331969e-06, "loss": 0.13799741, "memory(GiB)": 13.7, "step": 37790, "train_speed(iter/s)": 1.535024 }, { "acc": 0.97648029, "epoch": 17.715022263885633, "grad_norm": 6.442919731140137, "learning_rate": 7.674055060437361e-06, "loss": 0.0972231, "memory(GiB)": 13.7, "step": 37795, "train_speed(iter/s)": 1.535032 }, { "acc": 0.97924681, "epoch": 17.71736583079447, "grad_norm": 5.581208229064941, "learning_rate": 7.673400058279272e-06, "loss": 0.0779024, "memory(GiB)": 13.7, "step": 37800, "train_speed(iter/s)": 1.535036 }, { "acc": 0.97250004, "epoch": 17.719709397703305, "grad_norm": 4.389518737792969, "learning_rate": 7.672744991873447e-06, "loss": 0.08886298, "memory(GiB)": 13.7, "step": 37805, "train_speed(iter/s)": 1.535048 }, { "acc": 0.96329498, "epoch": 17.72205296461214, "grad_norm": 1.4963958263397217, "learning_rate": 7.672089861235634e-06, "loss": 0.2194103, "memory(GiB)": 13.7, "step": 37810, "train_speed(iter/s)": 1.535043 }, { "acc": 0.98761368, "epoch": 17.724396531520973, "grad_norm": 2.488312005996704, "learning_rate": 7.671434666381574e-06, "loss": 0.07115281, "memory(GiB)": 13.7, "step": 37815, "train_speed(iter/s)": 1.535042 }, { "acc": 0.97553034, "epoch": 17.72674009842981, "grad_norm": 0.3465915322303772, "learning_rate": 7.670779407327023e-06, "loss": 0.09517413, "memory(GiB)": 13.7, "step": 37820, "train_speed(iter/s)": 1.535053 }, { "acc": 0.98205814, "epoch": 17.729083665338646, "grad_norm": 8.232309341430664, "learning_rate": 7.670124084087724e-06, "loss": 0.04328565, "memory(GiB)": 13.7, "step": 37825, "train_speed(iter/s)": 1.535057 }, { "acc": 0.98918562, "epoch": 17.73142723224748, "grad_norm": 3.631330966949463, "learning_rate": 7.669468696679435e-06, "loss": 0.07903773, "memory(GiB)": 13.7, "step": 37830, "train_speed(iter/s)": 1.535067 }, { "acc": 0.98007936, "epoch": 17.733770799156314, "grad_norm": 2.598890781402588, "learning_rate": 7.668813245117905e-06, "loss": 0.08841605, "memory(GiB)": 13.7, "step": 37835, "train_speed(iter/s)": 1.535076 }, { "acc": 0.98988094, "epoch": 17.736114366065152, "grad_norm": 4.201727390289307, "learning_rate": 7.668157729418892e-06, "loss": 0.07312611, "memory(GiB)": 13.7, "step": 37840, "train_speed(iter/s)": 1.535084 }, { "acc": 0.98030033, "epoch": 17.738457932973986, "grad_norm": 2.0649373531341553, "learning_rate": 7.66750214959815e-06, "loss": 0.07582165, "memory(GiB)": 13.7, "step": 37845, "train_speed(iter/s)": 1.535082 }, { "acc": 0.98239584, "epoch": 17.74080149988282, "grad_norm": 3.802133798599243, "learning_rate": 7.666846505671438e-06, "loss": 0.05850552, "memory(GiB)": 13.7, "step": 37850, "train_speed(iter/s)": 1.53508 }, { "acc": 0.99080811, "epoch": 17.74314506679166, "grad_norm": 0.7193723917007446, "learning_rate": 7.666190797654518e-06, "loss": 0.04617092, "memory(GiB)": 13.7, "step": 37855, "train_speed(iter/s)": 1.535088 }, { "acc": 0.96092262, "epoch": 17.745488633700493, "grad_norm": 12.21337890625, "learning_rate": 7.665535025563146e-06, "loss": 0.14318405, "memory(GiB)": 13.7, "step": 37860, "train_speed(iter/s)": 1.535095 }, { "acc": 0.98422165, "epoch": 17.747832200609327, "grad_norm": 1.036535382270813, "learning_rate": 7.664879189413089e-06, "loss": 0.08382745, "memory(GiB)": 13.7, "step": 37865, "train_speed(iter/s)": 1.53511 }, { "acc": 0.98604164, "epoch": 17.75017576751816, "grad_norm": 0.5396066308021545, "learning_rate": 7.664223289220108e-06, "loss": 0.04276529, "memory(GiB)": 13.7, "step": 37870, "train_speed(iter/s)": 1.535115 }, { "acc": 0.96374998, "epoch": 17.752519334427, "grad_norm": 6.37434720993042, "learning_rate": 7.663567324999971e-06, "loss": 0.11528616, "memory(GiB)": 13.7, "step": 37875, "train_speed(iter/s)": 1.535119 }, { "acc": 0.96104164, "epoch": 17.754862901335834, "grad_norm": 2.877375841140747, "learning_rate": 7.662911296768446e-06, "loss": 0.13427963, "memory(GiB)": 13.7, "step": 37880, "train_speed(iter/s)": 1.535122 }, { "acc": 0.98338747, "epoch": 17.757206468244668, "grad_norm": 3.9818673133850098, "learning_rate": 7.662255204541297e-06, "loss": 0.07714742, "memory(GiB)": 13.7, "step": 37885, "train_speed(iter/s)": 1.535127 }, { "acc": 0.98152781, "epoch": 17.759550035153502, "grad_norm": 2.3596770763397217, "learning_rate": 7.661599048334299e-06, "loss": 0.04219429, "memory(GiB)": 13.7, "step": 37890, "train_speed(iter/s)": 1.535135 }, { "acc": 0.98190975, "epoch": 17.76189360206234, "grad_norm": 1.15103018283844, "learning_rate": 7.660942828163223e-06, "loss": 0.06582032, "memory(GiB)": 13.7, "step": 37895, "train_speed(iter/s)": 1.535139 }, { "acc": 0.98217258, "epoch": 17.764237168971174, "grad_norm": 0.021777154877781868, "learning_rate": 7.660286544043841e-06, "loss": 0.06088868, "memory(GiB)": 13.7, "step": 37900, "train_speed(iter/s)": 1.535146 }, { "acc": 0.98187504, "epoch": 17.76658073588001, "grad_norm": 1.1320641040802002, "learning_rate": 7.659630195991929e-06, "loss": 0.05812613, "memory(GiB)": 13.7, "step": 37905, "train_speed(iter/s)": 1.535154 }, { "acc": 0.97872028, "epoch": 17.768924302788843, "grad_norm": 5.205904960632324, "learning_rate": 7.65897378402326e-06, "loss": 0.08059558, "memory(GiB)": 13.7, "step": 37910, "train_speed(iter/s)": 1.535149 }, { "acc": 0.97516413, "epoch": 17.77126786969768, "grad_norm": 6.8897528648376465, "learning_rate": 7.65831730815362e-06, "loss": 0.07167109, "memory(GiB)": 13.7, "step": 37915, "train_speed(iter/s)": 1.535155 }, { "acc": 0.97701645, "epoch": 17.773611436606515, "grad_norm": 5.428462982177734, "learning_rate": 7.65766076839878e-06, "loss": 0.14336618, "memory(GiB)": 13.7, "step": 37920, "train_speed(iter/s)": 1.535162 }, { "acc": 0.98165178, "epoch": 17.77595500351535, "grad_norm": 1.3602244853973389, "learning_rate": 7.657004164774526e-06, "loss": 0.06286885, "memory(GiB)": 13.7, "step": 37925, "train_speed(iter/s)": 1.535171 }, { "acc": 0.98410797, "epoch": 17.778298570424184, "grad_norm": 3.452604055404663, "learning_rate": 7.656347497296638e-06, "loss": 0.08240267, "memory(GiB)": 13.7, "step": 37930, "train_speed(iter/s)": 1.53517 }, { "acc": 0.9625, "epoch": 17.78064213733302, "grad_norm": 3.2192623615264893, "learning_rate": 7.655690765980903e-06, "loss": 0.17536726, "memory(GiB)": 13.7, "step": 37935, "train_speed(iter/s)": 1.535177 }, { "acc": 0.98263893, "epoch": 17.782985704241856, "grad_norm": 3.8912558555603027, "learning_rate": 7.655033970843103e-06, "loss": 0.05791853, "memory(GiB)": 13.7, "step": 37940, "train_speed(iter/s)": 1.535193 }, { "acc": 0.97379417, "epoch": 17.78532927115069, "grad_norm": 7.256872177124023, "learning_rate": 7.654377111899029e-06, "loss": 0.09773653, "memory(GiB)": 13.7, "step": 37945, "train_speed(iter/s)": 1.535208 }, { "acc": 0.98569756, "epoch": 17.787672838059528, "grad_norm": 4.618334770202637, "learning_rate": 7.653720189164467e-06, "loss": 0.0629557, "memory(GiB)": 13.7, "step": 37950, "train_speed(iter/s)": 1.535221 }, { "acc": 0.9885416, "epoch": 17.790016404968362, "grad_norm": 0.13967151939868927, "learning_rate": 7.653063202655206e-06, "loss": 0.04680761, "memory(GiB)": 13.7, "step": 37955, "train_speed(iter/s)": 1.535214 }, { "acc": 0.98175602, "epoch": 17.792359971877197, "grad_norm": 5.6579484939575195, "learning_rate": 7.652406152387044e-06, "loss": 0.07785496, "memory(GiB)": 13.7, "step": 37960, "train_speed(iter/s)": 1.535219 }, { "acc": 0.9828125, "epoch": 17.79470353878603, "grad_norm": 7.029277324676514, "learning_rate": 7.651749038375767e-06, "loss": 0.06381475, "memory(GiB)": 13.7, "step": 37965, "train_speed(iter/s)": 1.535228 }, { "acc": 0.98421135, "epoch": 17.79704710569487, "grad_norm": 2.4378886222839355, "learning_rate": 7.651091860637175e-06, "loss": 0.07048356, "memory(GiB)": 13.7, "step": 37970, "train_speed(iter/s)": 1.535226 }, { "acc": 0.9885416, "epoch": 17.799390672603703, "grad_norm": 1.0408451557159424, "learning_rate": 7.650434619187061e-06, "loss": 0.06322752, "memory(GiB)": 13.7, "step": 37975, "train_speed(iter/s)": 1.535226 }, { "acc": 0.9777977, "epoch": 17.801734239512538, "grad_norm": 5.9699530601501465, "learning_rate": 7.649777314041227e-06, "loss": 0.15454222, "memory(GiB)": 13.7, "step": 37980, "train_speed(iter/s)": 1.535227 }, { "acc": 0.96731606, "epoch": 17.804077806421372, "grad_norm": 0.6930651664733887, "learning_rate": 7.649119945215468e-06, "loss": 0.07322695, "memory(GiB)": 13.7, "step": 37985, "train_speed(iter/s)": 1.535235 }, { "acc": 0.975947, "epoch": 17.80642137333021, "grad_norm": 7.648016452789307, "learning_rate": 7.648462512725588e-06, "loss": 0.0819536, "memory(GiB)": 13.7, "step": 37990, "train_speed(iter/s)": 1.535239 }, { "acc": 0.9916666, "epoch": 17.808764940239044, "grad_norm": 0.47569912672042847, "learning_rate": 7.647805016587387e-06, "loss": 0.04139763, "memory(GiB)": 13.7, "step": 37995, "train_speed(iter/s)": 1.535243 }, { "acc": 0.98520832, "epoch": 17.81110850714788, "grad_norm": 2.1605336666107178, "learning_rate": 7.647147456816673e-06, "loss": 0.08085816, "memory(GiB)": 13.7, "step": 38000, "train_speed(iter/s)": 1.535245 }, { "acc": 0.99312496, "epoch": 17.813452074056713, "grad_norm": 2.1127023696899414, "learning_rate": 7.646489833429246e-06, "loss": 0.02306123, "memory(GiB)": 13.7, "step": 38005, "train_speed(iter/s)": 1.535236 }, { "acc": 0.97937498, "epoch": 17.81579564096555, "grad_norm": 0.8852338790893555, "learning_rate": 7.645832146440917e-06, "loss": 0.0567427, "memory(GiB)": 13.7, "step": 38010, "train_speed(iter/s)": 1.535247 }, { "acc": 0.97270832, "epoch": 17.818139207874385, "grad_norm": 25.99073600769043, "learning_rate": 7.645174395867495e-06, "loss": 0.15186992, "memory(GiB)": 13.7, "step": 38015, "train_speed(iter/s)": 1.535247 }, { "acc": 0.97300596, "epoch": 17.82048277478322, "grad_norm": 8.939611434936523, "learning_rate": 7.644516581724788e-06, "loss": 0.13525147, "memory(GiB)": 13.7, "step": 38020, "train_speed(iter/s)": 1.535237 }, { "acc": 0.98426476, "epoch": 17.822826341692057, "grad_norm": 3.974318265914917, "learning_rate": 7.643858704028608e-06, "loss": 0.05491017, "memory(GiB)": 13.7, "step": 38025, "train_speed(iter/s)": 1.535241 }, { "acc": 0.98175602, "epoch": 17.82516990860089, "grad_norm": 0.9944915175437927, "learning_rate": 7.64320076279477e-06, "loss": 0.03526923, "memory(GiB)": 13.7, "step": 38030, "train_speed(iter/s)": 1.535247 }, { "acc": 0.98377972, "epoch": 17.827513475509726, "grad_norm": 5.540640830993652, "learning_rate": 7.642542758039088e-06, "loss": 0.08523614, "memory(GiB)": 13.7, "step": 38035, "train_speed(iter/s)": 1.535248 }, { "acc": 0.98603086, "epoch": 17.82985704241856, "grad_norm": 3.8900504112243652, "learning_rate": 7.641884689777378e-06, "loss": 0.09276919, "memory(GiB)": 13.7, "step": 38040, "train_speed(iter/s)": 1.535266 }, { "acc": 0.96996536, "epoch": 17.832200609327398, "grad_norm": 9.215253829956055, "learning_rate": 7.641226558025457e-06, "loss": 0.13764293, "memory(GiB)": 13.7, "step": 38045, "train_speed(iter/s)": 1.53527 }, { "acc": 0.97547617, "epoch": 17.834544176236232, "grad_norm": 1.959996223449707, "learning_rate": 7.640568362799143e-06, "loss": 0.08145777, "memory(GiB)": 13.7, "step": 38050, "train_speed(iter/s)": 1.535264 }, { "acc": 0.9784565, "epoch": 17.836887743145066, "grad_norm": 3.8929409980773926, "learning_rate": 7.63991010411426e-06, "loss": 0.05928183, "memory(GiB)": 13.7, "step": 38055, "train_speed(iter/s)": 1.53527 }, { "acc": 0.98470831, "epoch": 17.8392313100539, "grad_norm": 3.1781814098358154, "learning_rate": 7.63925178198663e-06, "loss": 0.04845426, "memory(GiB)": 13.7, "step": 38060, "train_speed(iter/s)": 1.53528 }, { "acc": 0.98666668, "epoch": 17.84157487696274, "grad_norm": 0.7992779016494751, "learning_rate": 7.638593396432078e-06, "loss": 0.03021273, "memory(GiB)": 13.7, "step": 38065, "train_speed(iter/s)": 1.535274 }, { "acc": 0.97833328, "epoch": 17.843918443871573, "grad_norm": 4.206332683563232, "learning_rate": 7.637934947466426e-06, "loss": 0.06115079, "memory(GiB)": 13.7, "step": 38070, "train_speed(iter/s)": 1.535275 }, { "acc": 0.97875004, "epoch": 17.846262010780407, "grad_norm": 5.614558696746826, "learning_rate": 7.6372764351055e-06, "loss": 0.07560512, "memory(GiB)": 13.7, "step": 38075, "train_speed(iter/s)": 1.535282 }, { "acc": 0.990625, "epoch": 17.84860557768924, "grad_norm": 3.905543327331543, "learning_rate": 7.636617859365136e-06, "loss": 0.04673477, "memory(GiB)": 13.7, "step": 38080, "train_speed(iter/s)": 1.535279 }, { "acc": 0.98258934, "epoch": 17.85094914459808, "grad_norm": 4.449185848236084, "learning_rate": 7.635959220261154e-06, "loss": 0.04680634, "memory(GiB)": 13.7, "step": 38085, "train_speed(iter/s)": 1.535276 }, { "acc": 0.97502975, "epoch": 17.853292711506914, "grad_norm": 6.162206649780273, "learning_rate": 7.635300517809394e-06, "loss": 0.0560438, "memory(GiB)": 13.7, "step": 38090, "train_speed(iter/s)": 1.53528 }, { "acc": 0.99020834, "epoch": 17.855636278415748, "grad_norm": 1.8535581827163696, "learning_rate": 7.634641752025683e-06, "loss": 0.05977639, "memory(GiB)": 13.7, "step": 38095, "train_speed(iter/s)": 1.535279 }, { "acc": 0.98051548, "epoch": 17.857979845324586, "grad_norm": 5.512385845184326, "learning_rate": 7.63398292292586e-06, "loss": 0.09675518, "memory(GiB)": 13.7, "step": 38100, "train_speed(iter/s)": 1.535279 }, { "acc": 0.98716345, "epoch": 17.86032341223342, "grad_norm": 4.844620227813721, "learning_rate": 7.633324030525759e-06, "loss": 0.04525409, "memory(GiB)": 13.7, "step": 38105, "train_speed(iter/s)": 1.535278 }, { "acc": 0.9810813, "epoch": 17.862666979142254, "grad_norm": 2.5532829761505127, "learning_rate": 7.632665074841217e-06, "loss": 0.08075761, "memory(GiB)": 13.7, "step": 38110, "train_speed(iter/s)": 1.535288 }, { "acc": 0.9763195, "epoch": 17.86501054605109, "grad_norm": 1.6397583484649658, "learning_rate": 7.632006055888073e-06, "loss": 0.07494027, "memory(GiB)": 13.7, "step": 38115, "train_speed(iter/s)": 1.535296 }, { "acc": 0.97830296, "epoch": 17.867354112959926, "grad_norm": 5.459688186645508, "learning_rate": 7.631346973682169e-06, "loss": 0.07352849, "memory(GiB)": 13.7, "step": 38120, "train_speed(iter/s)": 1.535291 }, { "acc": 0.9844965, "epoch": 17.86969767986876, "grad_norm": 3.4127955436706543, "learning_rate": 7.630687828239349e-06, "loss": 0.08700911, "memory(GiB)": 13.7, "step": 38125, "train_speed(iter/s)": 1.535302 }, { "acc": 0.97889614, "epoch": 17.872041246777595, "grad_norm": 3.1072070598602295, "learning_rate": 7.630028619575453e-06, "loss": 0.0932124, "memory(GiB)": 13.7, "step": 38130, "train_speed(iter/s)": 1.535324 }, { "acc": 0.97196426, "epoch": 17.87438481368643, "grad_norm": 4.7403883934021, "learning_rate": 7.629369347706328e-06, "loss": 0.06165397, "memory(GiB)": 13.7, "step": 38135, "train_speed(iter/s)": 1.535329 }, { "acc": 0.973631, "epoch": 17.876728380595267, "grad_norm": 7.16245698928833, "learning_rate": 7.62871001264782e-06, "loss": 0.11568047, "memory(GiB)": 13.7, "step": 38140, "train_speed(iter/s)": 1.535329 }, { "acc": 0.9786459, "epoch": 17.8790719475041, "grad_norm": 5.700305938720703, "learning_rate": 7.628050614415778e-06, "loss": 0.0654999, "memory(GiB)": 13.7, "step": 38145, "train_speed(iter/s)": 1.535326 }, { "acc": 0.98508577, "epoch": 17.881415514412936, "grad_norm": 7.794160842895508, "learning_rate": 7.627391153026049e-06, "loss": 0.12330999, "memory(GiB)": 13.7, "step": 38150, "train_speed(iter/s)": 1.535328 }, { "acc": 0.95916119, "epoch": 17.88375908132177, "grad_norm": 4.543595314025879, "learning_rate": 7.62673162849449e-06, "loss": 0.12596984, "memory(GiB)": 13.7, "step": 38155, "train_speed(iter/s)": 1.535334 }, { "acc": 0.98705931, "epoch": 17.886102648230608, "grad_norm": 3.948857069015503, "learning_rate": 7.626072040836948e-06, "loss": 0.03667598, "memory(GiB)": 13.7, "step": 38160, "train_speed(iter/s)": 1.535337 }, { "acc": 0.98395834, "epoch": 17.888446215139442, "grad_norm": 2.2522075176239014, "learning_rate": 7.625412390069282e-06, "loss": 0.04362845, "memory(GiB)": 13.7, "step": 38165, "train_speed(iter/s)": 1.535346 }, { "acc": 0.97250004, "epoch": 17.890789782048277, "grad_norm": 3.912127733230591, "learning_rate": 7.6247526762073445e-06, "loss": 0.10593276, "memory(GiB)": 13.7, "step": 38170, "train_speed(iter/s)": 1.535355 }, { "acc": 0.9765625, "epoch": 17.893133348957114, "grad_norm": 6.1729207038879395, "learning_rate": 7.624092899266993e-06, "loss": 0.11073997, "memory(GiB)": 13.7, "step": 38175, "train_speed(iter/s)": 1.535355 }, { "acc": 0.97208328, "epoch": 17.89547691586595, "grad_norm": 10.452836036682129, "learning_rate": 7.623433059264091e-06, "loss": 0.11156375, "memory(GiB)": 13.7, "step": 38180, "train_speed(iter/s)": 1.535364 }, { "acc": 0.98083334, "epoch": 17.897820482774783, "grad_norm": 5.317126750946045, "learning_rate": 7.622773156214491e-06, "loss": 0.04637665, "memory(GiB)": 13.7, "step": 38185, "train_speed(iter/s)": 1.535365 }, { "acc": 0.97410402, "epoch": 17.900164049683617, "grad_norm": 1.930441975593567, "learning_rate": 7.622113190134062e-06, "loss": 0.07785422, "memory(GiB)": 13.7, "step": 38190, "train_speed(iter/s)": 1.535377 }, { "acc": 0.98363094, "epoch": 17.902507616592455, "grad_norm": 3.9517064094543457, "learning_rate": 7.621453161038664e-06, "loss": 0.06948822, "memory(GiB)": 13.7, "step": 38195, "train_speed(iter/s)": 1.535385 }, { "acc": 0.984375, "epoch": 17.90485118350129, "grad_norm": 1.7179354429244995, "learning_rate": 7.620793068944162e-06, "loss": 0.0277551, "memory(GiB)": 13.7, "step": 38200, "train_speed(iter/s)": 1.535396 }, { "acc": 0.97634802, "epoch": 17.907194750410124, "grad_norm": 6.332882881164551, "learning_rate": 7.620132913866423e-06, "loss": 0.12021188, "memory(GiB)": 13.7, "step": 38205, "train_speed(iter/s)": 1.5354 }, { "acc": 0.97636318, "epoch": 17.909538317318958, "grad_norm": 1.965808391571045, "learning_rate": 7.619472695821316e-06, "loss": 0.085538, "memory(GiB)": 13.7, "step": 38210, "train_speed(iter/s)": 1.535395 }, { "acc": 0.98967266, "epoch": 17.911881884227796, "grad_norm": 1.4356706142425537, "learning_rate": 7.618812414824708e-06, "loss": 0.06195787, "memory(GiB)": 13.7, "step": 38215, "train_speed(iter/s)": 1.535397 }, { "acc": 0.95828371, "epoch": 17.91422545113663, "grad_norm": 7.573505878448486, "learning_rate": 7.6181520708924705e-06, "loss": 0.13576719, "memory(GiB)": 13.7, "step": 38220, "train_speed(iter/s)": 1.535396 }, { "acc": 0.98184519, "epoch": 17.916569018045465, "grad_norm": 2.201509475708008, "learning_rate": 7.617491664040477e-06, "loss": 0.05732772, "memory(GiB)": 13.7, "step": 38225, "train_speed(iter/s)": 1.535412 }, { "acc": 0.97840281, "epoch": 17.9189125849543, "grad_norm": 6.311418533325195, "learning_rate": 7.6168311942846055e-06, "loss": 0.08522505, "memory(GiB)": 13.7, "step": 38230, "train_speed(iter/s)": 1.535415 }, { "acc": 0.98395824, "epoch": 17.921256151863137, "grad_norm": 0.10719043016433716, "learning_rate": 7.616170661640723e-06, "loss": 0.03363802, "memory(GiB)": 13.7, "step": 38235, "train_speed(iter/s)": 1.535416 }, { "acc": 0.98521366, "epoch": 17.92359971877197, "grad_norm": 3.6966753005981445, "learning_rate": 7.615510066124711e-06, "loss": 0.09696653, "memory(GiB)": 13.7, "step": 38240, "train_speed(iter/s)": 1.53542 }, { "acc": 0.98128834, "epoch": 17.925943285680805, "grad_norm": 0.4057108461856842, "learning_rate": 7.61484940775245e-06, "loss": 0.05761142, "memory(GiB)": 13.7, "step": 38245, "train_speed(iter/s)": 1.535425 }, { "acc": 0.9794445, "epoch": 17.92828685258964, "grad_norm": 3.054086208343506, "learning_rate": 7.614188686539819e-06, "loss": 0.07023275, "memory(GiB)": 13.7, "step": 38250, "train_speed(iter/s)": 1.535423 }, { "acc": 0.98812504, "epoch": 17.930630419498478, "grad_norm": 4.124317646026611, "learning_rate": 7.613527902502696e-06, "loss": 0.04490935, "memory(GiB)": 13.7, "step": 38255, "train_speed(iter/s)": 1.535425 }, { "acc": 0.97644348, "epoch": 17.932973986407312, "grad_norm": 5.324812889099121, "learning_rate": 7.612867055656967e-06, "loss": 0.08374387, "memory(GiB)": 13.7, "step": 38260, "train_speed(iter/s)": 1.535429 }, { "acc": 0.9697916, "epoch": 17.935317553316146, "grad_norm": 3.4500648975372314, "learning_rate": 7.6122061460185185e-06, "loss": 0.15070617, "memory(GiB)": 13.7, "step": 38265, "train_speed(iter/s)": 1.535433 }, { "acc": 0.98874998, "epoch": 17.937661120224984, "grad_norm": 0.011937066912651062, "learning_rate": 7.611545173603231e-06, "loss": 0.04777854, "memory(GiB)": 13.7, "step": 38270, "train_speed(iter/s)": 1.535428 }, { "acc": 0.97037411, "epoch": 17.94000468713382, "grad_norm": 6.252926349639893, "learning_rate": 7.610884138426998e-06, "loss": 0.15854363, "memory(GiB)": 13.7, "step": 38275, "train_speed(iter/s)": 1.53544 }, { "acc": 0.9895833, "epoch": 17.942348254042653, "grad_norm": 5.990843772888184, "learning_rate": 7.610223040505705e-06, "loss": 0.02671025, "memory(GiB)": 13.7, "step": 38280, "train_speed(iter/s)": 1.535448 }, { "acc": 0.97531567, "epoch": 17.944691820951487, "grad_norm": 5.673445701599121, "learning_rate": 7.609561879855243e-06, "loss": 0.12588296, "memory(GiB)": 13.7, "step": 38285, "train_speed(iter/s)": 1.535455 }, { "acc": 0.97041664, "epoch": 17.947035387860325, "grad_norm": 7.917540550231934, "learning_rate": 7.608900656491508e-06, "loss": 0.13473306, "memory(GiB)": 13.7, "step": 38290, "train_speed(iter/s)": 1.535456 }, { "acc": 0.98417664, "epoch": 17.94937895476916, "grad_norm": 7.896938323974609, "learning_rate": 7.608239370430388e-06, "loss": 0.04532405, "memory(GiB)": 13.7, "step": 38295, "train_speed(iter/s)": 1.535457 }, { "acc": 0.97824907, "epoch": 17.951722521677993, "grad_norm": 2.932447671890259, "learning_rate": 7.607578021687779e-06, "loss": 0.09552911, "memory(GiB)": 13.7, "step": 38300, "train_speed(iter/s)": 1.535468 }, { "acc": 0.98467264, "epoch": 17.954066088586828, "grad_norm": 2.075315475463867, "learning_rate": 7.606916610279583e-06, "loss": 0.05341598, "memory(GiB)": 13.7, "step": 38305, "train_speed(iter/s)": 1.535474 }, { "acc": 0.98309975, "epoch": 17.956409655495666, "grad_norm": 2.343998908996582, "learning_rate": 7.606255136221692e-06, "loss": 0.05547755, "memory(GiB)": 13.7, "step": 38310, "train_speed(iter/s)": 1.53548 }, { "acc": 0.9817708, "epoch": 17.9587532224045, "grad_norm": 7.183366298675537, "learning_rate": 7.605593599530009e-06, "loss": 0.10642676, "memory(GiB)": 13.7, "step": 38315, "train_speed(iter/s)": 1.535483 }, { "acc": 0.96826267, "epoch": 17.961096789313334, "grad_norm": 5.663060188293457, "learning_rate": 7.604932000220435e-06, "loss": 0.12283156, "memory(GiB)": 13.7, "step": 38320, "train_speed(iter/s)": 1.535484 }, { "acc": 0.97777786, "epoch": 17.96344035622217, "grad_norm": 14.494388580322266, "learning_rate": 7.60427033830887e-06, "loss": 0.0831479, "memory(GiB)": 13.7, "step": 38325, "train_speed(iter/s)": 1.535494 }, { "acc": 0.98840828, "epoch": 17.965783923131006, "grad_norm": 1.3711254596710205, "learning_rate": 7.6036086138112205e-06, "loss": 0.05026495, "memory(GiB)": 13.7, "step": 38330, "train_speed(iter/s)": 1.5355 }, { "acc": 0.97561951, "epoch": 17.96812749003984, "grad_norm": 6.174076080322266, "learning_rate": 7.602946826743393e-06, "loss": 0.08611223, "memory(GiB)": 13.7, "step": 38335, "train_speed(iter/s)": 1.535498 }, { "acc": 0.96540184, "epoch": 17.970471056948675, "grad_norm": 12.470495223999023, "learning_rate": 7.602284977121293e-06, "loss": 0.14752779, "memory(GiB)": 13.7, "step": 38340, "train_speed(iter/s)": 1.535505 }, { "acc": 0.99057541, "epoch": 17.972814623857513, "grad_norm": 4.110170364379883, "learning_rate": 7.601623064960829e-06, "loss": 0.05845197, "memory(GiB)": 13.7, "step": 38345, "train_speed(iter/s)": 1.535516 }, { "acc": 0.97458334, "epoch": 17.975158190766347, "grad_norm": 0.8045268058776855, "learning_rate": 7.600961090277913e-06, "loss": 0.08201234, "memory(GiB)": 13.7, "step": 38350, "train_speed(iter/s)": 1.535526 }, { "acc": 0.96687498, "epoch": 17.97750175767518, "grad_norm": 4.058822154998779, "learning_rate": 7.6002990530884545e-06, "loss": 0.15357611, "memory(GiB)": 13.7, "step": 38355, "train_speed(iter/s)": 1.535534 }, { "acc": 0.9857481, "epoch": 17.979845324584016, "grad_norm": 4.319520950317383, "learning_rate": 7.5996369534083696e-06, "loss": 0.06720862, "memory(GiB)": 13.7, "step": 38360, "train_speed(iter/s)": 1.535538 }, { "acc": 0.98033333, "epoch": 17.982188891492854, "grad_norm": 8.05185317993164, "learning_rate": 7.5989747912535705e-06, "loss": 0.07336743, "memory(GiB)": 13.7, "step": 38365, "train_speed(iter/s)": 1.535535 }, { "acc": 0.95739088, "epoch": 17.984532458401688, "grad_norm": 3.40791392326355, "learning_rate": 7.598312566639973e-06, "loss": 0.13623695, "memory(GiB)": 13.7, "step": 38370, "train_speed(iter/s)": 1.535547 }, { "acc": 0.97833328, "epoch": 17.986876025310522, "grad_norm": 1.7772513628005981, "learning_rate": 7.597650279583495e-06, "loss": 0.05869673, "memory(GiB)": 13.7, "step": 38375, "train_speed(iter/s)": 1.535567 }, { "acc": 0.97362061, "epoch": 17.989219592219357, "grad_norm": 4.924524784088135, "learning_rate": 7.5969879301000595e-06, "loss": 0.0778687, "memory(GiB)": 13.7, "step": 38380, "train_speed(iter/s)": 1.53556 }, { "acc": 0.98068829, "epoch": 17.991563159128194, "grad_norm": 2.491671085357666, "learning_rate": 7.596325518205582e-06, "loss": 0.074025, "memory(GiB)": 13.7, "step": 38385, "train_speed(iter/s)": 1.535564 }, { "acc": 0.98874998, "epoch": 17.99390672603703, "grad_norm": 0.423026978969574, "learning_rate": 7.595663043915987e-06, "loss": 0.02926854, "memory(GiB)": 13.7, "step": 38390, "train_speed(iter/s)": 1.535568 }, { "acc": 0.99624996, "epoch": 17.996250292945863, "grad_norm": 0.2502867877483368, "learning_rate": 7.5950005072471985e-06, "loss": 0.03698732, "memory(GiB)": 13.7, "step": 38395, "train_speed(iter/s)": 1.535582 }, { "acc": 0.98347759, "epoch": 17.998593859854697, "grad_norm": 3.438701868057251, "learning_rate": 7.594337908215142e-06, "loss": 0.10191091, "memory(GiB)": 13.7, "step": 38400, "train_speed(iter/s)": 1.535589 }, { "acc": 0.97770824, "epoch": 18.000937426763535, "grad_norm": 5.655213832855225, "learning_rate": 7.5936752468357446e-06, "loss": 0.10823485, "memory(GiB)": 13.7, "step": 38405, "train_speed(iter/s)": 1.535553 }, { "acc": 0.96486111, "epoch": 18.00328099367237, "grad_norm": 9.07214641571045, "learning_rate": 7.59301252312493e-06, "loss": 0.11900253, "memory(GiB)": 13.7, "step": 38410, "train_speed(iter/s)": 1.535552 }, { "acc": 0.97171135, "epoch": 18.005624560581204, "grad_norm": 6.288357257843018, "learning_rate": 7.592349737098634e-06, "loss": 0.15544937, "memory(GiB)": 13.7, "step": 38415, "train_speed(iter/s)": 1.535554 }, { "acc": 0.99201756, "epoch": 18.00796812749004, "grad_norm": 1.3973205089569092, "learning_rate": 7.591686888772787e-06, "loss": 0.07389852, "memory(GiB)": 13.7, "step": 38420, "train_speed(iter/s)": 1.535551 }, { "acc": 0.98829861, "epoch": 18.010311694398876, "grad_norm": 1.5329866409301758, "learning_rate": 7.5910239781633175e-06, "loss": 0.06510607, "memory(GiB)": 13.7, "step": 38425, "train_speed(iter/s)": 1.535556 }, { "acc": 0.99113102, "epoch": 18.01265526130771, "grad_norm": 0.015539413318037987, "learning_rate": 7.590361005286162e-06, "loss": 0.01874422, "memory(GiB)": 13.7, "step": 38430, "train_speed(iter/s)": 1.535558 }, { "acc": 0.97524805, "epoch": 18.014998828216545, "grad_norm": 5.416409969329834, "learning_rate": 7.589697970157256e-06, "loss": 0.07884163, "memory(GiB)": 13.7, "step": 38435, "train_speed(iter/s)": 1.535559 }, { "acc": 0.9864584, "epoch": 18.017342395125382, "grad_norm": 8.24371337890625, "learning_rate": 7.589034872792538e-06, "loss": 0.08626778, "memory(GiB)": 13.7, "step": 38440, "train_speed(iter/s)": 1.53556 }, { "acc": 0.99020834, "epoch": 18.019685962034217, "grad_norm": 1.3477349281311035, "learning_rate": 7.588371713207945e-06, "loss": 0.0170938, "memory(GiB)": 13.7, "step": 38445, "train_speed(iter/s)": 1.535568 }, { "acc": 0.98005686, "epoch": 18.02202952894305, "grad_norm": 4.85585880279541, "learning_rate": 7.587708491419419e-06, "loss": 0.05199057, "memory(GiB)": 13.7, "step": 38450, "train_speed(iter/s)": 1.535581 }, { "acc": 0.98166676, "epoch": 18.024373095851885, "grad_norm": 3.421785593032837, "learning_rate": 7.587045207442899e-06, "loss": 0.08404914, "memory(GiB)": 13.7, "step": 38455, "train_speed(iter/s)": 1.535586 }, { "acc": 0.98481064, "epoch": 18.026716662760723, "grad_norm": 5.826757431030273, "learning_rate": 7.5863818612943305e-06, "loss": 0.07117828, "memory(GiB)": 13.7, "step": 38460, "train_speed(iter/s)": 1.535597 }, { "acc": 0.98031826, "epoch": 18.029060229669557, "grad_norm": 3.1478378772735596, "learning_rate": 7.585718452989655e-06, "loss": 0.05042905, "memory(GiB)": 13.7, "step": 38465, "train_speed(iter/s)": 1.535602 }, { "acc": 0.96836309, "epoch": 18.03140379657839, "grad_norm": 3.947930335998535, "learning_rate": 7.585054982544825e-06, "loss": 0.09314975, "memory(GiB)": 13.7, "step": 38470, "train_speed(iter/s)": 1.535603 }, { "acc": 0.9895834, "epoch": 18.033747363487226, "grad_norm": 0.0074615925550460815, "learning_rate": 7.584391449975782e-06, "loss": 0.04883514, "memory(GiB)": 13.7, "step": 38475, "train_speed(iter/s)": 1.535603 }, { "acc": 0.98604174, "epoch": 18.036090930396064, "grad_norm": 7.1402506828308105, "learning_rate": 7.583727855298477e-06, "loss": 0.08205584, "memory(GiB)": 13.7, "step": 38480, "train_speed(iter/s)": 1.535603 }, { "acc": 0.9988636, "epoch": 18.0384344973049, "grad_norm": 0.2774534523487091, "learning_rate": 7.583064198528861e-06, "loss": 0.02710355, "memory(GiB)": 13.7, "step": 38485, "train_speed(iter/s)": 1.535593 }, { "acc": 0.9753891, "epoch": 18.040778064213733, "grad_norm": 2.3659112453460693, "learning_rate": 7.5824004796828875e-06, "loss": 0.13721979, "memory(GiB)": 13.7, "step": 38490, "train_speed(iter/s)": 1.535596 }, { "acc": 0.98812504, "epoch": 18.043121631122567, "grad_norm": 1.5410526990890503, "learning_rate": 7.581736698776509e-06, "loss": 0.05041575, "memory(GiB)": 13.7, "step": 38495, "train_speed(iter/s)": 1.535592 }, { "acc": 0.9864584, "epoch": 18.045465198031405, "grad_norm": 4.518026351928711, "learning_rate": 7.581072855825679e-06, "loss": 0.04022778, "memory(GiB)": 13.7, "step": 38500, "train_speed(iter/s)": 1.535595 }, { "acc": 0.98827457, "epoch": 18.04780876494024, "grad_norm": 31.147809982299805, "learning_rate": 7.580408950846355e-06, "loss": 0.06592074, "memory(GiB)": 13.7, "step": 38505, "train_speed(iter/s)": 1.535598 }, { "acc": 0.978125, "epoch": 18.050152331849073, "grad_norm": 6.332118988037109, "learning_rate": 7.579744983854494e-06, "loss": 0.16264064, "memory(GiB)": 13.7, "step": 38510, "train_speed(iter/s)": 1.535594 }, { "acc": 0.98080359, "epoch": 18.05249589875791, "grad_norm": 7.29715633392334, "learning_rate": 7.579080954866061e-06, "loss": 0.05995395, "memory(GiB)": 13.7, "step": 38515, "train_speed(iter/s)": 1.535594 }, { "acc": 0.97562504, "epoch": 18.054839465666745, "grad_norm": 5.34802770614624, "learning_rate": 7.578416863897009e-06, "loss": 0.10685878, "memory(GiB)": 13.7, "step": 38520, "train_speed(iter/s)": 1.535594 }, { "acc": 0.9869791, "epoch": 18.05718303257558, "grad_norm": 1.1460633277893066, "learning_rate": 7.577752710963309e-06, "loss": 0.0657456, "memory(GiB)": 13.7, "step": 38525, "train_speed(iter/s)": 1.535595 }, { "acc": 0.97061014, "epoch": 18.059526599484414, "grad_norm": 8.29509449005127, "learning_rate": 7.577088496080917e-06, "loss": 0.10505055, "memory(GiB)": 13.7, "step": 38530, "train_speed(iter/s)": 1.535596 }, { "acc": 0.9765625, "epoch": 18.061870166393252, "grad_norm": 0.05370489880442619, "learning_rate": 7.576424219265803e-06, "loss": 0.08523514, "memory(GiB)": 13.7, "step": 38535, "train_speed(iter/s)": 1.535602 }, { "acc": 0.97037678, "epoch": 18.064213733302086, "grad_norm": 6.585987091064453, "learning_rate": 7.575759880533935e-06, "loss": 0.11588508, "memory(GiB)": 13.7, "step": 38540, "train_speed(iter/s)": 1.535597 }, { "acc": 0.98117561, "epoch": 18.06655730021092, "grad_norm": 7.010157108306885, "learning_rate": 7.5750954799012785e-06, "loss": 0.08383093, "memory(GiB)": 13.7, "step": 38545, "train_speed(iter/s)": 1.535601 }, { "acc": 0.9894887, "epoch": 18.068900867119755, "grad_norm": 2.9909722805023193, "learning_rate": 7.574431017383807e-06, "loss": 0.0501242, "memory(GiB)": 13.7, "step": 38550, "train_speed(iter/s)": 1.535609 }, { "acc": 0.98625002, "epoch": 18.071244434028593, "grad_norm": 0.005787095054984093, "learning_rate": 7.5737664929974866e-06, "loss": 0.0270885, "memory(GiB)": 13.7, "step": 38555, "train_speed(iter/s)": 1.535616 }, { "acc": 0.97713795, "epoch": 18.073588000937427, "grad_norm": 6.040523529052734, "learning_rate": 7.573101906758296e-06, "loss": 0.13447666, "memory(GiB)": 13.7, "step": 38560, "train_speed(iter/s)": 1.535619 }, { "acc": 0.98592262, "epoch": 18.07593156784626, "grad_norm": 5.135154724121094, "learning_rate": 7.572437258682206e-06, "loss": 0.04258744, "memory(GiB)": 13.7, "step": 38565, "train_speed(iter/s)": 1.53562 }, { "acc": 0.99153519, "epoch": 18.078275134755096, "grad_norm": 6.179248809814453, "learning_rate": 7.571772548785195e-06, "loss": 0.02751351, "memory(GiB)": 13.7, "step": 38570, "train_speed(iter/s)": 1.535634 }, { "acc": 0.97885418, "epoch": 18.080618701663933, "grad_norm": 12.24325180053711, "learning_rate": 7.571107777083238e-06, "loss": 0.11985824, "memory(GiB)": 13.7, "step": 38575, "train_speed(iter/s)": 1.535638 }, { "acc": 0.98714733, "epoch": 18.082962268572768, "grad_norm": 2.8769571781158447, "learning_rate": 7.570442943592314e-06, "loss": 0.06311937, "memory(GiB)": 13.7, "step": 38580, "train_speed(iter/s)": 1.53564 }, { "acc": 0.98815117, "epoch": 18.085305835481602, "grad_norm": 4.7725677490234375, "learning_rate": 7.5697780483284065e-06, "loss": 0.07171944, "memory(GiB)": 13.7, "step": 38585, "train_speed(iter/s)": 1.535646 }, { "acc": 0.98425598, "epoch": 18.08764940239044, "grad_norm": 5.0922675132751465, "learning_rate": 7.569113091307496e-06, "loss": 0.07300102, "memory(GiB)": 13.7, "step": 38590, "train_speed(iter/s)": 1.535638 }, { "acc": 0.98758011, "epoch": 18.089992969299274, "grad_norm": 4.483227729797363, "learning_rate": 7.568448072545563e-06, "loss": 0.05798728, "memory(GiB)": 13.7, "step": 38595, "train_speed(iter/s)": 1.535635 }, { "acc": 0.98094692, "epoch": 18.09233653620811, "grad_norm": 3.680588722229004, "learning_rate": 7.567782992058598e-06, "loss": 0.14661797, "memory(GiB)": 13.7, "step": 38600, "train_speed(iter/s)": 1.535645 }, { "acc": 0.97236004, "epoch": 18.094680103116943, "grad_norm": 3.523519992828369, "learning_rate": 7.567117849862582e-06, "loss": 0.0991109, "memory(GiB)": 13.7, "step": 38605, "train_speed(iter/s)": 1.53565 }, { "acc": 0.98667612, "epoch": 18.09702367002578, "grad_norm": 2.517672538757324, "learning_rate": 7.5664526459735055e-06, "loss": 0.03139145, "memory(GiB)": 13.7, "step": 38610, "train_speed(iter/s)": 1.535663 }, { "acc": 0.98938932, "epoch": 18.099367236934615, "grad_norm": 0.12156159430742264, "learning_rate": 7.565787380407356e-06, "loss": 0.05079471, "memory(GiB)": 13.7, "step": 38615, "train_speed(iter/s)": 1.535669 }, { "acc": 0.98803024, "epoch": 18.10171080384345, "grad_norm": 3.7551488876342773, "learning_rate": 7.565122053180125e-06, "loss": 0.063614, "memory(GiB)": 13.7, "step": 38620, "train_speed(iter/s)": 1.535669 }, { "acc": 0.9624053, "epoch": 18.104054370752284, "grad_norm": 5.099353313446045, "learning_rate": 7.564456664307806e-06, "loss": 0.12302248, "memory(GiB)": 13.7, "step": 38625, "train_speed(iter/s)": 1.535673 }, { "acc": 0.97424679, "epoch": 18.10639793766112, "grad_norm": 5.7784600257873535, "learning_rate": 7.563791213806393e-06, "loss": 0.12907267, "memory(GiB)": 13.7, "step": 38630, "train_speed(iter/s)": 1.535674 }, { "acc": 0.98631401, "epoch": 18.108741504569956, "grad_norm": 1.130905032157898, "learning_rate": 7.5631257016918815e-06, "loss": 0.08228199, "memory(GiB)": 13.7, "step": 38635, "train_speed(iter/s)": 1.535666 }, { "acc": 0.98107643, "epoch": 18.11108507147879, "grad_norm": 4.0805535316467285, "learning_rate": 7.562460127980264e-06, "loss": 0.07091803, "memory(GiB)": 13.7, "step": 38640, "train_speed(iter/s)": 1.535662 }, { "acc": 0.98883934, "epoch": 18.113428638387624, "grad_norm": 2.6564455032348633, "learning_rate": 7.561794492687543e-06, "loss": 0.06044933, "memory(GiB)": 13.7, "step": 38645, "train_speed(iter/s)": 1.535659 }, { "acc": 0.9666667, "epoch": 18.115772205296462, "grad_norm": 6.919055461883545, "learning_rate": 7.561128795829718e-06, "loss": 0.11793776, "memory(GiB)": 13.7, "step": 38650, "train_speed(iter/s)": 1.535658 }, { "acc": 0.97516098, "epoch": 18.118115772205297, "grad_norm": 6.017663955688477, "learning_rate": 7.5604630374227875e-06, "loss": 0.11416717, "memory(GiB)": 13.7, "step": 38655, "train_speed(iter/s)": 1.535658 }, { "acc": 0.96841507, "epoch": 18.12045933911413, "grad_norm": 9.532275199890137, "learning_rate": 7.559797217482756e-06, "loss": 0.16515845, "memory(GiB)": 13.7, "step": 38660, "train_speed(iter/s)": 1.535666 }, { "acc": 0.98447914, "epoch": 18.12280290602297, "grad_norm": 4.616134166717529, "learning_rate": 7.5591313360256265e-06, "loss": 0.04386631, "memory(GiB)": 13.7, "step": 38665, "train_speed(iter/s)": 1.535667 }, { "acc": 0.98166666, "epoch": 18.125146472931803, "grad_norm": 5.100863456726074, "learning_rate": 7.558465393067407e-06, "loss": 0.04699835, "memory(GiB)": 13.7, "step": 38670, "train_speed(iter/s)": 1.535672 }, { "acc": 0.97862558, "epoch": 18.127490039840637, "grad_norm": 1.8206242322921753, "learning_rate": 7.557799388624104e-06, "loss": 0.08119783, "memory(GiB)": 13.7, "step": 38675, "train_speed(iter/s)": 1.535676 }, { "acc": 0.9712534, "epoch": 18.12983360674947, "grad_norm": 6.884395122528076, "learning_rate": 7.557133322711722e-06, "loss": 0.20977926, "memory(GiB)": 13.7, "step": 38680, "train_speed(iter/s)": 1.535684 }, { "acc": 0.9895833, "epoch": 18.13217717365831, "grad_norm": 1.5207388401031494, "learning_rate": 7.556467195346276e-06, "loss": 0.04508827, "memory(GiB)": 13.7, "step": 38685, "train_speed(iter/s)": 1.535695 }, { "acc": 0.97904758, "epoch": 18.134520740567144, "grad_norm": 6.767370223999023, "learning_rate": 7.555801006543775e-06, "loss": 0.08832321, "memory(GiB)": 13.7, "step": 38690, "train_speed(iter/s)": 1.5357 }, { "acc": 0.97650051, "epoch": 18.136864307475978, "grad_norm": 2.201331615447998, "learning_rate": 7.555134756320234e-06, "loss": 0.1036572, "memory(GiB)": 13.7, "step": 38695, "train_speed(iter/s)": 1.535714 }, { "acc": 0.97466297, "epoch": 18.139207874384812, "grad_norm": 2.142622709274292, "learning_rate": 7.554468444691664e-06, "loss": 0.14146724, "memory(GiB)": 13.7, "step": 38700, "train_speed(iter/s)": 1.535717 }, { "acc": 0.9839819, "epoch": 18.14155144129365, "grad_norm": 2.9804723262786865, "learning_rate": 7.553802071674083e-06, "loss": 0.09751965, "memory(GiB)": 13.7, "step": 38705, "train_speed(iter/s)": 1.535722 }, { "acc": 0.97847223, "epoch": 18.143895008202485, "grad_norm": 4.658580303192139, "learning_rate": 7.5531356372835105e-06, "loss": 0.05620707, "memory(GiB)": 13.7, "step": 38710, "train_speed(iter/s)": 1.535725 }, { "acc": 0.97597218, "epoch": 18.14623857511132, "grad_norm": 1.1283905506134033, "learning_rate": 7.552469141535963e-06, "loss": 0.05852718, "memory(GiB)": 13.7, "step": 38715, "train_speed(iter/s)": 1.535726 }, { "acc": 0.98046083, "epoch": 18.148582142020153, "grad_norm": 4.436609745025635, "learning_rate": 7.551802584447461e-06, "loss": 0.13446401, "memory(GiB)": 13.7, "step": 38720, "train_speed(iter/s)": 1.535728 }, { "acc": 0.97531738, "epoch": 18.15092570892899, "grad_norm": 4.498964786529541, "learning_rate": 7.551135966034024e-06, "loss": 0.11756129, "memory(GiB)": 13.7, "step": 38725, "train_speed(iter/s)": 1.535743 }, { "acc": 0.99404764, "epoch": 18.153269275837825, "grad_norm": 1.428224802017212, "learning_rate": 7.550469286311682e-06, "loss": 0.02994386, "memory(GiB)": 13.7, "step": 38730, "train_speed(iter/s)": 1.535754 }, { "acc": 0.98886366, "epoch": 18.15561284274666, "grad_norm": 1.4108370542526245, "learning_rate": 7.5498025452964526e-06, "loss": 0.04012217, "memory(GiB)": 13.7, "step": 38735, "train_speed(iter/s)": 1.535756 }, { "acc": 0.96989594, "epoch": 18.157956409655494, "grad_norm": 7.1732940673828125, "learning_rate": 7.5491357430043665e-06, "loss": 0.09937738, "memory(GiB)": 13.7, "step": 38740, "train_speed(iter/s)": 1.535767 }, { "acc": 0.99693184, "epoch": 18.160299976564332, "grad_norm": 0.5526961088180542, "learning_rate": 7.548468879451449e-06, "loss": 0.02973197, "memory(GiB)": 13.7, "step": 38745, "train_speed(iter/s)": 1.535782 }, { "acc": 0.97895288, "epoch": 18.162643543473166, "grad_norm": 5.151626110076904, "learning_rate": 7.547801954653732e-06, "loss": 0.06992309, "memory(GiB)": 13.7, "step": 38750, "train_speed(iter/s)": 1.535791 }, { "acc": 0.95895834, "epoch": 18.164987110382, "grad_norm": 6.9224982261657715, "learning_rate": 7.547134968627242e-06, "loss": 0.07703663, "memory(GiB)": 13.7, "step": 38755, "train_speed(iter/s)": 1.5358 }, { "acc": 0.98061008, "epoch": 18.16733067729084, "grad_norm": 2.33901309967041, "learning_rate": 7.546467921388016e-06, "loss": 0.09630916, "memory(GiB)": 13.7, "step": 38760, "train_speed(iter/s)": 1.535807 }, { "acc": 0.9905303, "epoch": 18.169674244199673, "grad_norm": 8.177766799926758, "learning_rate": 7.545800812952084e-06, "loss": 0.03008274, "memory(GiB)": 13.7, "step": 38765, "train_speed(iter/s)": 1.535817 }, { "acc": 0.98490086, "epoch": 18.172017811108507, "grad_norm": 1.7281993627548218, "learning_rate": 7.545133643335483e-06, "loss": 0.08624469, "memory(GiB)": 13.7, "step": 38770, "train_speed(iter/s)": 1.53582 }, { "acc": 0.97408733, "epoch": 18.17436137801734, "grad_norm": 5.125514984130859, "learning_rate": 7.544466412554251e-06, "loss": 0.10714228, "memory(GiB)": 13.7, "step": 38775, "train_speed(iter/s)": 1.535839 }, { "acc": 0.98093748, "epoch": 18.17670494492618, "grad_norm": 3.3960371017456055, "learning_rate": 7.543799120624421e-06, "loss": 0.07465391, "memory(GiB)": 13.7, "step": 38780, "train_speed(iter/s)": 1.535842 }, { "acc": 0.97874994, "epoch": 18.179048511835013, "grad_norm": 2.917428731918335, "learning_rate": 7.543131767562036e-06, "loss": 0.14089159, "memory(GiB)": 13.7, "step": 38785, "train_speed(iter/s)": 1.535838 }, { "acc": 0.9859375, "epoch": 18.181392078743848, "grad_norm": 8.83505916595459, "learning_rate": 7.5424643533831375e-06, "loss": 0.04932514, "memory(GiB)": 13.7, "step": 38790, "train_speed(iter/s)": 1.535846 }, { "acc": 0.96736603, "epoch": 18.183735645652682, "grad_norm": 1.2256816625595093, "learning_rate": 7.541796878103768e-06, "loss": 0.12669256, "memory(GiB)": 13.7, "step": 38795, "train_speed(iter/s)": 1.535856 }, { "acc": 0.97609377, "epoch": 18.18607921256152, "grad_norm": 3.067063093185425, "learning_rate": 7.541129341739966e-06, "loss": 0.07845721, "memory(GiB)": 13.7, "step": 38800, "train_speed(iter/s)": 1.535867 }, { "acc": 0.97865534, "epoch": 18.188422779470354, "grad_norm": 2.8472352027893066, "learning_rate": 7.540461744307784e-06, "loss": 0.07516832, "memory(GiB)": 13.7, "step": 38805, "train_speed(iter/s)": 1.535869 }, { "acc": 0.98246527, "epoch": 18.19076634637919, "grad_norm": 4.954750061035156, "learning_rate": 7.5397940858232676e-06, "loss": 0.07274979, "memory(GiB)": 13.7, "step": 38810, "train_speed(iter/s)": 1.535861 }, { "acc": 0.98160706, "epoch": 18.193109913288023, "grad_norm": 2.9430835247039795, "learning_rate": 7.539126366302461e-06, "loss": 0.03955667, "memory(GiB)": 13.7, "step": 38815, "train_speed(iter/s)": 1.53586 }, { "acc": 0.9604167, "epoch": 18.19545348019686, "grad_norm": 6.883880615234375, "learning_rate": 7.538458585761419e-06, "loss": 0.16910627, "memory(GiB)": 13.7, "step": 38820, "train_speed(iter/s)": 1.535876 }, { "acc": 0.9921648, "epoch": 18.197797047105695, "grad_norm": 3.2255859375, "learning_rate": 7.5377907442161894e-06, "loss": 0.06621898, "memory(GiB)": 13.7, "step": 38825, "train_speed(iter/s)": 1.535877 }, { "acc": 0.98260422, "epoch": 18.20014061401453, "grad_norm": 5.290131092071533, "learning_rate": 7.537122841682826e-06, "loss": 0.05989318, "memory(GiB)": 13.7, "step": 38830, "train_speed(iter/s)": 1.53588 }, { "acc": 0.99018192, "epoch": 18.202484180923367, "grad_norm": 3.744908094406128, "learning_rate": 7.536454878177382e-06, "loss": 0.0654878, "memory(GiB)": 13.7, "step": 38835, "train_speed(iter/s)": 1.535888 }, { "acc": 0.9915905, "epoch": 18.2048277478322, "grad_norm": 2.2641584873199463, "learning_rate": 7.535786853715916e-06, "loss": 0.03994135, "memory(GiB)": 13.7, "step": 38840, "train_speed(iter/s)": 1.535894 }, { "acc": 0.99294357, "epoch": 18.207171314741036, "grad_norm": 2.310871124267578, "learning_rate": 7.53511876831448e-06, "loss": 0.06979393, "memory(GiB)": 13.7, "step": 38845, "train_speed(iter/s)": 1.535912 }, { "acc": 0.98291664, "epoch": 18.20951488164987, "grad_norm": 5.750491142272949, "learning_rate": 7.534450621989139e-06, "loss": 0.11099169, "memory(GiB)": 13.7, "step": 38850, "train_speed(iter/s)": 1.535913 }, { "acc": 0.9895834, "epoch": 18.211858448558708, "grad_norm": 3.6298153400421143, "learning_rate": 7.533782414755947e-06, "loss": 0.05059708, "memory(GiB)": 13.7, "step": 38855, "train_speed(iter/s)": 1.535918 }, { "acc": 0.98584003, "epoch": 18.214202015467542, "grad_norm": 1.7112922668457031, "learning_rate": 7.5331141466309694e-06, "loss": 0.07371311, "memory(GiB)": 13.7, "step": 38860, "train_speed(iter/s)": 1.535917 }, { "acc": 0.98896828, "epoch": 18.216545582376376, "grad_norm": 3.796132802963257, "learning_rate": 7.5324458176302685e-06, "loss": 0.07172413, "memory(GiB)": 13.7, "step": 38865, "train_speed(iter/s)": 1.535926 }, { "acc": 0.97870922, "epoch": 18.21888914928521, "grad_norm": 1.679707407951355, "learning_rate": 7.531777427769907e-06, "loss": 0.1037277, "memory(GiB)": 13.7, "step": 38870, "train_speed(iter/s)": 1.535927 }, { "acc": 0.9848959, "epoch": 18.22123271619405, "grad_norm": 0.19181254506111145, "learning_rate": 7.531108977065953e-06, "loss": 0.08193344, "memory(GiB)": 13.7, "step": 38875, "train_speed(iter/s)": 1.535936 }, { "acc": 0.97333336, "epoch": 18.223576283102883, "grad_norm": 3.813844680786133, "learning_rate": 7.530440465534473e-06, "loss": 0.06272002, "memory(GiB)": 13.7, "step": 38880, "train_speed(iter/s)": 1.535941 }, { "acc": 0.99458332, "epoch": 18.225919850011717, "grad_norm": 3.3823933601379395, "learning_rate": 7.529771893191536e-06, "loss": 0.01935508, "memory(GiB)": 13.7, "step": 38885, "train_speed(iter/s)": 1.535957 }, { "acc": 0.9760417, "epoch": 18.22826341692055, "grad_norm": 3.3963470458984375, "learning_rate": 7.529103260053211e-06, "loss": 0.08885962, "memory(GiB)": 13.7, "step": 38890, "train_speed(iter/s)": 1.53597 }, { "acc": 0.98000431, "epoch": 18.23060698382939, "grad_norm": 1.9282772541046143, "learning_rate": 7.528434566135571e-06, "loss": 0.07676688, "memory(GiB)": 13.7, "step": 38895, "train_speed(iter/s)": 1.535977 }, { "acc": 0.971875, "epoch": 18.232950550738224, "grad_norm": 1.5559513568878174, "learning_rate": 7.527765811454689e-06, "loss": 0.11976424, "memory(GiB)": 13.7, "step": 38900, "train_speed(iter/s)": 1.535989 }, { "acc": 0.9907177, "epoch": 18.235294117647058, "grad_norm": 3.8188729286193848, "learning_rate": 7.527096996026639e-06, "loss": 0.03967465, "memory(GiB)": 13.7, "step": 38905, "train_speed(iter/s)": 1.535988 }, { "acc": 0.97633934, "epoch": 18.237637684555892, "grad_norm": 5.957972526550293, "learning_rate": 7.5264281198674995e-06, "loss": 0.06276439, "memory(GiB)": 13.7, "step": 38910, "train_speed(iter/s)": 1.535999 }, { "acc": 0.9791666, "epoch": 18.23998125146473, "grad_norm": 11.210142135620117, "learning_rate": 7.5257591829933456e-06, "loss": 0.06366515, "memory(GiB)": 13.7, "step": 38915, "train_speed(iter/s)": 1.535991 }, { "acc": 0.98544722, "epoch": 18.242324818373564, "grad_norm": 4.898776531219482, "learning_rate": 7.525090185420256e-06, "loss": 0.08269824, "memory(GiB)": 13.7, "step": 38920, "train_speed(iter/s)": 1.535989 }, { "acc": 0.97774839, "epoch": 18.2446683852824, "grad_norm": 4.574108600616455, "learning_rate": 7.524421127164313e-06, "loss": 0.09854342, "memory(GiB)": 13.7, "step": 38925, "train_speed(iter/s)": 1.536004 }, { "acc": 0.9864583, "epoch": 18.247011952191237, "grad_norm": 3.8337457180023193, "learning_rate": 7.523752008241597e-06, "loss": 0.03846571, "memory(GiB)": 13.7, "step": 38930, "train_speed(iter/s)": 1.536 }, { "acc": 0.98494053, "epoch": 18.24935551910007, "grad_norm": 2.384568691253662, "learning_rate": 7.523082828668192e-06, "loss": 0.03741097, "memory(GiB)": 13.7, "step": 38935, "train_speed(iter/s)": 1.536009 }, { "acc": 0.97917662, "epoch": 18.251699086008905, "grad_norm": 2.762622594833374, "learning_rate": 7.5224135884601844e-06, "loss": 0.13680699, "memory(GiB)": 13.7, "step": 38940, "train_speed(iter/s)": 1.536017 }, { "acc": 0.98083334, "epoch": 18.25404265291774, "grad_norm": 1.2445554733276367, "learning_rate": 7.52174428763366e-06, "loss": 0.06416149, "memory(GiB)": 13.7, "step": 38945, "train_speed(iter/s)": 1.536025 }, { "acc": 0.96951389, "epoch": 18.256386219826577, "grad_norm": 5.709082126617432, "learning_rate": 7.521074926204706e-06, "loss": 0.10432153, "memory(GiB)": 13.7, "step": 38950, "train_speed(iter/s)": 1.536034 }, { "acc": 0.98500004, "epoch": 18.25872978673541, "grad_norm": 4.5877909660339355, "learning_rate": 7.520405504189409e-06, "loss": 0.06569206, "memory(GiB)": 13.7, "step": 38955, "train_speed(iter/s)": 1.536042 }, { "acc": 0.97986107, "epoch": 18.261073353644246, "grad_norm": 5.512242794036865, "learning_rate": 7.519736021603864e-06, "loss": 0.13043249, "memory(GiB)": 13.7, "step": 38960, "train_speed(iter/s)": 1.536037 }, { "acc": 0.97248564, "epoch": 18.26341692055308, "grad_norm": 1.532379388809204, "learning_rate": 7.519066478464161e-06, "loss": 0.16219883, "memory(GiB)": 13.7, "step": 38965, "train_speed(iter/s)": 1.536041 }, { "acc": 0.97360115, "epoch": 18.265760487461918, "grad_norm": 3.6719131469726562, "learning_rate": 7.518396874786393e-06, "loss": 0.10364224, "memory(GiB)": 13.7, "step": 38970, "train_speed(iter/s)": 1.53604 }, { "acc": 0.99535713, "epoch": 18.268104054370752, "grad_norm": 2.378060817718506, "learning_rate": 7.517727210586656e-06, "loss": 0.04495873, "memory(GiB)": 13.7, "step": 38975, "train_speed(iter/s)": 1.536048 }, { "acc": 0.98054371, "epoch": 18.270447621279587, "grad_norm": 2.8212730884552, "learning_rate": 7.517057485881048e-06, "loss": 0.07657938, "memory(GiB)": 13.7, "step": 38980, "train_speed(iter/s)": 1.536054 }, { "acc": 0.98291664, "epoch": 18.27279118818842, "grad_norm": 1.0636755228042603, "learning_rate": 7.516387700685667e-06, "loss": 0.05063383, "memory(GiB)": 13.7, "step": 38985, "train_speed(iter/s)": 1.536065 }, { "acc": 0.9763195, "epoch": 18.27513475509726, "grad_norm": 2.866364002227783, "learning_rate": 7.515717855016609e-06, "loss": 0.09813662, "memory(GiB)": 13.7, "step": 38990, "train_speed(iter/s)": 1.536061 }, { "acc": 0.98633928, "epoch": 18.277478322006093, "grad_norm": 1.8132389783859253, "learning_rate": 7.515047948889977e-06, "loss": 0.10309441, "memory(GiB)": 13.7, "step": 38995, "train_speed(iter/s)": 1.536061 }, { "acc": 0.98812504, "epoch": 18.279821888914928, "grad_norm": 7.964395046234131, "learning_rate": 7.514377982321872e-06, "loss": 0.05250862, "memory(GiB)": 13.7, "step": 39000, "train_speed(iter/s)": 1.536055 }, { "acc": 0.96340179, "epoch": 18.282165455823765, "grad_norm": 6.105987548828125, "learning_rate": 7.5137079553284005e-06, "loss": 0.20631714, "memory(GiB)": 13.7, "step": 39005, "train_speed(iter/s)": 1.536056 }, { "acc": 0.98852711, "epoch": 18.2845090227326, "grad_norm": 1.3834980726242065, "learning_rate": 7.513037867925667e-06, "loss": 0.05881121, "memory(GiB)": 13.7, "step": 39010, "train_speed(iter/s)": 1.536051 }, { "acc": 0.99020834, "epoch": 18.286852589641434, "grad_norm": 1.2270166873931885, "learning_rate": 7.5123677201297765e-06, "loss": 0.0647253, "memory(GiB)": 13.7, "step": 39015, "train_speed(iter/s)": 1.536046 }, { "acc": 0.97894344, "epoch": 18.28919615655027, "grad_norm": 6.6221022605896, "learning_rate": 7.5116975119568385e-06, "loss": 0.07295473, "memory(GiB)": 13.7, "step": 39020, "train_speed(iter/s)": 1.536057 }, { "acc": 0.9692749, "epoch": 18.291539723459106, "grad_norm": 39.12453842163086, "learning_rate": 7.511027243422961e-06, "loss": 0.13771673, "memory(GiB)": 13.7, "step": 39025, "train_speed(iter/s)": 1.53607 }, { "acc": 0.97904768, "epoch": 18.29388329036794, "grad_norm": 2.366989850997925, "learning_rate": 7.510356914544258e-06, "loss": 0.07819798, "memory(GiB)": 13.7, "step": 39030, "train_speed(iter/s)": 1.536069 }, { "acc": 0.96610832, "epoch": 18.296226857276775, "grad_norm": 7.6116790771484375, "learning_rate": 7.509686525336839e-06, "loss": 0.14523324, "memory(GiB)": 13.7, "step": 39035, "train_speed(iter/s)": 1.536065 }, { "acc": 0.98145294, "epoch": 18.29857042418561, "grad_norm": 6.4063262939453125, "learning_rate": 7.509016075816819e-06, "loss": 0.06692939, "memory(GiB)": 13.7, "step": 39040, "train_speed(iter/s)": 1.53607 }, { "acc": 0.97875004, "epoch": 18.300913991094447, "grad_norm": 3.739447832107544, "learning_rate": 7.508345566000314e-06, "loss": 0.0966283, "memory(GiB)": 13.7, "step": 39045, "train_speed(iter/s)": 1.536071 }, { "acc": 0.98520222, "epoch": 18.30325755800328, "grad_norm": 0.09121723473072052, "learning_rate": 7.507674995903442e-06, "loss": 0.07288395, "memory(GiB)": 13.7, "step": 39050, "train_speed(iter/s)": 1.536069 }, { "acc": 0.9791666, "epoch": 18.305601124912116, "grad_norm": 5.635673522949219, "learning_rate": 7.507004365542319e-06, "loss": 0.0455714, "memory(GiB)": 13.7, "step": 39055, "train_speed(iter/s)": 1.536072 }, { "acc": 0.99258928, "epoch": 18.30794469182095, "grad_norm": 10.088748931884766, "learning_rate": 7.506333674933065e-06, "loss": 0.09591553, "memory(GiB)": 13.7, "step": 39060, "train_speed(iter/s)": 1.536069 }, { "acc": 0.9788064, "epoch": 18.310288258729788, "grad_norm": 5.962656021118164, "learning_rate": 7.505662924091801e-06, "loss": 0.07651753, "memory(GiB)": 13.7, "step": 39065, "train_speed(iter/s)": 1.536071 }, { "acc": 0.97421207, "epoch": 18.312631825638622, "grad_norm": 5.594402313232422, "learning_rate": 7.504992113034654e-06, "loss": 0.18772843, "memory(GiB)": 13.7, "step": 39070, "train_speed(iter/s)": 1.536074 }, { "acc": 0.97963753, "epoch": 18.314975392547456, "grad_norm": 6.025982856750488, "learning_rate": 7.50432124177774e-06, "loss": 0.10505929, "memory(GiB)": 13.7, "step": 39075, "train_speed(iter/s)": 1.536078 }, { "acc": 0.97063494, "epoch": 18.317318959456294, "grad_norm": 2.9655752182006836, "learning_rate": 7.503650310337193e-06, "loss": 0.10431046, "memory(GiB)": 13.7, "step": 39080, "train_speed(iter/s)": 1.536091 }, { "acc": 0.99125004, "epoch": 18.31966252636513, "grad_norm": 1.0665644407272339, "learning_rate": 7.502979318729131e-06, "loss": 0.07543038, "memory(GiB)": 13.7, "step": 39085, "train_speed(iter/s)": 1.536089 }, { "acc": 0.97766752, "epoch": 18.322006093273963, "grad_norm": 3.0680058002471924, "learning_rate": 7.502308266969692e-06, "loss": 0.08255078, "memory(GiB)": 13.7, "step": 39090, "train_speed(iter/s)": 1.536096 }, { "acc": 0.9828125, "epoch": 18.324349660182797, "grad_norm": 4.13092041015625, "learning_rate": 7.501637155075e-06, "loss": 0.06493421, "memory(GiB)": 13.7, "step": 39095, "train_speed(iter/s)": 1.536093 }, { "acc": 0.96800594, "epoch": 18.326693227091635, "grad_norm": 7.429699897766113, "learning_rate": 7.500965983061188e-06, "loss": 0.10978435, "memory(GiB)": 13.7, "step": 39100, "train_speed(iter/s)": 1.536102 }, { "acc": 0.98145828, "epoch": 18.32903679400047, "grad_norm": 2.8196725845336914, "learning_rate": 7.500294750944388e-06, "loss": 0.03660375, "memory(GiB)": 13.7, "step": 39105, "train_speed(iter/s)": 1.536107 }, { "acc": 0.97621107, "epoch": 18.331380360909304, "grad_norm": 97.13693237304688, "learning_rate": 7.499623458740735e-06, "loss": 0.09816626, "memory(GiB)": 13.7, "step": 39110, "train_speed(iter/s)": 1.536109 }, { "acc": 0.98062496, "epoch": 18.333723927818138, "grad_norm": 5.429427146911621, "learning_rate": 7.498952106466363e-06, "loss": 0.11000354, "memory(GiB)": 13.7, "step": 39115, "train_speed(iter/s)": 1.536115 }, { "acc": 0.96885424, "epoch": 18.336067494726976, "grad_norm": 3.2807602882385254, "learning_rate": 7.498280694137411e-06, "loss": 0.1236774, "memory(GiB)": 13.7, "step": 39120, "train_speed(iter/s)": 1.536118 }, { "acc": 0.97887716, "epoch": 18.33841106163581, "grad_norm": 3.790517568588257, "learning_rate": 7.497609221770017e-06, "loss": 0.08843579, "memory(GiB)": 13.7, "step": 39125, "train_speed(iter/s)": 1.536128 }, { "acc": 0.98013391, "epoch": 18.340754628544644, "grad_norm": 4.38581657409668, "learning_rate": 7.496937689380321e-06, "loss": 0.07727112, "memory(GiB)": 13.7, "step": 39130, "train_speed(iter/s)": 1.536132 }, { "acc": 0.98614044, "epoch": 18.34309819545348, "grad_norm": 0.5262790322303772, "learning_rate": 7.496266096984466e-06, "loss": 0.04525485, "memory(GiB)": 13.7, "step": 39135, "train_speed(iter/s)": 1.536129 }, { "acc": 0.97770834, "epoch": 18.345441762362316, "grad_norm": 4.638786315917969, "learning_rate": 7.495594444598591e-06, "loss": 0.08184786, "memory(GiB)": 13.7, "step": 39140, "train_speed(iter/s)": 1.536126 }, { "acc": 0.97645836, "epoch": 18.34778532927115, "grad_norm": 3.7717583179473877, "learning_rate": 7.494922732238843e-06, "loss": 0.08466737, "memory(GiB)": 13.7, "step": 39145, "train_speed(iter/s)": 1.536123 }, { "acc": 0.9739584, "epoch": 18.350128896179985, "grad_norm": 5.805050849914551, "learning_rate": 7.494250959921368e-06, "loss": 0.08115819, "memory(GiB)": 13.7, "step": 39150, "train_speed(iter/s)": 1.536114 }, { "acc": 0.98842258, "epoch": 18.352472463088823, "grad_norm": 0.83841872215271, "learning_rate": 7.493579127662313e-06, "loss": 0.05140912, "memory(GiB)": 13.7, "step": 39155, "train_speed(iter/s)": 1.536131 }, { "acc": 0.98311958, "epoch": 18.354816029997657, "grad_norm": 5.596710205078125, "learning_rate": 7.492907235477824e-06, "loss": 0.05306809, "memory(GiB)": 13.7, "step": 39160, "train_speed(iter/s)": 1.536126 }, { "acc": 0.96592274, "epoch": 18.35715959690649, "grad_norm": 6.741189002990723, "learning_rate": 7.492235283384055e-06, "loss": 0.1762419, "memory(GiB)": 13.7, "step": 39165, "train_speed(iter/s)": 1.536126 }, { "acc": 0.97696428, "epoch": 18.359503163815326, "grad_norm": 0.039084386080503464, "learning_rate": 7.491563271397155e-06, "loss": 0.04824115, "memory(GiB)": 13.7, "step": 39170, "train_speed(iter/s)": 1.536126 }, { "acc": 0.98083324, "epoch": 18.361846730724164, "grad_norm": 3.2393295764923096, "learning_rate": 7.490891199533279e-06, "loss": 0.05953065, "memory(GiB)": 13.7, "step": 39175, "train_speed(iter/s)": 1.536134 }, { "acc": 0.98808422, "epoch": 18.364190297632998, "grad_norm": 0.6595824956893921, "learning_rate": 7.490219067808581e-06, "loss": 0.05359508, "memory(GiB)": 13.7, "step": 39180, "train_speed(iter/s)": 1.536138 }, { "acc": 0.98028841, "epoch": 18.366533864541832, "grad_norm": 3.318401575088501, "learning_rate": 7.489546876239213e-06, "loss": 0.06884841, "memory(GiB)": 13.7, "step": 39185, "train_speed(iter/s)": 1.536144 }, { "acc": 0.98988094, "epoch": 18.368877431450667, "grad_norm": 0.016843099147081375, "learning_rate": 7.4888746248413365e-06, "loss": 0.06294959, "memory(GiB)": 13.7, "step": 39190, "train_speed(iter/s)": 1.536145 }, { "acc": 0.98458328, "epoch": 18.371220998359505, "grad_norm": 2.3644940853118896, "learning_rate": 7.488202313631109e-06, "loss": 0.05661754, "memory(GiB)": 13.7, "step": 39195, "train_speed(iter/s)": 1.536152 }, { "acc": 0.98656101, "epoch": 18.37356456526834, "grad_norm": 0.0022584230173379183, "learning_rate": 7.4875299426246905e-06, "loss": 0.04301597, "memory(GiB)": 13.7, "step": 39200, "train_speed(iter/s)": 1.536154 }, { "acc": 0.97710323, "epoch": 18.375908132177173, "grad_norm": 5.826771259307861, "learning_rate": 7.486857511838242e-06, "loss": 0.07853062, "memory(GiB)": 13.7, "step": 39205, "train_speed(iter/s)": 1.536162 }, { "acc": 0.98982372, "epoch": 18.378251699086007, "grad_norm": 11.925888061523438, "learning_rate": 7.4861850212879296e-06, "loss": 0.06060607, "memory(GiB)": 13.7, "step": 39210, "train_speed(iter/s)": 1.536164 }, { "acc": 0.98604164, "epoch": 18.380595265994845, "grad_norm": 3.072258472442627, "learning_rate": 7.485512470989914e-06, "loss": 0.05945807, "memory(GiB)": 13.7, "step": 39215, "train_speed(iter/s)": 1.53618 }, { "acc": 0.9854166, "epoch": 18.38293883290368, "grad_norm": 2.6773154735565186, "learning_rate": 7.484839860960361e-06, "loss": 0.03786904, "memory(GiB)": 13.7, "step": 39220, "train_speed(iter/s)": 1.536178 }, { "acc": 0.97636166, "epoch": 18.385282399812514, "grad_norm": 3.120227098464966, "learning_rate": 7.4841671912154415e-06, "loss": 0.08429847, "memory(GiB)": 13.7, "step": 39225, "train_speed(iter/s)": 1.536184 }, { "acc": 0.98313065, "epoch": 18.387625966721348, "grad_norm": 1.4406965970993042, "learning_rate": 7.483494461771322e-06, "loss": 0.07808389, "memory(GiB)": 13.7, "step": 39230, "train_speed(iter/s)": 1.536176 }, { "acc": 0.97863092, "epoch": 18.389969533630186, "grad_norm": 0.011792746372520924, "learning_rate": 7.4828216726441715e-06, "loss": 0.05179054, "memory(GiB)": 13.7, "step": 39235, "train_speed(iter/s)": 1.536177 }, { "acc": 0.97106686, "epoch": 18.39231310053902, "grad_norm": 2.2078311443328857, "learning_rate": 7.482148823850165e-06, "loss": 0.12136793, "memory(GiB)": 13.7, "step": 39240, "train_speed(iter/s)": 1.536176 }, { "acc": 0.98572922, "epoch": 18.394656667447855, "grad_norm": 0.23491834104061127, "learning_rate": 7.481475915405472e-06, "loss": 0.0712375, "memory(GiB)": 13.7, "step": 39245, "train_speed(iter/s)": 1.536177 }, { "acc": 0.98969698, "epoch": 18.397000234356693, "grad_norm": 3.485262155532837, "learning_rate": 7.480802947326269e-06, "loss": 0.06286795, "memory(GiB)": 13.7, "step": 39250, "train_speed(iter/s)": 1.536181 }, { "acc": 0.9958334, "epoch": 18.399343801265527, "grad_norm": 1.32212495803833, "learning_rate": 7.480129919628732e-06, "loss": 0.02465006, "memory(GiB)": 13.7, "step": 39255, "train_speed(iter/s)": 1.536171 }, { "acc": 0.992099, "epoch": 18.40168736817436, "grad_norm": 0.013011145405471325, "learning_rate": 7.479456832329039e-06, "loss": 0.0520045, "memory(GiB)": 13.7, "step": 39260, "train_speed(iter/s)": 1.536156 }, { "acc": 0.97171135, "epoch": 18.404030935083195, "grad_norm": 5.717893123626709, "learning_rate": 7.4787836854433644e-06, "loss": 0.07108943, "memory(GiB)": 13.7, "step": 39265, "train_speed(iter/s)": 1.536161 }, { "acc": 0.96799335, "epoch": 18.406374501992033, "grad_norm": 9.19418716430664, "learning_rate": 7.478110478987894e-06, "loss": 0.14918905, "memory(GiB)": 13.7, "step": 39270, "train_speed(iter/s)": 1.536163 }, { "acc": 0.9833334, "epoch": 18.408718068900868, "grad_norm": 4.284626483917236, "learning_rate": 7.477437212978807e-06, "loss": 0.08441904, "memory(GiB)": 13.7, "step": 39275, "train_speed(iter/s)": 1.536176 }, { "acc": 0.97431545, "epoch": 18.411061635809702, "grad_norm": 6.5126166343688965, "learning_rate": 7.476763887432288e-06, "loss": 0.10360961, "memory(GiB)": 13.7, "step": 39280, "train_speed(iter/s)": 1.536183 }, { "acc": 0.98260422, "epoch": 18.413405202718536, "grad_norm": 3.2424163818359375, "learning_rate": 7.476090502364519e-06, "loss": 0.11336914, "memory(GiB)": 13.7, "step": 39285, "train_speed(iter/s)": 1.536193 }, { "acc": 0.98937492, "epoch": 18.415748769627374, "grad_norm": 4.070226669311523, "learning_rate": 7.475417057791687e-06, "loss": 0.03606453, "memory(GiB)": 13.7, "step": 39290, "train_speed(iter/s)": 1.536191 }, { "acc": 0.97927074, "epoch": 18.41809233653621, "grad_norm": 5.1839823722839355, "learning_rate": 7.4747435537299795e-06, "loss": 0.0797587, "memory(GiB)": 13.7, "step": 39295, "train_speed(iter/s)": 1.536198 }, { "acc": 0.98746109, "epoch": 18.420435903445043, "grad_norm": 4.652709484100342, "learning_rate": 7.474069990195588e-06, "loss": 0.06447909, "memory(GiB)": 13.7, "step": 39300, "train_speed(iter/s)": 1.536198 }, { "acc": 0.98145828, "epoch": 18.422779470353877, "grad_norm": 2.5417747497558594, "learning_rate": 7.473396367204699e-06, "loss": 0.05747286, "memory(GiB)": 13.7, "step": 39305, "train_speed(iter/s)": 1.536207 }, { "acc": 0.98210936, "epoch": 18.425123037262715, "grad_norm": 3.9241137504577637, "learning_rate": 7.472722684773504e-06, "loss": 0.07027551, "memory(GiB)": 13.7, "step": 39310, "train_speed(iter/s)": 1.536214 }, { "acc": 0.98187504, "epoch": 18.42746660417155, "grad_norm": 5.360142707824707, "learning_rate": 7.472048942918198e-06, "loss": 0.06644216, "memory(GiB)": 13.7, "step": 39315, "train_speed(iter/s)": 1.53622 }, { "acc": 0.97437496, "epoch": 18.429810171080383, "grad_norm": 2.5650346279144287, "learning_rate": 7.471375141654976e-06, "loss": 0.06383691, "memory(GiB)": 13.7, "step": 39320, "train_speed(iter/s)": 1.536231 }, { "acc": 0.9793993, "epoch": 18.43215373798922, "grad_norm": 14.210306167602539, "learning_rate": 7.470701281000032e-06, "loss": 0.08515707, "memory(GiB)": 13.7, "step": 39325, "train_speed(iter/s)": 1.53623 }, { "acc": 0.9729166, "epoch": 18.434497304898056, "grad_norm": 4.082875728607178, "learning_rate": 7.470027360969565e-06, "loss": 0.12486727, "memory(GiB)": 13.7, "step": 39330, "train_speed(iter/s)": 1.53624 }, { "acc": 0.98356647, "epoch": 18.43684087180689, "grad_norm": 1.9864397048950195, "learning_rate": 7.469353381579771e-06, "loss": 0.06424438, "memory(GiB)": 13.7, "step": 39335, "train_speed(iter/s)": 1.536254 }, { "acc": 0.99437504, "epoch": 18.439184438715724, "grad_norm": 0.009154386818408966, "learning_rate": 7.468679342846858e-06, "loss": 0.01507301, "memory(GiB)": 13.7, "step": 39340, "train_speed(iter/s)": 1.536254 }, { "acc": 0.971875, "epoch": 18.441528005624562, "grad_norm": 5.894538879394531, "learning_rate": 7.4680052447870176e-06, "loss": 0.12718606, "memory(GiB)": 13.7, "step": 39345, "train_speed(iter/s)": 1.53626 }, { "acc": 0.97770834, "epoch": 18.443871572533396, "grad_norm": 6.844383239746094, "learning_rate": 7.4673310874164584e-06, "loss": 0.10250114, "memory(GiB)": 13.7, "step": 39350, "train_speed(iter/s)": 1.536248 }, { "acc": 0.96555061, "epoch": 18.44621513944223, "grad_norm": 8.589375495910645, "learning_rate": 7.466656870751386e-06, "loss": 0.10208623, "memory(GiB)": 13.7, "step": 39355, "train_speed(iter/s)": 1.536244 }, { "acc": 0.97614584, "epoch": 18.448558706351065, "grad_norm": 3.8782386779785156, "learning_rate": 7.465982594808001e-06, "loss": 0.04551774, "memory(GiB)": 13.7, "step": 39360, "train_speed(iter/s)": 1.536258 }, { "acc": 0.96789265, "epoch": 18.450902273259903, "grad_norm": 6.057136535644531, "learning_rate": 7.4653082596025165e-06, "loss": 0.15373442, "memory(GiB)": 13.7, "step": 39365, "train_speed(iter/s)": 1.536257 }, { "acc": 0.9772768, "epoch": 18.453245840168737, "grad_norm": 7.6848464012146, "learning_rate": 7.464633865151139e-06, "loss": 0.12261493, "memory(GiB)": 13.7, "step": 39370, "train_speed(iter/s)": 1.536261 }, { "acc": 0.98145838, "epoch": 18.45558940707757, "grad_norm": 1.5310258865356445, "learning_rate": 7.463959411470077e-06, "loss": 0.08035927, "memory(GiB)": 13.7, "step": 39375, "train_speed(iter/s)": 1.536265 }, { "acc": 0.9864583, "epoch": 18.457932973986406, "grad_norm": 1.4201459884643555, "learning_rate": 7.463284898575546e-06, "loss": 0.05846235, "memory(GiB)": 13.7, "step": 39380, "train_speed(iter/s)": 1.536273 }, { "acc": 0.97857151, "epoch": 18.460276540895244, "grad_norm": 7.11893367767334, "learning_rate": 7.462610326483756e-06, "loss": 0.13910111, "memory(GiB)": 13.7, "step": 39385, "train_speed(iter/s)": 1.536273 }, { "acc": 0.98738098, "epoch": 18.462620107804078, "grad_norm": 2.3531253337860107, "learning_rate": 7.4619356952109214e-06, "loss": 0.07314485, "memory(GiB)": 13.7, "step": 39390, "train_speed(iter/s)": 1.536281 }, { "acc": 0.97989578, "epoch": 18.464963674712912, "grad_norm": 0.08109800517559052, "learning_rate": 7.46126100477326e-06, "loss": 0.05580344, "memory(GiB)": 13.7, "step": 39395, "train_speed(iter/s)": 1.536292 }, { "acc": 0.9808897, "epoch": 18.467307241621747, "grad_norm": 17.208126068115234, "learning_rate": 7.460586255186989e-06, "loss": 0.05960895, "memory(GiB)": 13.7, "step": 39400, "train_speed(iter/s)": 1.536287 }, { "acc": 0.98571434, "epoch": 18.469650808530584, "grad_norm": 5.064919471740723, "learning_rate": 7.459911446468325e-06, "loss": 0.03022653, "memory(GiB)": 13.7, "step": 39405, "train_speed(iter/s)": 1.536289 }, { "acc": 0.98812504, "epoch": 18.47199437543942, "grad_norm": 0.02313172072172165, "learning_rate": 7.459236578633491e-06, "loss": 0.02005252, "memory(GiB)": 13.7, "step": 39410, "train_speed(iter/s)": 1.536293 }, { "acc": 0.98069439, "epoch": 18.474337942348253, "grad_norm": 3.9881179332733154, "learning_rate": 7.458561651698707e-06, "loss": 0.0748737, "memory(GiB)": 13.7, "step": 39415, "train_speed(iter/s)": 1.536307 }, { "acc": 0.99470739, "epoch": 18.47668150925709, "grad_norm": 2.246593713760376, "learning_rate": 7.457886665680195e-06, "loss": 0.04260272, "memory(GiB)": 13.7, "step": 39420, "train_speed(iter/s)": 1.536308 }, { "acc": 0.97842255, "epoch": 18.479025076165925, "grad_norm": 4.783764839172363, "learning_rate": 7.457211620594182e-06, "loss": 0.07898713, "memory(GiB)": 13.7, "step": 39425, "train_speed(iter/s)": 1.536318 }, { "acc": 0.97855654, "epoch": 18.48136864307476, "grad_norm": 3.9372127056121826, "learning_rate": 7.456536516456892e-06, "loss": 0.05994433, "memory(GiB)": 13.7, "step": 39430, "train_speed(iter/s)": 1.536314 }, { "acc": 0.97413692, "epoch": 18.483712209983594, "grad_norm": 0.0022473738063126802, "learning_rate": 7.455861353284552e-06, "loss": 0.08120625, "memory(GiB)": 13.7, "step": 39435, "train_speed(iter/s)": 1.536315 }, { "acc": 0.97957792, "epoch": 18.48605577689243, "grad_norm": 5.586756229400635, "learning_rate": 7.455186131093394e-06, "loss": 0.08158601, "memory(GiB)": 13.7, "step": 39440, "train_speed(iter/s)": 1.53632 }, { "acc": 0.98529758, "epoch": 18.488399343801266, "grad_norm": 2.291916847229004, "learning_rate": 7.454510849899643e-06, "loss": 0.07449966, "memory(GiB)": 13.7, "step": 39445, "train_speed(iter/s)": 1.53632 }, { "acc": 0.99231606, "epoch": 18.4907429107101, "grad_norm": 2.1868581771850586, "learning_rate": 7.4538355097195355e-06, "loss": 0.05557078, "memory(GiB)": 13.7, "step": 39450, "train_speed(iter/s)": 1.536322 }, { "acc": 0.98907194, "epoch": 18.493086477618935, "grad_norm": 2.2013022899627686, "learning_rate": 7.453160110569301e-06, "loss": 0.03118991, "memory(GiB)": 13.7, "step": 39455, "train_speed(iter/s)": 1.536331 }, { "acc": 0.97666664, "epoch": 18.495430044527772, "grad_norm": 1.2732841968536377, "learning_rate": 7.452484652465175e-06, "loss": 0.09001223, "memory(GiB)": 13.7, "step": 39460, "train_speed(iter/s)": 1.536342 }, { "acc": 0.97240524, "epoch": 18.497773611436607, "grad_norm": 3.700845241546631, "learning_rate": 7.451809135423394e-06, "loss": 0.09779233, "memory(GiB)": 13.7, "step": 39465, "train_speed(iter/s)": 1.536338 }, { "acc": 0.99380684, "epoch": 18.50011717834544, "grad_norm": 1.1366078853607178, "learning_rate": 7.451133559460194e-06, "loss": 0.0126203, "memory(GiB)": 13.7, "step": 39470, "train_speed(iter/s)": 1.536337 }, { "acc": 0.9791667, "epoch": 18.502460745254275, "grad_norm": 0.42877820134162903, "learning_rate": 7.450457924591814e-06, "loss": 0.06770453, "memory(GiB)": 13.7, "step": 39475, "train_speed(iter/s)": 1.536342 }, { "acc": 0.98145828, "epoch": 18.504804312163113, "grad_norm": 3.290922164916992, "learning_rate": 7.449782230834494e-06, "loss": 0.11552606, "memory(GiB)": 13.7, "step": 39480, "train_speed(iter/s)": 1.536343 }, { "acc": 0.97333336, "epoch": 18.507147879071947, "grad_norm": 5.8210883140563965, "learning_rate": 7.449106478204475e-06, "loss": 0.15690407, "memory(GiB)": 13.7, "step": 39485, "train_speed(iter/s)": 1.536357 }, { "acc": 0.98265457, "epoch": 18.509491445980782, "grad_norm": 3.313915252685547, "learning_rate": 7.4484306667180025e-06, "loss": 0.14176877, "memory(GiB)": 13.7, "step": 39490, "train_speed(iter/s)": 1.536356 }, { "acc": 0.971875, "epoch": 18.51183501288962, "grad_norm": 10.819796562194824, "learning_rate": 7.447754796391319e-06, "loss": 0.1119645, "memory(GiB)": 13.7, "step": 39495, "train_speed(iter/s)": 1.536357 }, { "acc": 0.97290173, "epoch": 18.514178579798454, "grad_norm": 4.584946632385254, "learning_rate": 7.447078867240668e-06, "loss": 0.07754326, "memory(GiB)": 13.7, "step": 39500, "train_speed(iter/s)": 1.536368 }, { "acc": 0.97885418, "epoch": 18.51652214670729, "grad_norm": 2.8598899841308594, "learning_rate": 7.4464028792823e-06, "loss": 0.10805266, "memory(GiB)": 13.7, "step": 39505, "train_speed(iter/s)": 1.536368 }, { "acc": 0.98766775, "epoch": 18.518865713616123, "grad_norm": 0.29671743512153625, "learning_rate": 7.4457268325324615e-06, "loss": 0.081701, "memory(GiB)": 13.7, "step": 39510, "train_speed(iter/s)": 1.53637 }, { "acc": 0.98653851, "epoch": 18.52120928052496, "grad_norm": 3.103245735168457, "learning_rate": 7.4450507270074045e-06, "loss": 0.07910444, "memory(GiB)": 13.7, "step": 39515, "train_speed(iter/s)": 1.536369 }, { "acc": 0.98806534, "epoch": 18.523552847433795, "grad_norm": 3.482024908065796, "learning_rate": 7.444374562723378e-06, "loss": 0.04920518, "memory(GiB)": 13.7, "step": 39520, "train_speed(iter/s)": 1.536372 }, { "acc": 0.96874456, "epoch": 18.52589641434263, "grad_norm": 1.232838749885559, "learning_rate": 7.443698339696636e-06, "loss": 0.10389066, "memory(GiB)": 13.7, "step": 39525, "train_speed(iter/s)": 1.536368 }, { "acc": 0.98220482, "epoch": 18.528239981251463, "grad_norm": 3.8544352054595947, "learning_rate": 7.443022057943432e-06, "loss": 0.05268759, "memory(GiB)": 13.7, "step": 39530, "train_speed(iter/s)": 1.536374 }, { "acc": 0.99092264, "epoch": 18.5305835481603, "grad_norm": 3.4536378383636475, "learning_rate": 7.4423457174800225e-06, "loss": 0.03326679, "memory(GiB)": 13.7, "step": 39535, "train_speed(iter/s)": 1.536376 }, { "acc": 0.98416672, "epoch": 18.532927115069135, "grad_norm": 1.6869335174560547, "learning_rate": 7.441669318322664e-06, "loss": 0.08651806, "memory(GiB)": 13.7, "step": 39540, "train_speed(iter/s)": 1.536379 }, { "acc": 0.99017868, "epoch": 18.53527068197797, "grad_norm": 2.1874732971191406, "learning_rate": 7.4409928604876135e-06, "loss": 0.06285414, "memory(GiB)": 13.7, "step": 39545, "train_speed(iter/s)": 1.536384 }, { "acc": 0.99092264, "epoch": 18.537614248886804, "grad_norm": 0.004385252948850393, "learning_rate": 7.440316343991134e-06, "loss": 0.03525239, "memory(GiB)": 13.7, "step": 39550, "train_speed(iter/s)": 1.536388 }, { "acc": 0.99333334, "epoch": 18.539957815795642, "grad_norm": 1.5327577590942383, "learning_rate": 7.439639768849486e-06, "loss": 0.0513783, "memory(GiB)": 13.7, "step": 39555, "train_speed(iter/s)": 1.536392 }, { "acc": 0.98532982, "epoch": 18.542301382704476, "grad_norm": 5.968042373657227, "learning_rate": 7.438963135078929e-06, "loss": 0.07785786, "memory(GiB)": 13.7, "step": 39560, "train_speed(iter/s)": 1.5364 }, { "acc": 0.97986107, "epoch": 18.54464494961331, "grad_norm": 5.74136209487915, "learning_rate": 7.43828644269573e-06, "loss": 0.05853725, "memory(GiB)": 13.7, "step": 39565, "train_speed(iter/s)": 1.536394 }, { "acc": 0.98232145, "epoch": 18.54698851652215, "grad_norm": 54.33329772949219, "learning_rate": 7.437609691716155e-06, "loss": 0.0862025, "memory(GiB)": 13.7, "step": 39570, "train_speed(iter/s)": 1.536398 }, { "acc": 0.98475275, "epoch": 18.549332083430983, "grad_norm": 5.631634712219238, "learning_rate": 7.436932882156467e-06, "loss": 0.04727546, "memory(GiB)": 13.7, "step": 39575, "train_speed(iter/s)": 1.536401 }, { "acc": 0.96308708, "epoch": 18.551675650339817, "grad_norm": 3.5785255432128906, "learning_rate": 7.436256014032939e-06, "loss": 0.13839676, "memory(GiB)": 13.7, "step": 39580, "train_speed(iter/s)": 1.5364 }, { "acc": 0.98341951, "epoch": 18.55401921724865, "grad_norm": 6.874142169952393, "learning_rate": 7.435579087361836e-06, "loss": 0.08663979, "memory(GiB)": 13.7, "step": 39585, "train_speed(iter/s)": 1.5364 }, { "acc": 0.97898674, "epoch": 18.55636278415749, "grad_norm": 2.912325143814087, "learning_rate": 7.434902102159434e-06, "loss": 0.07277823, "memory(GiB)": 13.7, "step": 39590, "train_speed(iter/s)": 1.536399 }, { "acc": 0.97580042, "epoch": 18.558706351066323, "grad_norm": 7.059202194213867, "learning_rate": 7.434225058442002e-06, "loss": 0.09230103, "memory(GiB)": 13.7, "step": 39595, "train_speed(iter/s)": 1.536404 }, { "acc": 0.98377972, "epoch": 18.561049917975158, "grad_norm": 2.2553727626800537, "learning_rate": 7.433547956225815e-06, "loss": 0.0715506, "memory(GiB)": 13.7, "step": 39600, "train_speed(iter/s)": 1.536415 }, { "acc": 0.99250002, "epoch": 18.563393484883992, "grad_norm": 4.411019802093506, "learning_rate": 7.432870795527148e-06, "loss": 0.03182369, "memory(GiB)": 13.7, "step": 39605, "train_speed(iter/s)": 1.536412 }, { "acc": 0.96196423, "epoch": 18.56573705179283, "grad_norm": 4.308533668518066, "learning_rate": 7.432193576362279e-06, "loss": 0.11466131, "memory(GiB)": 13.7, "step": 39610, "train_speed(iter/s)": 1.536412 }, { "acc": 0.9802083, "epoch": 18.568080618701664, "grad_norm": 2.427659034729004, "learning_rate": 7.431516298747486e-06, "loss": 0.12164264, "memory(GiB)": 13.7, "step": 39615, "train_speed(iter/s)": 1.536415 }, { "acc": 0.99333334, "epoch": 18.5704241856105, "grad_norm": 4.060636520385742, "learning_rate": 7.430838962699046e-06, "loss": 0.06240392, "memory(GiB)": 13.7, "step": 39620, "train_speed(iter/s)": 1.536417 }, { "acc": 0.98062496, "epoch": 18.572767752519333, "grad_norm": 3.59041428565979, "learning_rate": 7.4301615682332415e-06, "loss": 0.0717665, "memory(GiB)": 13.7, "step": 39625, "train_speed(iter/s)": 1.536426 }, { "acc": 0.98059158, "epoch": 18.57511131942817, "grad_norm": 5.850937843322754, "learning_rate": 7.429484115366354e-06, "loss": 0.13376683, "memory(GiB)": 13.7, "step": 39630, "train_speed(iter/s)": 1.53643 }, { "acc": 0.97488098, "epoch": 18.577454886337005, "grad_norm": 6.772769451141357, "learning_rate": 7.428806604114672e-06, "loss": 0.1135025, "memory(GiB)": 13.7, "step": 39635, "train_speed(iter/s)": 1.536439 }, { "acc": 0.98125, "epoch": 18.57979845324584, "grad_norm": 3.390279769897461, "learning_rate": 7.4281290344944734e-06, "loss": 0.0580689, "memory(GiB)": 13.7, "step": 39640, "train_speed(iter/s)": 1.536436 }, { "acc": 0.97666664, "epoch": 18.582142020154677, "grad_norm": 0.00970834493637085, "learning_rate": 7.427451406522049e-06, "loss": 0.07976251, "memory(GiB)": 13.7, "step": 39645, "train_speed(iter/s)": 1.536449 }, { "acc": 0.98506947, "epoch": 18.58448558706351, "grad_norm": 5.836367607116699, "learning_rate": 7.426773720213687e-06, "loss": 0.03205371, "memory(GiB)": 13.7, "step": 39650, "train_speed(iter/s)": 1.536465 }, { "acc": 0.97518425, "epoch": 18.586829153972346, "grad_norm": 9.586454391479492, "learning_rate": 7.426095975585675e-06, "loss": 0.15482147, "memory(GiB)": 13.7, "step": 39655, "train_speed(iter/s)": 1.536462 }, { "acc": 0.96161709, "epoch": 18.58917272088118, "grad_norm": 3.971303939819336, "learning_rate": 7.425418172654304e-06, "loss": 0.19640338, "memory(GiB)": 13.7, "step": 39660, "train_speed(iter/s)": 1.536473 }, { "acc": 0.98395834, "epoch": 18.591516287790018, "grad_norm": 0.9227941632270813, "learning_rate": 7.424740311435867e-06, "loss": 0.06813066, "memory(GiB)": 13.7, "step": 39665, "train_speed(iter/s)": 1.536479 }, { "acc": 0.97852678, "epoch": 18.593859854698852, "grad_norm": 4.9768757820129395, "learning_rate": 7.424062391946657e-06, "loss": 0.05310698, "memory(GiB)": 13.7, "step": 39670, "train_speed(iter/s)": 1.536486 }, { "acc": 0.98500004, "epoch": 18.596203421607687, "grad_norm": 5.4585957527160645, "learning_rate": 7.423384414202967e-06, "loss": 0.05098234, "memory(GiB)": 13.7, "step": 39675, "train_speed(iter/s)": 1.536491 }, { "acc": 0.97781258, "epoch": 18.59854698851652, "grad_norm": 4.529214859008789, "learning_rate": 7.4227063782210995e-06, "loss": 0.0520356, "memory(GiB)": 13.7, "step": 39680, "train_speed(iter/s)": 1.536502 }, { "acc": 0.98500004, "epoch": 18.60089055542536, "grad_norm": 9.163029670715332, "learning_rate": 7.422028284017347e-06, "loss": 0.06763071, "memory(GiB)": 13.7, "step": 39685, "train_speed(iter/s)": 1.536505 }, { "acc": 0.97803469, "epoch": 18.603234122334193, "grad_norm": 30.196828842163086, "learning_rate": 7.42135013160801e-06, "loss": 0.17229447, "memory(GiB)": 13.7, "step": 39690, "train_speed(iter/s)": 1.536502 }, { "acc": 0.98953876, "epoch": 18.605577689243027, "grad_norm": 2.116868495941162, "learning_rate": 7.420671921009387e-06, "loss": 0.04193171, "memory(GiB)": 13.7, "step": 39695, "train_speed(iter/s)": 1.536498 }, { "acc": 0.97436008, "epoch": 18.60792125615186, "grad_norm": 48.613983154296875, "learning_rate": 7.419993652237784e-06, "loss": 0.12541108, "memory(GiB)": 13.7, "step": 39700, "train_speed(iter/s)": 1.536513 }, { "acc": 0.97133923, "epoch": 18.6102648230607, "grad_norm": 6.726711273193359, "learning_rate": 7.4193153253095015e-06, "loss": 0.09109312, "memory(GiB)": 13.7, "step": 39705, "train_speed(iter/s)": 1.536521 }, { "acc": 0.9916666, "epoch": 18.612608389969534, "grad_norm": 3.601355791091919, "learning_rate": 7.4186369402408455e-06, "loss": 0.02480926, "memory(GiB)": 13.7, "step": 39710, "train_speed(iter/s)": 1.536509 }, { "acc": 0.98605118, "epoch": 18.614951956878368, "grad_norm": 2.081918954849243, "learning_rate": 7.417958497048119e-06, "loss": 0.12002263, "memory(GiB)": 13.7, "step": 39715, "train_speed(iter/s)": 1.536508 }, { "acc": 0.98693905, "epoch": 18.617295523787202, "grad_norm": 2.860628843307495, "learning_rate": 7.417279995747636e-06, "loss": 0.05235046, "memory(GiB)": 13.7, "step": 39720, "train_speed(iter/s)": 1.536512 }, { "acc": 0.9836607, "epoch": 18.61963909069604, "grad_norm": 4.305168151855469, "learning_rate": 7.4166014363557e-06, "loss": 0.10081917, "memory(GiB)": 13.7, "step": 39725, "train_speed(iter/s)": 1.53651 }, { "acc": 0.97416668, "epoch": 18.621982657604875, "grad_norm": 23.35110092163086, "learning_rate": 7.415922818888622e-06, "loss": 0.14885056, "memory(GiB)": 13.7, "step": 39730, "train_speed(iter/s)": 1.536512 }, { "acc": 0.98187504, "epoch": 18.62432622451371, "grad_norm": 0.07279488444328308, "learning_rate": 7.415244143362715e-06, "loss": 0.06041226, "memory(GiB)": 13.7, "step": 39735, "train_speed(iter/s)": 1.536521 }, { "acc": 0.98604164, "epoch": 18.626669791422547, "grad_norm": 0.03789448365569115, "learning_rate": 7.414565409794293e-06, "loss": 0.0702401, "memory(GiB)": 13.7, "step": 39740, "train_speed(iter/s)": 1.53652 }, { "acc": 0.99437504, "epoch": 18.62901335833138, "grad_norm": 4.998600482940674, "learning_rate": 7.413886618199669e-06, "loss": 0.02912607, "memory(GiB)": 13.7, "step": 39745, "train_speed(iter/s)": 1.536533 }, { "acc": 0.9572917, "epoch": 18.631356925240215, "grad_norm": 7.574821472167969, "learning_rate": 7.413207768595159e-06, "loss": 0.14611037, "memory(GiB)": 13.7, "step": 39750, "train_speed(iter/s)": 1.536532 }, { "acc": 0.97349701, "epoch": 18.63370049214905, "grad_norm": 9.47705078125, "learning_rate": 7.412528860997081e-06, "loss": 0.09704272, "memory(GiB)": 13.7, "step": 39755, "train_speed(iter/s)": 1.536541 }, { "acc": 0.9822916, "epoch": 18.636044059057888, "grad_norm": 3.4040420055389404, "learning_rate": 7.411849895421753e-06, "loss": 0.06603535, "memory(GiB)": 13.7, "step": 39760, "train_speed(iter/s)": 1.536552 }, { "acc": 0.9734375, "epoch": 18.638387625966722, "grad_norm": 1.801494836807251, "learning_rate": 7.411170871885496e-06, "loss": 0.07534893, "memory(GiB)": 13.7, "step": 39765, "train_speed(iter/s)": 1.536553 }, { "acc": 0.9874671, "epoch": 18.640731192875556, "grad_norm": 3.4906020164489746, "learning_rate": 7.410491790404629e-06, "loss": 0.0467992, "memory(GiB)": 13.7, "step": 39770, "train_speed(iter/s)": 1.536551 }, { "acc": 0.96848793, "epoch": 18.64307475978439, "grad_norm": 1.0676209926605225, "learning_rate": 7.409812650995479e-06, "loss": 0.13010788, "memory(GiB)": 13.7, "step": 39775, "train_speed(iter/s)": 1.536551 }, { "acc": 0.99570141, "epoch": 18.64541832669323, "grad_norm": 2.8087949752807617, "learning_rate": 7.409133453674368e-06, "loss": 0.04342465, "memory(GiB)": 13.7, "step": 39780, "train_speed(iter/s)": 1.536554 }, { "acc": 0.98912163, "epoch": 18.647761893602063, "grad_norm": 1.4191722869873047, "learning_rate": 7.408454198457621e-06, "loss": 0.04493489, "memory(GiB)": 13.7, "step": 39785, "train_speed(iter/s)": 1.536558 }, { "acc": 0.97666664, "epoch": 18.650105460510897, "grad_norm": 5.270795822143555, "learning_rate": 7.407774885361565e-06, "loss": 0.04713538, "memory(GiB)": 13.7, "step": 39790, "train_speed(iter/s)": 1.536554 }, { "acc": 0.98122225, "epoch": 18.65244902741973, "grad_norm": 9.763978958129883, "learning_rate": 7.407095514402531e-06, "loss": 0.09533892, "memory(GiB)": 13.7, "step": 39795, "train_speed(iter/s)": 1.536554 }, { "acc": 0.97218752, "epoch": 18.65479259432857, "grad_norm": 8.25709056854248, "learning_rate": 7.406416085596846e-06, "loss": 0.1095899, "memory(GiB)": 13.7, "step": 39800, "train_speed(iter/s)": 1.536558 }, { "acc": 0.98791666, "epoch": 18.657136161237403, "grad_norm": 5.800651550292969, "learning_rate": 7.405736598960843e-06, "loss": 0.05803259, "memory(GiB)": 13.7, "step": 39805, "train_speed(iter/s)": 1.53656 }, { "acc": 0.98222942, "epoch": 18.659479728146238, "grad_norm": 7.034228801727295, "learning_rate": 7.405057054510854e-06, "loss": 0.08162774, "memory(GiB)": 13.7, "step": 39810, "train_speed(iter/s)": 1.536564 }, { "acc": 0.98187504, "epoch": 18.661823295055076, "grad_norm": 3.4076881408691406, "learning_rate": 7.404377452263211e-06, "loss": 0.05700886, "memory(GiB)": 13.7, "step": 39815, "train_speed(iter/s)": 1.536563 }, { "acc": 0.97496204, "epoch": 18.66416686196391, "grad_norm": 8.828615188598633, "learning_rate": 7.403697792234253e-06, "loss": 0.11015062, "memory(GiB)": 13.7, "step": 39820, "train_speed(iter/s)": 1.536572 }, { "acc": 0.9739584, "epoch": 18.666510428872744, "grad_norm": 3.2752299308776855, "learning_rate": 7.403018074440316e-06, "loss": 0.07537304, "memory(GiB)": 13.7, "step": 39825, "train_speed(iter/s)": 1.536581 }, { "acc": 0.97198324, "epoch": 18.66885399578158, "grad_norm": 7.790975570678711, "learning_rate": 7.402338298897736e-06, "loss": 0.13917663, "memory(GiB)": 13.7, "step": 39830, "train_speed(iter/s)": 1.536589 }, { "acc": 0.98505421, "epoch": 18.671197562690416, "grad_norm": 2.2430286407470703, "learning_rate": 7.401658465622854e-06, "loss": 0.05010242, "memory(GiB)": 13.7, "step": 39835, "train_speed(iter/s)": 1.5366 }, { "acc": 0.97972755, "epoch": 18.67354112959925, "grad_norm": 4.401370525360107, "learning_rate": 7.400978574632011e-06, "loss": 0.05519992, "memory(GiB)": 13.7, "step": 39840, "train_speed(iter/s)": 1.536606 }, { "acc": 0.97002983, "epoch": 18.675884696508085, "grad_norm": 4.689797878265381, "learning_rate": 7.40029862594155e-06, "loss": 0.07899734, "memory(GiB)": 13.7, "step": 39845, "train_speed(iter/s)": 1.536608 }, { "acc": 0.97062502, "epoch": 18.67822826341692, "grad_norm": 8.060094833374023, "learning_rate": 7.399618619567813e-06, "loss": 0.09184864, "memory(GiB)": 13.7, "step": 39850, "train_speed(iter/s)": 1.536608 }, { "acc": 0.97854338, "epoch": 18.680571830325757, "grad_norm": 1.0888899564743042, "learning_rate": 7.398938555527147e-06, "loss": 0.06643032, "memory(GiB)": 13.7, "step": 39855, "train_speed(iter/s)": 1.536607 }, { "acc": 0.96986847, "epoch": 18.68291539723459, "grad_norm": 6.479733943939209, "learning_rate": 7.3982584338358964e-06, "loss": 0.09381988, "memory(GiB)": 13.7, "step": 39860, "train_speed(iter/s)": 1.536613 }, { "acc": 0.96937504, "epoch": 18.685258964143426, "grad_norm": 5.617061138153076, "learning_rate": 7.397578254510411e-06, "loss": 0.15443292, "memory(GiB)": 13.7, "step": 39865, "train_speed(iter/s)": 1.536611 }, { "acc": 0.97163372, "epoch": 18.68760253105226, "grad_norm": 3.6530277729034424, "learning_rate": 7.396898017567041e-06, "loss": 0.11692905, "memory(GiB)": 13.7, "step": 39870, "train_speed(iter/s)": 1.536624 }, { "acc": 0.98151035, "epoch": 18.689946097961098, "grad_norm": 3.815140962600708, "learning_rate": 7.3962177230221334e-06, "loss": 0.09375904, "memory(GiB)": 13.7, "step": 39875, "train_speed(iter/s)": 1.536631 }, { "acc": 0.99008923, "epoch": 18.692289664869932, "grad_norm": 14.668928146362305, "learning_rate": 7.395537370892043e-06, "loss": 0.06938835, "memory(GiB)": 13.7, "step": 39880, "train_speed(iter/s)": 1.536636 }, { "acc": 0.98978624, "epoch": 18.694633231778766, "grad_norm": 6.532168388366699, "learning_rate": 7.3948569611931245e-06, "loss": 0.02723145, "memory(GiB)": 13.7, "step": 39885, "train_speed(iter/s)": 1.536644 }, { "acc": 0.97373104, "epoch": 18.6969767986876, "grad_norm": 6.5886993408203125, "learning_rate": 7.394176493941726e-06, "loss": 0.09253126, "memory(GiB)": 13.7, "step": 39890, "train_speed(iter/s)": 1.536646 }, { "acc": 0.98698864, "epoch": 18.69932036559644, "grad_norm": 4.268543243408203, "learning_rate": 7.393495969154212e-06, "loss": 0.04563822, "memory(GiB)": 13.7, "step": 39895, "train_speed(iter/s)": 1.536652 }, { "acc": 0.98602428, "epoch": 18.701663932505273, "grad_norm": 3.99056339263916, "learning_rate": 7.392815386846936e-06, "loss": 0.0446847, "memory(GiB)": 13.7, "step": 39900, "train_speed(iter/s)": 1.536662 }, { "acc": 0.9736187, "epoch": 18.704007499414107, "grad_norm": 2.1311872005462646, "learning_rate": 7.392134747036256e-06, "loss": 0.12183733, "memory(GiB)": 13.7, "step": 39905, "train_speed(iter/s)": 1.536667 }, { "acc": 0.98648815, "epoch": 18.706351066322945, "grad_norm": 0.15319274365901947, "learning_rate": 7.391454049738535e-06, "loss": 0.05100193, "memory(GiB)": 13.7, "step": 39910, "train_speed(iter/s)": 1.536668 }, { "acc": 0.98916664, "epoch": 18.70869463323178, "grad_norm": 3.394315719604492, "learning_rate": 7.390773294970133e-06, "loss": 0.02964945, "memory(GiB)": 13.7, "step": 39915, "train_speed(iter/s)": 1.536677 }, { "acc": 0.9770505, "epoch": 18.711038200140614, "grad_norm": 0.460126668214798, "learning_rate": 7.3900924827474145e-06, "loss": 0.0883171, "memory(GiB)": 13.7, "step": 39920, "train_speed(iter/s)": 1.53668 }, { "acc": 0.98467264, "epoch": 18.713381767049448, "grad_norm": 3.2115049362182617, "learning_rate": 7.3894116130867425e-06, "loss": 0.07185626, "memory(GiB)": 13.7, "step": 39925, "train_speed(iter/s)": 1.536671 }, { "acc": 0.97035713, "epoch": 18.715725333958286, "grad_norm": 7.754462718963623, "learning_rate": 7.388730686004483e-06, "loss": 0.13866285, "memory(GiB)": 13.7, "step": 39930, "train_speed(iter/s)": 1.536674 }, { "acc": 0.98954544, "epoch": 18.71806890086712, "grad_norm": 4.246372699737549, "learning_rate": 7.388049701517005e-06, "loss": 0.0377062, "memory(GiB)": 13.7, "step": 39935, "train_speed(iter/s)": 1.536675 }, { "acc": 0.96300602, "epoch": 18.720412467775954, "grad_norm": 5.498582363128662, "learning_rate": 7.387368659640674e-06, "loss": 0.10344577, "memory(GiB)": 13.7, "step": 39940, "train_speed(iter/s)": 1.536663 }, { "acc": 0.9856349, "epoch": 18.72275603468479, "grad_norm": 4.843034744262695, "learning_rate": 7.386687560391863e-06, "loss": 0.07523879, "memory(GiB)": 13.7, "step": 39945, "train_speed(iter/s)": 1.536663 }, { "acc": 0.9895833, "epoch": 18.725099601593627, "grad_norm": 4.980465888977051, "learning_rate": 7.386006403786944e-06, "loss": 0.04705892, "memory(GiB)": 13.7, "step": 39950, "train_speed(iter/s)": 1.536663 }, { "acc": 0.9871726, "epoch": 18.72744316850246, "grad_norm": 1.1825780868530273, "learning_rate": 7.385325189842286e-06, "loss": 0.09183912, "memory(GiB)": 13.7, "step": 39955, "train_speed(iter/s)": 1.536664 }, { "acc": 0.97458334, "epoch": 18.729786735411295, "grad_norm": 0.3823763132095337, "learning_rate": 7.384643918574266e-06, "loss": 0.09795882, "memory(GiB)": 13.7, "step": 39960, "train_speed(iter/s)": 1.536675 }, { "acc": 0.98288689, "epoch": 18.73213030232013, "grad_norm": 0.45142096281051636, "learning_rate": 7.383962589999257e-06, "loss": 0.06032417, "memory(GiB)": 13.7, "step": 39965, "train_speed(iter/s)": 1.536676 }, { "acc": 0.98639269, "epoch": 18.734473869228967, "grad_norm": 6.1322174072265625, "learning_rate": 7.383281204133641e-06, "loss": 0.09641654, "memory(GiB)": 13.7, "step": 39970, "train_speed(iter/s)": 1.536681 }, { "acc": 0.9655303, "epoch": 18.7368174361378, "grad_norm": 2.8205723762512207, "learning_rate": 7.38259976099379e-06, "loss": 0.1751851, "memory(GiB)": 13.7, "step": 39975, "train_speed(iter/s)": 1.536695 }, { "acc": 0.98287773, "epoch": 18.739161003046636, "grad_norm": 0.015722591429948807, "learning_rate": 7.381918260596087e-06, "loss": 0.05458595, "memory(GiB)": 13.7, "step": 39980, "train_speed(iter/s)": 1.53669 }, { "acc": 0.99283323, "epoch": 18.741504569955474, "grad_norm": 1.5012096166610718, "learning_rate": 7.3812367029569135e-06, "loss": 0.0420033, "memory(GiB)": 13.7, "step": 39985, "train_speed(iter/s)": 1.536694 }, { "acc": 0.978125, "epoch": 18.743848136864308, "grad_norm": 3.5711004734039307, "learning_rate": 7.380555088092652e-06, "loss": 0.08378823, "memory(GiB)": 13.7, "step": 39990, "train_speed(iter/s)": 1.536705 }, { "acc": 0.9833333, "epoch": 18.746191703773142, "grad_norm": 5.988626956939697, "learning_rate": 7.379873416019683e-06, "loss": 0.05964949, "memory(GiB)": 13.7, "step": 39995, "train_speed(iter/s)": 1.536709 }, { "acc": 0.97946434, "epoch": 18.748535270681977, "grad_norm": 4.175483703613281, "learning_rate": 7.379191686754393e-06, "loss": 0.05295863, "memory(GiB)": 13.7, "step": 40000, "train_speed(iter/s)": 1.536713 }, { "epoch": 18.748535270681977, "eval_acc": 0.7736224455385445, "eval_loss": 1.1022847890853882, "eval_runtime": 143.7617, "eval_samples_per_second": 56.121, "eval_steps_per_second": 7.019, "step": 40000 }, { "acc": 0.98117275, "epoch": 18.750878837590815, "grad_norm": 0.030877193436026573, "learning_rate": 7.378509900313172e-06, "loss": 0.12076373, "memory(GiB)": 13.7, "step": 40005, "train_speed(iter/s)": 1.526544 }, { "acc": 0.9801137, "epoch": 18.75322240449965, "grad_norm": 99.74201202392578, "learning_rate": 7.377828056712404e-06, "loss": 0.08041299, "memory(GiB)": 13.7, "step": 40010, "train_speed(iter/s)": 1.526556 }, { "acc": 0.97937498, "epoch": 18.755565971408483, "grad_norm": 3.068788528442383, "learning_rate": 7.377146155968481e-06, "loss": 0.19129782, "memory(GiB)": 13.7, "step": 40015, "train_speed(iter/s)": 1.526568 }, { "acc": 0.9739584, "epoch": 18.757909538317318, "grad_norm": 3.668531656265259, "learning_rate": 7.376464198097791e-06, "loss": 0.13206273, "memory(GiB)": 13.7, "step": 40020, "train_speed(iter/s)": 1.526565 }, { "acc": 0.98937502, "epoch": 18.760253105226155, "grad_norm": 3.229795217514038, "learning_rate": 7.375782183116729e-06, "loss": 0.0335568, "memory(GiB)": 13.7, "step": 40025, "train_speed(iter/s)": 1.526573 }, { "acc": 0.99070511, "epoch": 18.76259667213499, "grad_norm": 5.673612117767334, "learning_rate": 7.375100111041685e-06, "loss": 0.08314914, "memory(GiB)": 13.7, "step": 40030, "train_speed(iter/s)": 1.526582 }, { "acc": 0.97920008, "epoch": 18.764940239043824, "grad_norm": 13.416814804077148, "learning_rate": 7.374417981889056e-06, "loss": 0.14879736, "memory(GiB)": 13.7, "step": 40035, "train_speed(iter/s)": 1.526588 }, { "acc": 0.98016949, "epoch": 18.76728380595266, "grad_norm": 2.3649539947509766, "learning_rate": 7.373735795675237e-06, "loss": 0.09691454, "memory(GiB)": 13.7, "step": 40040, "train_speed(iter/s)": 1.526585 }, { "acc": 0.98937502, "epoch": 18.769627372861496, "grad_norm": 2.4656922817230225, "learning_rate": 7.373053552416628e-06, "loss": 0.14824898, "memory(GiB)": 13.7, "step": 40045, "train_speed(iter/s)": 1.526592 }, { "acc": 0.98167, "epoch": 18.77197093977033, "grad_norm": 0.7902591228485107, "learning_rate": 7.372371252129624e-06, "loss": 0.08136961, "memory(GiB)": 13.7, "step": 40050, "train_speed(iter/s)": 1.526597 }, { "acc": 0.97429161, "epoch": 18.774314506679165, "grad_norm": 3.8871805667877197, "learning_rate": 7.37168889483063e-06, "loss": 0.0503819, "memory(GiB)": 13.7, "step": 40055, "train_speed(iter/s)": 1.526595 }, { "acc": 0.99702387, "epoch": 18.776658073588003, "grad_norm": 1.6958192586898804, "learning_rate": 7.3710064805360414e-06, "loss": 0.02311594, "memory(GiB)": 13.7, "step": 40060, "train_speed(iter/s)": 1.526603 }, { "acc": 0.98031654, "epoch": 18.779001640496837, "grad_norm": 2.709693193435669, "learning_rate": 7.3703240092622665e-06, "loss": 0.12380285, "memory(GiB)": 13.7, "step": 40065, "train_speed(iter/s)": 1.526614 }, { "acc": 0.9744791, "epoch": 18.78134520740567, "grad_norm": 2.5868794918060303, "learning_rate": 7.369641481025708e-06, "loss": 0.07966954, "memory(GiB)": 13.7, "step": 40070, "train_speed(iter/s)": 1.526605 }, { "acc": 0.97665825, "epoch": 18.783688774314506, "grad_norm": 8.027547836303711, "learning_rate": 7.368958895842771e-06, "loss": 0.0875958, "memory(GiB)": 13.7, "step": 40075, "train_speed(iter/s)": 1.52661 }, { "acc": 0.99177656, "epoch": 18.786032341223343, "grad_norm": 2.8554162979125977, "learning_rate": 7.368276253729865e-06, "loss": 0.0506133, "memory(GiB)": 13.7, "step": 40080, "train_speed(iter/s)": 1.526611 }, { "acc": 0.9802084, "epoch": 18.788375908132178, "grad_norm": 5.774555206298828, "learning_rate": 7.367593554703393e-06, "loss": 0.07719389, "memory(GiB)": 13.7, "step": 40085, "train_speed(iter/s)": 1.526627 }, { "acc": 0.96395836, "epoch": 18.790719475041012, "grad_norm": 3.9308295249938965, "learning_rate": 7.366910798779772e-06, "loss": 0.16993822, "memory(GiB)": 13.7, "step": 40090, "train_speed(iter/s)": 1.526639 }, { "acc": 0.99423618, "epoch": 18.793063041949846, "grad_norm": 0.10772363096475601, "learning_rate": 7.3662279859754085e-06, "loss": 0.02657937, "memory(GiB)": 13.7, "step": 40095, "train_speed(iter/s)": 1.526644 }, { "acc": 0.9895834, "epoch": 18.795406608858684, "grad_norm": 6.263554096221924, "learning_rate": 7.365545116306716e-06, "loss": 0.050718, "memory(GiB)": 13.7, "step": 40100, "train_speed(iter/s)": 1.526652 }, { "acc": 0.9817709, "epoch": 18.79775017576752, "grad_norm": 7.014347076416016, "learning_rate": 7.364862189790108e-06, "loss": 0.09958082, "memory(GiB)": 13.7, "step": 40105, "train_speed(iter/s)": 1.526652 }, { "acc": 0.97456303, "epoch": 18.800093742676353, "grad_norm": 4.801268100738525, "learning_rate": 7.364179206442e-06, "loss": 0.11006119, "memory(GiB)": 13.7, "step": 40110, "train_speed(iter/s)": 1.52666 }, { "acc": 0.9739109, "epoch": 18.802437309585187, "grad_norm": 21.894855499267578, "learning_rate": 7.363496166278811e-06, "loss": 0.09660743, "memory(GiB)": 13.7, "step": 40115, "train_speed(iter/s)": 1.526667 }, { "acc": 0.98037138, "epoch": 18.804780876494025, "grad_norm": 5.363104820251465, "learning_rate": 7.362813069316956e-06, "loss": 0.05630692, "memory(GiB)": 13.7, "step": 40120, "train_speed(iter/s)": 1.526669 }, { "acc": 0.9776042, "epoch": 18.80712444340286, "grad_norm": 4.286142826080322, "learning_rate": 7.362129915572856e-06, "loss": 0.06825456, "memory(GiB)": 13.7, "step": 40125, "train_speed(iter/s)": 1.526672 }, { "acc": 0.98953123, "epoch": 18.809468010311694, "grad_norm": 2.0667717456817627, "learning_rate": 7.361446705062933e-06, "loss": 0.03381889, "memory(GiB)": 13.7, "step": 40130, "train_speed(iter/s)": 1.52667 }, { "acc": 0.98786316, "epoch": 18.81181157722053, "grad_norm": 3.7157366275787354, "learning_rate": 7.360763437803605e-06, "loss": 0.09178311, "memory(GiB)": 13.7, "step": 40135, "train_speed(iter/s)": 1.526677 }, { "acc": 0.98592262, "epoch": 18.814155144129366, "grad_norm": 5.376692771911621, "learning_rate": 7.3600801138113e-06, "loss": 0.06123707, "memory(GiB)": 13.7, "step": 40140, "train_speed(iter/s)": 1.52668 }, { "acc": 0.9822916, "epoch": 18.8164987110382, "grad_norm": 4.4178924560546875, "learning_rate": 7.359396733102439e-06, "loss": 0.04293005, "memory(GiB)": 13.7, "step": 40145, "train_speed(iter/s)": 1.526688 }, { "acc": 0.98187504, "epoch": 18.818842277947034, "grad_norm": 1.8086555004119873, "learning_rate": 7.358713295693451e-06, "loss": 0.06450881, "memory(GiB)": 13.7, "step": 40150, "train_speed(iter/s)": 1.526694 }, { "acc": 0.98946428, "epoch": 18.821185844855872, "grad_norm": 3.475438356399536, "learning_rate": 7.358029801600762e-06, "loss": 0.07671177, "memory(GiB)": 13.7, "step": 40155, "train_speed(iter/s)": 1.526694 }, { "acc": 0.97796879, "epoch": 18.823529411764707, "grad_norm": 6.55661153793335, "learning_rate": 7.3573462508408036e-06, "loss": 0.09744269, "memory(GiB)": 13.7, "step": 40160, "train_speed(iter/s)": 1.52669 }, { "acc": 0.98113098, "epoch": 18.82587297867354, "grad_norm": 1.2695903778076172, "learning_rate": 7.3566626434300025e-06, "loss": 0.07820166, "memory(GiB)": 13.7, "step": 40165, "train_speed(iter/s)": 1.526689 }, { "acc": 0.97458334, "epoch": 18.828216545582375, "grad_norm": 1.3080207109451294, "learning_rate": 7.355978979384792e-06, "loss": 0.08783114, "memory(GiB)": 13.7, "step": 40170, "train_speed(iter/s)": 1.526698 }, { "acc": 0.97619047, "epoch": 18.830560112491213, "grad_norm": 2.141996383666992, "learning_rate": 7.355295258721605e-06, "loss": 0.08784499, "memory(GiB)": 13.7, "step": 40175, "train_speed(iter/s)": 1.526703 }, { "acc": 0.98044872, "epoch": 18.832903679400047, "grad_norm": 2.3660011291503906, "learning_rate": 7.354611481456878e-06, "loss": 0.04803261, "memory(GiB)": 13.7, "step": 40180, "train_speed(iter/s)": 1.526716 }, { "acc": 0.98254471, "epoch": 18.83524724630888, "grad_norm": 3.8641366958618164, "learning_rate": 7.353927647607043e-06, "loss": 0.07966187, "memory(GiB)": 13.7, "step": 40185, "train_speed(iter/s)": 1.526718 }, { "acc": 0.98278904, "epoch": 18.837590813217716, "grad_norm": 3.8444337844848633, "learning_rate": 7.35324375718854e-06, "loss": 0.08056399, "memory(GiB)": 13.7, "step": 40190, "train_speed(iter/s)": 1.526714 }, { "acc": 0.98297157, "epoch": 18.839934380126554, "grad_norm": 0.9348568916320801, "learning_rate": 7.3525598102178054e-06, "loss": 0.0402468, "memory(GiB)": 13.7, "step": 40195, "train_speed(iter/s)": 1.526723 }, { "acc": 0.96383934, "epoch": 18.842277947035388, "grad_norm": 6.20135498046875, "learning_rate": 7.351875806711282e-06, "loss": 0.0933562, "memory(GiB)": 13.7, "step": 40200, "train_speed(iter/s)": 1.526721 }, { "acc": 0.98708334, "epoch": 18.844621513944222, "grad_norm": 0.17762374877929688, "learning_rate": 7.3511917466854076e-06, "loss": 0.03807399, "memory(GiB)": 13.7, "step": 40205, "train_speed(iter/s)": 1.526733 }, { "acc": 0.9791666, "epoch": 18.846965080853057, "grad_norm": 2.0099587440490723, "learning_rate": 7.350507630156626e-06, "loss": 0.08499048, "memory(GiB)": 13.7, "step": 40210, "train_speed(iter/s)": 1.526749 }, { "acc": 0.96758928, "epoch": 18.849308647761895, "grad_norm": 4.168842315673828, "learning_rate": 7.349823457141381e-06, "loss": 0.06765369, "memory(GiB)": 13.7, "step": 40215, "train_speed(iter/s)": 1.526754 }, { "acc": 0.97997589, "epoch": 18.85165221467073, "grad_norm": 4.080784797668457, "learning_rate": 7.349139227656119e-06, "loss": 0.07582618, "memory(GiB)": 13.7, "step": 40220, "train_speed(iter/s)": 1.526755 }, { "acc": 0.9796875, "epoch": 18.853995781579563, "grad_norm": 0.8129721283912659, "learning_rate": 7.348454941717287e-06, "loss": 0.04974619, "memory(GiB)": 13.7, "step": 40225, "train_speed(iter/s)": 1.526767 }, { "acc": 0.97517853, "epoch": 18.8563393484884, "grad_norm": 3.1519463062286377, "learning_rate": 7.3477705993413305e-06, "loss": 0.10136441, "memory(GiB)": 13.7, "step": 40230, "train_speed(iter/s)": 1.526762 }, { "acc": 0.95812502, "epoch": 18.858682915397235, "grad_norm": 6.334719657897949, "learning_rate": 7.3470862005447e-06, "loss": 0.20348201, "memory(GiB)": 13.7, "step": 40235, "train_speed(iter/s)": 1.526762 }, { "acc": 0.98531742, "epoch": 18.86102648230607, "grad_norm": 4.294219970703125, "learning_rate": 7.346401745343847e-06, "loss": 0.07305245, "memory(GiB)": 13.7, "step": 40240, "train_speed(iter/s)": 1.526757 }, { "acc": 0.96416664, "epoch": 18.863370049214904, "grad_norm": 3.3651437759399414, "learning_rate": 7.345717233755222e-06, "loss": 0.12932894, "memory(GiB)": 13.7, "step": 40245, "train_speed(iter/s)": 1.526769 }, { "acc": 0.98649483, "epoch": 18.86571361612374, "grad_norm": 1.5011563301086426, "learning_rate": 7.3450326657952794e-06, "loss": 0.0546337, "memory(GiB)": 13.7, "step": 40250, "train_speed(iter/s)": 1.526775 }, { "acc": 0.98331299, "epoch": 18.868057183032576, "grad_norm": 6.535890579223633, "learning_rate": 7.3443480414804754e-06, "loss": 0.07005705, "memory(GiB)": 13.7, "step": 40255, "train_speed(iter/s)": 1.526786 }, { "acc": 0.98710852, "epoch": 18.87040074994141, "grad_norm": 1.5882388353347778, "learning_rate": 7.343663360827264e-06, "loss": 0.02274484, "memory(GiB)": 13.7, "step": 40260, "train_speed(iter/s)": 1.52679 }, { "acc": 0.96479168, "epoch": 18.872744316850245, "grad_norm": 6.369162082672119, "learning_rate": 7.342978623852103e-06, "loss": 0.13723614, "memory(GiB)": 13.7, "step": 40265, "train_speed(iter/s)": 1.526786 }, { "acc": 0.97452383, "epoch": 18.875087883759083, "grad_norm": 4.5597381591796875, "learning_rate": 7.342293830571451e-06, "loss": 0.13018019, "memory(GiB)": 13.7, "step": 40270, "train_speed(iter/s)": 1.526792 }, { "acc": 0.99094696, "epoch": 18.877431450667917, "grad_norm": 2.048421621322632, "learning_rate": 7.34160898100177e-06, "loss": 0.03384556, "memory(GiB)": 13.7, "step": 40275, "train_speed(iter/s)": 1.526797 }, { "acc": 0.990625, "epoch": 18.87977501757675, "grad_norm": 3.904531240463257, "learning_rate": 7.340924075159519e-06, "loss": 0.04672147, "memory(GiB)": 13.7, "step": 40280, "train_speed(iter/s)": 1.526807 }, { "acc": 0.9818922, "epoch": 18.882118584485585, "grad_norm": 2.2368736267089844, "learning_rate": 7.340239113061163e-06, "loss": 0.06022965, "memory(GiB)": 13.7, "step": 40285, "train_speed(iter/s)": 1.52681 }, { "acc": 0.9877841, "epoch": 18.884462151394423, "grad_norm": 1.600461483001709, "learning_rate": 7.339554094723165e-06, "loss": 0.05524451, "memory(GiB)": 13.7, "step": 40290, "train_speed(iter/s)": 1.52682 }, { "acc": 0.99395838, "epoch": 18.886805718303258, "grad_norm": 2.9753100872039795, "learning_rate": 7.338869020161991e-06, "loss": 0.03237049, "memory(GiB)": 13.7, "step": 40295, "train_speed(iter/s)": 1.526824 }, { "acc": 0.9833334, "epoch": 18.889149285212092, "grad_norm": 2.419471502304077, "learning_rate": 7.338183889394107e-06, "loss": 0.14495053, "memory(GiB)": 13.7, "step": 40300, "train_speed(iter/s)": 1.526832 }, { "acc": 0.97911701, "epoch": 18.89149285212093, "grad_norm": 3.119689702987671, "learning_rate": 7.337498702435983e-06, "loss": 0.08490863, "memory(GiB)": 13.7, "step": 40305, "train_speed(iter/s)": 1.526834 }, { "acc": 0.97458334, "epoch": 18.893836419029764, "grad_norm": 3.5415990352630615, "learning_rate": 7.336813459304089e-06, "loss": 0.07708046, "memory(GiB)": 13.7, "step": 40310, "train_speed(iter/s)": 1.526841 }, { "acc": 0.9879261, "epoch": 18.8961799859386, "grad_norm": 4.822373867034912, "learning_rate": 7.336128160014891e-06, "loss": 0.04183065, "memory(GiB)": 13.7, "step": 40315, "train_speed(iter/s)": 1.526841 }, { "acc": 0.98135414, "epoch": 18.898523552847433, "grad_norm": 2.6464951038360596, "learning_rate": 7.335442804584868e-06, "loss": 0.09018583, "memory(GiB)": 13.7, "step": 40320, "train_speed(iter/s)": 1.526844 }, { "acc": 0.9822916, "epoch": 18.90086711975627, "grad_norm": 8.362930297851562, "learning_rate": 7.33475739303049e-06, "loss": 0.07764103, "memory(GiB)": 13.7, "step": 40325, "train_speed(iter/s)": 1.52685 }, { "acc": 0.9864584, "epoch": 18.903210686665105, "grad_norm": 3.6461987495422363, "learning_rate": 7.334071925368236e-06, "loss": 0.0602828, "memory(GiB)": 13.7, "step": 40330, "train_speed(iter/s)": 1.526858 }, { "acc": 0.96613102, "epoch": 18.90555425357394, "grad_norm": 3.3578407764434814, "learning_rate": 7.333386401614576e-06, "loss": 0.10205396, "memory(GiB)": 13.7, "step": 40335, "train_speed(iter/s)": 1.526858 }, { "acc": 0.99051466, "epoch": 18.907897820482773, "grad_norm": 3.2330305576324463, "learning_rate": 7.332700821785992e-06, "loss": 0.03478987, "memory(GiB)": 13.7, "step": 40340, "train_speed(iter/s)": 1.526864 }, { "acc": 0.98708334, "epoch": 18.91024138739161, "grad_norm": 4.075772762298584, "learning_rate": 7.332015185898961e-06, "loss": 0.05257331, "memory(GiB)": 13.7, "step": 40345, "train_speed(iter/s)": 1.526864 }, { "acc": 0.98968754, "epoch": 18.912584954300446, "grad_norm": 3.9825384616851807, "learning_rate": 7.331329493969968e-06, "loss": 0.02788137, "memory(GiB)": 13.7, "step": 40350, "train_speed(iter/s)": 1.526883 }, { "acc": 0.96967258, "epoch": 18.91492852120928, "grad_norm": 7.342205047607422, "learning_rate": 7.330643746015485e-06, "loss": 0.17391553, "memory(GiB)": 13.7, "step": 40355, "train_speed(iter/s)": 1.526903 }, { "acc": 0.98828869, "epoch": 18.917272088118114, "grad_norm": 6.365355968475342, "learning_rate": 7.329957942052007e-06, "loss": 0.06594635, "memory(GiB)": 13.7, "step": 40360, "train_speed(iter/s)": 1.526904 }, { "acc": 0.98206844, "epoch": 18.919615655026952, "grad_norm": 3.293994665145874, "learning_rate": 7.329272082096012e-06, "loss": 0.06071708, "memory(GiB)": 13.7, "step": 40365, "train_speed(iter/s)": 1.526905 }, { "acc": 0.97258015, "epoch": 18.921959221935786, "grad_norm": 8.806295394897461, "learning_rate": 7.328586166163986e-06, "loss": 0.10173039, "memory(GiB)": 13.7, "step": 40370, "train_speed(iter/s)": 1.526912 }, { "acc": 0.99144344, "epoch": 18.92430278884462, "grad_norm": 2.3341102600097656, "learning_rate": 7.327900194272417e-06, "loss": 0.04160963, "memory(GiB)": 13.7, "step": 40375, "train_speed(iter/s)": 1.526913 }, { "acc": 0.97766094, "epoch": 18.926646355753455, "grad_norm": 1.2229965925216675, "learning_rate": 7.327214166437793e-06, "loss": 0.0847551, "memory(GiB)": 13.7, "step": 40380, "train_speed(iter/s)": 1.526913 }, { "acc": 0.97841721, "epoch": 18.928989922662293, "grad_norm": 14.714266777038574, "learning_rate": 7.326528082676604e-06, "loss": 0.10114052, "memory(GiB)": 13.7, "step": 40385, "train_speed(iter/s)": 1.526921 }, { "acc": 0.98430805, "epoch": 18.931333489571127, "grad_norm": 0.9778875112533569, "learning_rate": 7.325841943005343e-06, "loss": 0.05445737, "memory(GiB)": 13.7, "step": 40390, "train_speed(iter/s)": 1.526931 }, { "acc": 0.98326397, "epoch": 18.93367705647996, "grad_norm": 0.07789541035890579, "learning_rate": 7.3251557474405015e-06, "loss": 0.04218949, "memory(GiB)": 13.7, "step": 40395, "train_speed(iter/s)": 1.526945 }, { "acc": 0.98104973, "epoch": 18.9360206233888, "grad_norm": 3.1067144870758057, "learning_rate": 7.324469495998569e-06, "loss": 0.08706341, "memory(GiB)": 13.7, "step": 40400, "train_speed(iter/s)": 1.526954 }, { "acc": 0.97989578, "epoch": 18.938364190297634, "grad_norm": 3.170701503753662, "learning_rate": 7.323783188696047e-06, "loss": 0.06398263, "memory(GiB)": 13.7, "step": 40405, "train_speed(iter/s)": 1.526957 }, { "acc": 0.99249992, "epoch": 18.940707757206468, "grad_norm": 0.45712000131607056, "learning_rate": 7.3230968255494305e-06, "loss": 0.03448865, "memory(GiB)": 13.7, "step": 40410, "train_speed(iter/s)": 1.526962 }, { "acc": 0.98159723, "epoch": 18.943051324115302, "grad_norm": 2.4363291263580322, "learning_rate": 7.322410406575217e-06, "loss": 0.06046237, "memory(GiB)": 13.7, "step": 40415, "train_speed(iter/s)": 1.526972 }, { "acc": 0.9802083, "epoch": 18.94539489102414, "grad_norm": 6.466107368469238, "learning_rate": 7.321723931789905e-06, "loss": 0.09391925, "memory(GiB)": 13.7, "step": 40420, "train_speed(iter/s)": 1.526976 }, { "acc": 0.98795795, "epoch": 18.947738457932974, "grad_norm": 6.381444454193115, "learning_rate": 7.321037401209994e-06, "loss": 0.06499511, "memory(GiB)": 13.7, "step": 40425, "train_speed(iter/s)": 1.526983 }, { "acc": 0.98224697, "epoch": 18.95008202484181, "grad_norm": 2.7145345211029053, "learning_rate": 7.320350814851989e-06, "loss": 0.11102011, "memory(GiB)": 13.7, "step": 40430, "train_speed(iter/s)": 1.526992 }, { "acc": 0.98673611, "epoch": 18.952425591750643, "grad_norm": 2.8069233894348145, "learning_rate": 7.319664172732392e-06, "loss": 0.04699164, "memory(GiB)": 13.7, "step": 40435, "train_speed(iter/s)": 1.527001 }, { "acc": 0.996875, "epoch": 18.95476915865948, "grad_norm": 2.4039742946624756, "learning_rate": 7.318977474867706e-06, "loss": 0.04213159, "memory(GiB)": 13.7, "step": 40440, "train_speed(iter/s)": 1.527011 }, { "acc": 0.98221722, "epoch": 18.957112725568315, "grad_norm": 3.267995834350586, "learning_rate": 7.3182907212744366e-06, "loss": 0.07793982, "memory(GiB)": 13.7, "step": 40445, "train_speed(iter/s)": 1.52702 }, { "acc": 0.97333698, "epoch": 18.95945629247715, "grad_norm": 3.052736759185791, "learning_rate": 7.317603911969097e-06, "loss": 0.09593439, "memory(GiB)": 13.7, "step": 40450, "train_speed(iter/s)": 1.527021 }, { "acc": 0.99593754, "epoch": 18.961799859385984, "grad_norm": 0.4898476302623749, "learning_rate": 7.316917046968188e-06, "loss": 0.0435811, "memory(GiB)": 13.7, "step": 40455, "train_speed(iter/s)": 1.527027 }, { "acc": 0.97984066, "epoch": 18.96414342629482, "grad_norm": 5.271886825561523, "learning_rate": 7.3162301262882256e-06, "loss": 0.08645053, "memory(GiB)": 13.7, "step": 40460, "train_speed(iter/s)": 1.527036 }, { "acc": 0.97104168, "epoch": 18.966486993203656, "grad_norm": 0.005362813826650381, "learning_rate": 7.315543149945718e-06, "loss": 0.12693095, "memory(GiB)": 13.7, "step": 40465, "train_speed(iter/s)": 1.527041 }, { "acc": 0.97695198, "epoch": 18.96883056011249, "grad_norm": 5.324333190917969, "learning_rate": 7.314856117957178e-06, "loss": 0.06580147, "memory(GiB)": 13.7, "step": 40470, "train_speed(iter/s)": 1.527053 }, { "acc": 0.98468742, "epoch": 18.971174127021328, "grad_norm": 6.682448863983154, "learning_rate": 7.314169030339121e-06, "loss": 0.0694802, "memory(GiB)": 13.7, "step": 40475, "train_speed(iter/s)": 1.527052 }, { "acc": 0.98800602, "epoch": 18.973517693930162, "grad_norm": 0.048770301043987274, "learning_rate": 7.3134818871080615e-06, "loss": 0.04112307, "memory(GiB)": 13.7, "step": 40480, "train_speed(iter/s)": 1.527059 }, { "acc": 0.98418159, "epoch": 18.975861260838997, "grad_norm": 1.1901462078094482, "learning_rate": 7.312794688280516e-06, "loss": 0.1064983, "memory(GiB)": 13.7, "step": 40485, "train_speed(iter/s)": 1.527065 }, { "acc": 0.96133928, "epoch": 18.97820482774783, "grad_norm": 4.60045051574707, "learning_rate": 7.312107433873001e-06, "loss": 0.14086719, "memory(GiB)": 13.7, "step": 40490, "train_speed(iter/s)": 1.527077 }, { "acc": 0.98153839, "epoch": 18.98054839465667, "grad_norm": 0.2000548392534256, "learning_rate": 7.3114201239020395e-06, "loss": 0.05920653, "memory(GiB)": 13.7, "step": 40495, "train_speed(iter/s)": 1.527081 }, { "acc": 0.98916664, "epoch": 18.982891961565503, "grad_norm": 0.7009406685829163, "learning_rate": 7.31073275838415e-06, "loss": 0.03868142, "memory(GiB)": 13.7, "step": 40500, "train_speed(iter/s)": 1.527082 }, { "acc": 0.97467937, "epoch": 18.985235528474337, "grad_norm": 8.911252975463867, "learning_rate": 7.310045337335855e-06, "loss": 0.14354215, "memory(GiB)": 13.7, "step": 40505, "train_speed(iter/s)": 1.527099 }, { "acc": 0.97145834, "epoch": 18.987579095383172, "grad_norm": 5.924921989440918, "learning_rate": 7.3093578607736755e-06, "loss": 0.14325814, "memory(GiB)": 13.7, "step": 40510, "train_speed(iter/s)": 1.527099 }, { "acc": 0.9639286, "epoch": 18.98992266229201, "grad_norm": 5.6561431884765625, "learning_rate": 7.308670328714141e-06, "loss": 0.14206929, "memory(GiB)": 13.7, "step": 40515, "train_speed(iter/s)": 1.52711 }, { "acc": 0.97875004, "epoch": 18.992266229200844, "grad_norm": 1.48638916015625, "learning_rate": 7.307982741173772e-06, "loss": 0.05726948, "memory(GiB)": 13.7, "step": 40520, "train_speed(iter/s)": 1.527114 }, { "acc": 0.9795928, "epoch": 18.99460979610968, "grad_norm": 3.9138877391815186, "learning_rate": 7.3072950981691e-06, "loss": 0.06739447, "memory(GiB)": 13.7, "step": 40525, "train_speed(iter/s)": 1.527117 }, { "acc": 0.98054104, "epoch": 18.996953363018513, "grad_norm": 3.0424327850341797, "learning_rate": 7.306607399716652e-06, "loss": 0.06770618, "memory(GiB)": 13.7, "step": 40530, "train_speed(iter/s)": 1.527112 }, { "acc": 0.98722591, "epoch": 18.99929692992735, "grad_norm": 0.5346555113792419, "learning_rate": 7.305919645832959e-06, "loss": 0.03742805, "memory(GiB)": 13.7, "step": 40535, "train_speed(iter/s)": 1.52711 }, { "acc": 0.98827114, "epoch": 19.001640496836185, "grad_norm": 5.612593650817871, "learning_rate": 7.305231836534552e-06, "loss": 0.06185616, "memory(GiB)": 13.7, "step": 40540, "train_speed(iter/s)": 1.52708 }, { "acc": 0.9874053, "epoch": 19.00398406374502, "grad_norm": 3.2623255252838135, "learning_rate": 7.304543971837962e-06, "loss": 0.05282832, "memory(GiB)": 13.7, "step": 40545, "train_speed(iter/s)": 1.527085 }, { "acc": 0.98292542, "epoch": 19.006327630653857, "grad_norm": 3.318492889404297, "learning_rate": 7.303856051759725e-06, "loss": 0.06434495, "memory(GiB)": 13.7, "step": 40550, "train_speed(iter/s)": 1.527089 }, { "acc": 0.97413378, "epoch": 19.00867119756269, "grad_norm": 0.8180360794067383, "learning_rate": 7.303168076316374e-06, "loss": 0.08847787, "memory(GiB)": 13.7, "step": 40555, "train_speed(iter/s)": 1.527091 }, { "acc": 0.97416668, "epoch": 19.011014764471525, "grad_norm": 1.3557342290878296, "learning_rate": 7.30248004552445e-06, "loss": 0.06636186, "memory(GiB)": 13.7, "step": 40560, "train_speed(iter/s)": 1.527097 }, { "acc": 0.97374458, "epoch": 19.01335833138036, "grad_norm": 98.5261001586914, "learning_rate": 7.301791959400486e-06, "loss": 0.07385533, "memory(GiB)": 13.7, "step": 40565, "train_speed(iter/s)": 1.527108 }, { "acc": 0.990625, "epoch": 19.015701898289198, "grad_norm": 5.975186347961426, "learning_rate": 7.301103817961025e-06, "loss": 0.06157674, "memory(GiB)": 13.7, "step": 40570, "train_speed(iter/s)": 1.527114 }, { "acc": 0.97807693, "epoch": 19.018045465198032, "grad_norm": 1.8725212812423706, "learning_rate": 7.300415621222605e-06, "loss": 0.08573978, "memory(GiB)": 13.7, "step": 40575, "train_speed(iter/s)": 1.527122 }, { "acc": 1.0, "epoch": 19.020389032106866, "grad_norm": 1.7351198196411133, "learning_rate": 7.299727369201771e-06, "loss": 0.03409621, "memory(GiB)": 13.7, "step": 40580, "train_speed(iter/s)": 1.527131 }, { "acc": 0.97596111, "epoch": 19.0227325990157, "grad_norm": 0.16858766973018646, "learning_rate": 7.299039061915062e-06, "loss": 0.09661869, "memory(GiB)": 13.7, "step": 40585, "train_speed(iter/s)": 1.527137 }, { "acc": 0.97712183, "epoch": 19.02507616592454, "grad_norm": 5.244099140167236, "learning_rate": 7.298350699379028e-06, "loss": 0.09577002, "memory(GiB)": 13.7, "step": 40590, "train_speed(iter/s)": 1.527136 }, { "acc": 0.98874998, "epoch": 19.027419732833373, "grad_norm": 4.714560031890869, "learning_rate": 7.297662281610211e-06, "loss": 0.13052866, "memory(GiB)": 13.7, "step": 40595, "train_speed(iter/s)": 1.527141 }, { "acc": 0.98500004, "epoch": 19.029763299742207, "grad_norm": 0.0035332406405359507, "learning_rate": 7.29697380862516e-06, "loss": 0.03178936, "memory(GiB)": 13.7, "step": 40600, "train_speed(iter/s)": 1.527135 }, { "acc": 0.99499998, "epoch": 19.03210686665104, "grad_norm": 7.41130256652832, "learning_rate": 7.2962852804404214e-06, "loss": 0.01595938, "memory(GiB)": 13.7, "step": 40605, "train_speed(iter/s)": 1.527134 }, { "acc": 0.98627682, "epoch": 19.03445043355988, "grad_norm": 3.8052008152008057, "learning_rate": 7.295596697072549e-06, "loss": 0.12849623, "memory(GiB)": 13.7, "step": 40610, "train_speed(iter/s)": 1.527136 }, { "acc": 0.98203125, "epoch": 19.036794000468714, "grad_norm": 4.4893479347229, "learning_rate": 7.294908058538091e-06, "loss": 0.07057986, "memory(GiB)": 13.7, "step": 40615, "train_speed(iter/s)": 1.527141 }, { "acc": 0.97965279, "epoch": 19.039137567377548, "grad_norm": 4.2541961669921875, "learning_rate": 7.294219364853602e-06, "loss": 0.09238576, "memory(GiB)": 13.7, "step": 40620, "train_speed(iter/s)": 1.527146 }, { "acc": 0.96654758, "epoch": 19.041481134286382, "grad_norm": 27.01740837097168, "learning_rate": 7.2935306160356345e-06, "loss": 0.1542376, "memory(GiB)": 13.7, "step": 40625, "train_speed(iter/s)": 1.527139 }, { "acc": 0.96791134, "epoch": 19.04382470119522, "grad_norm": 5.015472888946533, "learning_rate": 7.292841812100743e-06, "loss": 0.0972839, "memory(GiB)": 13.7, "step": 40630, "train_speed(iter/s)": 1.527137 }, { "acc": 0.98723955, "epoch": 19.046168268104054, "grad_norm": 4.219579219818115, "learning_rate": 7.292152953065485e-06, "loss": 0.07537162, "memory(GiB)": 13.7, "step": 40635, "train_speed(iter/s)": 1.52714 }, { "acc": 0.98610325, "epoch": 19.04851183501289, "grad_norm": 3.802624225616455, "learning_rate": 7.291464038946421e-06, "loss": 0.06608328, "memory(GiB)": 13.7, "step": 40640, "train_speed(iter/s)": 1.527145 }, { "acc": 0.9739584, "epoch": 19.050855401921726, "grad_norm": 1.4575910568237305, "learning_rate": 7.290775069760107e-06, "loss": 0.15939808, "memory(GiB)": 13.7, "step": 40645, "train_speed(iter/s)": 1.527152 }, { "acc": 0.97637787, "epoch": 19.05319896883056, "grad_norm": 8.30521011352539, "learning_rate": 7.290086045523104e-06, "loss": 0.092772, "memory(GiB)": 13.7, "step": 40650, "train_speed(iter/s)": 1.527153 }, { "acc": 0.98062496, "epoch": 19.055542535739395, "grad_norm": 3.775416851043701, "learning_rate": 7.289396966251974e-06, "loss": 0.10563364, "memory(GiB)": 13.7, "step": 40655, "train_speed(iter/s)": 1.527153 }, { "acc": 0.97909718, "epoch": 19.05788610264823, "grad_norm": 4.415823936462402, "learning_rate": 7.288707831963281e-06, "loss": 0.04650522, "memory(GiB)": 13.7, "step": 40660, "train_speed(iter/s)": 1.527158 }, { "acc": 0.97342262, "epoch": 19.060229669557067, "grad_norm": 3.1371071338653564, "learning_rate": 7.2880186426735886e-06, "loss": 0.15957413, "memory(GiB)": 13.7, "step": 40665, "train_speed(iter/s)": 1.52717 }, { "acc": 0.97562504, "epoch": 19.0625732364659, "grad_norm": 5.07778263092041, "learning_rate": 7.287329398399464e-06, "loss": 0.09765732, "memory(GiB)": 13.7, "step": 40670, "train_speed(iter/s)": 1.527186 }, { "acc": 0.99020834, "epoch": 19.064916803374736, "grad_norm": 0.8244647979736328, "learning_rate": 7.2866400991574724e-06, "loss": 0.05982959, "memory(GiB)": 13.7, "step": 40675, "train_speed(iter/s)": 1.527196 }, { "acc": 0.97695513, "epoch": 19.06726037028357, "grad_norm": 1.2921369075775146, "learning_rate": 7.285950744964184e-06, "loss": 0.09975902, "memory(GiB)": 13.7, "step": 40680, "train_speed(iter/s)": 1.527185 }, { "acc": 0.97232885, "epoch": 19.069603937192408, "grad_norm": 13.22964096069336, "learning_rate": 7.285261335836169e-06, "loss": 0.11567994, "memory(GiB)": 13.7, "step": 40685, "train_speed(iter/s)": 1.527191 }, { "acc": 0.96482954, "epoch": 19.071947504101242, "grad_norm": 3.4376308917999268, "learning_rate": 7.284571871789996e-06, "loss": 0.14779997, "memory(GiB)": 13.7, "step": 40690, "train_speed(iter/s)": 1.527194 }, { "acc": 0.98222218, "epoch": 19.074291071010077, "grad_norm": 1.2796865701675415, "learning_rate": 7.283882352842239e-06, "loss": 0.08797609, "memory(GiB)": 13.7, "step": 40695, "train_speed(iter/s)": 1.527193 }, { "acc": 0.98646784, "epoch": 19.07663463791891, "grad_norm": 4.964632987976074, "learning_rate": 7.2831927790094725e-06, "loss": 0.06282972, "memory(GiB)": 13.7, "step": 40700, "train_speed(iter/s)": 1.527184 }, { "acc": 0.99363098, "epoch": 19.07897820482775, "grad_norm": 4.515676975250244, "learning_rate": 7.282503150308271e-06, "loss": 0.02404293, "memory(GiB)": 13.7, "step": 40705, "train_speed(iter/s)": 1.52719 }, { "acc": 0.98374996, "epoch": 19.081321771736583, "grad_norm": 1.6685711145401, "learning_rate": 7.28181346675521e-06, "loss": 0.05904602, "memory(GiB)": 13.7, "step": 40710, "train_speed(iter/s)": 1.527188 }, { "acc": 0.98103628, "epoch": 19.083665338645417, "grad_norm": 0.03329179435968399, "learning_rate": 7.281123728366867e-06, "loss": 0.04482727, "memory(GiB)": 13.7, "step": 40715, "train_speed(iter/s)": 1.5272 }, { "acc": 0.98833332, "epoch": 19.086008905554255, "grad_norm": 3.8475966453552246, "learning_rate": 7.280433935159824e-06, "loss": 0.04994104, "memory(GiB)": 13.7, "step": 40720, "train_speed(iter/s)": 1.527212 }, { "acc": 0.9916338, "epoch": 19.08835247246309, "grad_norm": 4.370674133300781, "learning_rate": 7.279744087150659e-06, "loss": 0.06525351, "memory(GiB)": 13.7, "step": 40725, "train_speed(iter/s)": 1.527207 }, { "acc": 0.98291664, "epoch": 19.090696039371924, "grad_norm": 5.628533363342285, "learning_rate": 7.279054184355954e-06, "loss": 0.0637877, "memory(GiB)": 13.7, "step": 40730, "train_speed(iter/s)": 1.527205 }, { "acc": 0.990625, "epoch": 19.093039606280758, "grad_norm": 1.4915236234664917, "learning_rate": 7.27836422679229e-06, "loss": 0.05289719, "memory(GiB)": 13.7, "step": 40735, "train_speed(iter/s)": 1.527217 }, { "acc": 0.98133965, "epoch": 19.095383173189596, "grad_norm": 1.8262672424316406, "learning_rate": 7.277674214476256e-06, "loss": 0.07685848, "memory(GiB)": 13.7, "step": 40740, "train_speed(iter/s)": 1.52722 }, { "acc": 0.98767357, "epoch": 19.09772674009843, "grad_norm": 5.850494384765625, "learning_rate": 7.276984147424434e-06, "loss": 0.06198493, "memory(GiB)": 13.7, "step": 40745, "train_speed(iter/s)": 1.527226 }, { "acc": 0.96585083, "epoch": 19.100070307007265, "grad_norm": 6.431507110595703, "learning_rate": 7.276294025653412e-06, "loss": 0.12736702, "memory(GiB)": 13.7, "step": 40750, "train_speed(iter/s)": 1.527241 }, { "acc": 0.98282204, "epoch": 19.1024138739161, "grad_norm": 1.3082776069641113, "learning_rate": 7.275603849179778e-06, "loss": 0.08677913, "memory(GiB)": 13.7, "step": 40755, "train_speed(iter/s)": 1.527245 }, { "acc": 0.98874006, "epoch": 19.104757440824937, "grad_norm": 2.483624219894409, "learning_rate": 7.274913618020122e-06, "loss": 0.08851264, "memory(GiB)": 13.7, "step": 40760, "train_speed(iter/s)": 1.527257 }, { "acc": 0.98916664, "epoch": 19.10710100773377, "grad_norm": 5.486103057861328, "learning_rate": 7.274223332191034e-06, "loss": 0.03897981, "memory(GiB)": 13.7, "step": 40765, "train_speed(iter/s)": 1.527261 }, { "acc": 0.9791667, "epoch": 19.109444574642605, "grad_norm": 1.3349860906600952, "learning_rate": 7.273532991709108e-06, "loss": 0.06870196, "memory(GiB)": 13.7, "step": 40770, "train_speed(iter/s)": 1.527267 }, { "acc": 0.99385672, "epoch": 19.11178814155144, "grad_norm": 0.03220184147357941, "learning_rate": 7.272842596590936e-06, "loss": 0.06352448, "memory(GiB)": 13.7, "step": 40775, "train_speed(iter/s)": 1.527272 }, { "acc": 0.98755417, "epoch": 19.114131708460278, "grad_norm": 5.788412094116211, "learning_rate": 7.272152146853112e-06, "loss": 0.04532607, "memory(GiB)": 13.7, "step": 40780, "train_speed(iter/s)": 1.527274 }, { "acc": 0.9874054, "epoch": 19.116475275369112, "grad_norm": 5.008090496063232, "learning_rate": 7.271461642512233e-06, "loss": 0.0456706, "memory(GiB)": 13.7, "step": 40785, "train_speed(iter/s)": 1.527285 }, { "acc": 0.98279762, "epoch": 19.118818842277946, "grad_norm": 4.903613090515137, "learning_rate": 7.2707710835849e-06, "loss": 0.05118472, "memory(GiB)": 13.7, "step": 40790, "train_speed(iter/s)": 1.527299 }, { "acc": 0.98017921, "epoch": 19.121162409186784, "grad_norm": 4.326495170593262, "learning_rate": 7.270080470087707e-06, "loss": 0.07494926, "memory(GiB)": 13.7, "step": 40795, "train_speed(iter/s)": 1.527318 }, { "acc": 0.97666664, "epoch": 19.12350597609562, "grad_norm": 0.27004286646842957, "learning_rate": 7.269389802037255e-06, "loss": 0.07702641, "memory(GiB)": 13.7, "step": 40800, "train_speed(iter/s)": 1.527322 }, { "acc": 0.96571426, "epoch": 19.125849543004453, "grad_norm": 3.953763961791992, "learning_rate": 7.268699079450145e-06, "loss": 0.09718388, "memory(GiB)": 13.7, "step": 40805, "train_speed(iter/s)": 1.527329 }, { "acc": 0.978125, "epoch": 19.128193109913287, "grad_norm": 9.334817886352539, "learning_rate": 7.2680083023429835e-06, "loss": 0.08648086, "memory(GiB)": 13.7, "step": 40810, "train_speed(iter/s)": 1.527332 }, { "acc": 0.98675594, "epoch": 19.130536676822125, "grad_norm": 3.3848013877868652, "learning_rate": 7.267317470732369e-06, "loss": 0.0520898, "memory(GiB)": 13.7, "step": 40815, "train_speed(iter/s)": 1.527347 }, { "acc": 0.97487183, "epoch": 19.13288024373096, "grad_norm": 5.664552211761475, "learning_rate": 7.266626584634911e-06, "loss": 0.0749598, "memory(GiB)": 13.7, "step": 40820, "train_speed(iter/s)": 1.52735 }, { "acc": 0.98317566, "epoch": 19.135223810639793, "grad_norm": 3.074570417404175, "learning_rate": 7.265935644067215e-06, "loss": 0.10914361, "memory(GiB)": 13.7, "step": 40825, "train_speed(iter/s)": 1.527359 }, { "acc": 0.98708344, "epoch": 19.137567377548628, "grad_norm": 0.6385067701339722, "learning_rate": 7.26524464904589e-06, "loss": 0.04404481, "memory(GiB)": 13.7, "step": 40830, "train_speed(iter/s)": 1.527372 }, { "acc": 0.98119297, "epoch": 19.139910944457466, "grad_norm": 4.02125358581543, "learning_rate": 7.264553599587541e-06, "loss": 0.13373679, "memory(GiB)": 13.7, "step": 40835, "train_speed(iter/s)": 1.527375 }, { "acc": 0.96840782, "epoch": 19.1422545113663, "grad_norm": 7.147536754608154, "learning_rate": 7.263862495708782e-06, "loss": 0.12950213, "memory(GiB)": 13.7, "step": 40840, "train_speed(iter/s)": 1.527384 }, { "acc": 0.99513893, "epoch": 19.144598078275134, "grad_norm": 3.788177490234375, "learning_rate": 7.263171337426224e-06, "loss": 0.03628491, "memory(GiB)": 13.7, "step": 40845, "train_speed(iter/s)": 1.527383 }, { "acc": 0.97300596, "epoch": 19.14694164518397, "grad_norm": 3.6102256774902344, "learning_rate": 7.262480124756482e-06, "loss": 0.10029356, "memory(GiB)": 13.7, "step": 40850, "train_speed(iter/s)": 1.527386 }, { "acc": 0.9825695, "epoch": 19.149285212092806, "grad_norm": 0.11605086922645569, "learning_rate": 7.261788857716169e-06, "loss": 0.08388588, "memory(GiB)": 13.7, "step": 40855, "train_speed(iter/s)": 1.527389 }, { "acc": 0.98659096, "epoch": 19.15162877900164, "grad_norm": 2.7455086708068848, "learning_rate": 7.261097536321901e-06, "loss": 0.07592602, "memory(GiB)": 13.7, "step": 40860, "train_speed(iter/s)": 1.527393 }, { "acc": 0.975, "epoch": 19.153972345910475, "grad_norm": 6.9292802810668945, "learning_rate": 7.260406160590294e-06, "loss": 0.07725756, "memory(GiB)": 13.7, "step": 40865, "train_speed(iter/s)": 1.527404 }, { "acc": 0.96215286, "epoch": 19.15631591281931, "grad_norm": 4.785976886749268, "learning_rate": 7.259714730537967e-06, "loss": 0.15308607, "memory(GiB)": 13.7, "step": 40870, "train_speed(iter/s)": 1.527411 }, { "acc": 0.98154764, "epoch": 19.158659479728147, "grad_norm": 0.477171391248703, "learning_rate": 7.2590232461815405e-06, "loss": 0.06690663, "memory(GiB)": 13.7, "step": 40875, "train_speed(iter/s)": 1.527408 }, { "acc": 0.98325729, "epoch": 19.16100304663698, "grad_norm": 3.8170652389526367, "learning_rate": 7.258331707537633e-06, "loss": 0.08242735, "memory(GiB)": 13.7, "step": 40880, "train_speed(iter/s)": 1.52742 }, { "acc": 0.9882143, "epoch": 19.163346613545816, "grad_norm": 0.012331929989159107, "learning_rate": 7.257640114622872e-06, "loss": 0.06659079, "memory(GiB)": 13.7, "step": 40885, "train_speed(iter/s)": 1.527432 }, { "acc": 0.98423615, "epoch": 19.165690180454654, "grad_norm": 2.981367349624634, "learning_rate": 7.256948467453875e-06, "loss": 0.03993378, "memory(GiB)": 13.7, "step": 40890, "train_speed(iter/s)": 1.527436 }, { "acc": 0.97781258, "epoch": 19.168033747363488, "grad_norm": 3.559007406234741, "learning_rate": 7.2562567660472724e-06, "loss": 0.08350036, "memory(GiB)": 13.7, "step": 40895, "train_speed(iter/s)": 1.527438 }, { "acc": 0.99161787, "epoch": 19.170377314272322, "grad_norm": 3.1786599159240723, "learning_rate": 7.2555650104196866e-06, "loss": 0.04081741, "memory(GiB)": 13.7, "step": 40900, "train_speed(iter/s)": 1.527433 }, { "acc": 0.97790184, "epoch": 19.172720881181156, "grad_norm": 5.770028114318848, "learning_rate": 7.254873200587747e-06, "loss": 0.09567906, "memory(GiB)": 13.7, "step": 40905, "train_speed(iter/s)": 1.527439 }, { "acc": 0.99229164, "epoch": 19.175064448089994, "grad_norm": 1.7530348300933838, "learning_rate": 7.254181336568081e-06, "loss": 0.04153556, "memory(GiB)": 13.7, "step": 40910, "train_speed(iter/s)": 1.527453 }, { "acc": 0.97934341, "epoch": 19.17740801499883, "grad_norm": 2.053086757659912, "learning_rate": 7.253489418377321e-06, "loss": 0.11720377, "memory(GiB)": 13.7, "step": 40915, "train_speed(iter/s)": 1.527457 }, { "acc": 0.96645832, "epoch": 19.179751581907663, "grad_norm": 4.2611470222473145, "learning_rate": 7.252797446032096e-06, "loss": 0.09387694, "memory(GiB)": 13.7, "step": 40920, "train_speed(iter/s)": 1.52747 }, { "acc": 0.97458324, "epoch": 19.182095148816497, "grad_norm": 6.561271667480469, "learning_rate": 7.252105419549039e-06, "loss": 0.08607873, "memory(GiB)": 13.7, "step": 40925, "train_speed(iter/s)": 1.527475 }, { "acc": 0.996875, "epoch": 19.184438715725335, "grad_norm": 0.613342821598053, "learning_rate": 7.251413338944786e-06, "loss": 0.01170589, "memory(GiB)": 13.7, "step": 40930, "train_speed(iter/s)": 1.527467 }, { "acc": 0.97672625, "epoch": 19.18678228263417, "grad_norm": 3.4574384689331055, "learning_rate": 7.250721204235971e-06, "loss": 0.0914344, "memory(GiB)": 13.7, "step": 40935, "train_speed(iter/s)": 1.52748 }, { "acc": 0.98812504, "epoch": 19.189125849543004, "grad_norm": 3.9538307189941406, "learning_rate": 7.250029015439231e-06, "loss": 0.04074007, "memory(GiB)": 13.7, "step": 40940, "train_speed(iter/s)": 1.527473 }, { "acc": 0.98915215, "epoch": 19.191469416451838, "grad_norm": 2.405350923538208, "learning_rate": 7.249336772571203e-06, "loss": 0.04738166, "memory(GiB)": 13.7, "step": 40945, "train_speed(iter/s)": 1.527482 }, { "acc": 0.97546701, "epoch": 19.193812983360676, "grad_norm": 6.897458076477051, "learning_rate": 7.248644475648528e-06, "loss": 0.13495384, "memory(GiB)": 13.7, "step": 40950, "train_speed(iter/s)": 1.527483 }, { "acc": 0.96747017, "epoch": 19.19615655026951, "grad_norm": 3.0742695331573486, "learning_rate": 7.247952124687844e-06, "loss": 0.18851941, "memory(GiB)": 13.7, "step": 40955, "train_speed(iter/s)": 1.527492 }, { "acc": 0.98369045, "epoch": 19.198500117178344, "grad_norm": 1.2118760347366333, "learning_rate": 7.247259719705796e-06, "loss": 0.07655265, "memory(GiB)": 13.7, "step": 40960, "train_speed(iter/s)": 1.527487 }, { "acc": 0.98207798, "epoch": 19.200843684087182, "grad_norm": 7.484114646911621, "learning_rate": 7.246567260719024e-06, "loss": 0.04860536, "memory(GiB)": 13.7, "step": 40965, "train_speed(iter/s)": 1.52749 }, { "acc": 0.97895832, "epoch": 19.203187250996017, "grad_norm": 4.5337419509887695, "learning_rate": 7.245874747744176e-06, "loss": 0.05498976, "memory(GiB)": 13.7, "step": 40970, "train_speed(iter/s)": 1.527504 }, { "acc": 0.98363094, "epoch": 19.20553081790485, "grad_norm": 3.981499671936035, "learning_rate": 7.245182180797894e-06, "loss": 0.07089869, "memory(GiB)": 13.7, "step": 40975, "train_speed(iter/s)": 1.52751 }, { "acc": 0.98135414, "epoch": 19.207874384813685, "grad_norm": 4.913613796234131, "learning_rate": 7.244489559896826e-06, "loss": 0.0604543, "memory(GiB)": 13.7, "step": 40980, "train_speed(iter/s)": 1.527515 }, { "acc": 0.98932686, "epoch": 19.210217951722523, "grad_norm": 1.8073872327804565, "learning_rate": 7.243796885057625e-06, "loss": 0.04946855, "memory(GiB)": 13.7, "step": 40985, "train_speed(iter/s)": 1.527527 }, { "acc": 0.975947, "epoch": 19.212561518631357, "grad_norm": 1.4570878744125366, "learning_rate": 7.243104156296933e-06, "loss": 0.11618199, "memory(GiB)": 13.7, "step": 40990, "train_speed(iter/s)": 1.52754 }, { "acc": 0.97395287, "epoch": 19.21490508554019, "grad_norm": 1.5135570764541626, "learning_rate": 7.2424113736314054e-06, "loss": 0.09511563, "memory(GiB)": 13.7, "step": 40995, "train_speed(iter/s)": 1.527555 }, { "acc": 0.98553028, "epoch": 19.217248652449026, "grad_norm": 3.807243824005127, "learning_rate": 7.2417185370776935e-06, "loss": 0.04941288, "memory(GiB)": 13.7, "step": 41000, "train_speed(iter/s)": 1.527563 }, { "acc": 0.97614584, "epoch": 19.219592219357864, "grad_norm": 3.524779796600342, "learning_rate": 7.241025646652451e-06, "loss": 0.07935728, "memory(GiB)": 13.7, "step": 41005, "train_speed(iter/s)": 1.527576 }, { "acc": 0.98447914, "epoch": 19.221935786266698, "grad_norm": 4.135942459106445, "learning_rate": 7.240332702372332e-06, "loss": 0.06460672, "memory(GiB)": 13.7, "step": 41010, "train_speed(iter/s)": 1.527586 }, { "acc": 0.98104172, "epoch": 19.224279353175532, "grad_norm": 7.1795549392700195, "learning_rate": 7.239639704253995e-06, "loss": 0.10831199, "memory(GiB)": 13.7, "step": 41015, "train_speed(iter/s)": 1.527592 }, { "acc": 0.98544636, "epoch": 19.226622920084367, "grad_norm": 4.728976726531982, "learning_rate": 7.238946652314095e-06, "loss": 0.05841358, "memory(GiB)": 13.7, "step": 41020, "train_speed(iter/s)": 1.527593 }, { "acc": 0.97093687, "epoch": 19.228966486993205, "grad_norm": 8.255484580993652, "learning_rate": 7.238253546569291e-06, "loss": 0.13391824, "memory(GiB)": 13.7, "step": 41025, "train_speed(iter/s)": 1.5276 }, { "acc": 0.96976194, "epoch": 19.23131005390204, "grad_norm": 5.061893939971924, "learning_rate": 7.237560387036243e-06, "loss": 0.09461778, "memory(GiB)": 13.7, "step": 41030, "train_speed(iter/s)": 1.527595 }, { "acc": 0.9624054, "epoch": 19.233653620810873, "grad_norm": 63.59672546386719, "learning_rate": 7.236867173731613e-06, "loss": 0.20840242, "memory(GiB)": 13.7, "step": 41035, "train_speed(iter/s)": 1.527604 }, { "acc": 0.98576393, "epoch": 19.23599718771971, "grad_norm": 12.557963371276855, "learning_rate": 7.236173906672064e-06, "loss": 0.1077206, "memory(GiB)": 13.7, "step": 41040, "train_speed(iter/s)": 1.527601 }, { "acc": 0.98261356, "epoch": 19.238340754628545, "grad_norm": 3.389730930328369, "learning_rate": 7.235480585874258e-06, "loss": 0.04271115, "memory(GiB)": 13.7, "step": 41045, "train_speed(iter/s)": 1.5276 }, { "acc": 0.98759155, "epoch": 19.24068432153738, "grad_norm": 0.7403544783592224, "learning_rate": 7.234787211354861e-06, "loss": 0.09331434, "memory(GiB)": 13.7, "step": 41050, "train_speed(iter/s)": 1.527603 }, { "acc": 0.97931089, "epoch": 19.243027888446214, "grad_norm": 3.757882595062256, "learning_rate": 7.23409378313054e-06, "loss": 0.07078798, "memory(GiB)": 13.7, "step": 41055, "train_speed(iter/s)": 1.527613 }, { "acc": 0.98310871, "epoch": 19.245371455355052, "grad_norm": 36.834659576416016, "learning_rate": 7.233400301217962e-06, "loss": 0.11158459, "memory(GiB)": 13.7, "step": 41060, "train_speed(iter/s)": 1.527608 }, { "acc": 0.97120838, "epoch": 19.247715022263886, "grad_norm": 5.670071125030518, "learning_rate": 7.232706765633798e-06, "loss": 0.14205379, "memory(GiB)": 13.7, "step": 41065, "train_speed(iter/s)": 1.527619 }, { "acc": 0.99434528, "epoch": 19.25005858917272, "grad_norm": 2.199228048324585, "learning_rate": 7.2320131763947156e-06, "loss": 0.03927977, "memory(GiB)": 13.7, "step": 41070, "train_speed(iter/s)": 1.527635 }, { "acc": 0.978125, "epoch": 19.252402156081555, "grad_norm": 0.6680294275283813, "learning_rate": 7.2313195335173865e-06, "loss": 0.07953693, "memory(GiB)": 13.7, "step": 41075, "train_speed(iter/s)": 1.527633 }, { "acc": 0.9739584, "epoch": 19.254745722990393, "grad_norm": 4.348896503448486, "learning_rate": 7.230625837018488e-06, "loss": 0.051624, "memory(GiB)": 13.7, "step": 41080, "train_speed(iter/s)": 1.527633 }, { "acc": 0.97673616, "epoch": 19.257089289899227, "grad_norm": 4.567074298858643, "learning_rate": 7.229932086914687e-06, "loss": 0.05160415, "memory(GiB)": 13.7, "step": 41085, "train_speed(iter/s)": 1.527629 }, { "acc": 0.9947917, "epoch": 19.25943285680806, "grad_norm": 3.8711812496185303, "learning_rate": 7.229238283222665e-06, "loss": 0.02900038, "memory(GiB)": 13.7, "step": 41090, "train_speed(iter/s)": 1.527632 }, { "acc": 0.97882442, "epoch": 19.261776423716896, "grad_norm": 5.032181739807129, "learning_rate": 7.228544425959096e-06, "loss": 0.08360716, "memory(GiB)": 13.7, "step": 41095, "train_speed(iter/s)": 1.527635 }, { "acc": 0.97459278, "epoch": 19.264119990625733, "grad_norm": 2.3665900230407715, "learning_rate": 7.2278505151406575e-06, "loss": 0.09308731, "memory(GiB)": 13.7, "step": 41100, "train_speed(iter/s)": 1.527637 }, { "acc": 0.97886362, "epoch": 19.266463557534568, "grad_norm": 6.697636604309082, "learning_rate": 7.2271565507840325e-06, "loss": 0.08950018, "memory(GiB)": 13.7, "step": 41105, "train_speed(iter/s)": 1.527638 }, { "acc": 0.9689682, "epoch": 19.268807124443402, "grad_norm": 8.171992301940918, "learning_rate": 7.226462532905898e-06, "loss": 0.1778504, "memory(GiB)": 13.7, "step": 41110, "train_speed(iter/s)": 1.527646 }, { "acc": 0.96144352, "epoch": 19.27115069135224, "grad_norm": 7.917842388153076, "learning_rate": 7.225768461522937e-06, "loss": 0.12818148, "memory(GiB)": 13.7, "step": 41115, "train_speed(iter/s)": 1.527651 }, { "acc": 0.96102161, "epoch": 19.273494258261074, "grad_norm": 4.1373748779296875, "learning_rate": 7.225074336651833e-06, "loss": 0.14958559, "memory(GiB)": 13.7, "step": 41120, "train_speed(iter/s)": 1.527665 }, { "acc": 0.97822914, "epoch": 19.27583782516991, "grad_norm": 5.258325099945068, "learning_rate": 7.2243801583092696e-06, "loss": 0.0800577, "memory(GiB)": 13.7, "step": 41125, "train_speed(iter/s)": 1.527668 }, { "acc": 0.96995039, "epoch": 19.278181392078743, "grad_norm": 4.266520023345947, "learning_rate": 7.223685926511933e-06, "loss": 0.20325177, "memory(GiB)": 13.7, "step": 41130, "train_speed(iter/s)": 1.527668 }, { "acc": 0.97925596, "epoch": 19.28052495898758, "grad_norm": 4.22844934463501, "learning_rate": 7.222991641276512e-06, "loss": 0.07767604, "memory(GiB)": 13.7, "step": 41135, "train_speed(iter/s)": 1.527678 }, { "acc": 0.98274803, "epoch": 19.282868525896415, "grad_norm": 7.205423355102539, "learning_rate": 7.222297302619692e-06, "loss": 0.11841425, "memory(GiB)": 13.7, "step": 41140, "train_speed(iter/s)": 1.527682 }, { "acc": 0.9707386, "epoch": 19.28521209280525, "grad_norm": 1.2680764198303223, "learning_rate": 7.221602910558166e-06, "loss": 0.09493526, "memory(GiB)": 13.7, "step": 41145, "train_speed(iter/s)": 1.527694 }, { "acc": 0.99136906, "epoch": 19.287555659714084, "grad_norm": 5.875191688537598, "learning_rate": 7.22090846510862e-06, "loss": 0.03733027, "memory(GiB)": 13.7, "step": 41150, "train_speed(iter/s)": 1.527689 }, { "acc": 0.98571434, "epoch": 19.28989922662292, "grad_norm": 4.5427165031433105, "learning_rate": 7.2202139662877515e-06, "loss": 0.09168602, "memory(GiB)": 13.7, "step": 41155, "train_speed(iter/s)": 1.527682 }, { "acc": 0.98055553, "epoch": 19.292242793531756, "grad_norm": 3.596388339996338, "learning_rate": 7.21951941411225e-06, "loss": 0.0332238, "memory(GiB)": 13.7, "step": 41160, "train_speed(iter/s)": 1.527701 }, { "acc": 0.98911514, "epoch": 19.29458636044059, "grad_norm": 0.013634721748530865, "learning_rate": 7.218824808598813e-06, "loss": 0.04724454, "memory(GiB)": 13.7, "step": 41165, "train_speed(iter/s)": 1.527707 }, { "acc": 0.98133926, "epoch": 19.296929927349424, "grad_norm": 0.03015347756445408, "learning_rate": 7.218130149764134e-06, "loss": 0.06102091, "memory(GiB)": 13.7, "step": 41170, "train_speed(iter/s)": 1.527717 }, { "acc": 0.98451843, "epoch": 19.299273494258262, "grad_norm": 2.4038925170898438, "learning_rate": 7.217435437624913e-06, "loss": 0.09880491, "memory(GiB)": 13.7, "step": 41175, "train_speed(iter/s)": 1.527724 }, { "acc": 0.98560095, "epoch": 19.301617061167097, "grad_norm": 4.33309268951416, "learning_rate": 7.2167406721978474e-06, "loss": 0.05789797, "memory(GiB)": 13.7, "step": 41180, "train_speed(iter/s)": 1.527724 }, { "acc": 0.9751874, "epoch": 19.30396062807593, "grad_norm": 7.392646789550781, "learning_rate": 7.216045853499636e-06, "loss": 0.09086185, "memory(GiB)": 13.7, "step": 41185, "train_speed(iter/s)": 1.527717 }, { "acc": 0.98291664, "epoch": 19.306304194984765, "grad_norm": 3.733224630355835, "learning_rate": 7.215350981546981e-06, "loss": 0.04802704, "memory(GiB)": 13.7, "step": 41190, "train_speed(iter/s)": 1.527722 }, { "acc": 0.98598289, "epoch": 19.308647761893603, "grad_norm": 1.4910802841186523, "learning_rate": 7.214656056356585e-06, "loss": 0.06490544, "memory(GiB)": 13.7, "step": 41195, "train_speed(iter/s)": 1.527733 }, { "acc": 0.98517218, "epoch": 19.310991328802437, "grad_norm": 1.929396390914917, "learning_rate": 7.213961077945151e-06, "loss": 0.0591152, "memory(GiB)": 13.7, "step": 41200, "train_speed(iter/s)": 1.527737 }, { "acc": 0.989256, "epoch": 19.31333489571127, "grad_norm": 0.0637800320982933, "learning_rate": 7.213266046329385e-06, "loss": 0.03848526, "memory(GiB)": 13.7, "step": 41205, "train_speed(iter/s)": 1.527754 }, { "acc": 0.9916667, "epoch": 19.31567846262011, "grad_norm": 0.7546210289001465, "learning_rate": 7.212570961525994e-06, "loss": 0.02691688, "memory(GiB)": 13.7, "step": 41210, "train_speed(iter/s)": 1.527767 }, { "acc": 0.99338818, "epoch": 19.318022029528944, "grad_norm": 0.4647310972213745, "learning_rate": 7.2118758235516815e-06, "loss": 0.03734732, "memory(GiB)": 13.7, "step": 41215, "train_speed(iter/s)": 1.527767 }, { "acc": 0.99107647, "epoch": 19.320365596437778, "grad_norm": 0.7259852886199951, "learning_rate": 7.2111806324231604e-06, "loss": 0.03699391, "memory(GiB)": 13.7, "step": 41220, "train_speed(iter/s)": 1.527764 }, { "acc": 0.97250004, "epoch": 19.322709163346612, "grad_norm": 13.090632438659668, "learning_rate": 7.210485388157141e-06, "loss": 0.10982242, "memory(GiB)": 13.7, "step": 41225, "train_speed(iter/s)": 1.527769 }, { "acc": 0.9723485, "epoch": 19.32505273025545, "grad_norm": 0.5884328484535217, "learning_rate": 7.209790090770332e-06, "loss": 0.15231938, "memory(GiB)": 13.7, "step": 41230, "train_speed(iter/s)": 1.52777 }, { "acc": 0.98291092, "epoch": 19.327396297164285, "grad_norm": 5.226930141448975, "learning_rate": 7.209094740279447e-06, "loss": 0.13101898, "memory(GiB)": 13.7, "step": 41235, "train_speed(iter/s)": 1.527788 }, { "acc": 0.97606058, "epoch": 19.32973986407312, "grad_norm": 5.567631244659424, "learning_rate": 7.2083993367012006e-06, "loss": 0.0859561, "memory(GiB)": 13.7, "step": 41240, "train_speed(iter/s)": 1.527796 }, { "acc": 0.96840277, "epoch": 19.332083430981953, "grad_norm": 4.349480152130127, "learning_rate": 7.207703880052308e-06, "loss": 0.13724654, "memory(GiB)": 13.7, "step": 41245, "train_speed(iter/s)": 1.527794 }, { "acc": 0.98602066, "epoch": 19.33442699789079, "grad_norm": 2.2513129711151123, "learning_rate": 7.207008370349485e-06, "loss": 0.0805831, "memory(GiB)": 13.7, "step": 41250, "train_speed(iter/s)": 1.527806 }, { "acc": 0.990625, "epoch": 19.336770564799625, "grad_norm": 0.0667196661233902, "learning_rate": 7.20631280760945e-06, "loss": 0.04660884, "memory(GiB)": 13.7, "step": 41255, "train_speed(iter/s)": 1.52781 }, { "acc": 0.97717266, "epoch": 19.33911413170846, "grad_norm": 3.8638195991516113, "learning_rate": 7.2056171918489204e-06, "loss": 0.06550986, "memory(GiB)": 13.7, "step": 41260, "train_speed(iter/s)": 1.527821 }, { "acc": 0.98599434, "epoch": 19.341457698617294, "grad_norm": 3.4651737213134766, "learning_rate": 7.204921523084619e-06, "loss": 0.05077231, "memory(GiB)": 13.7, "step": 41265, "train_speed(iter/s)": 1.527823 }, { "acc": 0.98675594, "epoch": 19.34380126552613, "grad_norm": 4.011145114898682, "learning_rate": 7.204225801333266e-06, "loss": 0.06527201, "memory(GiB)": 13.7, "step": 41270, "train_speed(iter/s)": 1.527829 }, { "acc": 0.98122025, "epoch": 19.346144832434966, "grad_norm": 5.0175700187683105, "learning_rate": 7.203530026611585e-06, "loss": 0.09821337, "memory(GiB)": 13.7, "step": 41275, "train_speed(iter/s)": 1.52784 }, { "acc": 0.9859375, "epoch": 19.3484883993438, "grad_norm": 7.498138427734375, "learning_rate": 7.2028341989363e-06, "loss": 0.08719592, "memory(GiB)": 13.7, "step": 41280, "train_speed(iter/s)": 1.527856 }, { "acc": 0.97787914, "epoch": 19.35083196625264, "grad_norm": 4.883760452270508, "learning_rate": 7.202138318324135e-06, "loss": 0.08596867, "memory(GiB)": 13.7, "step": 41285, "train_speed(iter/s)": 1.527853 }, { "acc": 0.99050598, "epoch": 19.353175533161473, "grad_norm": 1.6099361181259155, "learning_rate": 7.201442384791819e-06, "loss": 0.04567759, "memory(GiB)": 13.7, "step": 41290, "train_speed(iter/s)": 1.527859 }, { "acc": 0.98091345, "epoch": 19.355519100070307, "grad_norm": 3.6439130306243896, "learning_rate": 7.200746398356078e-06, "loss": 0.05401094, "memory(GiB)": 13.7, "step": 41295, "train_speed(iter/s)": 1.527868 }, { "acc": 0.98187504, "epoch": 19.35786266697914, "grad_norm": 2.8744122982025146, "learning_rate": 7.200050359033642e-06, "loss": 0.07436286, "memory(GiB)": 13.7, "step": 41300, "train_speed(iter/s)": 1.527866 }, { "acc": 0.97956505, "epoch": 19.36020623388798, "grad_norm": 5.687592029571533, "learning_rate": 7.199354266841241e-06, "loss": 0.11292306, "memory(GiB)": 13.7, "step": 41305, "train_speed(iter/s)": 1.527867 }, { "acc": 0.99250002, "epoch": 19.362549800796813, "grad_norm": 0.4244420826435089, "learning_rate": 7.198658121795607e-06, "loss": 0.0539296, "memory(GiB)": 13.7, "step": 41310, "train_speed(iter/s)": 1.527881 }, { "acc": 0.97109852, "epoch": 19.364893367705648, "grad_norm": 2.6388115882873535, "learning_rate": 7.197961923913475e-06, "loss": 0.10593801, "memory(GiB)": 13.7, "step": 41315, "train_speed(iter/s)": 1.52789 }, { "acc": 0.97299681, "epoch": 19.367236934614482, "grad_norm": 5.705484867095947, "learning_rate": 7.197265673211575e-06, "loss": 0.12256958, "memory(GiB)": 13.7, "step": 41320, "train_speed(iter/s)": 1.527885 }, { "acc": 0.9885416, "epoch": 19.36958050152332, "grad_norm": 3.399444103240967, "learning_rate": 7.196569369706647e-06, "loss": 0.02974916, "memory(GiB)": 13.7, "step": 41325, "train_speed(iter/s)": 1.527889 }, { "acc": 0.97408552, "epoch": 19.371924068432154, "grad_norm": 3.5779929161071777, "learning_rate": 7.195873013415425e-06, "loss": 0.10521228, "memory(GiB)": 13.7, "step": 41330, "train_speed(iter/s)": 1.527895 }, { "acc": 0.98669271, "epoch": 19.37426763534099, "grad_norm": 2.1367745399475098, "learning_rate": 7.195176604354649e-06, "loss": 0.04219425, "memory(GiB)": 13.7, "step": 41335, "train_speed(iter/s)": 1.527903 }, { "acc": 0.99229164, "epoch": 19.376611202249823, "grad_norm": 4.230038642883301, "learning_rate": 7.194480142541058e-06, "loss": 0.03255102, "memory(GiB)": 13.7, "step": 41340, "train_speed(iter/s)": 1.527909 }, { "acc": 0.98247023, "epoch": 19.37895476915866, "grad_norm": 1.7487252950668335, "learning_rate": 7.193783627991391e-06, "loss": 0.10671384, "memory(GiB)": 13.7, "step": 41345, "train_speed(iter/s)": 1.527903 }, { "acc": 0.98599205, "epoch": 19.381298336067495, "grad_norm": 4.435184478759766, "learning_rate": 7.19308706072239e-06, "loss": 0.07040792, "memory(GiB)": 13.7, "step": 41350, "train_speed(iter/s)": 1.527898 }, { "acc": 0.9729166, "epoch": 19.38364190297633, "grad_norm": 4.449707984924316, "learning_rate": 7.192390440750802e-06, "loss": 0.05843896, "memory(GiB)": 13.7, "step": 41355, "train_speed(iter/s)": 1.527899 }, { "acc": 0.98705359, "epoch": 19.385985469885163, "grad_norm": 0.12426198273897171, "learning_rate": 7.191693768093369e-06, "loss": 0.06008681, "memory(GiB)": 13.7, "step": 41360, "train_speed(iter/s)": 1.527913 }, { "acc": 0.9802083, "epoch": 19.388329036794, "grad_norm": 5.174968242645264, "learning_rate": 7.190997042766834e-06, "loss": 0.05961273, "memory(GiB)": 13.7, "step": 41365, "train_speed(iter/s)": 1.52793 }, { "acc": 0.975, "epoch": 19.390672603702836, "grad_norm": 6.776872634887695, "learning_rate": 7.190300264787947e-06, "loss": 0.0978574, "memory(GiB)": 13.7, "step": 41370, "train_speed(iter/s)": 1.527927 }, { "acc": 0.99375, "epoch": 19.39301617061167, "grad_norm": 0.04870672523975372, "learning_rate": 7.189603434173456e-06, "loss": 0.02176291, "memory(GiB)": 13.7, "step": 41375, "train_speed(iter/s)": 1.527937 }, { "acc": 0.98663692, "epoch": 19.395359737520508, "grad_norm": 0.6034789085388184, "learning_rate": 7.18890655094011e-06, "loss": 0.06320948, "memory(GiB)": 13.7, "step": 41380, "train_speed(iter/s)": 1.527953 }, { "acc": 0.9838542, "epoch": 19.397703304429342, "grad_norm": 8.68364143371582, "learning_rate": 7.18820961510466e-06, "loss": 0.05270877, "memory(GiB)": 13.7, "step": 41385, "train_speed(iter/s)": 1.527956 }, { "acc": 0.98567905, "epoch": 19.400046871338176, "grad_norm": 1.0845305919647217, "learning_rate": 7.187512626683858e-06, "loss": 0.03296479, "memory(GiB)": 13.7, "step": 41390, "train_speed(iter/s)": 1.527954 }, { "acc": 0.97468748, "epoch": 19.40239043824701, "grad_norm": 5.655324459075928, "learning_rate": 7.186815585694457e-06, "loss": 0.10881863, "memory(GiB)": 13.7, "step": 41395, "train_speed(iter/s)": 1.52796 }, { "acc": 0.97461309, "epoch": 19.40473400515585, "grad_norm": 4.666836738586426, "learning_rate": 7.186118492153213e-06, "loss": 0.16660566, "memory(GiB)": 13.7, "step": 41400, "train_speed(iter/s)": 1.527957 }, { "acc": 0.98133926, "epoch": 19.407077572064683, "grad_norm": 3.137788772583008, "learning_rate": 7.185421346076881e-06, "loss": 0.10300089, "memory(GiB)": 13.7, "step": 41405, "train_speed(iter/s)": 1.527966 }, { "acc": 0.97895832, "epoch": 19.409421138973517, "grad_norm": 5.154242992401123, "learning_rate": 7.184724147482217e-06, "loss": 0.1273126, "memory(GiB)": 13.7, "step": 41410, "train_speed(iter/s)": 1.527975 }, { "acc": 0.96443405, "epoch": 19.41176470588235, "grad_norm": 6.2967681884765625, "learning_rate": 7.1840268963859805e-06, "loss": 0.11649522, "memory(GiB)": 13.7, "step": 41415, "train_speed(iter/s)": 1.527978 }, { "acc": 0.97800598, "epoch": 19.41410827279119, "grad_norm": 5.532027721405029, "learning_rate": 7.1833295928049304e-06, "loss": 0.08851292, "memory(GiB)": 13.7, "step": 41420, "train_speed(iter/s)": 1.52798 }, { "acc": 0.98714018, "epoch": 19.416451839700024, "grad_norm": 4.787674427032471, "learning_rate": 7.182632236755829e-06, "loss": 0.04626784, "memory(GiB)": 13.7, "step": 41425, "train_speed(iter/s)": 1.527982 }, { "acc": 0.98058491, "epoch": 19.418795406608858, "grad_norm": 1.4608272314071655, "learning_rate": 7.181934828255438e-06, "loss": 0.10743241, "memory(GiB)": 13.7, "step": 41430, "train_speed(iter/s)": 1.52798 }, { "acc": 0.98061008, "epoch": 19.421138973517692, "grad_norm": 0.7149006724357605, "learning_rate": 7.181237367320516e-06, "loss": 0.1277873, "memory(GiB)": 13.7, "step": 41435, "train_speed(iter/s)": 1.527991 }, { "acc": 0.99407196, "epoch": 19.42348254042653, "grad_norm": 3.1602892875671387, "learning_rate": 7.180539853967837e-06, "loss": 0.04218167, "memory(GiB)": 13.7, "step": 41440, "train_speed(iter/s)": 1.528009 }, { "acc": 0.98081303, "epoch": 19.425826107335364, "grad_norm": 4.44686222076416, "learning_rate": 7.179842288214159e-06, "loss": 0.06253569, "memory(GiB)": 13.7, "step": 41445, "train_speed(iter/s)": 1.528007 }, { "acc": 0.9776042, "epoch": 19.4281696742442, "grad_norm": 3.44752836227417, "learning_rate": 7.179144670076253e-06, "loss": 0.18670614, "memory(GiB)": 13.7, "step": 41450, "train_speed(iter/s)": 1.528016 }, { "acc": 0.98440475, "epoch": 19.430513241153037, "grad_norm": 7.277156829833984, "learning_rate": 7.178446999570887e-06, "loss": 0.09964452, "memory(GiB)": 13.7, "step": 41455, "train_speed(iter/s)": 1.528023 }, { "acc": 0.98019352, "epoch": 19.43285680806187, "grad_norm": 1.0136185884475708, "learning_rate": 7.177749276714832e-06, "loss": 0.08098761, "memory(GiB)": 13.7, "step": 41460, "train_speed(iter/s)": 1.528029 }, { "acc": 0.97663689, "epoch": 19.435200374970705, "grad_norm": 3.344611406326294, "learning_rate": 7.177051501524853e-06, "loss": 0.06761872, "memory(GiB)": 13.7, "step": 41465, "train_speed(iter/s)": 1.528036 }, { "acc": 0.9791667, "epoch": 19.43754394187954, "grad_norm": 2.2679338455200195, "learning_rate": 7.176353674017728e-06, "loss": 0.06181749, "memory(GiB)": 13.7, "step": 41470, "train_speed(iter/s)": 1.528047 }, { "acc": 0.98812504, "epoch": 19.439887508788377, "grad_norm": 2.8010857105255127, "learning_rate": 7.17565579421023e-06, "loss": 0.02266548, "memory(GiB)": 13.7, "step": 41475, "train_speed(iter/s)": 1.528055 }, { "acc": 0.98023806, "epoch": 19.44223107569721, "grad_norm": 4.831514358520508, "learning_rate": 7.174957862119134e-06, "loss": 0.06389599, "memory(GiB)": 13.7, "step": 41480, "train_speed(iter/s)": 1.52806 }, { "acc": 0.97842264, "epoch": 19.444574642606046, "grad_norm": 2.8824405670166016, "learning_rate": 7.174259877761212e-06, "loss": 0.05747274, "memory(GiB)": 13.7, "step": 41485, "train_speed(iter/s)": 1.528058 }, { "acc": 0.98979168, "epoch": 19.44691820951488, "grad_norm": 3.233600378036499, "learning_rate": 7.173561841153244e-06, "loss": 0.05587375, "memory(GiB)": 13.7, "step": 41490, "train_speed(iter/s)": 1.528065 }, { "acc": 0.9921875, "epoch": 19.449261776423718, "grad_norm": 0.08875705301761627, "learning_rate": 7.172863752312008e-06, "loss": 0.03374099, "memory(GiB)": 13.7, "step": 41495, "train_speed(iter/s)": 1.52807 }, { "acc": 0.99175596, "epoch": 19.451605343332552, "grad_norm": 2.7363851070404053, "learning_rate": 7.172165611254287e-06, "loss": 0.02172886, "memory(GiB)": 13.7, "step": 41500, "train_speed(iter/s)": 1.528076 }, { "acc": 0.97398815, "epoch": 19.453948910241387, "grad_norm": 0.08593534678220749, "learning_rate": 7.171467417996857e-06, "loss": 0.09732594, "memory(GiB)": 13.7, "step": 41505, "train_speed(iter/s)": 1.528073 }, { "acc": 0.98125, "epoch": 19.45629247715022, "grad_norm": 6.042011260986328, "learning_rate": 7.170769172556502e-06, "loss": 0.05636883, "memory(GiB)": 13.7, "step": 41510, "train_speed(iter/s)": 1.528072 }, { "acc": 0.97840776, "epoch": 19.45863604405906, "grad_norm": 3.808448553085327, "learning_rate": 7.170070874950007e-06, "loss": 0.13533654, "memory(GiB)": 13.7, "step": 41515, "train_speed(iter/s)": 1.528064 }, { "acc": 0.97528849, "epoch": 19.460979610967893, "grad_norm": 4.106012344360352, "learning_rate": 7.169372525194156e-06, "loss": 0.09515568, "memory(GiB)": 13.7, "step": 41520, "train_speed(iter/s)": 1.528064 }, { "acc": 0.98550596, "epoch": 19.463323177876727, "grad_norm": 4.545156478881836, "learning_rate": 7.168674123305735e-06, "loss": 0.05761465, "memory(GiB)": 13.7, "step": 41525, "train_speed(iter/s)": 1.528059 }, { "acc": 0.990625, "epoch": 19.465666744785565, "grad_norm": 3.0080442428588867, "learning_rate": 7.167975669301529e-06, "loss": 0.02774061, "memory(GiB)": 13.7, "step": 41530, "train_speed(iter/s)": 1.528066 }, { "acc": 0.98800058, "epoch": 19.4680103116944, "grad_norm": 2.527559757232666, "learning_rate": 7.16727716319833e-06, "loss": 0.05522684, "memory(GiB)": 13.7, "step": 41535, "train_speed(iter/s)": 1.528066 }, { "acc": 0.98633928, "epoch": 19.470353878603234, "grad_norm": 3.3166558742523193, "learning_rate": 7.166578605012926e-06, "loss": 0.10690591, "memory(GiB)": 13.7, "step": 41540, "train_speed(iter/s)": 1.528071 }, { "acc": 0.99375, "epoch": 19.47269744551207, "grad_norm": 3.914269208908081, "learning_rate": 7.165879994762111e-06, "loss": 0.06568027, "memory(GiB)": 13.7, "step": 41545, "train_speed(iter/s)": 1.528077 }, { "acc": 0.97921133, "epoch": 19.475041012420906, "grad_norm": 3.3471999168395996, "learning_rate": 7.1651813324626715e-06, "loss": 0.11311328, "memory(GiB)": 13.7, "step": 41550, "train_speed(iter/s)": 1.528082 }, { "acc": 0.97890787, "epoch": 19.47738457932974, "grad_norm": 5.115413665771484, "learning_rate": 7.164482618131405e-06, "loss": 0.10423553, "memory(GiB)": 13.7, "step": 41555, "train_speed(iter/s)": 1.528075 }, { "acc": 0.98114586, "epoch": 19.479728146238575, "grad_norm": 5.178525447845459, "learning_rate": 7.163783851785106e-06, "loss": 0.11179793, "memory(GiB)": 13.7, "step": 41560, "train_speed(iter/s)": 1.52808 }, { "acc": 0.98865929, "epoch": 19.48207171314741, "grad_norm": 0.38858187198638916, "learning_rate": 7.16308503344057e-06, "loss": 0.03313867, "memory(GiB)": 13.7, "step": 41565, "train_speed(iter/s)": 1.52809 }, { "acc": 0.99395828, "epoch": 19.484415280056247, "grad_norm": 0.012332674115896225, "learning_rate": 7.162386163114596e-06, "loss": 0.05278343, "memory(GiB)": 13.7, "step": 41570, "train_speed(iter/s)": 1.528096 }, { "acc": 0.99125004, "epoch": 19.48675884696508, "grad_norm": 4.947901725769043, "learning_rate": 7.161687240823978e-06, "loss": 0.04376207, "memory(GiB)": 13.7, "step": 41575, "train_speed(iter/s)": 1.528104 }, { "acc": 0.96479168, "epoch": 19.489102413873916, "grad_norm": 0.04196638986468315, "learning_rate": 7.160988266585521e-06, "loss": 0.09187272, "memory(GiB)": 13.7, "step": 41580, "train_speed(iter/s)": 1.528112 }, { "acc": 0.98270836, "epoch": 19.49144598078275, "grad_norm": 3.1644415855407715, "learning_rate": 7.160289240416024e-06, "loss": 0.09626927, "memory(GiB)": 13.7, "step": 41585, "train_speed(iter/s)": 1.528118 }, { "acc": 0.9885417, "epoch": 19.493789547691588, "grad_norm": 1.6149663925170898, "learning_rate": 7.159590162332288e-06, "loss": 0.02745764, "memory(GiB)": 13.7, "step": 41590, "train_speed(iter/s)": 1.528118 }, { "acc": 0.98090277, "epoch": 19.496133114600422, "grad_norm": 7.974827766418457, "learning_rate": 7.158891032351116e-06, "loss": 0.12812989, "memory(GiB)": 13.7, "step": 41595, "train_speed(iter/s)": 1.528119 }, { "acc": 0.97118053, "epoch": 19.498476681509256, "grad_norm": 2.447197675704956, "learning_rate": 7.158191850489318e-06, "loss": 0.13052793, "memory(GiB)": 13.7, "step": 41600, "train_speed(iter/s)": 1.528133 }, { "acc": 0.97800598, "epoch": 19.500820248418094, "grad_norm": 4.66812801361084, "learning_rate": 7.157492616763695e-06, "loss": 0.09397399, "memory(GiB)": 13.7, "step": 41605, "train_speed(iter/s)": 1.528133 }, { "acc": 0.98500004, "epoch": 19.50316381532693, "grad_norm": 5.990415096282959, "learning_rate": 7.156793331191056e-06, "loss": 0.04109554, "memory(GiB)": 13.7, "step": 41610, "train_speed(iter/s)": 1.528134 }, { "acc": 0.98633928, "epoch": 19.505507382235763, "grad_norm": 3.4626691341400146, "learning_rate": 7.156093993788209e-06, "loss": 0.05231309, "memory(GiB)": 13.7, "step": 41615, "train_speed(iter/s)": 1.528138 }, { "acc": 0.97390442, "epoch": 19.507850949144597, "grad_norm": 5.646799087524414, "learning_rate": 7.155394604571963e-06, "loss": 0.07741361, "memory(GiB)": 13.7, "step": 41620, "train_speed(iter/s)": 1.528141 }, { "acc": 0.9901042, "epoch": 19.510194516053435, "grad_norm": 1.1071341037750244, "learning_rate": 7.154695163559133e-06, "loss": 0.03566888, "memory(GiB)": 13.7, "step": 41625, "train_speed(iter/s)": 1.528144 }, { "acc": 0.96365528, "epoch": 19.51253808296227, "grad_norm": 2.108999013900757, "learning_rate": 7.153995670766525e-06, "loss": 0.10832965, "memory(GiB)": 13.7, "step": 41630, "train_speed(iter/s)": 1.528139 }, { "acc": 0.98439846, "epoch": 19.514881649871104, "grad_norm": 5.95400333404541, "learning_rate": 7.153296126210958e-06, "loss": 0.07101707, "memory(GiB)": 13.7, "step": 41635, "train_speed(iter/s)": 1.528144 }, { "acc": 0.97356777, "epoch": 19.517225216779938, "grad_norm": 2.4669442176818848, "learning_rate": 7.152596529909243e-06, "loss": 0.07399138, "memory(GiB)": 13.7, "step": 41640, "train_speed(iter/s)": 1.528148 }, { "acc": 0.98249998, "epoch": 19.519568783688776, "grad_norm": 0.09040409326553345, "learning_rate": 7.1518968818782e-06, "loss": 0.03493479, "memory(GiB)": 13.7, "step": 41645, "train_speed(iter/s)": 1.528149 }, { "acc": 0.99425602, "epoch": 19.52191235059761, "grad_norm": 0.9975153803825378, "learning_rate": 7.151197182134641e-06, "loss": 0.03805352, "memory(GiB)": 13.7, "step": 41650, "train_speed(iter/s)": 1.528156 }, { "acc": 0.98937502, "epoch": 19.524255917506444, "grad_norm": 2.163104295730591, "learning_rate": 7.150497430695389e-06, "loss": 0.07392038, "memory(GiB)": 13.7, "step": 41655, "train_speed(iter/s)": 1.528158 }, { "acc": 0.9885417, "epoch": 19.52659948441528, "grad_norm": 3.5896921157836914, "learning_rate": 7.149797627577263e-06, "loss": 0.05813544, "memory(GiB)": 13.7, "step": 41660, "train_speed(iter/s)": 1.528166 }, { "acc": 0.99375, "epoch": 19.528943051324116, "grad_norm": 3.789773941040039, "learning_rate": 7.1490977727970816e-06, "loss": 0.06163481, "memory(GiB)": 13.7, "step": 41665, "train_speed(iter/s)": 1.528172 }, { "acc": 0.98458338, "epoch": 19.53128661823295, "grad_norm": 0.8769526481628418, "learning_rate": 7.148397866371668e-06, "loss": 0.06743129, "memory(GiB)": 13.7, "step": 41670, "train_speed(iter/s)": 1.528182 }, { "acc": 0.97666664, "epoch": 19.533630185141785, "grad_norm": 3.653240919113159, "learning_rate": 7.147697908317848e-06, "loss": 0.09185694, "memory(GiB)": 13.7, "step": 41675, "train_speed(iter/s)": 1.528188 }, { "acc": 0.97976189, "epoch": 19.53597375205062, "grad_norm": 2.8046987056732178, "learning_rate": 7.1469978986524425e-06, "loss": 0.12136555, "memory(GiB)": 13.7, "step": 41680, "train_speed(iter/s)": 1.528198 }, { "acc": 0.97664261, "epoch": 19.538317318959457, "grad_norm": 1.1102288961410522, "learning_rate": 7.146297837392279e-06, "loss": 0.12853093, "memory(GiB)": 13.7, "step": 41685, "train_speed(iter/s)": 1.5282 }, { "acc": 0.97629623, "epoch": 19.54066088586829, "grad_norm": 4.859766483306885, "learning_rate": 7.145597724554187e-06, "loss": 0.12619495, "memory(GiB)": 13.7, "step": 41690, "train_speed(iter/s)": 1.528198 }, { "acc": 0.9895834, "epoch": 19.543004452777126, "grad_norm": 2.815932035446167, "learning_rate": 7.1448975601549905e-06, "loss": 0.06092559, "memory(GiB)": 13.7, "step": 41695, "train_speed(iter/s)": 1.528194 }, { "acc": 0.97810097, "epoch": 19.545348019685964, "grad_norm": 6.129473686218262, "learning_rate": 7.1441973442115234e-06, "loss": 0.06258665, "memory(GiB)": 13.7, "step": 41700, "train_speed(iter/s)": 1.528197 }, { "acc": 0.98491669, "epoch": 19.547691586594798, "grad_norm": 4.9808878898620605, "learning_rate": 7.143497076740613e-06, "loss": 0.05216493, "memory(GiB)": 13.7, "step": 41705, "train_speed(iter/s)": 1.528208 }, { "acc": 0.996875, "epoch": 19.550035153503632, "grad_norm": 4.235095977783203, "learning_rate": 7.142796757759095e-06, "loss": 0.03100026, "memory(GiB)": 13.7, "step": 41710, "train_speed(iter/s)": 1.528214 }, { "acc": 0.97886906, "epoch": 19.552378720412467, "grad_norm": 1.0857505798339844, "learning_rate": 7.1420963872838e-06, "loss": 0.09995066, "memory(GiB)": 13.7, "step": 41715, "train_speed(iter/s)": 1.528218 }, { "acc": 0.98405933, "epoch": 19.554722287321304, "grad_norm": 11.926003456115723, "learning_rate": 7.141395965331565e-06, "loss": 0.10311475, "memory(GiB)": 13.7, "step": 41720, "train_speed(iter/s)": 1.528224 }, { "acc": 0.98458672, "epoch": 19.55706585423014, "grad_norm": 2.98464298248291, "learning_rate": 7.140695491919223e-06, "loss": 0.03488355, "memory(GiB)": 13.7, "step": 41725, "train_speed(iter/s)": 1.528222 }, { "acc": 0.96365738, "epoch": 19.559409421138973, "grad_norm": 8.58263874053955, "learning_rate": 7.1399949670636125e-06, "loss": 0.11791282, "memory(GiB)": 13.7, "step": 41730, "train_speed(iter/s)": 1.528224 }, { "acc": 0.98656254, "epoch": 19.561752988047807, "grad_norm": 5.781342506408691, "learning_rate": 7.139294390781571e-06, "loss": 0.06697816, "memory(GiB)": 13.7, "step": 41735, "train_speed(iter/s)": 1.528228 }, { "acc": 0.99072914, "epoch": 19.564096554956645, "grad_norm": 0.1728532612323761, "learning_rate": 7.1385937630899406e-06, "loss": 0.05198114, "memory(GiB)": 13.7, "step": 41740, "train_speed(iter/s)": 1.528232 }, { "acc": 0.98090286, "epoch": 19.56644012186548, "grad_norm": 28.58781623840332, "learning_rate": 7.13789308400556e-06, "loss": 0.07744003, "memory(GiB)": 13.7, "step": 41745, "train_speed(iter/s)": 1.528237 }, { "acc": 0.97896824, "epoch": 19.568783688774314, "grad_norm": 6.621273040771484, "learning_rate": 7.13719235354527e-06, "loss": 0.10529463, "memory(GiB)": 13.7, "step": 41750, "train_speed(iter/s)": 1.528241 }, { "acc": 0.97821426, "epoch": 19.571127255683148, "grad_norm": 3.896089792251587, "learning_rate": 7.136491571725917e-06, "loss": 0.07286876, "memory(GiB)": 13.7, "step": 41755, "train_speed(iter/s)": 1.52825 }, { "acc": 0.98791676, "epoch": 19.573470822591986, "grad_norm": 5.689701557159424, "learning_rate": 7.135790738564343e-06, "loss": 0.05127001, "memory(GiB)": 13.7, "step": 41760, "train_speed(iter/s)": 1.528244 }, { "acc": 0.99281254, "epoch": 19.57581438950082, "grad_norm": 0.38574448227882385, "learning_rate": 7.135089854077395e-06, "loss": 0.03728147, "memory(GiB)": 13.7, "step": 41765, "train_speed(iter/s)": 1.528243 }, { "acc": 0.97728624, "epoch": 19.578157956409655, "grad_norm": 0.0019606652203947306, "learning_rate": 7.13438891828192e-06, "loss": 0.04876248, "memory(GiB)": 13.7, "step": 41770, "train_speed(iter/s)": 1.528253 }, { "acc": 0.99750004, "epoch": 19.58050152331849, "grad_norm": 3.2091736793518066, "learning_rate": 7.1336879311947646e-06, "loss": 0.01837756, "memory(GiB)": 13.7, "step": 41775, "train_speed(iter/s)": 1.528246 }, { "acc": 0.9875, "epoch": 19.582845090227327, "grad_norm": 3.354506015777588, "learning_rate": 7.1329868928327795e-06, "loss": 0.06854327, "memory(GiB)": 13.7, "step": 41780, "train_speed(iter/s)": 1.528248 }, { "acc": 0.98183041, "epoch": 19.58518865713616, "grad_norm": 8.95345401763916, "learning_rate": 7.132285803212818e-06, "loss": 0.10010384, "memory(GiB)": 13.7, "step": 41785, "train_speed(iter/s)": 1.52825 }, { "acc": 0.97770834, "epoch": 19.587532224044995, "grad_norm": 51.25531005859375, "learning_rate": 7.131584662351726e-06, "loss": 0.06167374, "memory(GiB)": 13.7, "step": 41790, "train_speed(iter/s)": 1.528253 }, { "acc": 0.97057886, "epoch": 19.589875790953833, "grad_norm": 7.381983757019043, "learning_rate": 7.130883470266361e-06, "loss": 0.1318316, "memory(GiB)": 13.7, "step": 41795, "train_speed(iter/s)": 1.528266 }, { "acc": 0.97372017, "epoch": 19.592219357862668, "grad_norm": 2.6838178634643555, "learning_rate": 7.130182226973577e-06, "loss": 0.06219192, "memory(GiB)": 13.7, "step": 41800, "train_speed(iter/s)": 1.528272 }, { "acc": 0.98125, "epoch": 19.594562924771502, "grad_norm": 5.476414680480957, "learning_rate": 7.129480932490226e-06, "loss": 0.07529919, "memory(GiB)": 13.7, "step": 41805, "train_speed(iter/s)": 1.528277 }, { "acc": 0.98803024, "epoch": 19.596906491680336, "grad_norm": 3.887472152709961, "learning_rate": 7.12877958683317e-06, "loss": 0.04421169, "memory(GiB)": 13.7, "step": 41810, "train_speed(iter/s)": 1.528285 }, { "acc": 0.99196434, "epoch": 19.599250058589174, "grad_norm": 2.944131374359131, "learning_rate": 7.128078190019264e-06, "loss": 0.03804197, "memory(GiB)": 13.7, "step": 41815, "train_speed(iter/s)": 1.528287 }, { "acc": 0.97809219, "epoch": 19.60159362549801, "grad_norm": 4.020191192626953, "learning_rate": 7.127376742065369e-06, "loss": 0.08142618, "memory(GiB)": 13.7, "step": 41820, "train_speed(iter/s)": 1.528302 }, { "acc": 0.97904758, "epoch": 19.603937192406843, "grad_norm": 2.426478385925293, "learning_rate": 7.126675242988343e-06, "loss": 0.07971939, "memory(GiB)": 13.7, "step": 41825, "train_speed(iter/s)": 1.528312 }, { "acc": 0.98515873, "epoch": 19.606280759315677, "grad_norm": 2.490891456604004, "learning_rate": 7.1259736928050505e-06, "loss": 0.04745696, "memory(GiB)": 13.7, "step": 41830, "train_speed(iter/s)": 1.528311 }, { "acc": 0.98277779, "epoch": 19.608624326224515, "grad_norm": 1.9254536628723145, "learning_rate": 7.1252720915323525e-06, "loss": 0.03391439, "memory(GiB)": 13.7, "step": 41835, "train_speed(iter/s)": 1.52831 }, { "acc": 0.98031254, "epoch": 19.61096789313335, "grad_norm": 6.0926971435546875, "learning_rate": 7.124570439187114e-06, "loss": 0.10161215, "memory(GiB)": 13.7, "step": 41840, "train_speed(iter/s)": 1.528315 }, { "acc": 0.96175594, "epoch": 19.613311460042183, "grad_norm": 4.81634521484375, "learning_rate": 7.123868735786199e-06, "loss": 0.11901674, "memory(GiB)": 13.7, "step": 41845, "train_speed(iter/s)": 1.528319 }, { "acc": 0.97476196, "epoch": 19.615655026951018, "grad_norm": 2.001838207244873, "learning_rate": 7.123166981346477e-06, "loss": 0.12101245, "memory(GiB)": 13.7, "step": 41850, "train_speed(iter/s)": 1.528317 }, { "acc": 0.97991476, "epoch": 19.617998593859856, "grad_norm": 0.8213204741477966, "learning_rate": 7.122465175884814e-06, "loss": 0.07886609, "memory(GiB)": 13.7, "step": 41855, "train_speed(iter/s)": 1.528328 }, { "acc": 0.98654766, "epoch": 19.62034216076869, "grad_norm": 4.505244731903076, "learning_rate": 7.121763319418079e-06, "loss": 0.07896245, "memory(GiB)": 13.7, "step": 41860, "train_speed(iter/s)": 1.52834 }, { "acc": 0.96682692, "epoch": 19.622685727677524, "grad_norm": 5.230075359344482, "learning_rate": 7.1210614119631436e-06, "loss": 0.29585462, "memory(GiB)": 13.7, "step": 41865, "train_speed(iter/s)": 1.528334 }, { "acc": 0.98732224, "epoch": 19.625029294586362, "grad_norm": 2.1724183559417725, "learning_rate": 7.120359453536877e-06, "loss": 0.05419577, "memory(GiB)": 13.7, "step": 41870, "train_speed(iter/s)": 1.528342 }, { "acc": 0.96887398, "epoch": 19.627372861495196, "grad_norm": 24.636837005615234, "learning_rate": 7.119657444156155e-06, "loss": 0.17790003, "memory(GiB)": 13.7, "step": 41875, "train_speed(iter/s)": 1.528349 }, { "acc": 0.9802084, "epoch": 19.62971642840403, "grad_norm": 4.106709003448486, "learning_rate": 7.118955383837849e-06, "loss": 0.12778901, "memory(GiB)": 13.7, "step": 41880, "train_speed(iter/s)": 1.528356 }, { "acc": 0.97757282, "epoch": 19.632059995312865, "grad_norm": 6.640895366668701, "learning_rate": 7.118253272598835e-06, "loss": 0.06620509, "memory(GiB)": 13.7, "step": 41885, "train_speed(iter/s)": 1.528365 }, { "acc": 0.98390875, "epoch": 19.634403562221703, "grad_norm": 0.7690528035163879, "learning_rate": 7.117551110455989e-06, "loss": 0.05320154, "memory(GiB)": 13.7, "step": 41890, "train_speed(iter/s)": 1.528369 }, { "acc": 0.98249998, "epoch": 19.636747129130537, "grad_norm": 4.020506858825684, "learning_rate": 7.116848897426191e-06, "loss": 0.06236457, "memory(GiB)": 13.7, "step": 41895, "train_speed(iter/s)": 1.528368 }, { "acc": 0.98181086, "epoch": 19.63909069603937, "grad_norm": 8.749059677124023, "learning_rate": 7.116146633526319e-06, "loss": 0.03225541, "memory(GiB)": 13.7, "step": 41900, "train_speed(iter/s)": 1.528372 }, { "acc": 0.98369789, "epoch": 19.641434262948206, "grad_norm": 3.031830310821533, "learning_rate": 7.11544431877325e-06, "loss": 0.09015379, "memory(GiB)": 13.7, "step": 41905, "train_speed(iter/s)": 1.528377 }, { "acc": 0.97534723, "epoch": 19.643777829857044, "grad_norm": 6.852858066558838, "learning_rate": 7.114741953183869e-06, "loss": 0.09203931, "memory(GiB)": 13.7, "step": 41910, "train_speed(iter/s)": 1.528394 }, { "acc": 0.98832111, "epoch": 19.646121396765878, "grad_norm": 3.408423662185669, "learning_rate": 7.114039536775057e-06, "loss": 0.05702295, "memory(GiB)": 13.7, "step": 41915, "train_speed(iter/s)": 1.528402 }, { "acc": 0.9854167, "epoch": 19.648464963674712, "grad_norm": 1.2271584272384644, "learning_rate": 7.1133370695636986e-06, "loss": 0.0309213, "memory(GiB)": 13.7, "step": 41920, "train_speed(iter/s)": 1.528399 }, { "acc": 0.98002701, "epoch": 19.650808530583546, "grad_norm": 4.3392229080200195, "learning_rate": 7.112634551566677e-06, "loss": 0.07858098, "memory(GiB)": 13.7, "step": 41925, "train_speed(iter/s)": 1.528398 }, { "acc": 0.99073868, "epoch": 19.653152097492384, "grad_norm": 0.023333482444286346, "learning_rate": 7.1119319828008805e-06, "loss": 0.06450515, "memory(GiB)": 13.7, "step": 41930, "train_speed(iter/s)": 1.528398 }, { "acc": 0.99613972, "epoch": 19.65549566440122, "grad_norm": 0.888068675994873, "learning_rate": 7.111229363283194e-06, "loss": 0.01323753, "memory(GiB)": 13.7, "step": 41935, "train_speed(iter/s)": 1.528407 }, { "acc": 0.97726641, "epoch": 19.657839231310053, "grad_norm": 3.5213136672973633, "learning_rate": 7.110526693030512e-06, "loss": 0.12419077, "memory(GiB)": 13.7, "step": 41940, "train_speed(iter/s)": 1.528422 }, { "acc": 0.98383923, "epoch": 19.66018279821889, "grad_norm": 4.535960674285889, "learning_rate": 7.109823972059717e-06, "loss": 0.05375223, "memory(GiB)": 13.7, "step": 41945, "train_speed(iter/s)": 1.528423 }, { "acc": 0.9916666, "epoch": 19.662526365127725, "grad_norm": 11.6879243850708, "learning_rate": 7.109121200387705e-06, "loss": 0.04106311, "memory(GiB)": 13.7, "step": 41950, "train_speed(iter/s)": 1.528441 }, { "acc": 0.98361111, "epoch": 19.66486993203656, "grad_norm": 5.152286529541016, "learning_rate": 7.108418378031364e-06, "loss": 0.08615103, "memory(GiB)": 13.7, "step": 41955, "train_speed(iter/s)": 1.528453 }, { "acc": 0.96956577, "epoch": 19.667213498945394, "grad_norm": 4.7424445152282715, "learning_rate": 7.107715505007593e-06, "loss": 0.08436, "memory(GiB)": 13.7, "step": 41960, "train_speed(iter/s)": 1.528461 }, { "acc": 0.96821423, "epoch": 19.66955706585423, "grad_norm": 1.1423701047897339, "learning_rate": 7.1070125813332834e-06, "loss": 0.12368798, "memory(GiB)": 13.7, "step": 41965, "train_speed(iter/s)": 1.528466 }, { "acc": 0.98874998, "epoch": 19.671900632763066, "grad_norm": 3.392834424972534, "learning_rate": 7.106309607025331e-06, "loss": 0.03478802, "memory(GiB)": 13.7, "step": 41970, "train_speed(iter/s)": 1.528465 }, { "acc": 0.96816463, "epoch": 19.6742441996719, "grad_norm": 3.5203864574432373, "learning_rate": 7.105606582100635e-06, "loss": 0.08433387, "memory(GiB)": 13.7, "step": 41975, "train_speed(iter/s)": 1.52847 }, { "acc": 0.97833328, "epoch": 19.676587766580734, "grad_norm": 0.9210085272789001, "learning_rate": 7.104903506576092e-06, "loss": 0.10135169, "memory(GiB)": 13.7, "step": 41980, "train_speed(iter/s)": 1.528476 }, { "acc": 0.98561964, "epoch": 19.678931333489572, "grad_norm": 1.6573768854141235, "learning_rate": 7.104200380468601e-06, "loss": 0.05763674, "memory(GiB)": 13.7, "step": 41985, "train_speed(iter/s)": 1.528489 }, { "acc": 0.98690481, "epoch": 19.681274900398407, "grad_norm": 0.7259103655815125, "learning_rate": 7.103497203795066e-06, "loss": 0.06106865, "memory(GiB)": 13.7, "step": 41990, "train_speed(iter/s)": 1.528494 }, { "acc": 0.97651787, "epoch": 19.68361846730724, "grad_norm": 6.378525257110596, "learning_rate": 7.102793976572386e-06, "loss": 0.11212232, "memory(GiB)": 13.7, "step": 41995, "train_speed(iter/s)": 1.5285 }, { "acc": 0.98240471, "epoch": 19.685962034216075, "grad_norm": 0.6111050844192505, "learning_rate": 7.102090698817464e-06, "loss": 0.0606078, "memory(GiB)": 13.7, "step": 42000, "train_speed(iter/s)": 1.528501 }, { "acc": 0.98572922, "epoch": 19.688305601124913, "grad_norm": 2.2973570823669434, "learning_rate": 7.1013873705472096e-06, "loss": 0.08376812, "memory(GiB)": 13.7, "step": 42005, "train_speed(iter/s)": 1.528503 }, { "acc": 0.98763885, "epoch": 19.690649168033747, "grad_norm": 2.5532989501953125, "learning_rate": 7.100683991778522e-06, "loss": 0.06214578, "memory(GiB)": 13.7, "step": 42010, "train_speed(iter/s)": 1.528508 }, { "acc": 0.97539139, "epoch": 19.69299273494258, "grad_norm": 4.389504432678223, "learning_rate": 7.099980562528312e-06, "loss": 0.11711493, "memory(GiB)": 13.7, "step": 42015, "train_speed(iter/s)": 1.528519 }, { "acc": 0.96958332, "epoch": 19.69533630185142, "grad_norm": 11.166045188903809, "learning_rate": 7.099277082813484e-06, "loss": 0.07788811, "memory(GiB)": 13.7, "step": 42020, "train_speed(iter/s)": 1.52852 }, { "acc": 0.98071871, "epoch": 19.697679868760254, "grad_norm": 3.6636123657226562, "learning_rate": 7.098573552650954e-06, "loss": 0.11442118, "memory(GiB)": 13.7, "step": 42025, "train_speed(iter/s)": 1.528516 }, { "acc": 0.96921873, "epoch": 19.700023435669088, "grad_norm": 8.596360206604004, "learning_rate": 7.097869972057625e-06, "loss": 0.12888432, "memory(GiB)": 13.7, "step": 42030, "train_speed(iter/s)": 1.528514 }, { "acc": 0.98542738, "epoch": 19.702367002577923, "grad_norm": 3.245697021484375, "learning_rate": 7.097166341050415e-06, "loss": 0.07486069, "memory(GiB)": 13.7, "step": 42035, "train_speed(iter/s)": 1.528522 }, { "acc": 0.9864584, "epoch": 19.70471056948676, "grad_norm": 1.1871732473373413, "learning_rate": 7.096462659646233e-06, "loss": 0.02874867, "memory(GiB)": 13.7, "step": 42040, "train_speed(iter/s)": 1.528527 }, { "acc": 0.98383923, "epoch": 19.707054136395595, "grad_norm": 10.71901798248291, "learning_rate": 7.0957589278619954e-06, "loss": 0.05350701, "memory(GiB)": 13.7, "step": 42045, "train_speed(iter/s)": 1.528534 }, { "acc": 0.95665674, "epoch": 19.70939770330443, "grad_norm": 5.503179550170898, "learning_rate": 7.095055145714616e-06, "loss": 0.13053224, "memory(GiB)": 13.7, "step": 42050, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98279762, "epoch": 19.711741270213263, "grad_norm": 0.0019466179655864835, "learning_rate": 7.09435131322101e-06, "loss": 0.0321832, "memory(GiB)": 13.7, "step": 42055, "train_speed(iter/s)": 1.528548 }, { "acc": 0.98653851, "epoch": 19.7140848371221, "grad_norm": 3.1476216316223145, "learning_rate": 7.093647430398098e-06, "loss": 0.05582693, "memory(GiB)": 13.7, "step": 42060, "train_speed(iter/s)": 1.528547 }, { "acc": 0.96916666, "epoch": 19.716428404030935, "grad_norm": 0.8375788331031799, "learning_rate": 7.0929434972628e-06, "loss": 0.09877847, "memory(GiB)": 13.7, "step": 42065, "train_speed(iter/s)": 1.528556 }, { "acc": 0.97328377, "epoch": 19.71877197093977, "grad_norm": 1.9153376817703247, "learning_rate": 7.092239513832035e-06, "loss": 0.06941941, "memory(GiB)": 13.7, "step": 42070, "train_speed(iter/s)": 1.528558 }, { "acc": 0.98875008, "epoch": 19.721115537848604, "grad_norm": 3.1735544204711914, "learning_rate": 7.091535480122722e-06, "loss": 0.05318099, "memory(GiB)": 13.7, "step": 42075, "train_speed(iter/s)": 1.528574 }, { "acc": 0.9833334, "epoch": 19.723459104757442, "grad_norm": 4.837449073791504, "learning_rate": 7.090831396151786e-06, "loss": 0.05300851, "memory(GiB)": 13.7, "step": 42080, "train_speed(iter/s)": 1.528579 }, { "acc": 0.97755098, "epoch": 19.725802671666276, "grad_norm": 7.027990341186523, "learning_rate": 7.090127261936152e-06, "loss": 0.1119439, "memory(GiB)": 13.7, "step": 42085, "train_speed(iter/s)": 1.528577 }, { "acc": 0.98213177, "epoch": 19.72814623857511, "grad_norm": 6.357176303863525, "learning_rate": 7.089423077492742e-06, "loss": 0.07568784, "memory(GiB)": 13.7, "step": 42090, "train_speed(iter/s)": 1.528578 }, { "acc": 0.97821426, "epoch": 19.73048980548395, "grad_norm": 5.842936992645264, "learning_rate": 7.088718842838484e-06, "loss": 0.10545267, "memory(GiB)": 13.7, "step": 42095, "train_speed(iter/s)": 1.528581 }, { "acc": 0.97797079, "epoch": 19.732833372392783, "grad_norm": 4.80167818069458, "learning_rate": 7.088014557990306e-06, "loss": 0.09492382, "memory(GiB)": 13.7, "step": 42100, "train_speed(iter/s)": 1.528583 }, { "acc": 0.98104172, "epoch": 19.735176939301617, "grad_norm": 5.3802361488342285, "learning_rate": 7.0873102229651355e-06, "loss": 0.07083665, "memory(GiB)": 13.7, "step": 42105, "train_speed(iter/s)": 1.528587 }, { "acc": 0.98131275, "epoch": 19.73752050621045, "grad_norm": 7.121802806854248, "learning_rate": 7.086605837779904e-06, "loss": 0.08999439, "memory(GiB)": 13.7, "step": 42110, "train_speed(iter/s)": 1.528592 }, { "acc": 0.9822916, "epoch": 19.73986407311929, "grad_norm": 9.584193229675293, "learning_rate": 7.085901402451541e-06, "loss": 0.06192895, "memory(GiB)": 13.7, "step": 42115, "train_speed(iter/s)": 1.528601 }, { "acc": 0.99097223, "epoch": 19.742207640028123, "grad_norm": 0.9869366884231567, "learning_rate": 7.085196916996978e-06, "loss": 0.04013818, "memory(GiB)": 13.7, "step": 42120, "train_speed(iter/s)": 1.52861 }, { "acc": 0.98924685, "epoch": 19.744551206936958, "grad_norm": 1.4265778064727783, "learning_rate": 7.084492381433152e-06, "loss": 0.03500065, "memory(GiB)": 13.7, "step": 42125, "train_speed(iter/s)": 1.52862 }, { "acc": 0.97423611, "epoch": 19.746894773845792, "grad_norm": 5.106627941131592, "learning_rate": 7.083787795776994e-06, "loss": 0.06901922, "memory(GiB)": 13.7, "step": 42130, "train_speed(iter/s)": 1.528621 }, { "acc": 0.98749857, "epoch": 19.74923834075463, "grad_norm": 2.6969430446624756, "learning_rate": 7.083083160045443e-06, "loss": 0.0684193, "memory(GiB)": 13.7, "step": 42135, "train_speed(iter/s)": 1.528628 }, { "acc": 0.98062496, "epoch": 19.751581907663464, "grad_norm": 3.649120807647705, "learning_rate": 7.082378474255434e-06, "loss": 0.08399526, "memory(GiB)": 13.7, "step": 42140, "train_speed(iter/s)": 1.528639 }, { "acc": 0.97057867, "epoch": 19.7539254745723, "grad_norm": 7.665320873260498, "learning_rate": 7.081673738423905e-06, "loss": 0.14496951, "memory(GiB)": 13.7, "step": 42145, "train_speed(iter/s)": 1.528651 }, { "acc": 0.9739583, "epoch": 19.756269041481133, "grad_norm": 22.165197372436523, "learning_rate": 7.080968952567797e-06, "loss": 0.08027308, "memory(GiB)": 13.7, "step": 42150, "train_speed(iter/s)": 1.528657 }, { "acc": 0.97937508, "epoch": 19.75861260838997, "grad_norm": 5.996212959289551, "learning_rate": 7.080264116704052e-06, "loss": 0.11392252, "memory(GiB)": 13.7, "step": 42155, "train_speed(iter/s)": 1.528662 }, { "acc": 0.97532053, "epoch": 19.760956175298805, "grad_norm": 6.810344219207764, "learning_rate": 7.079559230849608e-06, "loss": 0.09604558, "memory(GiB)": 13.7, "step": 42160, "train_speed(iter/s)": 1.528673 }, { "acc": 0.98174677, "epoch": 19.76329974220764, "grad_norm": 4.636325836181641, "learning_rate": 7.078854295021411e-06, "loss": 0.09033065, "memory(GiB)": 13.7, "step": 42165, "train_speed(iter/s)": 1.528682 }, { "acc": 0.9863636, "epoch": 19.765643309116474, "grad_norm": 3.7635414600372314, "learning_rate": 7.078149309236404e-06, "loss": 0.03692242, "memory(GiB)": 13.7, "step": 42170, "train_speed(iter/s)": 1.528689 }, { "acc": 0.98264847, "epoch": 19.76798687602531, "grad_norm": 6.0205559730529785, "learning_rate": 7.077444273511534e-06, "loss": 0.08134593, "memory(GiB)": 13.7, "step": 42175, "train_speed(iter/s)": 1.528686 }, { "acc": 0.98411713, "epoch": 19.770330442934146, "grad_norm": 3.446667432785034, "learning_rate": 7.076739187863748e-06, "loss": 0.05375669, "memory(GiB)": 13.7, "step": 42180, "train_speed(iter/s)": 1.528685 }, { "acc": 0.98625002, "epoch": 19.77267400984298, "grad_norm": 3.7357938289642334, "learning_rate": 7.07603405230999e-06, "loss": 0.03353888, "memory(GiB)": 13.7, "step": 42185, "train_speed(iter/s)": 1.528682 }, { "acc": 0.9852273, "epoch": 19.775017576751818, "grad_norm": 1.4521639347076416, "learning_rate": 7.075328866867214e-06, "loss": 0.06202789, "memory(GiB)": 13.7, "step": 42190, "train_speed(iter/s)": 1.528692 }, { "acc": 0.99142323, "epoch": 19.777361143660652, "grad_norm": 1.7356672286987305, "learning_rate": 7.074623631552369e-06, "loss": 0.0501601, "memory(GiB)": 13.7, "step": 42195, "train_speed(iter/s)": 1.528697 }, { "acc": 0.98634796, "epoch": 19.779704710569487, "grad_norm": 4.678407669067383, "learning_rate": 7.073918346382405e-06, "loss": 0.10885773, "memory(GiB)": 13.7, "step": 42200, "train_speed(iter/s)": 1.528701 }, { "acc": 0.99017859, "epoch": 19.78204827747832, "grad_norm": 4.478824615478516, "learning_rate": 7.073213011374274e-06, "loss": 0.08395668, "memory(GiB)": 13.7, "step": 42205, "train_speed(iter/s)": 1.528701 }, { "acc": 0.98472586, "epoch": 19.78439184438716, "grad_norm": 2.732903242111206, "learning_rate": 7.072507626544935e-06, "loss": 0.04494376, "memory(GiB)": 13.7, "step": 42210, "train_speed(iter/s)": 1.528706 }, { "acc": 0.98302078, "epoch": 19.786735411295993, "grad_norm": 2.0157418251037598, "learning_rate": 7.071802191911338e-06, "loss": 0.08112684, "memory(GiB)": 13.7, "step": 42215, "train_speed(iter/s)": 1.528711 }, { "acc": 0.96799603, "epoch": 19.789078978204827, "grad_norm": 1.8048115968704224, "learning_rate": 7.071096707490442e-06, "loss": 0.068648, "memory(GiB)": 13.7, "step": 42220, "train_speed(iter/s)": 1.528708 }, { "acc": 0.97842264, "epoch": 19.79142254511366, "grad_norm": 4.365678787231445, "learning_rate": 7.070391173299203e-06, "loss": 0.06675555, "memory(GiB)": 13.7, "step": 42225, "train_speed(iter/s)": 1.528715 }, { "acc": 0.99020834, "epoch": 19.7937661120225, "grad_norm": 3.207467555999756, "learning_rate": 7.069685589354579e-06, "loss": 0.01971143, "memory(GiB)": 13.7, "step": 42230, "train_speed(iter/s)": 1.528718 }, { "acc": 0.98341351, "epoch": 19.796109678931334, "grad_norm": 2.982577323913574, "learning_rate": 7.0689799556735325e-06, "loss": 0.04105019, "memory(GiB)": 13.7, "step": 42235, "train_speed(iter/s)": 1.528724 }, { "acc": 0.98250008, "epoch": 19.798453245840168, "grad_norm": 4.898038387298584, "learning_rate": 7.068274272273024e-06, "loss": 0.05476723, "memory(GiB)": 13.7, "step": 42240, "train_speed(iter/s)": 1.528745 }, { "acc": 0.98270836, "epoch": 19.800796812749002, "grad_norm": 4.833958625793457, "learning_rate": 7.067568539170014e-06, "loss": 0.0459159, "memory(GiB)": 13.7, "step": 42245, "train_speed(iter/s)": 1.52874 }, { "acc": 0.9864584, "epoch": 19.80314037965784, "grad_norm": 4.654139518737793, "learning_rate": 7.066862756381467e-06, "loss": 0.03457852, "memory(GiB)": 13.7, "step": 42250, "train_speed(iter/s)": 1.528742 }, { "acc": 0.980723, "epoch": 19.805483946566675, "grad_norm": 4.465548515319824, "learning_rate": 7.06615692392435e-06, "loss": 0.05534517, "memory(GiB)": 13.7, "step": 42255, "train_speed(iter/s)": 1.528744 }, { "acc": 0.97964287, "epoch": 19.80782751347551, "grad_norm": 5.951144218444824, "learning_rate": 7.065451041815623e-06, "loss": 0.05955045, "memory(GiB)": 13.7, "step": 42260, "train_speed(iter/s)": 1.528755 }, { "acc": 0.97842255, "epoch": 19.810171080384343, "grad_norm": 14.323807716369629, "learning_rate": 7.064745110072259e-06, "loss": 0.18109376, "memory(GiB)": 13.7, "step": 42265, "train_speed(iter/s)": 1.528767 }, { "acc": 0.97173195, "epoch": 19.81251464729318, "grad_norm": 5.818554878234863, "learning_rate": 7.064039128711223e-06, "loss": 0.07523317, "memory(GiB)": 13.7, "step": 42270, "train_speed(iter/s)": 1.528766 }, { "acc": 0.9836647, "epoch": 19.814858214202015, "grad_norm": 7.846931457519531, "learning_rate": 7.063333097749489e-06, "loss": 0.09798242, "memory(GiB)": 13.7, "step": 42275, "train_speed(iter/s)": 1.528776 }, { "acc": 0.98258934, "epoch": 19.81720178111085, "grad_norm": 4.978397846221924, "learning_rate": 7.0626270172040214e-06, "loss": 0.03533739, "memory(GiB)": 13.7, "step": 42280, "train_speed(iter/s)": 1.528771 }, { "acc": 0.98529758, "epoch": 19.819545348019687, "grad_norm": 0.12286953628063202, "learning_rate": 7.061920887091796e-06, "loss": 0.04782178, "memory(GiB)": 13.7, "step": 42285, "train_speed(iter/s)": 1.528778 }, { "acc": 0.9947917, "epoch": 19.821888914928522, "grad_norm": 3.7464590072631836, "learning_rate": 7.061214707429784e-06, "loss": 0.02384676, "memory(GiB)": 13.7, "step": 42290, "train_speed(iter/s)": 1.528789 }, { "acc": 0.97354174, "epoch": 19.824232481837356, "grad_norm": 3.9764244556427, "learning_rate": 7.060508478234959e-06, "loss": 0.09064956, "memory(GiB)": 13.7, "step": 42295, "train_speed(iter/s)": 1.528793 }, { "acc": 0.98406248, "epoch": 19.82657604874619, "grad_norm": 3.5116078853607178, "learning_rate": 7.059802199524301e-06, "loss": 0.09985587, "memory(GiB)": 13.7, "step": 42300, "train_speed(iter/s)": 1.528802 }, { "acc": 0.9864583, "epoch": 19.82891961565503, "grad_norm": 1.9127297401428223, "learning_rate": 7.059095871314782e-06, "loss": 0.04087673, "memory(GiB)": 13.7, "step": 42305, "train_speed(iter/s)": 1.528809 }, { "acc": 0.97707329, "epoch": 19.831263182563863, "grad_norm": 1.8975944519042969, "learning_rate": 7.058389493623382e-06, "loss": 0.12413819, "memory(GiB)": 13.7, "step": 42310, "train_speed(iter/s)": 1.528814 }, { "acc": 0.97174339, "epoch": 19.833606749472697, "grad_norm": 5.5724077224731445, "learning_rate": 7.057683066467077e-06, "loss": 0.10286272, "memory(GiB)": 13.7, "step": 42315, "train_speed(iter/s)": 1.528821 }, { "acc": 0.98874998, "epoch": 19.83595031638153, "grad_norm": 2.389913320541382, "learning_rate": 7.056976589862851e-06, "loss": 0.02883433, "memory(GiB)": 13.7, "step": 42320, "train_speed(iter/s)": 1.528821 }, { "acc": 0.97163372, "epoch": 19.83829388329037, "grad_norm": 7.709171295166016, "learning_rate": 7.056270063827683e-06, "loss": 0.14057956, "memory(GiB)": 13.7, "step": 42325, "train_speed(iter/s)": 1.528825 }, { "acc": 0.9587923, "epoch": 19.840637450199203, "grad_norm": 11.610840797424316, "learning_rate": 7.055563488378558e-06, "loss": 0.1828909, "memory(GiB)": 13.7, "step": 42330, "train_speed(iter/s)": 1.528843 }, { "acc": 0.97802086, "epoch": 19.842981017108038, "grad_norm": 5.679039478302002, "learning_rate": 7.0548568635324575e-06, "loss": 0.06112351, "memory(GiB)": 13.7, "step": 42335, "train_speed(iter/s)": 1.528845 }, { "acc": 0.97318897, "epoch": 19.845324584016872, "grad_norm": 4.17151403427124, "learning_rate": 7.0541501893063665e-06, "loss": 0.1382834, "memory(GiB)": 13.7, "step": 42340, "train_speed(iter/s)": 1.528849 }, { "acc": 0.9807291, "epoch": 19.84766815092571, "grad_norm": 0.7110930681228638, "learning_rate": 7.053443465717272e-06, "loss": 0.07114583, "memory(GiB)": 13.7, "step": 42345, "train_speed(iter/s)": 1.528862 }, { "acc": 0.98012314, "epoch": 19.850011717834544, "grad_norm": 5.517151832580566, "learning_rate": 7.052736692782163e-06, "loss": 0.11590265, "memory(GiB)": 13.7, "step": 42350, "train_speed(iter/s)": 1.528865 }, { "acc": 0.9734375, "epoch": 19.85235528474338, "grad_norm": 3.772015333175659, "learning_rate": 7.052029870518023e-06, "loss": 0.06925516, "memory(GiB)": 13.7, "step": 42355, "train_speed(iter/s)": 1.528869 }, { "acc": 0.99201393, "epoch": 19.854698851652216, "grad_norm": 0.11916820704936981, "learning_rate": 7.051322998941846e-06, "loss": 0.03618406, "memory(GiB)": 13.7, "step": 42360, "train_speed(iter/s)": 1.528867 }, { "acc": 0.9704464, "epoch": 19.85704241856105, "grad_norm": 2.638580083847046, "learning_rate": 7.050616078070622e-06, "loss": 0.1213724, "memory(GiB)": 13.7, "step": 42365, "train_speed(iter/s)": 1.528863 }, { "acc": 0.9967803, "epoch": 19.859385985469885, "grad_norm": 2.5079994201660156, "learning_rate": 7.049909107921341e-06, "loss": 0.01725849, "memory(GiB)": 13.7, "step": 42370, "train_speed(iter/s)": 1.528865 }, { "acc": 0.98446426, "epoch": 19.86172955237872, "grad_norm": 3.2239301204681396, "learning_rate": 7.049202088510999e-06, "loss": 0.06286411, "memory(GiB)": 13.7, "step": 42375, "train_speed(iter/s)": 1.528861 }, { "acc": 0.98483286, "epoch": 19.864073119287557, "grad_norm": 1.8862924575805664, "learning_rate": 7.048495019856589e-06, "loss": 0.08126646, "memory(GiB)": 13.7, "step": 42380, "train_speed(iter/s)": 1.528858 }, { "acc": 0.98781252, "epoch": 19.86641668619639, "grad_norm": 2.1544735431671143, "learning_rate": 7.047787901975108e-06, "loss": 0.06407104, "memory(GiB)": 13.7, "step": 42385, "train_speed(iter/s)": 1.528861 }, { "acc": 0.98206844, "epoch": 19.868760253105226, "grad_norm": 4.312856674194336, "learning_rate": 7.04708073488355e-06, "loss": 0.06479508, "memory(GiB)": 13.7, "step": 42390, "train_speed(iter/s)": 1.528864 }, { "acc": 0.98562498, "epoch": 19.87110382001406, "grad_norm": 0.905232310295105, "learning_rate": 7.046373518598916e-06, "loss": 0.0267583, "memory(GiB)": 13.7, "step": 42395, "train_speed(iter/s)": 1.528871 }, { "acc": 0.97782078, "epoch": 19.873447386922898, "grad_norm": 5.077579975128174, "learning_rate": 7.045666253138204e-06, "loss": 0.06201464, "memory(GiB)": 13.7, "step": 42400, "train_speed(iter/s)": 1.528881 }, { "acc": 0.97577381, "epoch": 19.875790953831732, "grad_norm": 6.789127826690674, "learning_rate": 7.044958938518414e-06, "loss": 0.08135046, "memory(GiB)": 13.7, "step": 42405, "train_speed(iter/s)": 1.528885 }, { "acc": 0.98119125, "epoch": 19.878134520740566, "grad_norm": 2.604865789413452, "learning_rate": 7.044251574756546e-06, "loss": 0.09094059, "memory(GiB)": 13.7, "step": 42410, "train_speed(iter/s)": 1.528892 }, { "acc": 0.98250008, "epoch": 19.8804780876494, "grad_norm": 0.26215314865112305, "learning_rate": 7.0435441618696055e-06, "loss": 0.04278746, "memory(GiB)": 13.7, "step": 42415, "train_speed(iter/s)": 1.5289 }, { "acc": 0.9865799, "epoch": 19.88282165455824, "grad_norm": 4.503743648529053, "learning_rate": 7.042836699874595e-06, "loss": 0.0635816, "memory(GiB)": 13.7, "step": 42420, "train_speed(iter/s)": 1.528911 }, { "acc": 0.98291664, "epoch": 19.885165221467073, "grad_norm": 1.4762425422668457, "learning_rate": 7.042129188788521e-06, "loss": 0.07065551, "memory(GiB)": 13.7, "step": 42425, "train_speed(iter/s)": 1.528917 }, { "acc": 0.97738094, "epoch": 19.887508788375907, "grad_norm": 1.8767683506011963, "learning_rate": 7.041421628628388e-06, "loss": 0.09484258, "memory(GiB)": 13.7, "step": 42430, "train_speed(iter/s)": 1.528924 }, { "acc": 0.98531246, "epoch": 19.889852355284745, "grad_norm": 5.112805366516113, "learning_rate": 7.040714019411203e-06, "loss": 0.05317325, "memory(GiB)": 13.7, "step": 42435, "train_speed(iter/s)": 1.528921 }, { "acc": 0.96955357, "epoch": 19.89219592219358, "grad_norm": 1.8124394416809082, "learning_rate": 7.040006361153976e-06, "loss": 0.11808813, "memory(GiB)": 13.7, "step": 42440, "train_speed(iter/s)": 1.528924 }, { "acc": 0.98399105, "epoch": 19.894539489102414, "grad_norm": 4.2275872230529785, "learning_rate": 7.039298653873718e-06, "loss": 0.06544496, "memory(GiB)": 13.7, "step": 42445, "train_speed(iter/s)": 1.528928 }, { "acc": 0.983006, "epoch": 19.896883056011248, "grad_norm": 8.673582077026367, "learning_rate": 7.038590897587437e-06, "loss": 0.07358099, "memory(GiB)": 13.7, "step": 42450, "train_speed(iter/s)": 1.528935 }, { "acc": 0.98795137, "epoch": 19.899226622920086, "grad_norm": 4.835342884063721, "learning_rate": 7.037883092312145e-06, "loss": 0.07206754, "memory(GiB)": 13.7, "step": 42455, "train_speed(iter/s)": 1.528925 }, { "acc": 0.9796814, "epoch": 19.90157018982892, "grad_norm": 7.902414798736572, "learning_rate": 7.037175238064859e-06, "loss": 0.10337543, "memory(GiB)": 13.7, "step": 42460, "train_speed(iter/s)": 1.528924 }, { "acc": 0.9760417, "epoch": 19.903913756737754, "grad_norm": 4.606579780578613, "learning_rate": 7.036467334862591e-06, "loss": 0.07843636, "memory(GiB)": 13.7, "step": 42465, "train_speed(iter/s)": 1.528924 }, { "acc": 0.97517853, "epoch": 19.90625732364659, "grad_norm": 0.7192261815071106, "learning_rate": 7.035759382722357e-06, "loss": 0.08788466, "memory(GiB)": 13.7, "step": 42470, "train_speed(iter/s)": 1.528922 }, { "acc": 0.9689045, "epoch": 19.908600890555427, "grad_norm": 10.792057037353516, "learning_rate": 7.035051381661173e-06, "loss": 0.14346477, "memory(GiB)": 13.7, "step": 42475, "train_speed(iter/s)": 1.528927 }, { "acc": 0.9822916, "epoch": 19.91094445746426, "grad_norm": 37.27935028076172, "learning_rate": 7.034343331696059e-06, "loss": 0.06159296, "memory(GiB)": 13.7, "step": 42480, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98467255, "epoch": 19.913288024373095, "grad_norm": 3.0970346927642822, "learning_rate": 7.033635232844032e-06, "loss": 0.0620407, "memory(GiB)": 13.7, "step": 42485, "train_speed(iter/s)": 1.528946 }, { "acc": 0.9683897, "epoch": 19.91563159128193, "grad_norm": 5.6689629554748535, "learning_rate": 7.032927085122113e-06, "loss": 0.09056526, "memory(GiB)": 13.7, "step": 42490, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98482199, "epoch": 19.917975158190767, "grad_norm": 3.839799642562866, "learning_rate": 7.032218888547324e-06, "loss": 0.06240394, "memory(GiB)": 13.7, "step": 42495, "train_speed(iter/s)": 1.528945 }, { "acc": 0.9926137, "epoch": 19.9203187250996, "grad_norm": 0.5554567575454712, "learning_rate": 7.031510643136688e-06, "loss": 0.04474374, "memory(GiB)": 13.7, "step": 42500, "train_speed(iter/s)": 1.528951 }, { "acc": 0.981565, "epoch": 19.922662292008436, "grad_norm": 2.479235887527466, "learning_rate": 7.03080234890723e-06, "loss": 0.10046635, "memory(GiB)": 13.7, "step": 42505, "train_speed(iter/s)": 1.528953 }, { "acc": 0.97986107, "epoch": 19.925005858917274, "grad_norm": 6.93245267868042, "learning_rate": 7.030094005875972e-06, "loss": 0.0658343, "memory(GiB)": 13.7, "step": 42510, "train_speed(iter/s)": 1.52895 }, { "acc": 0.98441353, "epoch": 19.927349425826108, "grad_norm": 2.307419538497925, "learning_rate": 7.029385614059942e-06, "loss": 0.09243143, "memory(GiB)": 13.7, "step": 42515, "train_speed(iter/s)": 1.52896 }, { "acc": 0.98145828, "epoch": 19.929692992734942, "grad_norm": 7.6094512939453125, "learning_rate": 7.028677173476166e-06, "loss": 0.07375474, "memory(GiB)": 13.7, "step": 42520, "train_speed(iter/s)": 1.528965 }, { "acc": 0.9958334, "epoch": 19.932036559643777, "grad_norm": 2.5833866596221924, "learning_rate": 7.027968684141675e-06, "loss": 0.02808493, "memory(GiB)": 13.7, "step": 42525, "train_speed(iter/s)": 1.528966 }, { "acc": 0.98029757, "epoch": 19.934380126552615, "grad_norm": 4.098196029663086, "learning_rate": 7.0272601460734976e-06, "loss": 0.08913426, "memory(GiB)": 13.7, "step": 42530, "train_speed(iter/s)": 1.52897 }, { "acc": 0.97436962, "epoch": 19.93672369346145, "grad_norm": 4.412333011627197, "learning_rate": 7.026551559288665e-06, "loss": 0.10570083, "memory(GiB)": 13.7, "step": 42535, "train_speed(iter/s)": 1.528976 }, { "acc": 0.98125, "epoch": 19.939067260370283, "grad_norm": 3.906878709793091, "learning_rate": 7.025842923804209e-06, "loss": 0.08706755, "memory(GiB)": 13.7, "step": 42540, "train_speed(iter/s)": 1.528989 }, { "acc": 0.9775588, "epoch": 19.941410827279118, "grad_norm": 2.3058698177337646, "learning_rate": 7.025134239637163e-06, "loss": 0.05943752, "memory(GiB)": 13.7, "step": 42545, "train_speed(iter/s)": 1.529 }, { "acc": 0.97457104, "epoch": 19.943754394187955, "grad_norm": 1.0947620868682861, "learning_rate": 7.024425506804561e-06, "loss": 0.11172092, "memory(GiB)": 13.7, "step": 42550, "train_speed(iter/s)": 1.528998 }, { "acc": 0.96488094, "epoch": 19.94609796109679, "grad_norm": 4.071905136108398, "learning_rate": 7.0237167253234385e-06, "loss": 0.15210632, "memory(GiB)": 13.7, "step": 42555, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97425594, "epoch": 19.948441528005624, "grad_norm": 2.1955888271331787, "learning_rate": 7.023007895210834e-06, "loss": 0.06320136, "memory(GiB)": 13.7, "step": 42560, "train_speed(iter/s)": 1.529 }, { "acc": 0.98529758, "epoch": 19.95078509491446, "grad_norm": 7.495973110198975, "learning_rate": 7.022299016483782e-06, "loss": 0.06796426, "memory(GiB)": 13.7, "step": 42565, "train_speed(iter/s)": 1.529007 }, { "acc": 0.98715277, "epoch": 19.953128661823296, "grad_norm": 5.467513084411621, "learning_rate": 7.021590089159326e-06, "loss": 0.05652045, "memory(GiB)": 13.7, "step": 42570, "train_speed(iter/s)": 1.529015 }, { "acc": 0.99487181, "epoch": 19.95547222873213, "grad_norm": 1.8469202518463135, "learning_rate": 7.020881113254503e-06, "loss": 0.02965426, "memory(GiB)": 13.7, "step": 42575, "train_speed(iter/s)": 1.529017 }, { "acc": 0.99300594, "epoch": 19.957815795640965, "grad_norm": 2.516690254211426, "learning_rate": 7.0201720887863554e-06, "loss": 0.04451998, "memory(GiB)": 13.7, "step": 42580, "train_speed(iter/s)": 1.529018 }, { "acc": 0.99571428, "epoch": 19.960159362549803, "grad_norm": 1.4709126949310303, "learning_rate": 7.019463015771925e-06, "loss": 0.06654798, "memory(GiB)": 13.7, "step": 42585, "train_speed(iter/s)": 1.529012 }, { "acc": 0.98458328, "epoch": 19.962502929458637, "grad_norm": 4.508667469024658, "learning_rate": 7.018753894228259e-06, "loss": 0.07401305, "memory(GiB)": 13.7, "step": 42590, "train_speed(iter/s)": 1.529024 }, { "acc": 0.98180561, "epoch": 19.96484649636747, "grad_norm": 1.7512760162353516, "learning_rate": 7.018044724172398e-06, "loss": 0.06082669, "memory(GiB)": 13.7, "step": 42595, "train_speed(iter/s)": 1.529035 }, { "acc": 0.971875, "epoch": 19.967190063276306, "grad_norm": 6.966579437255859, "learning_rate": 7.01733550562139e-06, "loss": 0.1037012, "memory(GiB)": 13.7, "step": 42600, "train_speed(iter/s)": 1.52904 }, { "acc": 0.98586311, "epoch": 19.969533630185143, "grad_norm": 2.975491762161255, "learning_rate": 7.016626238592283e-06, "loss": 0.06589581, "memory(GiB)": 13.7, "step": 42605, "train_speed(iter/s)": 1.529052 }, { "acc": 0.96749992, "epoch": 19.971877197093978, "grad_norm": 3.7488248348236084, "learning_rate": 7.015916923102125e-06, "loss": 0.12434536, "memory(GiB)": 13.7, "step": 42610, "train_speed(iter/s)": 1.529059 }, { "acc": 0.97232952, "epoch": 19.974220764002812, "grad_norm": 3.451406240463257, "learning_rate": 7.015207559167964e-06, "loss": 0.09190772, "memory(GiB)": 13.7, "step": 42615, "train_speed(iter/s)": 1.529069 }, { "acc": 0.97914858, "epoch": 19.976564330911646, "grad_norm": 3.4914324283599854, "learning_rate": 7.014498146806853e-06, "loss": 0.11066811, "memory(GiB)": 13.7, "step": 42620, "train_speed(iter/s)": 1.529074 }, { "acc": 0.98499451, "epoch": 19.978907897820484, "grad_norm": 4.244199752807617, "learning_rate": 7.013788686035844e-06, "loss": 0.10402418, "memory(GiB)": 13.7, "step": 42625, "train_speed(iter/s)": 1.529071 }, { "acc": 0.97130947, "epoch": 19.98125146472932, "grad_norm": 1.5090283155441284, "learning_rate": 7.013079176871986e-06, "loss": 0.12203567, "memory(GiB)": 13.7, "step": 42630, "train_speed(iter/s)": 1.52908 }, { "acc": 0.9760416, "epoch": 19.983595031638153, "grad_norm": 4.139781951904297, "learning_rate": 7.01236961933234e-06, "loss": 0.06007678, "memory(GiB)": 13.7, "step": 42635, "train_speed(iter/s)": 1.529087 }, { "acc": 0.98915176, "epoch": 19.985938598546987, "grad_norm": 4.393584728240967, "learning_rate": 7.011660013433956e-06, "loss": 0.03716225, "memory(GiB)": 13.7, "step": 42640, "train_speed(iter/s)": 1.529075 }, { "acc": 0.98949909, "epoch": 19.988282165455825, "grad_norm": 2.3008358478546143, "learning_rate": 7.010950359193893e-06, "loss": 0.07256016, "memory(GiB)": 13.7, "step": 42645, "train_speed(iter/s)": 1.529081 }, { "acc": 0.96754837, "epoch": 19.99062573236466, "grad_norm": 2.5035295486450195, "learning_rate": 7.010240656629208e-06, "loss": 0.12169495, "memory(GiB)": 13.7, "step": 42650, "train_speed(iter/s)": 1.529095 }, { "acc": 0.98566475, "epoch": 19.992969299273494, "grad_norm": 1.5465106964111328, "learning_rate": 7.009530905756961e-06, "loss": 0.03597987, "memory(GiB)": 13.7, "step": 42655, "train_speed(iter/s)": 1.5291 }, { "acc": 0.98510418, "epoch": 19.995312866182328, "grad_norm": 3.446347236633301, "learning_rate": 7.00882110659421e-06, "loss": 0.03169188, "memory(GiB)": 13.7, "step": 42660, "train_speed(iter/s)": 1.529102 }, { "acc": 0.98031254, "epoch": 19.997656433091166, "grad_norm": 5.529900074005127, "learning_rate": 7.008111259158017e-06, "loss": 0.11577303, "memory(GiB)": 13.7, "step": 42665, "train_speed(iter/s)": 1.529107 }, { "acc": 0.98988094, "epoch": 20.0, "grad_norm": 2.641855001449585, "learning_rate": 7.0074013634654454e-06, "loss": 0.07925223, "memory(GiB)": 13.7, "step": 42670, "train_speed(iter/s)": 1.529093 }, { "acc": 0.98402786, "epoch": 20.002343566908834, "grad_norm": 5.149044990539551, "learning_rate": 7.006691419533559e-06, "loss": 0.04915776, "memory(GiB)": 13.7, "step": 42675, "train_speed(iter/s)": 1.529089 }, { "acc": 0.9791667, "epoch": 20.004687133817672, "grad_norm": 5.7503509521484375, "learning_rate": 7.0059814273794225e-06, "loss": 0.08865201, "memory(GiB)": 13.7, "step": 42680, "train_speed(iter/s)": 1.529096 }, { "acc": 0.97208328, "epoch": 20.007030700726506, "grad_norm": 5.1113739013671875, "learning_rate": 7.0052713870200986e-06, "loss": 0.08276275, "memory(GiB)": 13.7, "step": 42685, "train_speed(iter/s)": 1.529104 }, { "acc": 0.98623514, "epoch": 20.00937426763534, "grad_norm": 2.5092852115631104, "learning_rate": 7.004561298472659e-06, "loss": 0.06972713, "memory(GiB)": 13.7, "step": 42690, "train_speed(iter/s)": 1.529104 }, { "acc": 0.98208332, "epoch": 20.011717834544175, "grad_norm": 10.025911331176758, "learning_rate": 7.003851161754167e-06, "loss": 0.09115951, "memory(GiB)": 13.7, "step": 42695, "train_speed(iter/s)": 1.529101 }, { "acc": 0.97113094, "epoch": 20.014061401453013, "grad_norm": 2.5755367279052734, "learning_rate": 7.003140976881697e-06, "loss": 0.05274148, "memory(GiB)": 13.7, "step": 42700, "train_speed(iter/s)": 1.529101 }, { "acc": 0.97833595, "epoch": 20.016404968361847, "grad_norm": 6.537906646728516, "learning_rate": 7.002430743872315e-06, "loss": 0.11821266, "memory(GiB)": 13.7, "step": 42705, "train_speed(iter/s)": 1.529107 }, { "acc": 0.99145832, "epoch": 20.01874853527068, "grad_norm": 0.620611846446991, "learning_rate": 7.001720462743095e-06, "loss": 0.0328909, "memory(GiB)": 13.7, "step": 42710, "train_speed(iter/s)": 1.529104 }, { "acc": 0.99125004, "epoch": 20.021092102179516, "grad_norm": 2.713902473449707, "learning_rate": 7.001010133511111e-06, "loss": 0.03235422, "memory(GiB)": 13.7, "step": 42715, "train_speed(iter/s)": 1.52911 }, { "acc": 0.9854167, "epoch": 20.023435669088354, "grad_norm": 3.193850040435791, "learning_rate": 7.000299756193437e-06, "loss": 0.05958421, "memory(GiB)": 13.7, "step": 42720, "train_speed(iter/s)": 1.529113 }, { "acc": 0.98964014, "epoch": 20.025779235997188, "grad_norm": 0.3444112539291382, "learning_rate": 6.9995893308071435e-06, "loss": 0.06437583, "memory(GiB)": 13.7, "step": 42725, "train_speed(iter/s)": 1.529121 }, { "acc": 0.98923378, "epoch": 20.028122802906022, "grad_norm": 3.2972280979156494, "learning_rate": 6.998878857369311e-06, "loss": 0.07263502, "memory(GiB)": 13.7, "step": 42730, "train_speed(iter/s)": 1.529125 }, { "acc": 0.98137655, "epoch": 20.030466369814857, "grad_norm": 4.136064052581787, "learning_rate": 6.9981683358970166e-06, "loss": 0.11771333, "memory(GiB)": 13.7, "step": 42735, "train_speed(iter/s)": 1.529133 }, { "acc": 0.98833332, "epoch": 20.032809936723694, "grad_norm": 2.414855480194092, "learning_rate": 6.997457766407337e-06, "loss": 0.03883178, "memory(GiB)": 13.7, "step": 42740, "train_speed(iter/s)": 1.529135 }, { "acc": 0.97555561, "epoch": 20.03515350363253, "grad_norm": 3.887399196624756, "learning_rate": 6.996747148917355e-06, "loss": 0.09342732, "memory(GiB)": 13.7, "step": 42745, "train_speed(iter/s)": 1.529132 }, { "acc": 0.9655489, "epoch": 20.037497070541363, "grad_norm": 3.0847370624542236, "learning_rate": 6.996036483444148e-06, "loss": 0.12789638, "memory(GiB)": 13.7, "step": 42750, "train_speed(iter/s)": 1.52914 }, { "acc": 0.97352715, "epoch": 20.0398406374502, "grad_norm": 6.511684894561768, "learning_rate": 6.995325770004802e-06, "loss": 0.09100327, "memory(GiB)": 13.7, "step": 42755, "train_speed(iter/s)": 1.529151 }, { "acc": 0.97354164, "epoch": 20.042184204359035, "grad_norm": 0.14988723397254944, "learning_rate": 6.994615008616397e-06, "loss": 0.04196019, "memory(GiB)": 13.7, "step": 42760, "train_speed(iter/s)": 1.52916 }, { "acc": 0.97957792, "epoch": 20.04452777126787, "grad_norm": 0.0025231563486158848, "learning_rate": 6.9939041992960174e-06, "loss": 0.1101032, "memory(GiB)": 13.7, "step": 42765, "train_speed(iter/s)": 1.529155 }, { "acc": 0.9874855, "epoch": 20.046871338176704, "grad_norm": 6.8157267570495605, "learning_rate": 6.99319334206075e-06, "loss": 0.06795242, "memory(GiB)": 13.7, "step": 42770, "train_speed(iter/s)": 1.52916 }, { "acc": 0.97880211, "epoch": 20.04921490508554, "grad_norm": 31.919902801513672, "learning_rate": 6.992482436927681e-06, "loss": 0.10832658, "memory(GiB)": 13.7, "step": 42775, "train_speed(iter/s)": 1.529159 }, { "acc": 0.98660793, "epoch": 20.051558471994376, "grad_norm": 16.722957611083984, "learning_rate": 6.991771483913898e-06, "loss": 0.05031723, "memory(GiB)": 13.7, "step": 42780, "train_speed(iter/s)": 1.529156 }, { "acc": 0.984375, "epoch": 20.05390203890321, "grad_norm": 5.101015090942383, "learning_rate": 6.9910604830364925e-06, "loss": 0.06626848, "memory(GiB)": 13.7, "step": 42785, "train_speed(iter/s)": 1.529155 }, { "acc": 0.98559761, "epoch": 20.056245605812045, "grad_norm": 3.1718215942382812, "learning_rate": 6.990349434312551e-06, "loss": 0.06739312, "memory(GiB)": 13.7, "step": 42790, "train_speed(iter/s)": 1.529168 }, { "acc": 0.96508923, "epoch": 20.058589172720882, "grad_norm": 9.283793449401855, "learning_rate": 6.9896383377591656e-06, "loss": 0.12386172, "memory(GiB)": 13.7, "step": 42795, "train_speed(iter/s)": 1.529168 }, { "acc": 0.99354172, "epoch": 20.060932739629717, "grad_norm": 3.124631881713867, "learning_rate": 6.98892719339343e-06, "loss": 0.05827588, "memory(GiB)": 13.7, "step": 42800, "train_speed(iter/s)": 1.529175 }, { "acc": 0.96749458, "epoch": 20.06327630653855, "grad_norm": 7.063018321990967, "learning_rate": 6.988216001232439e-06, "loss": 0.12317208, "memory(GiB)": 13.7, "step": 42805, "train_speed(iter/s)": 1.529179 }, { "acc": 0.96983624, "epoch": 20.065619873447385, "grad_norm": 1.3821501731872559, "learning_rate": 6.987504761293284e-06, "loss": 0.15088024, "memory(GiB)": 13.7, "step": 42810, "train_speed(iter/s)": 1.529178 }, { "acc": 0.97239075, "epoch": 20.067963440356223, "grad_norm": 5.410547733306885, "learning_rate": 6.986793473593061e-06, "loss": 0.10010264, "memory(GiB)": 13.7, "step": 42815, "train_speed(iter/s)": 1.52917 }, { "acc": 0.97186012, "epoch": 20.070307007265058, "grad_norm": 6.218101501464844, "learning_rate": 6.98608213814887e-06, "loss": 0.08550661, "memory(GiB)": 13.7, "step": 42820, "train_speed(iter/s)": 1.52917 }, { "acc": 0.98249998, "epoch": 20.072650574173892, "grad_norm": 0.6302127242088318, "learning_rate": 6.985370754977807e-06, "loss": 0.06232404, "memory(GiB)": 13.7, "step": 42825, "train_speed(iter/s)": 1.529176 }, { "acc": 0.97139883, "epoch": 20.074994141082726, "grad_norm": 3.542330503463745, "learning_rate": 6.984659324096972e-06, "loss": 0.0929317, "memory(GiB)": 13.7, "step": 42830, "train_speed(iter/s)": 1.529182 }, { "acc": 0.97113094, "epoch": 20.077337707991564, "grad_norm": 7.961729049682617, "learning_rate": 6.983947845523465e-06, "loss": 0.09496519, "memory(GiB)": 13.7, "step": 42835, "train_speed(iter/s)": 1.529182 }, { "acc": 0.96176548, "epoch": 20.0796812749004, "grad_norm": 6.321925163269043, "learning_rate": 6.983236319274389e-06, "loss": 0.09244428, "memory(GiB)": 13.7, "step": 42840, "train_speed(iter/s)": 1.52919 }, { "acc": 0.99035721, "epoch": 20.082024841809233, "grad_norm": 1.1441761255264282, "learning_rate": 6.982524745366844e-06, "loss": 0.04920492, "memory(GiB)": 13.7, "step": 42845, "train_speed(iter/s)": 1.529192 }, { "acc": 0.95062504, "epoch": 20.08436840871807, "grad_norm": 4.452850818634033, "learning_rate": 6.9818131238179374e-06, "loss": 0.12840323, "memory(GiB)": 13.7, "step": 42850, "train_speed(iter/s)": 1.529201 }, { "acc": 0.97689972, "epoch": 20.086711975626905, "grad_norm": 3.1502175331115723, "learning_rate": 6.981101454644772e-06, "loss": 0.1374189, "memory(GiB)": 13.7, "step": 42855, "train_speed(iter/s)": 1.529197 }, { "acc": 0.98666668, "epoch": 20.08905554253574, "grad_norm": 0.012781449593603611, "learning_rate": 6.9803897378644535e-06, "loss": 0.02502614, "memory(GiB)": 13.7, "step": 42860, "train_speed(iter/s)": 1.529209 }, { "acc": 0.97646065, "epoch": 20.091399109444573, "grad_norm": 6.3746538162231445, "learning_rate": 6.979677973494091e-06, "loss": 0.10189441, "memory(GiB)": 13.7, "step": 42865, "train_speed(iter/s)": 1.529207 }, { "acc": 0.96342268, "epoch": 20.09374267635341, "grad_norm": 8.982939720153809, "learning_rate": 6.978966161550791e-06, "loss": 0.07935609, "memory(GiB)": 13.7, "step": 42870, "train_speed(iter/s)": 1.529209 }, { "acc": 0.98258934, "epoch": 20.096086243262246, "grad_norm": 20.736852645874023, "learning_rate": 6.978254302051665e-06, "loss": 0.10306966, "memory(GiB)": 13.7, "step": 42875, "train_speed(iter/s)": 1.52921 }, { "acc": 0.96280632, "epoch": 20.09842981017108, "grad_norm": 8.880721092224121, "learning_rate": 6.977542395013822e-06, "loss": 0.1748197, "memory(GiB)": 13.7, "step": 42880, "train_speed(iter/s)": 1.529219 }, { "acc": 0.98152771, "epoch": 20.100773377079914, "grad_norm": 3.61439847946167, "learning_rate": 6.9768304404543774e-06, "loss": 0.11327078, "memory(GiB)": 13.7, "step": 42885, "train_speed(iter/s)": 1.529219 }, { "acc": 0.98179073, "epoch": 20.103116943988752, "grad_norm": 1.7336658239364624, "learning_rate": 6.976118438390439e-06, "loss": 0.08535306, "memory(GiB)": 13.7, "step": 42890, "train_speed(iter/s)": 1.52923 }, { "acc": 0.97113781, "epoch": 20.105460510897586, "grad_norm": 4.100627899169922, "learning_rate": 6.975406388839125e-06, "loss": 0.06250685, "memory(GiB)": 13.7, "step": 42895, "train_speed(iter/s)": 1.529229 }, { "acc": 0.97769375, "epoch": 20.10780407780642, "grad_norm": 4.719798564910889, "learning_rate": 6.974694291817551e-06, "loss": 0.0612036, "memory(GiB)": 13.7, "step": 42900, "train_speed(iter/s)": 1.529235 }, { "acc": 0.9817235, "epoch": 20.110147644715255, "grad_norm": 2.7856922149658203, "learning_rate": 6.973982147342829e-06, "loss": 0.13393593, "memory(GiB)": 13.7, "step": 42905, "train_speed(iter/s)": 1.52924 }, { "acc": 0.9802084, "epoch": 20.112491211624093, "grad_norm": 3.4879555702209473, "learning_rate": 6.97326995543208e-06, "loss": 0.06995247, "memory(GiB)": 13.7, "step": 42910, "train_speed(iter/s)": 1.529247 }, { "acc": 0.9927084, "epoch": 20.114834778532927, "grad_norm": 1.3804665803909302, "learning_rate": 6.972557716102425e-06, "loss": 0.01720818, "memory(GiB)": 13.7, "step": 42915, "train_speed(iter/s)": 1.529244 }, { "acc": 0.967381, "epoch": 20.11717834544176, "grad_norm": 6.809938907623291, "learning_rate": 6.9718454293709795e-06, "loss": 0.13097801, "memory(GiB)": 13.7, "step": 42920, "train_speed(iter/s)": 1.529254 }, { "acc": 0.96416664, "epoch": 20.1195219123506, "grad_norm": 27.51701545715332, "learning_rate": 6.971133095254866e-06, "loss": 0.19838705, "memory(GiB)": 13.7, "step": 42925, "train_speed(iter/s)": 1.529255 }, { "acc": 0.97032194, "epoch": 20.121865479259434, "grad_norm": 3.665926933288574, "learning_rate": 6.970420713771208e-06, "loss": 0.11130331, "memory(GiB)": 13.7, "step": 42930, "train_speed(iter/s)": 1.529258 }, { "acc": 0.96603622, "epoch": 20.124209046168268, "grad_norm": 7.667026996612549, "learning_rate": 6.969708284937125e-06, "loss": 0.09809537, "memory(GiB)": 13.7, "step": 42935, "train_speed(iter/s)": 1.529275 }, { "acc": 0.98041668, "epoch": 20.126552613077102, "grad_norm": 1.6024671792984009, "learning_rate": 6.968995808769748e-06, "loss": 0.1083905, "memory(GiB)": 13.7, "step": 42940, "train_speed(iter/s)": 1.529277 }, { "acc": 0.9864584, "epoch": 20.12889617998594, "grad_norm": 2.968611478805542, "learning_rate": 6.968283285286195e-06, "loss": 0.06802757, "memory(GiB)": 13.7, "step": 42945, "train_speed(iter/s)": 1.529288 }, { "acc": 0.98738966, "epoch": 20.131239746894774, "grad_norm": 1.6930640935897827, "learning_rate": 6.9675707145035995e-06, "loss": 0.06867266, "memory(GiB)": 13.7, "step": 42950, "train_speed(iter/s)": 1.529296 }, { "acc": 0.98707695, "epoch": 20.13358331380361, "grad_norm": 4.211492538452148, "learning_rate": 6.9668580964390856e-06, "loss": 0.06472369, "memory(GiB)": 13.7, "step": 42955, "train_speed(iter/s)": 1.529296 }, { "acc": 0.9515625, "epoch": 20.135926880712443, "grad_norm": 5.736796855926514, "learning_rate": 6.966145431109782e-06, "loss": 0.16967459, "memory(GiB)": 13.7, "step": 42960, "train_speed(iter/s)": 1.529309 }, { "acc": 0.98993511, "epoch": 20.13827044762128, "grad_norm": 1.9009004831314087, "learning_rate": 6.965432718532821e-06, "loss": 0.04349889, "memory(GiB)": 13.7, "step": 42965, "train_speed(iter/s)": 1.529313 }, { "acc": 0.98979168, "epoch": 20.140614014530115, "grad_norm": 6.102888584136963, "learning_rate": 6.964719958725332e-06, "loss": 0.04403322, "memory(GiB)": 13.7, "step": 42970, "train_speed(iter/s)": 1.529321 }, { "acc": 0.98116322, "epoch": 20.14295758143895, "grad_norm": 7.057928562164307, "learning_rate": 6.964007151704449e-06, "loss": 0.02534045, "memory(GiB)": 13.7, "step": 42975, "train_speed(iter/s)": 1.529324 }, { "acc": 0.97278271, "epoch": 20.145301148347784, "grad_norm": 3.583117961883545, "learning_rate": 6.963294297487303e-06, "loss": 0.13698165, "memory(GiB)": 13.7, "step": 42980, "train_speed(iter/s)": 1.52933 }, { "acc": 0.96904774, "epoch": 20.14764471525662, "grad_norm": 5.588911533355713, "learning_rate": 6.962581396091031e-06, "loss": 0.06277509, "memory(GiB)": 13.7, "step": 42985, "train_speed(iter/s)": 1.529326 }, { "acc": 0.98195515, "epoch": 20.149988282165456, "grad_norm": 3.3588004112243652, "learning_rate": 6.96186844753277e-06, "loss": 0.056399, "memory(GiB)": 13.7, "step": 42990, "train_speed(iter/s)": 1.529336 }, { "acc": 0.97334862, "epoch": 20.15233184907429, "grad_norm": 6.985422134399414, "learning_rate": 6.961155451829654e-06, "loss": 0.12596875, "memory(GiB)": 13.7, "step": 42995, "train_speed(iter/s)": 1.52934 }, { "acc": 0.9864584, "epoch": 20.154675415983128, "grad_norm": 2.420628309249878, "learning_rate": 6.960442408998821e-06, "loss": 0.04877515, "memory(GiB)": 13.7, "step": 43000, "train_speed(iter/s)": 1.529344 }, { "acc": 0.97697306, "epoch": 20.157018982891962, "grad_norm": 4.849048614501953, "learning_rate": 6.9597293190574125e-06, "loss": 0.12182425, "memory(GiB)": 13.7, "step": 43005, "train_speed(iter/s)": 1.529354 }, { "acc": 0.98154306, "epoch": 20.159362549800797, "grad_norm": 0.17959868907928467, "learning_rate": 6.959016182022568e-06, "loss": 0.07186555, "memory(GiB)": 13.7, "step": 43010, "train_speed(iter/s)": 1.529358 }, { "acc": 0.9776989, "epoch": 20.16170611670963, "grad_norm": 2.8216171264648438, "learning_rate": 6.958302997911429e-06, "loss": 0.12231964, "memory(GiB)": 13.7, "step": 43015, "train_speed(iter/s)": 1.529362 }, { "acc": 0.9693285, "epoch": 20.16404968361847, "grad_norm": 4.525405406951904, "learning_rate": 6.9575897667411365e-06, "loss": 0.13399645, "memory(GiB)": 13.7, "step": 43020, "train_speed(iter/s)": 1.529364 }, { "acc": 0.9916667, "epoch": 20.166393250527303, "grad_norm": 4.8484368324279785, "learning_rate": 6.956876488528835e-06, "loss": 0.07546449, "memory(GiB)": 13.7, "step": 43025, "train_speed(iter/s)": 1.529375 }, { "acc": 0.98731518, "epoch": 20.168736817436137, "grad_norm": 1.9509345293045044, "learning_rate": 6.9561631632916714e-06, "loss": 0.07215578, "memory(GiB)": 13.7, "step": 43030, "train_speed(iter/s)": 1.529389 }, { "acc": 0.98386364, "epoch": 20.17108038434497, "grad_norm": 0.013219308108091354, "learning_rate": 6.955449791046791e-06, "loss": 0.05903106, "memory(GiB)": 13.7, "step": 43035, "train_speed(iter/s)": 1.529396 }, { "acc": 0.99375, "epoch": 20.17342395125381, "grad_norm": 1.9148591756820679, "learning_rate": 6.95473637181134e-06, "loss": 0.0363567, "memory(GiB)": 13.7, "step": 43040, "train_speed(iter/s)": 1.529402 }, { "acc": 0.97174816, "epoch": 20.175767518162644, "grad_norm": 4.541933536529541, "learning_rate": 6.954022905602464e-06, "loss": 0.08617053, "memory(GiB)": 13.7, "step": 43045, "train_speed(iter/s)": 1.529418 }, { "acc": 0.98678036, "epoch": 20.178111085071478, "grad_norm": 1.1896946430206299, "learning_rate": 6.953309392437317e-06, "loss": 0.07789146, "memory(GiB)": 13.7, "step": 43050, "train_speed(iter/s)": 1.529426 }, { "acc": 0.98139877, "epoch": 20.180454651980313, "grad_norm": 6.436259746551514, "learning_rate": 6.9525958323330485e-06, "loss": 0.07196614, "memory(GiB)": 13.7, "step": 43055, "train_speed(iter/s)": 1.529429 }, { "acc": 0.98419857, "epoch": 20.18279821888915, "grad_norm": 4.113539695739746, "learning_rate": 6.9518822253068086e-06, "loss": 0.10404032, "memory(GiB)": 13.7, "step": 43060, "train_speed(iter/s)": 1.529429 }, { "acc": 0.98800602, "epoch": 20.185141785797985, "grad_norm": 1.449959397315979, "learning_rate": 6.951168571375751e-06, "loss": 0.04697445, "memory(GiB)": 13.7, "step": 43065, "train_speed(iter/s)": 1.529429 }, { "acc": 0.9859127, "epoch": 20.18748535270682, "grad_norm": 0.7430895566940308, "learning_rate": 6.950454870557028e-06, "loss": 0.04707914, "memory(GiB)": 13.7, "step": 43070, "train_speed(iter/s)": 1.529426 }, { "acc": 0.98291664, "epoch": 20.189828919615653, "grad_norm": 1.6960073709487915, "learning_rate": 6.949741122867798e-06, "loss": 0.05944289, "memory(GiB)": 13.7, "step": 43075, "train_speed(iter/s)": 1.529429 }, { "acc": 0.97925596, "epoch": 20.19217248652449, "grad_norm": 3.556460380554199, "learning_rate": 6.949027328325214e-06, "loss": 0.08388428, "memory(GiB)": 13.7, "step": 43080, "train_speed(iter/s)": 1.529431 }, { "acc": 0.98656254, "epoch": 20.194516053433325, "grad_norm": 5.788630962371826, "learning_rate": 6.948313486946434e-06, "loss": 0.03776742, "memory(GiB)": 13.7, "step": 43085, "train_speed(iter/s)": 1.529432 }, { "acc": 0.98300648, "epoch": 20.19685962034216, "grad_norm": 6.237487316131592, "learning_rate": 6.947599598748617e-06, "loss": 0.07757227, "memory(GiB)": 13.7, "step": 43090, "train_speed(iter/s)": 1.529437 }, { "acc": 0.98363094, "epoch": 20.199203187250998, "grad_norm": 9.146502494812012, "learning_rate": 6.946885663748924e-06, "loss": 0.11256499, "memory(GiB)": 13.7, "step": 43095, "train_speed(iter/s)": 1.529435 }, { "acc": 0.98110571, "epoch": 20.201546754159832, "grad_norm": 3.6013991832733154, "learning_rate": 6.946171681964513e-06, "loss": 0.08141807, "memory(GiB)": 13.7, "step": 43100, "train_speed(iter/s)": 1.529444 }, { "acc": 0.99039688, "epoch": 20.203890321068666, "grad_norm": 2.0648415088653564, "learning_rate": 6.945457653412548e-06, "loss": 0.03139406, "memory(GiB)": 13.7, "step": 43105, "train_speed(iter/s)": 1.529455 }, { "acc": 0.97354164, "epoch": 20.2062338879775, "grad_norm": 4.254292011260986, "learning_rate": 6.944743578110187e-06, "loss": 0.07651159, "memory(GiB)": 13.7, "step": 43110, "train_speed(iter/s)": 1.52946 }, { "acc": 0.98562508, "epoch": 20.20857745488634, "grad_norm": 1.8340257406234741, "learning_rate": 6.944029456074602e-06, "loss": 0.03072471, "memory(GiB)": 13.7, "step": 43115, "train_speed(iter/s)": 1.529467 }, { "acc": 0.97537775, "epoch": 20.210921021795173, "grad_norm": 2.4353127479553223, "learning_rate": 6.943315287322952e-06, "loss": 0.11286589, "memory(GiB)": 13.7, "step": 43120, "train_speed(iter/s)": 1.529479 }, { "acc": 0.9901042, "epoch": 20.213264588704007, "grad_norm": 1.313297152519226, "learning_rate": 6.942601071872404e-06, "loss": 0.04301129, "memory(GiB)": 13.7, "step": 43125, "train_speed(iter/s)": 1.529489 }, { "acc": 0.97872019, "epoch": 20.21560815561284, "grad_norm": 4.960835933685303, "learning_rate": 6.941886809740127e-06, "loss": 0.08656917, "memory(GiB)": 13.7, "step": 43130, "train_speed(iter/s)": 1.5295 }, { "acc": 0.991572, "epoch": 20.21795172252168, "grad_norm": 2.7048661708831787, "learning_rate": 6.941172500943291e-06, "loss": 0.04692173, "memory(GiB)": 13.7, "step": 43135, "train_speed(iter/s)": 1.529513 }, { "acc": 0.97744656, "epoch": 20.220295289430513, "grad_norm": 4.098772048950195, "learning_rate": 6.940458145499062e-06, "loss": 0.08590464, "memory(GiB)": 13.7, "step": 43140, "train_speed(iter/s)": 1.52951 }, { "acc": 0.98165178, "epoch": 20.222638856339348, "grad_norm": 4.759925365447998, "learning_rate": 6.939743743424613e-06, "loss": 0.10049781, "memory(GiB)": 13.7, "step": 43145, "train_speed(iter/s)": 1.529513 }, { "acc": 0.99229164, "epoch": 20.224982423248182, "grad_norm": 0.0031938340980559587, "learning_rate": 6.939029294737115e-06, "loss": 0.0356198, "memory(GiB)": 13.7, "step": 43150, "train_speed(iter/s)": 1.529519 }, { "acc": 0.9919445, "epoch": 20.22732599015702, "grad_norm": 6.5727715492248535, "learning_rate": 6.938314799453741e-06, "loss": 0.03942739, "memory(GiB)": 13.7, "step": 43155, "train_speed(iter/s)": 1.529519 }, { "acc": 0.99201393, "epoch": 20.229669557065854, "grad_norm": 1.3338626623153687, "learning_rate": 6.937600257591667e-06, "loss": 0.03415071, "memory(GiB)": 13.7, "step": 43160, "train_speed(iter/s)": 1.529536 }, { "acc": 0.98500004, "epoch": 20.23201312397469, "grad_norm": 4.183506011962891, "learning_rate": 6.936885669168064e-06, "loss": 0.05861154, "memory(GiB)": 13.7, "step": 43165, "train_speed(iter/s)": 1.529538 }, { "acc": 0.98907204, "epoch": 20.234356690883526, "grad_norm": 3.455906867980957, "learning_rate": 6.936171034200114e-06, "loss": 0.06283375, "memory(GiB)": 13.7, "step": 43170, "train_speed(iter/s)": 1.52954 }, { "acc": 0.98967266, "epoch": 20.23670025779236, "grad_norm": 1.9725254774093628, "learning_rate": 6.935456352704988e-06, "loss": 0.02842031, "memory(GiB)": 13.7, "step": 43175, "train_speed(iter/s)": 1.529549 }, { "acc": 0.97382193, "epoch": 20.239043824701195, "grad_norm": 2.631474018096924, "learning_rate": 6.934741624699872e-06, "loss": 0.10228646, "memory(GiB)": 13.7, "step": 43180, "train_speed(iter/s)": 1.529554 }, { "acc": 0.98048611, "epoch": 20.24138739161003, "grad_norm": 0.7050753831863403, "learning_rate": 6.934026850201939e-06, "loss": 0.05299824, "memory(GiB)": 13.7, "step": 43185, "train_speed(iter/s)": 1.52956 }, { "acc": 0.9879735, "epoch": 20.243730958518867, "grad_norm": 2.749758720397949, "learning_rate": 6.933312029228375e-06, "loss": 0.05016726, "memory(GiB)": 13.7, "step": 43190, "train_speed(iter/s)": 1.529561 }, { "acc": 0.98293648, "epoch": 20.2460745254277, "grad_norm": 7.052567005157471, "learning_rate": 6.932597161796356e-06, "loss": 0.1242568, "memory(GiB)": 13.7, "step": 43195, "train_speed(iter/s)": 1.529567 }, { "acc": 0.97671127, "epoch": 20.248418092336536, "grad_norm": 5.382059097290039, "learning_rate": 6.931882247923073e-06, "loss": 0.11019057, "memory(GiB)": 13.7, "step": 43200, "train_speed(iter/s)": 1.529578 }, { "acc": 0.9723958, "epoch": 20.25076165924537, "grad_norm": 6.863547325134277, "learning_rate": 6.931167287625702e-06, "loss": 0.12303668, "memory(GiB)": 13.7, "step": 43205, "train_speed(iter/s)": 1.529583 }, { "acc": 0.9879427, "epoch": 20.253105226154208, "grad_norm": 0.6713981032371521, "learning_rate": 6.930452280921436e-06, "loss": 0.0438715, "memory(GiB)": 13.7, "step": 43210, "train_speed(iter/s)": 1.529588 }, { "acc": 0.97380953, "epoch": 20.255448793063042, "grad_norm": 3.577852964401245, "learning_rate": 6.929737227827454e-06, "loss": 0.11202021, "memory(GiB)": 13.7, "step": 43215, "train_speed(iter/s)": 1.529594 }, { "acc": 0.98708324, "epoch": 20.257792359971877, "grad_norm": 2.624429225921631, "learning_rate": 6.9290221283609486e-06, "loss": 0.04993607, "memory(GiB)": 13.7, "step": 43220, "train_speed(iter/s)": 1.529603 }, { "acc": 0.98475275, "epoch": 20.26013592688071, "grad_norm": 1.8728785514831543, "learning_rate": 6.928306982539107e-06, "loss": 0.06445239, "memory(GiB)": 13.7, "step": 43225, "train_speed(iter/s)": 1.529606 }, { "acc": 0.9885417, "epoch": 20.26247949378955, "grad_norm": 1.2527481317520142, "learning_rate": 6.927591790379119e-06, "loss": 0.06140026, "memory(GiB)": 13.7, "step": 43230, "train_speed(iter/s)": 1.529611 }, { "acc": 0.98571339, "epoch": 20.264823060698383, "grad_norm": 6.910966396331787, "learning_rate": 6.926876551898175e-06, "loss": 0.0855365, "memory(GiB)": 13.7, "step": 43235, "train_speed(iter/s)": 1.529621 }, { "acc": 0.96958332, "epoch": 20.267166627607217, "grad_norm": 4.00539493560791, "learning_rate": 6.926161267113467e-06, "loss": 0.09808171, "memory(GiB)": 13.7, "step": 43240, "train_speed(iter/s)": 1.529619 }, { "acc": 0.97520828, "epoch": 20.269510194516055, "grad_norm": 5.285299301147461, "learning_rate": 6.92544593604219e-06, "loss": 0.07832839, "memory(GiB)": 13.7, "step": 43245, "train_speed(iter/s)": 1.529627 }, { "acc": 0.98125, "epoch": 20.27185376142489, "grad_norm": 3.594033718109131, "learning_rate": 6.924730558701535e-06, "loss": 0.04373265, "memory(GiB)": 13.7, "step": 43250, "train_speed(iter/s)": 1.529638 }, { "acc": 0.97855167, "epoch": 20.274197328333724, "grad_norm": 5.153289318084717, "learning_rate": 6.924015135108699e-06, "loss": 0.07781152, "memory(GiB)": 13.7, "step": 43255, "train_speed(iter/s)": 1.529634 }, { "acc": 0.984375, "epoch": 20.276540895242558, "grad_norm": 2.8725087642669678, "learning_rate": 6.923299665280879e-06, "loss": 0.03838262, "memory(GiB)": 13.7, "step": 43260, "train_speed(iter/s)": 1.529641 }, { "acc": 0.97127972, "epoch": 20.278884462151396, "grad_norm": 5.319100379943848, "learning_rate": 6.922584149235272e-06, "loss": 0.10624238, "memory(GiB)": 13.7, "step": 43265, "train_speed(iter/s)": 1.529653 }, { "acc": 0.96065483, "epoch": 20.28122802906023, "grad_norm": 5.531031608581543, "learning_rate": 6.921868586989075e-06, "loss": 0.14972445, "memory(GiB)": 13.7, "step": 43270, "train_speed(iter/s)": 1.529661 }, { "acc": 0.98165207, "epoch": 20.283571595969065, "grad_norm": 7.338443279266357, "learning_rate": 6.921152978559493e-06, "loss": 0.09315549, "memory(GiB)": 13.7, "step": 43275, "train_speed(iter/s)": 1.529676 }, { "acc": 0.97815895, "epoch": 20.2859151628779, "grad_norm": 8.698067665100098, "learning_rate": 6.92043732396372e-06, "loss": 0.09171109, "memory(GiB)": 13.7, "step": 43280, "train_speed(iter/s)": 1.529684 }, { "acc": 0.9854167, "epoch": 20.288258729786737, "grad_norm": 5.245576858520508, "learning_rate": 6.9197216232189624e-06, "loss": 0.05023766, "memory(GiB)": 13.7, "step": 43285, "train_speed(iter/s)": 1.529697 }, { "acc": 0.98239994, "epoch": 20.29060229669557, "grad_norm": 5.814435005187988, "learning_rate": 6.919005876342421e-06, "loss": 0.07023706, "memory(GiB)": 13.7, "step": 43290, "train_speed(iter/s)": 1.529707 }, { "acc": 0.9888072, "epoch": 20.292945863604405, "grad_norm": 1.9646008014678955, "learning_rate": 6.918290083351302e-06, "loss": 0.05120614, "memory(GiB)": 13.7, "step": 43295, "train_speed(iter/s)": 1.529702 }, { "acc": 0.97250004, "epoch": 20.29528943051324, "grad_norm": 7.8613057136535645, "learning_rate": 6.91757424426281e-06, "loss": 0.08156917, "memory(GiB)": 13.7, "step": 43300, "train_speed(iter/s)": 1.529697 }, { "acc": 0.98669395, "epoch": 20.297632997422077, "grad_norm": 0.9626617431640625, "learning_rate": 6.916858359094152e-06, "loss": 0.05070717, "memory(GiB)": 13.7, "step": 43305, "train_speed(iter/s)": 1.529704 }, { "acc": 0.97942543, "epoch": 20.299976564330912, "grad_norm": 2.5458827018737793, "learning_rate": 6.916142427862533e-06, "loss": 0.0978525, "memory(GiB)": 13.7, "step": 43310, "train_speed(iter/s)": 1.529703 }, { "acc": 0.98359203, "epoch": 20.302320131239746, "grad_norm": 2.7354767322540283, "learning_rate": 6.915426450585164e-06, "loss": 0.06470415, "memory(GiB)": 13.7, "step": 43315, "train_speed(iter/s)": 1.529703 }, { "acc": 0.99000006, "epoch": 20.30466369814858, "grad_norm": 0.6862295866012573, "learning_rate": 6.914710427279254e-06, "loss": 0.0560012, "memory(GiB)": 13.7, "step": 43320, "train_speed(iter/s)": 1.529701 }, { "acc": 0.97925606, "epoch": 20.30700726505742, "grad_norm": 2.5543384552001953, "learning_rate": 6.913994357962016e-06, "loss": 0.04812191, "memory(GiB)": 13.7, "step": 43325, "train_speed(iter/s)": 1.5297 }, { "acc": 0.9833333, "epoch": 20.309350831966253, "grad_norm": 4.411856651306152, "learning_rate": 6.913278242650658e-06, "loss": 0.0470949, "memory(GiB)": 13.7, "step": 43330, "train_speed(iter/s)": 1.529707 }, { "acc": 0.98416672, "epoch": 20.311694398875087, "grad_norm": 1.8751589059829712, "learning_rate": 6.912562081362394e-06, "loss": 0.04901171, "memory(GiB)": 13.7, "step": 43335, "train_speed(iter/s)": 1.529705 }, { "acc": 0.97488098, "epoch": 20.314037965783925, "grad_norm": 3.4389193058013916, "learning_rate": 6.91184587411444e-06, "loss": 0.07991144, "memory(GiB)": 13.7, "step": 43340, "train_speed(iter/s)": 1.529706 }, { "acc": 0.98290653, "epoch": 20.31638153269276, "grad_norm": 3.024152994155884, "learning_rate": 6.911129620924012e-06, "loss": 0.0552735, "memory(GiB)": 13.7, "step": 43345, "train_speed(iter/s)": 1.529713 }, { "acc": 0.98696432, "epoch": 20.318725099601593, "grad_norm": 3.3626797199249268, "learning_rate": 6.910413321808325e-06, "loss": 0.02929048, "memory(GiB)": 13.7, "step": 43350, "train_speed(iter/s)": 1.529721 }, { "acc": 0.98156252, "epoch": 20.321068666510428, "grad_norm": 7.941350936889648, "learning_rate": 6.909696976784595e-06, "loss": 0.05807076, "memory(GiB)": 13.7, "step": 43355, "train_speed(iter/s)": 1.529728 }, { "acc": 0.98213539, "epoch": 20.323412233419266, "grad_norm": 6.38007926940918, "learning_rate": 6.908980585870041e-06, "loss": 0.08498687, "memory(GiB)": 13.7, "step": 43360, "train_speed(iter/s)": 1.529732 }, { "acc": 0.96645298, "epoch": 20.3257558003281, "grad_norm": 14.115815162658691, "learning_rate": 6.908264149081883e-06, "loss": 0.10175678, "memory(GiB)": 13.7, "step": 43365, "train_speed(iter/s)": 1.529738 }, { "acc": 0.98549137, "epoch": 20.328099367236934, "grad_norm": 3.3178164958953857, "learning_rate": 6.907547666437345e-06, "loss": 0.04774139, "memory(GiB)": 13.7, "step": 43370, "train_speed(iter/s)": 1.529739 }, { "acc": 0.97889957, "epoch": 20.33044293414577, "grad_norm": 2.455442190170288, "learning_rate": 6.906831137953646e-06, "loss": 0.0564218, "memory(GiB)": 13.7, "step": 43375, "train_speed(iter/s)": 1.529738 }, { "acc": 0.97624998, "epoch": 20.332786501054606, "grad_norm": 1.507760763168335, "learning_rate": 6.906114563648007e-06, "loss": 0.08115416, "memory(GiB)": 13.7, "step": 43380, "train_speed(iter/s)": 1.529739 }, { "acc": 0.98673611, "epoch": 20.33513006796344, "grad_norm": 3.4063782691955566, "learning_rate": 6.9053979435376565e-06, "loss": 0.03943128, "memory(GiB)": 13.7, "step": 43385, "train_speed(iter/s)": 1.529741 }, { "acc": 0.9802083, "epoch": 20.337473634872275, "grad_norm": 4.661222457885742, "learning_rate": 6.904681277639816e-06, "loss": 0.04270876, "memory(GiB)": 13.7, "step": 43390, "train_speed(iter/s)": 1.52975 }, { "acc": 0.98812504, "epoch": 20.33981720178111, "grad_norm": 0.23141300678253174, "learning_rate": 6.903964565971713e-06, "loss": 0.02918311, "memory(GiB)": 13.7, "step": 43395, "train_speed(iter/s)": 1.529754 }, { "acc": 0.97145834, "epoch": 20.342160768689947, "grad_norm": 5.308662414550781, "learning_rate": 6.903247808550578e-06, "loss": 0.08841689, "memory(GiB)": 13.7, "step": 43400, "train_speed(iter/s)": 1.529753 }, { "acc": 0.9739584, "epoch": 20.34450433559878, "grad_norm": 5.8683552742004395, "learning_rate": 6.902531005393634e-06, "loss": 0.08445164, "memory(GiB)": 13.7, "step": 43405, "train_speed(iter/s)": 1.529756 }, { "acc": 0.97113972, "epoch": 20.346847902507616, "grad_norm": 4.103222370147705, "learning_rate": 6.9018141565181155e-06, "loss": 0.09773127, "memory(GiB)": 13.7, "step": 43410, "train_speed(iter/s)": 1.529768 }, { "acc": 0.98743057, "epoch": 20.349191469416454, "grad_norm": 3.6479263305664062, "learning_rate": 6.90109726194125e-06, "loss": 0.0563224, "memory(GiB)": 13.7, "step": 43415, "train_speed(iter/s)": 1.529773 }, { "acc": 0.9875, "epoch": 20.351535036325288, "grad_norm": 2.9136946201324463, "learning_rate": 6.900380321680269e-06, "loss": 0.05461257, "memory(GiB)": 13.7, "step": 43420, "train_speed(iter/s)": 1.529771 }, { "acc": 0.97904758, "epoch": 20.353878603234122, "grad_norm": 7.2253618240356445, "learning_rate": 6.899663335752406e-06, "loss": 0.12727197, "memory(GiB)": 13.7, "step": 43425, "train_speed(iter/s)": 1.529776 }, { "acc": 0.97301598, "epoch": 20.356222170142956, "grad_norm": 4.208083152770996, "learning_rate": 6.898946304174898e-06, "loss": 0.13245033, "memory(GiB)": 13.7, "step": 43430, "train_speed(iter/s)": 1.529786 }, { "acc": 0.98448944, "epoch": 20.358565737051794, "grad_norm": 0.9559351801872253, "learning_rate": 6.8982292269649766e-06, "loss": 0.06201783, "memory(GiB)": 13.7, "step": 43435, "train_speed(iter/s)": 1.529784 }, { "acc": 0.98696423, "epoch": 20.36090930396063, "grad_norm": 3.3144991397857666, "learning_rate": 6.897512104139881e-06, "loss": 0.05759406, "memory(GiB)": 13.7, "step": 43440, "train_speed(iter/s)": 1.52978 }, { "acc": 0.96989574, "epoch": 20.363252870869463, "grad_norm": 1.6808956861495972, "learning_rate": 6.896794935716843e-06, "loss": 0.10940032, "memory(GiB)": 13.7, "step": 43445, "train_speed(iter/s)": 1.529784 }, { "acc": 0.98329859, "epoch": 20.365596437778297, "grad_norm": 4.849167346954346, "learning_rate": 6.8960777217131085e-06, "loss": 0.05754597, "memory(GiB)": 13.7, "step": 43450, "train_speed(iter/s)": 1.529789 }, { "acc": 0.98604164, "epoch": 20.367940004687135, "grad_norm": 3.629873752593994, "learning_rate": 6.89536046214591e-06, "loss": 0.08556355, "memory(GiB)": 13.7, "step": 43455, "train_speed(iter/s)": 1.529808 }, { "acc": 0.97370033, "epoch": 20.37028357159597, "grad_norm": 5.397490978240967, "learning_rate": 6.894643157032492e-06, "loss": 0.11795013, "memory(GiB)": 13.7, "step": 43460, "train_speed(iter/s)": 1.529817 }, { "acc": 0.97354164, "epoch": 20.372627138504804, "grad_norm": 13.57021427154541, "learning_rate": 6.893925806390096e-06, "loss": 0.08018591, "memory(GiB)": 13.7, "step": 43465, "train_speed(iter/s)": 1.529821 }, { "acc": 0.9729167, "epoch": 20.374970705413638, "grad_norm": 2.2967300415039062, "learning_rate": 6.893208410235963e-06, "loss": 0.05459678, "memory(GiB)": 13.7, "step": 43470, "train_speed(iter/s)": 1.529825 }, { "acc": 0.98604164, "epoch": 20.377314272322476, "grad_norm": 2.5340261459350586, "learning_rate": 6.8924909685873385e-06, "loss": 0.05662187, "memory(GiB)": 13.7, "step": 43475, "train_speed(iter/s)": 1.529836 }, { "acc": 0.98394346, "epoch": 20.37965783923131, "grad_norm": 0.058589860796928406, "learning_rate": 6.891773481461467e-06, "loss": 0.06171498, "memory(GiB)": 13.7, "step": 43480, "train_speed(iter/s)": 1.529843 }, { "acc": 0.98604164, "epoch": 20.382001406140144, "grad_norm": 3.2823147773742676, "learning_rate": 6.891055948875594e-06, "loss": 0.05244086, "memory(GiB)": 13.7, "step": 43485, "train_speed(iter/s)": 1.529857 }, { "acc": 0.9694643, "epoch": 20.384344973048982, "grad_norm": 32.620174407958984, "learning_rate": 6.890338370846965e-06, "loss": 0.07683939, "memory(GiB)": 13.7, "step": 43490, "train_speed(iter/s)": 1.529858 }, { "acc": 0.98611116, "epoch": 20.386688539957817, "grad_norm": 2.3422799110412598, "learning_rate": 6.889620747392834e-06, "loss": 0.06419019, "memory(GiB)": 13.7, "step": 43495, "train_speed(iter/s)": 1.529864 }, { "acc": 0.98000002, "epoch": 20.38903210686665, "grad_norm": 2.019549608230591, "learning_rate": 6.888903078530443e-06, "loss": 0.06234384, "memory(GiB)": 13.7, "step": 43500, "train_speed(iter/s)": 1.529871 }, { "acc": 0.99229164, "epoch": 20.391375673775485, "grad_norm": 1.9546977281570435, "learning_rate": 6.888185364277048e-06, "loss": 0.01666037, "memory(GiB)": 13.7, "step": 43505, "train_speed(iter/s)": 1.529872 }, { "acc": 0.98447914, "epoch": 20.393719240684323, "grad_norm": 3.752774238586426, "learning_rate": 6.8874676046498965e-06, "loss": 0.04039536, "memory(GiB)": 13.7, "step": 43510, "train_speed(iter/s)": 1.529875 }, { "acc": 0.94697771, "epoch": 20.396062807593157, "grad_norm": 7.340468406677246, "learning_rate": 6.886749799666246e-06, "loss": 0.23300509, "memory(GiB)": 13.7, "step": 43515, "train_speed(iter/s)": 1.529886 }, { "acc": 0.98290224, "epoch": 20.39840637450199, "grad_norm": 16.3924617767334, "learning_rate": 6.8860319493433435e-06, "loss": 0.13014396, "memory(GiB)": 13.7, "step": 43520, "train_speed(iter/s)": 1.529887 }, { "acc": 0.98387623, "epoch": 20.400749941410826, "grad_norm": 2.2389116287231445, "learning_rate": 6.88531405369845e-06, "loss": 0.04430646, "memory(GiB)": 13.7, "step": 43525, "train_speed(iter/s)": 1.529899 }, { "acc": 0.98187504, "epoch": 20.403093508319664, "grad_norm": 1.234872817993164, "learning_rate": 6.884596112748819e-06, "loss": 0.08904618, "memory(GiB)": 13.7, "step": 43530, "train_speed(iter/s)": 1.529903 }, { "acc": 0.97268028, "epoch": 20.405437075228498, "grad_norm": 11.800884246826172, "learning_rate": 6.883878126511705e-06, "loss": 0.17137588, "memory(GiB)": 13.7, "step": 43535, "train_speed(iter/s)": 1.529912 }, { "acc": 0.9812912, "epoch": 20.407780642137332, "grad_norm": 3.4445154666900635, "learning_rate": 6.88316009500437e-06, "loss": 0.0679419, "memory(GiB)": 13.7, "step": 43540, "train_speed(iter/s)": 1.529912 }, { "acc": 0.9895833, "epoch": 20.410124209046167, "grad_norm": 2.685162305831909, "learning_rate": 6.88244201824407e-06, "loss": 0.10229853, "memory(GiB)": 13.7, "step": 43545, "train_speed(iter/s)": 1.52992 }, { "acc": 0.9926136, "epoch": 20.412467775955005, "grad_norm": 0.733833909034729, "learning_rate": 6.881723896248067e-06, "loss": 0.06547046, "memory(GiB)": 13.7, "step": 43550, "train_speed(iter/s)": 1.529929 }, { "acc": 0.98883934, "epoch": 20.41481134286384, "grad_norm": 1.494425892829895, "learning_rate": 6.881005729033623e-06, "loss": 0.04588365, "memory(GiB)": 13.7, "step": 43555, "train_speed(iter/s)": 1.52994 }, { "acc": 0.97430553, "epoch": 20.417154909772673, "grad_norm": 4.091037750244141, "learning_rate": 6.880287516617997e-06, "loss": 0.11661806, "memory(GiB)": 13.7, "step": 43560, "train_speed(iter/s)": 1.529947 }, { "acc": 0.9958333, "epoch": 20.419498476681508, "grad_norm": 1.1158219575881958, "learning_rate": 6.879569259018456e-06, "loss": 0.01487607, "memory(GiB)": 13.7, "step": 43565, "train_speed(iter/s)": 1.529956 }, { "acc": 0.9875, "epoch": 20.421842043590345, "grad_norm": 0.004694979637861252, "learning_rate": 6.878850956252264e-06, "loss": 0.04870084, "memory(GiB)": 13.7, "step": 43570, "train_speed(iter/s)": 1.529959 }, { "acc": 0.95915184, "epoch": 20.42418561049918, "grad_norm": 6.343145847320557, "learning_rate": 6.878132608336685e-06, "loss": 0.10272779, "memory(GiB)": 13.7, "step": 43575, "train_speed(iter/s)": 1.529972 }, { "acc": 0.97299824, "epoch": 20.426529177408014, "grad_norm": 7.087283611297607, "learning_rate": 6.8774142152889865e-06, "loss": 0.08425038, "memory(GiB)": 13.7, "step": 43580, "train_speed(iter/s)": 1.529978 }, { "acc": 0.99455357, "epoch": 20.428872744316852, "grad_norm": 0.6283373236656189, "learning_rate": 6.876695777126437e-06, "loss": 0.02883875, "memory(GiB)": 13.7, "step": 43585, "train_speed(iter/s)": 1.529978 }, { "acc": 0.98113098, "epoch": 20.431216311225686, "grad_norm": 4.337409496307373, "learning_rate": 6.875977293866304e-06, "loss": 0.07068046, "memory(GiB)": 13.7, "step": 43590, "train_speed(iter/s)": 1.529982 }, { "acc": 0.96782198, "epoch": 20.43355987813452, "grad_norm": 2.5809593200683594, "learning_rate": 6.875258765525859e-06, "loss": 0.1081666, "memory(GiB)": 13.7, "step": 43595, "train_speed(iter/s)": 1.529994 }, { "acc": 0.98947306, "epoch": 20.435903445043355, "grad_norm": 1.9806193113327026, "learning_rate": 6.874540192122372e-06, "loss": 0.09313995, "memory(GiB)": 13.7, "step": 43600, "train_speed(iter/s)": 1.529995 }, { "acc": 0.98319445, "epoch": 20.438247011952193, "grad_norm": 7.9237213134765625, "learning_rate": 6.8738215736731154e-06, "loss": 0.05158808, "memory(GiB)": 13.7, "step": 43605, "train_speed(iter/s)": 1.529993 }, { "acc": 0.96875, "epoch": 20.440590578861027, "grad_norm": 8.233589172363281, "learning_rate": 6.8731029101953614e-06, "loss": 0.10692503, "memory(GiB)": 13.7, "step": 43610, "train_speed(iter/s)": 1.53 }, { "acc": 0.9833334, "epoch": 20.44293414576986, "grad_norm": 4.961245059967041, "learning_rate": 6.872384201706387e-06, "loss": 0.05582799, "memory(GiB)": 13.7, "step": 43615, "train_speed(iter/s)": 1.529998 }, { "acc": 0.99541664, "epoch": 20.445277712678696, "grad_norm": 2.639037609100342, "learning_rate": 6.871665448223466e-06, "loss": 0.06333228, "memory(GiB)": 13.7, "step": 43620, "train_speed(iter/s)": 1.53 }, { "acc": 0.99145832, "epoch": 20.447621279587533, "grad_norm": 1.1906312704086304, "learning_rate": 6.870946649763875e-06, "loss": 0.04112551, "memory(GiB)": 13.7, "step": 43625, "train_speed(iter/s)": 1.529999 }, { "acc": 0.9770834, "epoch": 20.449964846496368, "grad_norm": 0.4878361225128174, "learning_rate": 6.87022780634489e-06, "loss": 0.067272, "memory(GiB)": 13.7, "step": 43630, "train_speed(iter/s)": 1.530009 }, { "acc": 0.98448868, "epoch": 20.452308413405202, "grad_norm": 1.7360488176345825, "learning_rate": 6.869508917983792e-06, "loss": 0.04838994, "memory(GiB)": 13.7, "step": 43635, "train_speed(iter/s)": 1.530012 }, { "acc": 0.98520832, "epoch": 20.454651980314036, "grad_norm": 3.050046920776367, "learning_rate": 6.86878998469786e-06, "loss": 0.07252973, "memory(GiB)": 13.7, "step": 43640, "train_speed(iter/s)": 1.530018 }, { "acc": 0.98304882, "epoch": 20.456995547222874, "grad_norm": 1.673420786857605, "learning_rate": 6.868071006504375e-06, "loss": 0.05994366, "memory(GiB)": 13.7, "step": 43645, "train_speed(iter/s)": 1.530024 }, { "acc": 0.98130951, "epoch": 20.45933911413171, "grad_norm": 4.3749589920043945, "learning_rate": 6.867351983420616e-06, "loss": 0.07553565, "memory(GiB)": 13.7, "step": 43650, "train_speed(iter/s)": 1.530032 }, { "acc": 0.98028278, "epoch": 20.461682681040543, "grad_norm": 3.4385926723480225, "learning_rate": 6.866632915463871e-06, "loss": 0.07668399, "memory(GiB)": 13.7, "step": 43655, "train_speed(iter/s)": 1.530045 }, { "acc": 0.98490524, "epoch": 20.46402624794938, "grad_norm": 2.262721300125122, "learning_rate": 6.865913802651421e-06, "loss": 0.05461974, "memory(GiB)": 13.7, "step": 43660, "train_speed(iter/s)": 1.530041 }, { "acc": 0.97868738, "epoch": 20.466369814858215, "grad_norm": 5.231484889984131, "learning_rate": 6.8651946450005505e-06, "loss": 0.08583748, "memory(GiB)": 13.7, "step": 43665, "train_speed(iter/s)": 1.530053 }, { "acc": 0.98182545, "epoch": 20.46871338176705, "grad_norm": 4.0707855224609375, "learning_rate": 6.864475442528548e-06, "loss": 0.06690367, "memory(GiB)": 13.7, "step": 43670, "train_speed(iter/s)": 1.530057 }, { "acc": 0.98062496, "epoch": 20.471056948675884, "grad_norm": 5.14019250869751, "learning_rate": 6.863756195252698e-06, "loss": 0.09399484, "memory(GiB)": 13.7, "step": 43675, "train_speed(iter/s)": 1.530068 }, { "acc": 0.98218136, "epoch": 20.47340051558472, "grad_norm": 4.588395595550537, "learning_rate": 6.8630369031902935e-06, "loss": 0.07081218, "memory(GiB)": 13.7, "step": 43680, "train_speed(iter/s)": 1.530079 }, { "acc": 0.96343746, "epoch": 20.475744082493556, "grad_norm": 7.219043731689453, "learning_rate": 6.862317566358616e-06, "loss": 0.07039031, "memory(GiB)": 13.7, "step": 43685, "train_speed(iter/s)": 1.530085 }, { "acc": 0.98307285, "epoch": 20.47808764940239, "grad_norm": 4.365167617797852, "learning_rate": 6.861598184774964e-06, "loss": 0.05582197, "memory(GiB)": 13.7, "step": 43690, "train_speed(iter/s)": 1.530097 }, { "acc": 0.97011671, "epoch": 20.480431216311224, "grad_norm": 3.8872385025024414, "learning_rate": 6.860878758456624e-06, "loss": 0.09155362, "memory(GiB)": 13.7, "step": 43695, "train_speed(iter/s)": 1.530102 }, { "acc": 0.98197384, "epoch": 20.482774783220062, "grad_norm": 1.0485246181488037, "learning_rate": 6.860159287420893e-06, "loss": 0.12490567, "memory(GiB)": 13.7, "step": 43700, "train_speed(iter/s)": 1.530119 }, { "acc": 0.97967415, "epoch": 20.485118350128896, "grad_norm": 5.339112758636475, "learning_rate": 6.859439771685059e-06, "loss": 0.09373255, "memory(GiB)": 13.7, "step": 43705, "train_speed(iter/s)": 1.530133 }, { "acc": 0.97716026, "epoch": 20.48746191703773, "grad_norm": 1.6895928382873535, "learning_rate": 6.858720211266423e-06, "loss": 0.09741701, "memory(GiB)": 13.7, "step": 43710, "train_speed(iter/s)": 1.530141 }, { "acc": 0.98362179, "epoch": 20.489805483946565, "grad_norm": 3.512761354446411, "learning_rate": 6.8580006061822765e-06, "loss": 0.06236425, "memory(GiB)": 13.7, "step": 43715, "train_speed(iter/s)": 1.53015 }, { "acc": 0.97885418, "epoch": 20.492149050855403, "grad_norm": 1.2394192218780518, "learning_rate": 6.857280956449918e-06, "loss": 0.08203412, "memory(GiB)": 13.7, "step": 43720, "train_speed(iter/s)": 1.530154 }, { "acc": 0.98093758, "epoch": 20.494492617764237, "grad_norm": 1.4215227365493774, "learning_rate": 6.856561262086644e-06, "loss": 0.05520902, "memory(GiB)": 13.7, "step": 43725, "train_speed(iter/s)": 1.530163 }, { "acc": 0.97583332, "epoch": 20.49683618467307, "grad_norm": 1.415086269378662, "learning_rate": 6.8558415231097564e-06, "loss": 0.07377958, "memory(GiB)": 13.7, "step": 43730, "train_speed(iter/s)": 1.530164 }, { "acc": 0.99051476, "epoch": 20.499179751581906, "grad_norm": 2.690321445465088, "learning_rate": 6.855121739536553e-06, "loss": 0.04166237, "memory(GiB)": 13.7, "step": 43735, "train_speed(iter/s)": 1.530172 }, { "acc": 0.97666664, "epoch": 20.501523318490744, "grad_norm": 3.6147966384887695, "learning_rate": 6.854401911384336e-06, "loss": 0.07625437, "memory(GiB)": 13.7, "step": 43740, "train_speed(iter/s)": 1.530173 }, { "acc": 0.97817535, "epoch": 20.503866885399578, "grad_norm": 3.4962692260742188, "learning_rate": 6.853682038670411e-06, "loss": 0.11580282, "memory(GiB)": 13.7, "step": 43745, "train_speed(iter/s)": 1.530177 }, { "acc": 0.97145834, "epoch": 20.506210452308412, "grad_norm": 5.097014427185059, "learning_rate": 6.8529621214120735e-06, "loss": 0.09542924, "memory(GiB)": 13.7, "step": 43750, "train_speed(iter/s)": 1.530174 }, { "acc": 0.97843142, "epoch": 20.50855401921725, "grad_norm": 4.663022041320801, "learning_rate": 6.852242159626633e-06, "loss": 0.07564652, "memory(GiB)": 13.7, "step": 43755, "train_speed(iter/s)": 1.53019 }, { "acc": 0.97268314, "epoch": 20.510897586126084, "grad_norm": 5.429072856903076, "learning_rate": 6.851522153331395e-06, "loss": 0.06725763, "memory(GiB)": 13.7, "step": 43760, "train_speed(iter/s)": 1.530194 }, { "acc": 0.97208328, "epoch": 20.51324115303492, "grad_norm": 7.553167343139648, "learning_rate": 6.850802102543666e-06, "loss": 0.10423926, "memory(GiB)": 13.7, "step": 43765, "train_speed(iter/s)": 1.530207 }, { "acc": 0.98705359, "epoch": 20.515584719943753, "grad_norm": 2.4315905570983887, "learning_rate": 6.850082007280754e-06, "loss": 0.06962198, "memory(GiB)": 13.7, "step": 43770, "train_speed(iter/s)": 1.530207 }, { "acc": 0.98974209, "epoch": 20.51792828685259, "grad_norm": 3.4396018981933594, "learning_rate": 6.849361867559965e-06, "loss": 0.06007358, "memory(GiB)": 13.7, "step": 43775, "train_speed(iter/s)": 1.530224 }, { "acc": 0.9875, "epoch": 20.520271853761425, "grad_norm": 0.05321834608912468, "learning_rate": 6.848641683398612e-06, "loss": 0.03203297, "memory(GiB)": 13.7, "step": 43780, "train_speed(iter/s)": 1.530226 }, { "acc": 0.97371531, "epoch": 20.52261542067026, "grad_norm": 3.1878561973571777, "learning_rate": 6.847921454814004e-06, "loss": 0.13923241, "memory(GiB)": 13.7, "step": 43785, "train_speed(iter/s)": 1.530231 }, { "acc": 0.98988094, "epoch": 20.524958987579094, "grad_norm": 2.029202699661255, "learning_rate": 6.847201181823453e-06, "loss": 0.05733329, "memory(GiB)": 13.7, "step": 43790, "train_speed(iter/s)": 1.53024 }, { "acc": 0.97830353, "epoch": 20.52730255448793, "grad_norm": 3.1642258167266846, "learning_rate": 6.846480864444273e-06, "loss": 0.09196686, "memory(GiB)": 13.7, "step": 43795, "train_speed(iter/s)": 1.530245 }, { "acc": 0.96422615, "epoch": 20.529646121396766, "grad_norm": 0.011046177707612514, "learning_rate": 6.845760502693778e-06, "loss": 0.15400674, "memory(GiB)": 13.7, "step": 43800, "train_speed(iter/s)": 1.530253 }, { "acc": 0.97726107, "epoch": 20.5319896883056, "grad_norm": 1.7637006044387817, "learning_rate": 6.845040096589282e-06, "loss": 0.12263665, "memory(GiB)": 13.7, "step": 43805, "train_speed(iter/s)": 1.530255 }, { "acc": 0.97648811, "epoch": 20.534333255214435, "grad_norm": 5.220433235168457, "learning_rate": 6.844319646148102e-06, "loss": 0.05032796, "memory(GiB)": 13.7, "step": 43810, "train_speed(iter/s)": 1.530261 }, { "acc": 0.9916666, "epoch": 20.536676822123273, "grad_norm": 3.2181248664855957, "learning_rate": 6.843599151387556e-06, "loss": 0.0446036, "memory(GiB)": 13.7, "step": 43815, "train_speed(iter/s)": 1.530265 }, { "acc": 0.97666664, "epoch": 20.539020389032107, "grad_norm": 0.6806750893592834, "learning_rate": 6.842878612324959e-06, "loss": 0.0977455, "memory(GiB)": 13.7, "step": 43820, "train_speed(iter/s)": 1.53027 }, { "acc": 0.97854166, "epoch": 20.54136395594094, "grad_norm": 2.9317307472229004, "learning_rate": 6.8421580289776345e-06, "loss": 0.06614208, "memory(GiB)": 13.7, "step": 43825, "train_speed(iter/s)": 1.53028 }, { "acc": 0.97488098, "epoch": 20.54370752284978, "grad_norm": 1.5535247325897217, "learning_rate": 6.841437401362903e-06, "loss": 0.08262705, "memory(GiB)": 13.7, "step": 43830, "train_speed(iter/s)": 1.53028 }, { "acc": 0.98001213, "epoch": 20.546051089758613, "grad_norm": 2.276716947555542, "learning_rate": 6.840716729498081e-06, "loss": 0.06903183, "memory(GiB)": 13.7, "step": 43835, "train_speed(iter/s)": 1.53028 }, { "acc": 0.97281246, "epoch": 20.548394656667448, "grad_norm": 27.02175521850586, "learning_rate": 6.839996013400496e-06, "loss": 0.0970139, "memory(GiB)": 13.7, "step": 43840, "train_speed(iter/s)": 1.530288 }, { "acc": 0.9739584, "epoch": 20.550738223576282, "grad_norm": 7.276187896728516, "learning_rate": 6.83927525308747e-06, "loss": 0.07254001, "memory(GiB)": 13.7, "step": 43845, "train_speed(iter/s)": 1.530305 }, { "acc": 0.97321434, "epoch": 20.55308179048512, "grad_norm": 0.0051470305770635605, "learning_rate": 6.838554448576329e-06, "loss": 0.05011979, "memory(GiB)": 13.7, "step": 43850, "train_speed(iter/s)": 1.530317 }, { "acc": 0.9833334, "epoch": 20.555425357393954, "grad_norm": 8.837397575378418, "learning_rate": 6.837833599884396e-06, "loss": 0.08970519, "memory(GiB)": 13.7, "step": 43855, "train_speed(iter/s)": 1.53032 }, { "acc": 0.98217258, "epoch": 20.55776892430279, "grad_norm": 7.982053279876709, "learning_rate": 6.837112707028999e-06, "loss": 0.06533566, "memory(GiB)": 13.7, "step": 43860, "train_speed(iter/s)": 1.530331 }, { "acc": 0.98875332, "epoch": 20.560112491211623, "grad_norm": 2.596442699432373, "learning_rate": 6.836391770027467e-06, "loss": 0.05153956, "memory(GiB)": 13.7, "step": 43865, "train_speed(iter/s)": 1.530336 }, { "acc": 0.99538689, "epoch": 20.56245605812046, "grad_norm": 1.7111872434616089, "learning_rate": 6.835670788897128e-06, "loss": 0.02901046, "memory(GiB)": 13.7, "step": 43870, "train_speed(iter/s)": 1.530337 }, { "acc": 0.9729167, "epoch": 20.564799625029295, "grad_norm": 6.401788711547852, "learning_rate": 6.834949763655313e-06, "loss": 0.0803555, "memory(GiB)": 13.7, "step": 43875, "train_speed(iter/s)": 1.530342 }, { "acc": 0.98178034, "epoch": 20.56714319193813, "grad_norm": 5.534152030944824, "learning_rate": 6.834228694319352e-06, "loss": 0.05333099, "memory(GiB)": 13.7, "step": 43880, "train_speed(iter/s)": 1.530347 }, { "acc": 0.9848959, "epoch": 20.569486758846963, "grad_norm": 5.576405048370361, "learning_rate": 6.833507580906577e-06, "loss": 0.04562373, "memory(GiB)": 13.7, "step": 43885, "train_speed(iter/s)": 1.530352 }, { "acc": 0.98278542, "epoch": 20.5718303257558, "grad_norm": 3.2477543354034424, "learning_rate": 6.832786423434321e-06, "loss": 0.08708522, "memory(GiB)": 13.7, "step": 43890, "train_speed(iter/s)": 1.530363 }, { "acc": 0.98770828, "epoch": 20.574173892664636, "grad_norm": 4.42844820022583, "learning_rate": 6.83206522191992e-06, "loss": 0.06044736, "memory(GiB)": 13.7, "step": 43895, "train_speed(iter/s)": 1.530371 }, { "acc": 0.97833328, "epoch": 20.57651745957347, "grad_norm": 0.2967662215232849, "learning_rate": 6.831343976380708e-06, "loss": 0.05713992, "memory(GiB)": 13.7, "step": 43900, "train_speed(iter/s)": 1.530377 }, { "acc": 0.98199406, "epoch": 20.578861026482308, "grad_norm": 0.9477602243423462, "learning_rate": 6.830622686834022e-06, "loss": 0.06507831, "memory(GiB)": 13.7, "step": 43905, "train_speed(iter/s)": 1.530381 }, { "acc": 0.99851189, "epoch": 20.581204593391142, "grad_norm": 0.39832383394241333, "learning_rate": 6.829901353297198e-06, "loss": 0.02297971, "memory(GiB)": 13.7, "step": 43910, "train_speed(iter/s)": 1.530386 }, { "acc": 0.98979168, "epoch": 20.583548160299976, "grad_norm": 2.508782386779785, "learning_rate": 6.829179975787577e-06, "loss": 0.03279552, "memory(GiB)": 13.7, "step": 43915, "train_speed(iter/s)": 1.530398 }, { "acc": 0.98973217, "epoch": 20.58589172720881, "grad_norm": 2.1251308917999268, "learning_rate": 6.828458554322497e-06, "loss": 0.05525097, "memory(GiB)": 13.7, "step": 43920, "train_speed(iter/s)": 1.530401 }, { "acc": 0.98447914, "epoch": 20.58823529411765, "grad_norm": 2.966897964477539, "learning_rate": 6.8277370889192964e-06, "loss": 0.05698634, "memory(GiB)": 13.7, "step": 43925, "train_speed(iter/s)": 1.530408 }, { "acc": 0.97716351, "epoch": 20.590578861026483, "grad_norm": 18.48028564453125, "learning_rate": 6.827015579595321e-06, "loss": 0.0933649, "memory(GiB)": 13.7, "step": 43930, "train_speed(iter/s)": 1.530412 }, { "acc": 0.9890625, "epoch": 20.592922427935317, "grad_norm": 4.545013427734375, "learning_rate": 6.8262940263679125e-06, "loss": 0.06028572, "memory(GiB)": 13.7, "step": 43935, "train_speed(iter/s)": 1.530408 }, { "acc": 0.98798609, "epoch": 20.59526599484415, "grad_norm": 0.49135157465934753, "learning_rate": 6.8255724292544125e-06, "loss": 0.09160628, "memory(GiB)": 13.7, "step": 43940, "train_speed(iter/s)": 1.53041 }, { "acc": 0.97364588, "epoch": 20.59760956175299, "grad_norm": 4.624828815460205, "learning_rate": 6.824850788272165e-06, "loss": 0.16619471, "memory(GiB)": 13.7, "step": 43945, "train_speed(iter/s)": 1.530411 }, { "acc": 0.99094696, "epoch": 20.599953128661824, "grad_norm": 2.173421859741211, "learning_rate": 6.824129103438521e-06, "loss": 0.04418758, "memory(GiB)": 13.7, "step": 43950, "train_speed(iter/s)": 1.530416 }, { "acc": 0.98062496, "epoch": 20.602296695570658, "grad_norm": 0.10708870738744736, "learning_rate": 6.823407374770823e-06, "loss": 0.08107315, "memory(GiB)": 13.7, "step": 43955, "train_speed(iter/s)": 1.530418 }, { "acc": 0.9791666, "epoch": 20.604640262479492, "grad_norm": 5.454785346984863, "learning_rate": 6.822685602286422e-06, "loss": 0.08143636, "memory(GiB)": 13.7, "step": 43960, "train_speed(iter/s)": 1.530423 }, { "acc": 0.9854167, "epoch": 20.60698382938833, "grad_norm": 5.666150093078613, "learning_rate": 6.8219637860026646e-06, "loss": 0.04061487, "memory(GiB)": 13.7, "step": 43965, "train_speed(iter/s)": 1.530424 }, { "acc": 0.96316853, "epoch": 20.609327396297164, "grad_norm": 11.759925842285156, "learning_rate": 6.821241925936901e-06, "loss": 0.16566519, "memory(GiB)": 13.7, "step": 43970, "train_speed(iter/s)": 1.530431 }, { "acc": 0.9856945, "epoch": 20.611670963206, "grad_norm": 1.794484257698059, "learning_rate": 6.820520022106484e-06, "loss": 0.07648602, "memory(GiB)": 13.7, "step": 43975, "train_speed(iter/s)": 1.530442 }, { "acc": 0.9875, "epoch": 20.614014530114837, "grad_norm": 0.025789109990000725, "learning_rate": 6.819798074528764e-06, "loss": 0.02906397, "memory(GiB)": 13.7, "step": 43980, "train_speed(iter/s)": 1.530446 }, { "acc": 0.99437504, "epoch": 20.61635809702367, "grad_norm": 4.089196681976318, "learning_rate": 6.819076083221097e-06, "loss": 0.03747811, "memory(GiB)": 13.7, "step": 43985, "train_speed(iter/s)": 1.530451 }, { "acc": 0.97335529, "epoch": 20.618701663932505, "grad_norm": 3.9812464714050293, "learning_rate": 6.818354048200832e-06, "loss": 0.08941962, "memory(GiB)": 13.7, "step": 43990, "train_speed(iter/s)": 1.530457 }, { "acc": 0.96382122, "epoch": 20.62104523084134, "grad_norm": 6.477226734161377, "learning_rate": 6.817631969485332e-06, "loss": 0.18551056, "memory(GiB)": 13.7, "step": 43995, "train_speed(iter/s)": 1.53046 }, { "acc": 0.97302084, "epoch": 20.623388797750177, "grad_norm": 12.317439079284668, "learning_rate": 6.816909847091947e-06, "loss": 0.10475013, "memory(GiB)": 13.7, "step": 44000, "train_speed(iter/s)": 1.530468 }, { "acc": 0.97593746, "epoch": 20.62573236465901, "grad_norm": 2.19035005569458, "learning_rate": 6.816187681038037e-06, "loss": 0.08376532, "memory(GiB)": 13.7, "step": 44005, "train_speed(iter/s)": 1.530481 }, { "acc": 0.97563229, "epoch": 20.628075931567846, "grad_norm": 4.124939441680908, "learning_rate": 6.815465471340959e-06, "loss": 0.08136144, "memory(GiB)": 13.7, "step": 44010, "train_speed(iter/s)": 1.530481 }, { "acc": 0.98231068, "epoch": 20.63041949847668, "grad_norm": 0.15242838859558105, "learning_rate": 6.8147432180180766e-06, "loss": 0.04177653, "memory(GiB)": 13.7, "step": 44015, "train_speed(iter/s)": 1.53048 }, { "acc": 0.96993732, "epoch": 20.632763065385518, "grad_norm": 6.953550815582275, "learning_rate": 6.814020921086743e-06, "loss": 0.10528152, "memory(GiB)": 13.7, "step": 44020, "train_speed(iter/s)": 1.530496 }, { "acc": 0.98016052, "epoch": 20.635106632294352, "grad_norm": 13.33580207824707, "learning_rate": 6.813298580564328e-06, "loss": 0.10023159, "memory(GiB)": 13.7, "step": 44025, "train_speed(iter/s)": 1.530495 }, { "acc": 0.99333324, "epoch": 20.637450199203187, "grad_norm": 0.24026788771152496, "learning_rate": 6.81257619646819e-06, "loss": 0.03235208, "memory(GiB)": 13.7, "step": 44030, "train_speed(iter/s)": 1.530504 }, { "acc": 0.97881947, "epoch": 20.63979376611202, "grad_norm": 3.8140721321105957, "learning_rate": 6.811853768815693e-06, "loss": 0.06099561, "memory(GiB)": 13.7, "step": 44035, "train_speed(iter/s)": 1.530501 }, { "acc": 0.98812752, "epoch": 20.64213733302086, "grad_norm": 3.979434013366699, "learning_rate": 6.811131297624202e-06, "loss": 0.09143854, "memory(GiB)": 13.7, "step": 44040, "train_speed(iter/s)": 1.530502 }, { "acc": 0.98703804, "epoch": 20.644480899929693, "grad_norm": 5.729756832122803, "learning_rate": 6.810408782911083e-06, "loss": 0.07412837, "memory(GiB)": 13.7, "step": 44045, "train_speed(iter/s)": 1.530509 }, { "acc": 0.97861462, "epoch": 20.646824466838527, "grad_norm": 1.7649441957473755, "learning_rate": 6.809686224693703e-06, "loss": 0.11548213, "memory(GiB)": 13.7, "step": 44050, "train_speed(iter/s)": 1.53051 }, { "acc": 0.97562494, "epoch": 20.649168033747365, "grad_norm": 2.369241952896118, "learning_rate": 6.808963622989429e-06, "loss": 0.04679227, "memory(GiB)": 13.7, "step": 44055, "train_speed(iter/s)": 1.530514 }, { "acc": 0.98770828, "epoch": 20.6515116006562, "grad_norm": 5.898197650909424, "learning_rate": 6.808240977815632e-06, "loss": 0.04006638, "memory(GiB)": 13.7, "step": 44060, "train_speed(iter/s)": 1.530515 }, { "acc": 0.98145294, "epoch": 20.653855167565034, "grad_norm": 8.012251853942871, "learning_rate": 6.8075182891896805e-06, "loss": 0.10745833, "memory(GiB)": 13.7, "step": 44065, "train_speed(iter/s)": 1.530514 }, { "acc": 0.97333336, "epoch": 20.65619873447387, "grad_norm": 16.53896141052246, "learning_rate": 6.806795557128946e-06, "loss": 0.11929508, "memory(GiB)": 13.7, "step": 44070, "train_speed(iter/s)": 1.530523 }, { "acc": 0.9916666, "epoch": 20.658542301382706, "grad_norm": 0.12520815432071686, "learning_rate": 6.8060727816508e-06, "loss": 0.05335341, "memory(GiB)": 13.7, "step": 44075, "train_speed(iter/s)": 1.53053 }, { "acc": 0.99375, "epoch": 20.66088586829154, "grad_norm": 5.139510154724121, "learning_rate": 6.805349962772616e-06, "loss": 0.058365, "memory(GiB)": 13.7, "step": 44080, "train_speed(iter/s)": 1.530531 }, { "acc": 0.98133926, "epoch": 20.663229435200375, "grad_norm": 5.92964506149292, "learning_rate": 6.8046271005117685e-06, "loss": 0.04909973, "memory(GiB)": 13.7, "step": 44085, "train_speed(iter/s)": 1.53053 }, { "acc": 0.98258076, "epoch": 20.66557300210921, "grad_norm": 2.3403146266937256, "learning_rate": 6.803904194885632e-06, "loss": 0.07979254, "memory(GiB)": 13.7, "step": 44090, "train_speed(iter/s)": 1.530538 }, { "acc": 0.95270834, "epoch": 20.667916569018047, "grad_norm": 4.941676616668701, "learning_rate": 6.8031812459115845e-06, "loss": 0.10655404, "memory(GiB)": 13.7, "step": 44095, "train_speed(iter/s)": 1.530545 }, { "acc": 0.98594704, "epoch": 20.67026013592688, "grad_norm": 4.669058799743652, "learning_rate": 6.802458253607001e-06, "loss": 0.07675788, "memory(GiB)": 13.7, "step": 44100, "train_speed(iter/s)": 1.530538 }, { "acc": 0.98067703, "epoch": 20.672603702835715, "grad_norm": 6.026564121246338, "learning_rate": 6.801735217989261e-06, "loss": 0.07300574, "memory(GiB)": 13.7, "step": 44105, "train_speed(iter/s)": 1.530546 }, { "acc": 0.98937502, "epoch": 20.67494726974455, "grad_norm": 4.034640789031982, "learning_rate": 6.801012139075744e-06, "loss": 0.03754445, "memory(GiB)": 13.7, "step": 44110, "train_speed(iter/s)": 1.530551 }, { "acc": 0.99020834, "epoch": 20.677290836653388, "grad_norm": 5.2752485275268555, "learning_rate": 6.800289016883832e-06, "loss": 0.04912605, "memory(GiB)": 13.7, "step": 44115, "train_speed(iter/s)": 1.530558 }, { "acc": 0.97508926, "epoch": 20.679634403562222, "grad_norm": 3.86514949798584, "learning_rate": 6.799565851430903e-06, "loss": 0.13768495, "memory(GiB)": 13.7, "step": 44120, "train_speed(iter/s)": 1.530566 }, { "acc": 0.97690973, "epoch": 20.681977970471056, "grad_norm": 2.6582789421081543, "learning_rate": 6.7988426427343415e-06, "loss": 0.10049481, "memory(GiB)": 13.7, "step": 44125, "train_speed(iter/s)": 1.530574 }, { "acc": 0.98592262, "epoch": 20.68432153737989, "grad_norm": 3.033806800842285, "learning_rate": 6.798119390811529e-06, "loss": 0.03955107, "memory(GiB)": 13.7, "step": 44130, "train_speed(iter/s)": 1.53057 }, { "acc": 0.97678108, "epoch": 20.68666510428873, "grad_norm": 6.023529529571533, "learning_rate": 6.797396095679857e-06, "loss": 0.07776125, "memory(GiB)": 13.7, "step": 44135, "train_speed(iter/s)": 1.530585 }, { "acc": 0.98217258, "epoch": 20.689008671197563, "grad_norm": 4.21730375289917, "learning_rate": 6.796672757356702e-06, "loss": 0.06173158, "memory(GiB)": 13.7, "step": 44140, "train_speed(iter/s)": 1.530597 }, { "acc": 0.98687496, "epoch": 20.691352238106397, "grad_norm": 4.122645854949951, "learning_rate": 6.795949375859456e-06, "loss": 0.07648534, "memory(GiB)": 13.7, "step": 44145, "train_speed(iter/s)": 1.530602 }, { "acc": 0.98255215, "epoch": 20.693695805015235, "grad_norm": 0.0027134998235851526, "learning_rate": 6.795225951205504e-06, "loss": 0.05249001, "memory(GiB)": 13.7, "step": 44150, "train_speed(iter/s)": 1.530596 }, { "acc": 0.98760414, "epoch": 20.69603937192407, "grad_norm": 1.0663180351257324, "learning_rate": 6.794502483412239e-06, "loss": 0.04102264, "memory(GiB)": 13.7, "step": 44155, "train_speed(iter/s)": 1.5306 }, { "acc": 0.98297453, "epoch": 20.698382938832903, "grad_norm": 0.5395411849021912, "learning_rate": 6.793778972497047e-06, "loss": 0.06801225, "memory(GiB)": 13.7, "step": 44160, "train_speed(iter/s)": 1.530609 }, { "acc": 0.97188454, "epoch": 20.700726505741738, "grad_norm": 5.100648880004883, "learning_rate": 6.79305541847732e-06, "loss": 0.06883166, "memory(GiB)": 13.7, "step": 44165, "train_speed(iter/s)": 1.530614 }, { "acc": 0.97979164, "epoch": 20.703070072650576, "grad_norm": 3.056452512741089, "learning_rate": 6.79233182137045e-06, "loss": 0.05313833, "memory(GiB)": 13.7, "step": 44170, "train_speed(iter/s)": 1.530614 }, { "acc": 0.99499998, "epoch": 20.70541363955941, "grad_norm": 0.9824780225753784, "learning_rate": 6.7916081811938285e-06, "loss": 0.03476456, "memory(GiB)": 13.7, "step": 44175, "train_speed(iter/s)": 1.53062 }, { "acc": 0.97458334, "epoch": 20.707757206468244, "grad_norm": 6.219205856323242, "learning_rate": 6.7908844979648515e-06, "loss": 0.08260648, "memory(GiB)": 13.7, "step": 44180, "train_speed(iter/s)": 1.530622 }, { "acc": 0.96498013, "epoch": 20.71010077337708, "grad_norm": 5.393190383911133, "learning_rate": 6.790160771700912e-06, "loss": 0.10374207, "memory(GiB)": 13.7, "step": 44185, "train_speed(iter/s)": 1.530632 }, { "acc": 0.98924103, "epoch": 20.712444340285916, "grad_norm": 2.250034809112549, "learning_rate": 6.7894370024194074e-06, "loss": 0.03158803, "memory(GiB)": 13.7, "step": 44190, "train_speed(iter/s)": 1.530642 }, { "acc": 0.99725876, "epoch": 20.71478790719475, "grad_norm": 1.7456587553024292, "learning_rate": 6.788713190137735e-06, "loss": 0.02818391, "memory(GiB)": 13.7, "step": 44195, "train_speed(iter/s)": 1.530649 }, { "acc": 0.98083334, "epoch": 20.717131474103585, "grad_norm": 0.8364801406860352, "learning_rate": 6.7879893348732915e-06, "loss": 0.12690237, "memory(GiB)": 13.7, "step": 44200, "train_speed(iter/s)": 1.530649 }, { "acc": 0.98322916, "epoch": 20.71947504101242, "grad_norm": 3.3355026245117188, "learning_rate": 6.787265436643478e-06, "loss": 0.04973031, "memory(GiB)": 13.7, "step": 44205, "train_speed(iter/s)": 1.530658 }, { "acc": 0.98288565, "epoch": 20.721818607921257, "grad_norm": 4.304259777069092, "learning_rate": 6.786541495465692e-06, "loss": 0.1026352, "memory(GiB)": 13.7, "step": 44210, "train_speed(iter/s)": 1.530647 }, { "acc": 0.97038689, "epoch": 20.72416217483009, "grad_norm": 9.860267639160156, "learning_rate": 6.785817511357337e-06, "loss": 0.10684204, "memory(GiB)": 13.7, "step": 44215, "train_speed(iter/s)": 1.530647 }, { "acc": 0.98440475, "epoch": 20.726505741738926, "grad_norm": 4.6542229652404785, "learning_rate": 6.785093484335813e-06, "loss": 0.04973046, "memory(GiB)": 13.7, "step": 44220, "train_speed(iter/s)": 1.530646 }, { "acc": 0.9757143, "epoch": 20.72884930864776, "grad_norm": 1.9643702507019043, "learning_rate": 6.784369414418526e-06, "loss": 0.08441793, "memory(GiB)": 13.7, "step": 44225, "train_speed(iter/s)": 1.530655 }, { "acc": 0.98137894, "epoch": 20.731192875556598, "grad_norm": 5.883185863494873, "learning_rate": 6.783645301622879e-06, "loss": 0.09817377, "memory(GiB)": 13.7, "step": 44230, "train_speed(iter/s)": 1.530657 }, { "acc": 0.98621111, "epoch": 20.733536442465432, "grad_norm": 1.6789131164550781, "learning_rate": 6.7829211459662755e-06, "loss": 0.0557071, "memory(GiB)": 13.7, "step": 44235, "train_speed(iter/s)": 1.530659 }, { "acc": 0.9793087, "epoch": 20.735880009374267, "grad_norm": 6.395939826965332, "learning_rate": 6.782196947466123e-06, "loss": 0.1100934, "memory(GiB)": 13.7, "step": 44240, "train_speed(iter/s)": 1.530663 }, { "acc": 0.98923616, "epoch": 20.738223576283104, "grad_norm": 2.0608959197998047, "learning_rate": 6.781472706139831e-06, "loss": 0.02220938, "memory(GiB)": 13.7, "step": 44245, "train_speed(iter/s)": 1.530664 }, { "acc": 0.97171135, "epoch": 20.74056714319194, "grad_norm": 4.040205001831055, "learning_rate": 6.780748422004806e-06, "loss": 0.1291236, "memory(GiB)": 13.7, "step": 44250, "train_speed(iter/s)": 1.530665 }, { "acc": 0.97143803, "epoch": 20.742910710100773, "grad_norm": 2.4100430011749268, "learning_rate": 6.7800240950784566e-06, "loss": 0.11829731, "memory(GiB)": 13.7, "step": 44255, "train_speed(iter/s)": 1.530664 }, { "acc": 0.99120913, "epoch": 20.745254277009607, "grad_norm": 2.215653419494629, "learning_rate": 6.779299725378194e-06, "loss": 0.03402124, "memory(GiB)": 13.7, "step": 44260, "train_speed(iter/s)": 1.530675 }, { "acc": 0.98583336, "epoch": 20.747597843918445, "grad_norm": 6.822954177856445, "learning_rate": 6.7785753129214296e-06, "loss": 0.09508907, "memory(GiB)": 13.7, "step": 44265, "train_speed(iter/s)": 1.530678 }, { "acc": 0.97636633, "epoch": 20.74994141082728, "grad_norm": 26.94424819946289, "learning_rate": 6.777850857725577e-06, "loss": 0.07939627, "memory(GiB)": 13.7, "step": 44270, "train_speed(iter/s)": 1.530681 }, { "acc": 0.96597223, "epoch": 20.752284977736114, "grad_norm": 6.9470744132995605, "learning_rate": 6.777126359808049e-06, "loss": 0.08897867, "memory(GiB)": 13.7, "step": 44275, "train_speed(iter/s)": 1.530692 }, { "acc": 0.97091351, "epoch": 20.754628544644948, "grad_norm": 0.16298207640647888, "learning_rate": 6.776401819186259e-06, "loss": 0.08679914, "memory(GiB)": 13.7, "step": 44280, "train_speed(iter/s)": 1.530685 }, { "acc": 0.98274307, "epoch": 20.756972111553786, "grad_norm": 3.7781996726989746, "learning_rate": 6.7756772358776234e-06, "loss": 0.07153726, "memory(GiB)": 13.7, "step": 44285, "train_speed(iter/s)": 1.530693 }, { "acc": 0.9864583, "epoch": 20.75931567846262, "grad_norm": 1.5431747436523438, "learning_rate": 6.77495260989956e-06, "loss": 0.04083216, "memory(GiB)": 13.7, "step": 44290, "train_speed(iter/s)": 1.530698 }, { "acc": 0.97993059, "epoch": 20.761659245371455, "grad_norm": 5.1339006423950195, "learning_rate": 6.774227941269486e-06, "loss": 0.06294951, "memory(GiB)": 13.7, "step": 44295, "train_speed(iter/s)": 1.530715 }, { "acc": 0.99330931, "epoch": 20.76400281228029, "grad_norm": 1.1429893970489502, "learning_rate": 6.773503230004818e-06, "loss": 0.0241888, "memory(GiB)": 13.7, "step": 44300, "train_speed(iter/s)": 1.53072 }, { "acc": 0.99019613, "epoch": 20.766346379189127, "grad_norm": 0.014861092902719975, "learning_rate": 6.7727784761229765e-06, "loss": 0.0428916, "memory(GiB)": 13.7, "step": 44305, "train_speed(iter/s)": 1.530722 }, { "acc": 0.9927084, "epoch": 20.76868994609796, "grad_norm": 1.4757226705551147, "learning_rate": 6.772053679641385e-06, "loss": 0.02412064, "memory(GiB)": 13.7, "step": 44310, "train_speed(iter/s)": 1.53073 }, { "acc": 0.9760417, "epoch": 20.771033513006795, "grad_norm": 2.100344657897949, "learning_rate": 6.77132884057746e-06, "loss": 0.13839586, "memory(GiB)": 13.7, "step": 44315, "train_speed(iter/s)": 1.530735 }, { "acc": 0.9864584, "epoch": 20.773377079915633, "grad_norm": 1.1799527406692505, "learning_rate": 6.7706039589486305e-06, "loss": 0.11103873, "memory(GiB)": 13.7, "step": 44320, "train_speed(iter/s)": 1.530736 }, { "acc": 0.98028765, "epoch": 20.775720646824468, "grad_norm": 2.3181660175323486, "learning_rate": 6.7698790347723155e-06, "loss": 0.1133523, "memory(GiB)": 13.7, "step": 44325, "train_speed(iter/s)": 1.530739 }, { "acc": 0.98916664, "epoch": 20.778064213733302, "grad_norm": 4.536853313446045, "learning_rate": 6.769154068065942e-06, "loss": 0.03970843, "memory(GiB)": 13.7, "step": 44330, "train_speed(iter/s)": 1.530737 }, { "acc": 0.97770834, "epoch": 20.780407780642136, "grad_norm": 4.428859233856201, "learning_rate": 6.768429058846935e-06, "loss": 0.08967526, "memory(GiB)": 13.7, "step": 44335, "train_speed(iter/s)": 1.530738 }, { "acc": 0.97976189, "epoch": 20.782751347550974, "grad_norm": 2.340676784515381, "learning_rate": 6.76770400713272e-06, "loss": 0.05598046, "memory(GiB)": 13.7, "step": 44340, "train_speed(iter/s)": 1.530747 }, { "acc": 0.98724899, "epoch": 20.78509491445981, "grad_norm": 1.63178288936615, "learning_rate": 6.7669789129407295e-06, "loss": 0.06861783, "memory(GiB)": 13.7, "step": 44345, "train_speed(iter/s)": 1.530747 }, { "acc": 0.99020834, "epoch": 20.787438481368643, "grad_norm": 0.07959303259849548, "learning_rate": 6.7662537762883876e-06, "loss": 0.03738518, "memory(GiB)": 13.7, "step": 44350, "train_speed(iter/s)": 1.530747 }, { "acc": 0.98852673, "epoch": 20.789782048277477, "grad_norm": 0.4339005649089813, "learning_rate": 6.765528597193125e-06, "loss": 0.04955882, "memory(GiB)": 13.7, "step": 44355, "train_speed(iter/s)": 1.530752 }, { "acc": 0.9672534, "epoch": 20.792125615186315, "grad_norm": 5.901800155639648, "learning_rate": 6.764803375672374e-06, "loss": 0.10985105, "memory(GiB)": 13.7, "step": 44360, "train_speed(iter/s)": 1.530761 }, { "acc": 0.98811283, "epoch": 20.79446918209515, "grad_norm": 1.562277913093567, "learning_rate": 6.764078111743566e-06, "loss": 0.06296093, "memory(GiB)": 13.7, "step": 44365, "train_speed(iter/s)": 1.53076 }, { "acc": 0.98966227, "epoch": 20.796812749003983, "grad_norm": 2.9562196731567383, "learning_rate": 6.763352805424134e-06, "loss": 0.03510438, "memory(GiB)": 13.7, "step": 44370, "train_speed(iter/s)": 1.530759 }, { "acc": 0.97895222, "epoch": 20.799156315912818, "grad_norm": 2.370203733444214, "learning_rate": 6.762627456731513e-06, "loss": 0.04615958, "memory(GiB)": 13.7, "step": 44375, "train_speed(iter/s)": 1.530766 }, { "acc": 0.98736668, "epoch": 20.801499882821656, "grad_norm": 3.860059976577759, "learning_rate": 6.761902065683137e-06, "loss": 0.04866236, "memory(GiB)": 13.7, "step": 44380, "train_speed(iter/s)": 1.530778 }, { "acc": 0.98618422, "epoch": 20.80384344973049, "grad_norm": 5.371545791625977, "learning_rate": 6.76117663229644e-06, "loss": 0.04154693, "memory(GiB)": 13.7, "step": 44385, "train_speed(iter/s)": 1.530784 }, { "acc": 0.98458872, "epoch": 20.806187016639324, "grad_norm": 2.16475510597229, "learning_rate": 6.760451156588863e-06, "loss": 0.062326, "memory(GiB)": 13.7, "step": 44390, "train_speed(iter/s)": 1.53078 }, { "acc": 0.97967873, "epoch": 20.808530583548162, "grad_norm": 4.602914810180664, "learning_rate": 6.759725638577841e-06, "loss": 0.09443712, "memory(GiB)": 13.7, "step": 44395, "train_speed(iter/s)": 1.530787 }, { "acc": 0.98015871, "epoch": 20.810874150456996, "grad_norm": 0.7364848852157593, "learning_rate": 6.759000078280814e-06, "loss": 0.059637, "memory(GiB)": 13.7, "step": 44400, "train_speed(iter/s)": 1.530789 }, { "acc": 0.97321434, "epoch": 20.81321771736583, "grad_norm": 6.491423606872559, "learning_rate": 6.758274475715223e-06, "loss": 0.0892773, "memory(GiB)": 13.7, "step": 44405, "train_speed(iter/s)": 1.530793 }, { "acc": 0.99187498, "epoch": 20.815561284274665, "grad_norm": 1.1413394212722778, "learning_rate": 6.757548830898508e-06, "loss": 0.02122058, "memory(GiB)": 13.7, "step": 44410, "train_speed(iter/s)": 1.530803 }, { "acc": 0.97732201, "epoch": 20.817904851183503, "grad_norm": 7.967377662658691, "learning_rate": 6.756823143848112e-06, "loss": 0.10122662, "memory(GiB)": 13.7, "step": 44415, "train_speed(iter/s)": 1.530809 }, { "acc": 0.97743549, "epoch": 20.820248418092337, "grad_norm": 1.2531074285507202, "learning_rate": 6.756097414581478e-06, "loss": 0.0613225, "memory(GiB)": 13.7, "step": 44420, "train_speed(iter/s)": 1.530812 }, { "acc": 0.97738094, "epoch": 20.82259198500117, "grad_norm": 4.694484233856201, "learning_rate": 6.755371643116048e-06, "loss": 0.09020921, "memory(GiB)": 13.7, "step": 44425, "train_speed(iter/s)": 1.530818 }, { "acc": 0.98785715, "epoch": 20.824935551910006, "grad_norm": 2.904524803161621, "learning_rate": 6.75464582946927e-06, "loss": 0.06731341, "memory(GiB)": 13.7, "step": 44430, "train_speed(iter/s)": 1.530829 }, { "acc": 1.0, "epoch": 20.827279118818844, "grad_norm": 0.20524898171424866, "learning_rate": 6.7539199736585895e-06, "loss": 0.00245629, "memory(GiB)": 13.7, "step": 44435, "train_speed(iter/s)": 1.530832 }, { "acc": 0.9875, "epoch": 20.829622685727678, "grad_norm": 7.271684169769287, "learning_rate": 6.753194075701452e-06, "loss": 0.03533666, "memory(GiB)": 13.7, "step": 44440, "train_speed(iter/s)": 1.530825 }, { "acc": 0.98383007, "epoch": 20.831966252636512, "grad_norm": 1.1591356992721558, "learning_rate": 6.752468135615306e-06, "loss": 0.0966248, "memory(GiB)": 13.7, "step": 44445, "train_speed(iter/s)": 1.530841 }, { "acc": 0.97104168, "epoch": 20.834309819545346, "grad_norm": 2.9150733947753906, "learning_rate": 6.751742153417604e-06, "loss": 0.07045605, "memory(GiB)": 13.7, "step": 44450, "train_speed(iter/s)": 1.530849 }, { "acc": 0.99459324, "epoch": 20.836653386454184, "grad_norm": 2.4760243892669678, "learning_rate": 6.7510161291257935e-06, "loss": 0.03912026, "memory(GiB)": 13.7, "step": 44455, "train_speed(iter/s)": 1.530856 }, { "acc": 0.9770833, "epoch": 20.83899695336302, "grad_norm": 4.0413713455200195, "learning_rate": 6.750290062757326e-06, "loss": 0.06949366, "memory(GiB)": 13.7, "step": 44460, "train_speed(iter/s)": 1.530863 }, { "acc": 0.99085398, "epoch": 20.841340520271853, "grad_norm": 2.0828969478607178, "learning_rate": 6.749563954329655e-06, "loss": 0.04296305, "memory(GiB)": 13.7, "step": 44465, "train_speed(iter/s)": 1.530873 }, { "acc": 0.98127975, "epoch": 20.84368408718069, "grad_norm": 2.229882001876831, "learning_rate": 6.748837803860229e-06, "loss": 0.06991184, "memory(GiB)": 13.7, "step": 44470, "train_speed(iter/s)": 1.53088 }, { "acc": 0.97312498, "epoch": 20.846027654089525, "grad_norm": 5.038559436798096, "learning_rate": 6.748111611366509e-06, "loss": 0.09498482, "memory(GiB)": 13.7, "step": 44475, "train_speed(iter/s)": 1.530888 }, { "acc": 0.98156252, "epoch": 20.84837122099836, "grad_norm": 4.457985877990723, "learning_rate": 6.747385376865947e-06, "loss": 0.07514111, "memory(GiB)": 13.7, "step": 44480, "train_speed(iter/s)": 1.53089 }, { "acc": 0.98354168, "epoch": 20.850714787907194, "grad_norm": 1.082139253616333, "learning_rate": 6.746659100375998e-06, "loss": 0.04273322, "memory(GiB)": 13.7, "step": 44485, "train_speed(iter/s)": 1.5309 }, { "acc": 0.97175589, "epoch": 20.85305835481603, "grad_norm": 7.102939605712891, "learning_rate": 6.745932781914122e-06, "loss": 0.13258402, "memory(GiB)": 13.7, "step": 44490, "train_speed(iter/s)": 1.530907 }, { "acc": 0.97265873, "epoch": 20.855401921724866, "grad_norm": 6.421051502227783, "learning_rate": 6.745206421497776e-06, "loss": 0.1085444, "memory(GiB)": 13.7, "step": 44495, "train_speed(iter/s)": 1.530922 }, { "acc": 0.97958336, "epoch": 20.8577454886337, "grad_norm": 1.1386834383010864, "learning_rate": 6.744480019144421e-06, "loss": 0.07716981, "memory(GiB)": 13.7, "step": 44500, "train_speed(iter/s)": 1.530927 }, { "acc": 0.97069445, "epoch": 20.860089055542534, "grad_norm": 6.486157417297363, "learning_rate": 6.743753574871513e-06, "loss": 0.08281012, "memory(GiB)": 13.7, "step": 44505, "train_speed(iter/s)": 1.530935 }, { "acc": 0.98897724, "epoch": 20.862432622451372, "grad_norm": 11.086050033569336, "learning_rate": 6.7430270886965165e-06, "loss": 0.04834547, "memory(GiB)": 13.7, "step": 44510, "train_speed(iter/s)": 1.530938 }, { "acc": 0.98049107, "epoch": 20.864776189360207, "grad_norm": 3.60508131980896, "learning_rate": 6.742300560636895e-06, "loss": 0.10664629, "memory(GiB)": 13.7, "step": 44515, "train_speed(iter/s)": 1.530949 }, { "acc": 0.97124996, "epoch": 20.86711975626904, "grad_norm": 4.615251541137695, "learning_rate": 6.74157399071011e-06, "loss": 0.12215513, "memory(GiB)": 13.7, "step": 44520, "train_speed(iter/s)": 1.530962 }, { "acc": 0.98604164, "epoch": 20.869463323177875, "grad_norm": 4.19387674331665, "learning_rate": 6.740847378933625e-06, "loss": 0.07047058, "memory(GiB)": 13.7, "step": 44525, "train_speed(iter/s)": 1.530966 }, { "acc": 0.99190483, "epoch": 20.871806890086713, "grad_norm": 0.049812477082014084, "learning_rate": 6.740120725324908e-06, "loss": 0.03879059, "memory(GiB)": 13.7, "step": 44530, "train_speed(iter/s)": 1.530968 }, { "acc": 0.98318901, "epoch": 20.874150456995547, "grad_norm": 2.5709218978881836, "learning_rate": 6.739394029901422e-06, "loss": 0.06466587, "memory(GiB)": 13.7, "step": 44535, "train_speed(iter/s)": 1.530975 }, { "acc": 0.9885417, "epoch": 20.87649402390438, "grad_norm": 4.523813724517822, "learning_rate": 6.7386672926806375e-06, "loss": 0.05035682, "memory(GiB)": 13.7, "step": 44540, "train_speed(iter/s)": 1.530973 }, { "acc": 0.990625, "epoch": 20.87883759081322, "grad_norm": 4.273780822753906, "learning_rate": 6.73794051368002e-06, "loss": 0.03070654, "memory(GiB)": 13.7, "step": 44545, "train_speed(iter/s)": 1.530986 }, { "acc": 0.99453125, "epoch": 20.881181157722054, "grad_norm": 3.414881467819214, "learning_rate": 6.7372136929170425e-06, "loss": 0.05099702, "memory(GiB)": 13.7, "step": 44550, "train_speed(iter/s)": 1.530996 }, { "acc": 0.99080353, "epoch": 20.883524724630888, "grad_norm": 0.03423050791025162, "learning_rate": 6.7364868304091725e-06, "loss": 0.03725285, "memory(GiB)": 13.7, "step": 44555, "train_speed(iter/s)": 1.530995 }, { "acc": 0.97687492, "epoch": 20.885868291539722, "grad_norm": 1.7085202932357788, "learning_rate": 6.735759926173884e-06, "loss": 0.08373971, "memory(GiB)": 13.7, "step": 44560, "train_speed(iter/s)": 1.530991 }, { "acc": 0.98468132, "epoch": 20.88821185844856, "grad_norm": 7.613056182861328, "learning_rate": 6.735032980228644e-06, "loss": 0.12102379, "memory(GiB)": 13.7, "step": 44565, "train_speed(iter/s)": 1.531 }, { "acc": 0.97666664, "epoch": 20.890555425357395, "grad_norm": 8.494806289672852, "learning_rate": 6.734305992590932e-06, "loss": 0.10437543, "memory(GiB)": 13.7, "step": 44570, "train_speed(iter/s)": 1.531003 }, { "acc": 0.98633013, "epoch": 20.89289899226623, "grad_norm": 1.5742419958114624, "learning_rate": 6.733578963278219e-06, "loss": 0.09119124, "memory(GiB)": 13.7, "step": 44575, "train_speed(iter/s)": 1.530998 }, { "acc": 0.98061008, "epoch": 20.895242559175063, "grad_norm": 2.3336434364318848, "learning_rate": 6.732851892307981e-06, "loss": 0.07827483, "memory(GiB)": 13.7, "step": 44580, "train_speed(iter/s)": 1.531 }, { "acc": 0.97861109, "epoch": 20.8975861260839, "grad_norm": 3.6061761379241943, "learning_rate": 6.732124779697697e-06, "loss": 0.08076812, "memory(GiB)": 13.7, "step": 44585, "train_speed(iter/s)": 1.531 }, { "acc": 0.99175596, "epoch": 20.899929692992735, "grad_norm": 2.0312774181365967, "learning_rate": 6.7313976254648405e-06, "loss": 0.03042867, "memory(GiB)": 13.7, "step": 44590, "train_speed(iter/s)": 1.530999 }, { "acc": 0.97174683, "epoch": 20.90227325990157, "grad_norm": 1.379814863204956, "learning_rate": 6.730670429626893e-06, "loss": 0.10043287, "memory(GiB)": 13.7, "step": 44595, "train_speed(iter/s)": 1.531009 }, { "acc": 0.9871726, "epoch": 20.904616826810404, "grad_norm": 4.131186008453369, "learning_rate": 6.72994319220133e-06, "loss": 0.08885337, "memory(GiB)": 13.7, "step": 44600, "train_speed(iter/s)": 1.531007 }, { "acc": 0.96782198, "epoch": 20.906960393719242, "grad_norm": 4.044793605804443, "learning_rate": 6.729215913205635e-06, "loss": 0.18561497, "memory(GiB)": 13.7, "step": 44605, "train_speed(iter/s)": 1.531004 }, { "acc": 0.9764286, "epoch": 20.909303960628076, "grad_norm": 3.5710582733154297, "learning_rate": 6.728488592657289e-06, "loss": 0.05923043, "memory(GiB)": 13.7, "step": 44610, "train_speed(iter/s)": 1.531006 }, { "acc": 0.98483629, "epoch": 20.91164752753691, "grad_norm": 4.725132942199707, "learning_rate": 6.7277612305737746e-06, "loss": 0.07756861, "memory(GiB)": 13.7, "step": 44615, "train_speed(iter/s)": 1.53101 }, { "acc": 0.98624992, "epoch": 20.913991094445745, "grad_norm": 0.7494615912437439, "learning_rate": 6.727033826972574e-06, "loss": 0.03920366, "memory(GiB)": 13.7, "step": 44620, "train_speed(iter/s)": 1.531013 }, { "acc": 0.97383928, "epoch": 20.916334661354583, "grad_norm": 5.375242710113525, "learning_rate": 6.726306381871174e-06, "loss": 0.11096128, "memory(GiB)": 13.7, "step": 44625, "train_speed(iter/s)": 1.531024 }, { "acc": 0.97416668, "epoch": 20.918678228263417, "grad_norm": 3.8208937644958496, "learning_rate": 6.725578895287057e-06, "loss": 0.08977787, "memory(GiB)": 13.7, "step": 44630, "train_speed(iter/s)": 1.531027 }, { "acc": 0.98029766, "epoch": 20.92102179517225, "grad_norm": 4.127885818481445, "learning_rate": 6.724851367237709e-06, "loss": 0.0664508, "memory(GiB)": 13.7, "step": 44635, "train_speed(iter/s)": 1.531026 }, { "acc": 0.98270836, "epoch": 20.92336536208109, "grad_norm": 1.0120402574539185, "learning_rate": 6.724123797740622e-06, "loss": 0.05805365, "memory(GiB)": 13.7, "step": 44640, "train_speed(iter/s)": 1.531027 }, { "acc": 0.98553028, "epoch": 20.925708928989923, "grad_norm": 2.3726229667663574, "learning_rate": 6.723396186813281e-06, "loss": 0.08020121, "memory(GiB)": 13.7, "step": 44645, "train_speed(iter/s)": 1.531023 }, { "acc": 0.9951128, "epoch": 20.928052495898758, "grad_norm": 3.663969039916992, "learning_rate": 6.722668534473173e-06, "loss": 0.06069303, "memory(GiB)": 13.7, "step": 44650, "train_speed(iter/s)": 1.531026 }, { "acc": 0.9723238, "epoch": 20.930396062807592, "grad_norm": 8.473962783813477, "learning_rate": 6.721940840737794e-06, "loss": 0.08971895, "memory(GiB)": 13.7, "step": 44655, "train_speed(iter/s)": 1.531035 }, { "acc": 0.98181505, "epoch": 20.93273962971643, "grad_norm": 2.2537224292755127, "learning_rate": 6.721213105624632e-06, "loss": 0.0748333, "memory(GiB)": 13.7, "step": 44660, "train_speed(iter/s)": 1.531036 }, { "acc": 0.9926136, "epoch": 20.935083196625264, "grad_norm": 1.0738455057144165, "learning_rate": 6.720485329151179e-06, "loss": 0.02821171, "memory(GiB)": 13.7, "step": 44665, "train_speed(iter/s)": 1.531042 }, { "acc": 0.98788691, "epoch": 20.9374267635341, "grad_norm": 5.045886993408203, "learning_rate": 6.719757511334931e-06, "loss": 0.08415197, "memory(GiB)": 13.7, "step": 44670, "train_speed(iter/s)": 1.531051 }, { "acc": 0.9927083, "epoch": 20.939770330442933, "grad_norm": 1.131544589996338, "learning_rate": 6.719029652193378e-06, "loss": 0.03922398, "memory(GiB)": 13.7, "step": 44675, "train_speed(iter/s)": 1.531061 }, { "acc": 0.96812496, "epoch": 20.94211389735177, "grad_norm": 3.243734359741211, "learning_rate": 6.718301751744021e-06, "loss": 0.14485469, "memory(GiB)": 13.7, "step": 44680, "train_speed(iter/s)": 1.531062 }, { "acc": 0.98758926, "epoch": 20.944457464260605, "grad_norm": 1.5498894453048706, "learning_rate": 6.71757381000435e-06, "loss": 0.03338836, "memory(GiB)": 13.7, "step": 44685, "train_speed(iter/s)": 1.531054 }, { "acc": 0.9833334, "epoch": 20.94680103116944, "grad_norm": 0.01123141497373581, "learning_rate": 6.7168458269918694e-06, "loss": 0.08122149, "memory(GiB)": 13.7, "step": 44690, "train_speed(iter/s)": 1.531061 }, { "acc": 0.98761368, "epoch": 20.949144598078274, "grad_norm": 2.2061095237731934, "learning_rate": 6.71611780272407e-06, "loss": 0.05612482, "memory(GiB)": 13.7, "step": 44695, "train_speed(iter/s)": 1.53107 }, { "acc": 0.96925726, "epoch": 20.95148816498711, "grad_norm": 5.942690372467041, "learning_rate": 6.715389737218457e-06, "loss": 0.11220977, "memory(GiB)": 13.7, "step": 44700, "train_speed(iter/s)": 1.531076 }, { "acc": 0.99000721, "epoch": 20.953831731895946, "grad_norm": 5.324471473693848, "learning_rate": 6.7146616304925295e-06, "loss": 0.04641431, "memory(GiB)": 13.7, "step": 44705, "train_speed(iter/s)": 1.531077 }, { "acc": 0.9885417, "epoch": 20.95617529880478, "grad_norm": 4.615057945251465, "learning_rate": 6.713933482563787e-06, "loss": 0.04097865, "memory(GiB)": 13.7, "step": 44710, "train_speed(iter/s)": 1.531069 }, { "acc": 0.9967804, "epoch": 20.958518865713614, "grad_norm": 1.7050050497055054, "learning_rate": 6.713205293449731e-06, "loss": 0.06413161, "memory(GiB)": 13.7, "step": 44715, "train_speed(iter/s)": 1.531068 }, { "acc": 0.97569447, "epoch": 20.960862432622452, "grad_norm": 5.794363498687744, "learning_rate": 6.712477063167867e-06, "loss": 0.10311053, "memory(GiB)": 13.7, "step": 44720, "train_speed(iter/s)": 1.531079 }, { "acc": 0.98916664, "epoch": 20.963205999531286, "grad_norm": 4.118561744689941, "learning_rate": 6.711748791735701e-06, "loss": 0.04472931, "memory(GiB)": 13.7, "step": 44725, "train_speed(iter/s)": 1.531074 }, { "acc": 0.97599201, "epoch": 20.96554956644012, "grad_norm": 4.265405178070068, "learning_rate": 6.711020479170733e-06, "loss": 0.06048562, "memory(GiB)": 13.7, "step": 44730, "train_speed(iter/s)": 1.531076 }, { "acc": 0.9869791, "epoch": 20.96789313334896, "grad_norm": 5.7723493576049805, "learning_rate": 6.710292125490475e-06, "loss": 0.04931844, "memory(GiB)": 13.7, "step": 44735, "train_speed(iter/s)": 1.531082 }, { "acc": 0.97918892, "epoch": 20.970236700257793, "grad_norm": 4.169780731201172, "learning_rate": 6.709563730712428e-06, "loss": 0.10355132, "memory(GiB)": 13.7, "step": 44740, "train_speed(iter/s)": 1.53108 }, { "acc": 0.971875, "epoch": 20.972580267166627, "grad_norm": 6.506795883178711, "learning_rate": 6.708835294854107e-06, "loss": 0.11316088, "memory(GiB)": 13.7, "step": 44745, "train_speed(iter/s)": 1.531087 }, { "acc": 0.99499998, "epoch": 20.97492383407546, "grad_norm": 1.900629997253418, "learning_rate": 6.708106817933017e-06, "loss": 0.02029094, "memory(GiB)": 13.7, "step": 44750, "train_speed(iter/s)": 1.531089 }, { "acc": 0.99809208, "epoch": 20.9772674009843, "grad_norm": 2.2644379138946533, "learning_rate": 6.707378299966668e-06, "loss": 0.02766067, "memory(GiB)": 13.7, "step": 44755, "train_speed(iter/s)": 1.531095 }, { "acc": 0.95573864, "epoch": 20.979610967893134, "grad_norm": 6.679560661315918, "learning_rate": 6.706649740972572e-06, "loss": 0.15131423, "memory(GiB)": 13.7, "step": 44760, "train_speed(iter/s)": 1.531109 }, { "acc": 0.98379993, "epoch": 20.981954534801968, "grad_norm": 2.2414042949676514, "learning_rate": 6.705921140968241e-06, "loss": 0.07432488, "memory(GiB)": 13.7, "step": 44765, "train_speed(iter/s)": 1.53112 }, { "acc": 0.98258934, "epoch": 20.984298101710802, "grad_norm": 2.507632255554199, "learning_rate": 6.70519249997119e-06, "loss": 0.08032151, "memory(GiB)": 13.7, "step": 44770, "train_speed(iter/s)": 1.531128 }, { "acc": 0.98729172, "epoch": 20.98664166861964, "grad_norm": 1.4116696119308472, "learning_rate": 6.704463817998932e-06, "loss": 0.04806965, "memory(GiB)": 13.7, "step": 44775, "train_speed(iter/s)": 1.531121 }, { "acc": 0.98434982, "epoch": 20.988985235528475, "grad_norm": 1.5434610843658447, "learning_rate": 6.7037350950689805e-06, "loss": 0.04637965, "memory(GiB)": 13.7, "step": 44780, "train_speed(iter/s)": 1.531122 }, { "acc": 0.9864583, "epoch": 20.99132880243731, "grad_norm": 0.018393568694591522, "learning_rate": 6.703006331198852e-06, "loss": 0.04769358, "memory(GiB)": 13.7, "step": 44785, "train_speed(iter/s)": 1.531128 }, { "acc": 0.97977676, "epoch": 20.993672369346143, "grad_norm": 4.1909871101379395, "learning_rate": 6.702277526406067e-06, "loss": 0.15030563, "memory(GiB)": 13.7, "step": 44790, "train_speed(iter/s)": 1.531131 }, { "acc": 0.97354174, "epoch": 20.99601593625498, "grad_norm": 5.8252129554748535, "learning_rate": 6.7015486807081396e-06, "loss": 0.11912944, "memory(GiB)": 13.7, "step": 44795, "train_speed(iter/s)": 1.531135 }, { "acc": 0.99018736, "epoch": 20.998359503163815, "grad_norm": 0.6615282893180847, "learning_rate": 6.700819794122591e-06, "loss": 0.02743959, "memory(GiB)": 13.7, "step": 44800, "train_speed(iter/s)": 1.531144 }, { "acc": 0.98194447, "epoch": 21.00070307007265, "grad_norm": 6.20623779296875, "learning_rate": 6.700090866666939e-06, "loss": 0.06863287, "memory(GiB)": 13.7, "step": 44805, "train_speed(iter/s)": 1.531116 }, { "acc": 0.98000002, "epoch": 21.003046636981487, "grad_norm": 5.715022087097168, "learning_rate": 6.699361898358707e-06, "loss": 0.06610413, "memory(GiB)": 13.7, "step": 44810, "train_speed(iter/s)": 1.531117 }, { "acc": 0.97894344, "epoch": 21.00539020389032, "grad_norm": 6.842389106750488, "learning_rate": 6.6986328892154165e-06, "loss": 0.07216516, "memory(GiB)": 13.7, "step": 44815, "train_speed(iter/s)": 1.531124 }, { "acc": 0.98500004, "epoch": 21.007733770799156, "grad_norm": 0.0028345161117613316, "learning_rate": 6.69790383925459e-06, "loss": 0.05278707, "memory(GiB)": 13.7, "step": 44820, "train_speed(iter/s)": 1.531133 }, { "acc": 0.9645834, "epoch": 21.01007733770799, "grad_norm": 2.954435348510742, "learning_rate": 6.69717474849375e-06, "loss": 0.12637327, "memory(GiB)": 13.7, "step": 44825, "train_speed(iter/s)": 1.531131 }, { "acc": 0.99258928, "epoch": 21.012420904616828, "grad_norm": 0.1993451714515686, "learning_rate": 6.6964456169504254e-06, "loss": 0.02831405, "memory(GiB)": 13.7, "step": 44830, "train_speed(iter/s)": 1.531136 }, { "acc": 0.965625, "epoch": 21.014764471525663, "grad_norm": 5.96613883972168, "learning_rate": 6.69571644464214e-06, "loss": 0.12446337, "memory(GiB)": 13.7, "step": 44835, "train_speed(iter/s)": 1.531147 }, { "acc": 0.98924103, "epoch": 21.017108038434497, "grad_norm": 10.800680160522461, "learning_rate": 6.69498723158642e-06, "loss": 0.02498266, "memory(GiB)": 13.7, "step": 44840, "train_speed(iter/s)": 1.531148 }, { "acc": 0.98599205, "epoch": 21.01945160534333, "grad_norm": 4.3574676513671875, "learning_rate": 6.694257977800793e-06, "loss": 0.07372782, "memory(GiB)": 13.7, "step": 44845, "train_speed(iter/s)": 1.531152 }, { "acc": 0.97250004, "epoch": 21.02179517225217, "grad_norm": 0.8853711485862732, "learning_rate": 6.6935286833027875e-06, "loss": 0.09185852, "memory(GiB)": 13.7, "step": 44850, "train_speed(iter/s)": 1.531156 }, { "acc": 0.97969704, "epoch": 21.024138739161003, "grad_norm": 7.685592174530029, "learning_rate": 6.692799348109936e-06, "loss": 0.07478585, "memory(GiB)": 13.7, "step": 44855, "train_speed(iter/s)": 1.531168 }, { "acc": 0.97773542, "epoch": 21.026482306069838, "grad_norm": 3.989149332046509, "learning_rate": 6.692069972239768e-06, "loss": 0.11469508, "memory(GiB)": 13.7, "step": 44860, "train_speed(iter/s)": 1.531164 }, { "acc": 0.965625, "epoch": 21.028825872978672, "grad_norm": 4.846180438995361, "learning_rate": 6.691340555709814e-06, "loss": 0.09988818, "memory(GiB)": 13.7, "step": 44865, "train_speed(iter/s)": 1.531172 }, { "acc": 0.98402777, "epoch": 21.03116943988751, "grad_norm": 3.7501797676086426, "learning_rate": 6.690611098537609e-06, "loss": 0.06291035, "memory(GiB)": 13.7, "step": 44870, "train_speed(iter/s)": 1.53117 }, { "acc": 0.98604164, "epoch": 21.033513006796344, "grad_norm": 2.5281355381011963, "learning_rate": 6.6898816007406865e-06, "loss": 0.03205238, "memory(GiB)": 13.7, "step": 44875, "train_speed(iter/s)": 1.531162 }, { "acc": 0.98291664, "epoch": 21.03585657370518, "grad_norm": 2.6612613201141357, "learning_rate": 6.689152062336578e-06, "loss": 0.05242189, "memory(GiB)": 13.7, "step": 44880, "train_speed(iter/s)": 1.531161 }, { "acc": 0.97416668, "epoch": 21.038200140614016, "grad_norm": 5.623122215270996, "learning_rate": 6.688422483342824e-06, "loss": 0.08272551, "memory(GiB)": 13.7, "step": 44885, "train_speed(iter/s)": 1.531167 }, { "acc": 0.97707787, "epoch": 21.04054370752285, "grad_norm": 3.576986789703369, "learning_rate": 6.687692863776957e-06, "loss": 0.06135723, "memory(GiB)": 13.7, "step": 44890, "train_speed(iter/s)": 1.531172 }, { "acc": 0.98378963, "epoch": 21.042887274431685, "grad_norm": 3.0165212154388428, "learning_rate": 6.686963203656517e-06, "loss": 0.05166059, "memory(GiB)": 13.7, "step": 44895, "train_speed(iter/s)": 1.53117 }, { "acc": 0.97871838, "epoch": 21.04523084134052, "grad_norm": 3.3340706825256348, "learning_rate": 6.686233502999041e-06, "loss": 0.11220319, "memory(GiB)": 13.7, "step": 44900, "train_speed(iter/s)": 1.531174 }, { "acc": 0.98125, "epoch": 21.047574408249357, "grad_norm": 4.643891334533691, "learning_rate": 6.685503761822071e-06, "loss": 0.14210142, "memory(GiB)": 13.7, "step": 44905, "train_speed(iter/s)": 1.53118 }, { "acc": 0.98611107, "epoch": 21.04991797515819, "grad_norm": 6.921894550323486, "learning_rate": 6.684773980143145e-06, "loss": 0.06451306, "memory(GiB)": 13.7, "step": 44910, "train_speed(iter/s)": 1.531191 }, { "acc": 0.988731, "epoch": 21.052261542067026, "grad_norm": 0.08614814281463623, "learning_rate": 6.684044157979808e-06, "loss": 0.05855316, "memory(GiB)": 13.7, "step": 44915, "train_speed(iter/s)": 1.531199 }, { "acc": 0.99186954, "epoch": 21.05460510897586, "grad_norm": 0.2115950733423233, "learning_rate": 6.683314295349599e-06, "loss": 0.03812336, "memory(GiB)": 13.7, "step": 44920, "train_speed(iter/s)": 1.531211 }, { "acc": 0.99079857, "epoch": 21.056948675884698, "grad_norm": 1.500306487083435, "learning_rate": 6.6825843922700615e-06, "loss": 0.05173935, "memory(GiB)": 13.7, "step": 44925, "train_speed(iter/s)": 1.531211 }, { "acc": 0.97719698, "epoch": 21.059292242793532, "grad_norm": 2.777716875076294, "learning_rate": 6.681854448758744e-06, "loss": 0.11263323, "memory(GiB)": 13.7, "step": 44930, "train_speed(iter/s)": 1.531213 }, { "acc": 0.98203526, "epoch": 21.061635809702366, "grad_norm": 2.455371618270874, "learning_rate": 6.681124464833187e-06, "loss": 0.10027764, "memory(GiB)": 13.7, "step": 44935, "train_speed(iter/s)": 1.531214 }, { "acc": 0.9833334, "epoch": 21.0639793766112, "grad_norm": 4.547337055206299, "learning_rate": 6.680394440510941e-06, "loss": 0.0584689, "memory(GiB)": 13.7, "step": 44940, "train_speed(iter/s)": 1.531222 }, { "acc": 0.99107151, "epoch": 21.06632294352004, "grad_norm": 1.3317227363586426, "learning_rate": 6.679664375809549e-06, "loss": 0.04415488, "memory(GiB)": 13.7, "step": 44945, "train_speed(iter/s)": 1.531227 }, { "acc": 0.98779764, "epoch": 21.068666510428873, "grad_norm": 3.71291184425354, "learning_rate": 6.6789342707465635e-06, "loss": 0.05160912, "memory(GiB)": 13.7, "step": 44950, "train_speed(iter/s)": 1.531224 }, { "acc": 0.97208519, "epoch": 21.071010077337707, "grad_norm": 2.7435367107391357, "learning_rate": 6.678204125339533e-06, "loss": 0.10998533, "memory(GiB)": 13.7, "step": 44955, "train_speed(iter/s)": 1.53123 }, { "acc": 0.99125004, "epoch": 21.073353644246545, "grad_norm": 17.006484985351562, "learning_rate": 6.677473939606006e-06, "loss": 0.02959132, "memory(GiB)": 13.7, "step": 44960, "train_speed(iter/s)": 1.531246 }, { "acc": 0.9927084, "epoch": 21.07569721115538, "grad_norm": 2.6364524364471436, "learning_rate": 6.676743713563534e-06, "loss": 0.03899453, "memory(GiB)": 13.7, "step": 44965, "train_speed(iter/s)": 1.531251 }, { "acc": 0.98800602, "epoch": 21.078040778064214, "grad_norm": 3.399088144302368, "learning_rate": 6.67601344722967e-06, "loss": 0.05788382, "memory(GiB)": 13.7, "step": 44970, "train_speed(iter/s)": 1.531255 }, { "acc": 0.9921875, "epoch": 21.080384344973048, "grad_norm": 1.0874288082122803, "learning_rate": 6.675283140621968e-06, "loss": 0.03274246, "memory(GiB)": 13.7, "step": 44975, "train_speed(iter/s)": 1.531268 }, { "acc": 0.97987175, "epoch": 21.082727911881886, "grad_norm": 5.540447235107422, "learning_rate": 6.674552793757983e-06, "loss": 0.10570216, "memory(GiB)": 13.7, "step": 44980, "train_speed(iter/s)": 1.531282 }, { "acc": 0.98562498, "epoch": 21.08507147879072, "grad_norm": 7.192895412445068, "learning_rate": 6.673822406655267e-06, "loss": 0.03204185, "memory(GiB)": 13.7, "step": 44985, "train_speed(iter/s)": 1.531281 }, { "acc": 0.97377977, "epoch": 21.087415045699554, "grad_norm": 6.916034698486328, "learning_rate": 6.6730919793313766e-06, "loss": 0.08932257, "memory(GiB)": 13.7, "step": 44990, "train_speed(iter/s)": 1.531285 }, { "acc": 0.9693552, "epoch": 21.08975861260839, "grad_norm": 4.456735610961914, "learning_rate": 6.672361511803871e-06, "loss": 0.18947017, "memory(GiB)": 13.7, "step": 44995, "train_speed(iter/s)": 1.531282 }, { "acc": 0.97704067, "epoch": 21.092102179517227, "grad_norm": 1.6895140409469604, "learning_rate": 6.671631004090307e-06, "loss": 0.07308249, "memory(GiB)": 13.7, "step": 45000, "train_speed(iter/s)": 1.531286 }, { "acc": 0.98217258, "epoch": 21.09444574642606, "grad_norm": 1.114567756652832, "learning_rate": 6.670900456208245e-06, "loss": 0.10242566, "memory(GiB)": 13.7, "step": 45005, "train_speed(iter/s)": 1.531286 }, { "acc": 0.98761368, "epoch": 21.096789313334895, "grad_norm": 0.8798320889472961, "learning_rate": 6.6701698681752414e-06, "loss": 0.02461046, "memory(GiB)": 13.7, "step": 45010, "train_speed(iter/s)": 1.531292 }, { "acc": 0.9859375, "epoch": 21.09913288024373, "grad_norm": 6.110057353973389, "learning_rate": 6.66943924000886e-06, "loss": 0.06433185, "memory(GiB)": 13.7, "step": 45015, "train_speed(iter/s)": 1.531296 }, { "acc": 0.97573795, "epoch": 21.101476447152567, "grad_norm": 5.045635223388672, "learning_rate": 6.668708571726663e-06, "loss": 0.06822846, "memory(GiB)": 13.7, "step": 45020, "train_speed(iter/s)": 1.531305 }, { "acc": 0.97193146, "epoch": 21.1038200140614, "grad_norm": 3.693830728530884, "learning_rate": 6.667977863346212e-06, "loss": 0.09585921, "memory(GiB)": 13.7, "step": 45025, "train_speed(iter/s)": 1.531304 }, { "acc": 0.99375, "epoch": 21.106163580970236, "grad_norm": 0.08424653857946396, "learning_rate": 6.667247114885072e-06, "loss": 0.03791384, "memory(GiB)": 13.7, "step": 45030, "train_speed(iter/s)": 1.531309 }, { "acc": 0.96749992, "epoch": 21.10850714787907, "grad_norm": 1.4741352796554565, "learning_rate": 6.666516326360805e-06, "loss": 0.11536309, "memory(GiB)": 13.7, "step": 45035, "train_speed(iter/s)": 1.531314 }, { "acc": 0.97406254, "epoch": 21.110850714787908, "grad_norm": 5.032463550567627, "learning_rate": 6.665785497790982e-06, "loss": 0.0682609, "memory(GiB)": 13.7, "step": 45040, "train_speed(iter/s)": 1.531315 }, { "acc": 0.99020834, "epoch": 21.113194281696742, "grad_norm": 5.8018341064453125, "learning_rate": 6.6650546291931654e-06, "loss": 0.02451425, "memory(GiB)": 13.7, "step": 45045, "train_speed(iter/s)": 1.531317 }, { "acc": 0.99091339, "epoch": 21.115537848605577, "grad_norm": 3.2872166633605957, "learning_rate": 6.664323720584923e-06, "loss": 0.0398407, "memory(GiB)": 13.7, "step": 45050, "train_speed(iter/s)": 1.531328 }, { "acc": 0.98299103, "epoch": 21.117881415514415, "grad_norm": 16.837326049804688, "learning_rate": 6.663592771983823e-06, "loss": 0.06733086, "memory(GiB)": 13.7, "step": 45055, "train_speed(iter/s)": 1.531327 }, { "acc": 0.98551636, "epoch": 21.12022498242325, "grad_norm": 10.193446159362793, "learning_rate": 6.662861783407441e-06, "loss": 0.10658264, "memory(GiB)": 13.7, "step": 45060, "train_speed(iter/s)": 1.531322 }, { "acc": 0.9733036, "epoch": 21.122568549332083, "grad_norm": 4.155091762542725, "learning_rate": 6.662130754873338e-06, "loss": 0.10002421, "memory(GiB)": 13.7, "step": 45065, "train_speed(iter/s)": 1.531324 }, { "acc": 0.98520832, "epoch": 21.124912116240917, "grad_norm": 1.5580118894577026, "learning_rate": 6.661399686399093e-06, "loss": 0.05050496, "memory(GiB)": 13.7, "step": 45070, "train_speed(iter/s)": 1.531329 }, { "acc": 0.99051476, "epoch": 21.127255683149755, "grad_norm": 0.6516587138175964, "learning_rate": 6.660668578002277e-06, "loss": 0.07093334, "memory(GiB)": 13.7, "step": 45075, "train_speed(iter/s)": 1.531332 }, { "acc": 0.96493959, "epoch": 21.12959925005859, "grad_norm": 1.640082836151123, "learning_rate": 6.659937429700461e-06, "loss": 0.15287974, "memory(GiB)": 13.7, "step": 45080, "train_speed(iter/s)": 1.531346 }, { "acc": 0.9739584, "epoch": 21.131942816967424, "grad_norm": 6.150023460388184, "learning_rate": 6.6592062415112214e-06, "loss": 0.05633129, "memory(GiB)": 13.7, "step": 45085, "train_speed(iter/s)": 1.531358 }, { "acc": 0.99187498, "epoch": 21.13428638387626, "grad_norm": 4.235775947570801, "learning_rate": 6.658475013452133e-06, "loss": 0.06060449, "memory(GiB)": 13.7, "step": 45090, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98833332, "epoch": 21.136629950785096, "grad_norm": 1.3229721784591675, "learning_rate": 6.6577437455407725e-06, "loss": 0.06874914, "memory(GiB)": 13.7, "step": 45095, "train_speed(iter/s)": 1.531365 }, { "acc": 0.97173615, "epoch": 21.13897351769393, "grad_norm": 4.338655471801758, "learning_rate": 6.657012437794714e-06, "loss": 0.14363412, "memory(GiB)": 13.7, "step": 45100, "train_speed(iter/s)": 1.531371 }, { "acc": 0.98625002, "epoch": 21.141317084602765, "grad_norm": 6.599111557006836, "learning_rate": 6.656281090231542e-06, "loss": 0.07182986, "memory(GiB)": 13.7, "step": 45105, "train_speed(iter/s)": 1.53137 }, { "acc": 0.9927083, "epoch": 21.1436606515116, "grad_norm": 0.01589900441467762, "learning_rate": 6.655549702868831e-06, "loss": 0.07755538, "memory(GiB)": 13.7, "step": 45110, "train_speed(iter/s)": 1.53138 }, { "acc": 0.9746726, "epoch": 21.146004218420437, "grad_norm": 2.6500866413116455, "learning_rate": 6.654818275724163e-06, "loss": 0.08239367, "memory(GiB)": 13.7, "step": 45115, "train_speed(iter/s)": 1.531391 }, { "acc": 0.99321432, "epoch": 21.14834778532927, "grad_norm": 4.271852493286133, "learning_rate": 6.654086808815117e-06, "loss": 0.05897148, "memory(GiB)": 13.7, "step": 45120, "train_speed(iter/s)": 1.531392 }, { "acc": 0.97666664, "epoch": 21.150691352238105, "grad_norm": 3.147031784057617, "learning_rate": 6.653355302159278e-06, "loss": 0.10337806, "memory(GiB)": 13.7, "step": 45125, "train_speed(iter/s)": 1.531397 }, { "acc": 0.98313093, "epoch": 21.153034919146943, "grad_norm": 3.757675886154175, "learning_rate": 6.652623755774227e-06, "loss": 0.04728682, "memory(GiB)": 13.7, "step": 45130, "train_speed(iter/s)": 1.531401 }, { "acc": 0.98416672, "epoch": 21.155378486055778, "grad_norm": 6.86496639251709, "learning_rate": 6.65189216967755e-06, "loss": 0.06623827, "memory(GiB)": 13.7, "step": 45135, "train_speed(iter/s)": 1.531406 }, { "acc": 0.9610363, "epoch": 21.157722052964612, "grad_norm": 5.83012056350708, "learning_rate": 6.651160543886828e-06, "loss": 0.1654596, "memory(GiB)": 13.7, "step": 45140, "train_speed(iter/s)": 1.531412 }, { "acc": 0.98249998, "epoch": 21.160065619873446, "grad_norm": 3.2625741958618164, "learning_rate": 6.650428878419651e-06, "loss": 0.03862801, "memory(GiB)": 13.7, "step": 45145, "train_speed(iter/s)": 1.531407 }, { "acc": 0.99263897, "epoch": 21.162409186782284, "grad_norm": 2.241105794906616, "learning_rate": 6.649697173293604e-06, "loss": 0.05193208, "memory(GiB)": 13.7, "step": 45150, "train_speed(iter/s)": 1.531417 }, { "acc": 0.98829823, "epoch": 21.16475275369112, "grad_norm": 4.053981304168701, "learning_rate": 6.648965428526275e-06, "loss": 0.04249665, "memory(GiB)": 13.7, "step": 45155, "train_speed(iter/s)": 1.531424 }, { "acc": 0.9854166, "epoch": 21.167096320599953, "grad_norm": 5.420108318328857, "learning_rate": 6.648233644135252e-06, "loss": 0.04308438, "memory(GiB)": 13.7, "step": 45160, "train_speed(iter/s)": 1.53143 }, { "acc": 0.97559977, "epoch": 21.169439887508787, "grad_norm": 5.894508361816406, "learning_rate": 6.647501820138126e-06, "loss": 0.08202165, "memory(GiB)": 13.7, "step": 45165, "train_speed(iter/s)": 1.531435 }, { "acc": 0.99347591, "epoch": 21.171783454417625, "grad_norm": 3.318303346633911, "learning_rate": 6.646769956552488e-06, "loss": 0.02882118, "memory(GiB)": 13.7, "step": 45170, "train_speed(iter/s)": 1.531424 }, { "acc": 0.99018307, "epoch": 21.17412702132646, "grad_norm": 0.095299132168293, "learning_rate": 6.646038053395927e-06, "loss": 0.05297893, "memory(GiB)": 13.7, "step": 45175, "train_speed(iter/s)": 1.531427 }, { "acc": 0.9822917, "epoch": 21.176470588235293, "grad_norm": 4.73011589050293, "learning_rate": 6.6453061106860375e-06, "loss": 0.09130036, "memory(GiB)": 13.7, "step": 45180, "train_speed(iter/s)": 1.531434 }, { "acc": 0.975947, "epoch": 21.178814155144128, "grad_norm": 3.134755849838257, "learning_rate": 6.644574128440412e-06, "loss": 0.05432015, "memory(GiB)": 13.7, "step": 45185, "train_speed(iter/s)": 1.531437 }, { "acc": 0.98374996, "epoch": 21.181157722052966, "grad_norm": 5.510105133056641, "learning_rate": 6.643842106676648e-06, "loss": 0.07299696, "memory(GiB)": 13.7, "step": 45190, "train_speed(iter/s)": 1.531439 }, { "acc": 0.98829861, "epoch": 21.1835012889618, "grad_norm": 1.688657283782959, "learning_rate": 6.643110045412336e-06, "loss": 0.04489538, "memory(GiB)": 13.7, "step": 45195, "train_speed(iter/s)": 1.531433 }, { "acc": 0.99019346, "epoch": 21.185844855870634, "grad_norm": 1.736748218536377, "learning_rate": 6.642377944665078e-06, "loss": 0.06312954, "memory(GiB)": 13.7, "step": 45200, "train_speed(iter/s)": 1.531433 }, { "acc": 0.99750004, "epoch": 21.18818842277947, "grad_norm": 0.662604570388794, "learning_rate": 6.641645804452465e-06, "loss": 0.01321471, "memory(GiB)": 13.7, "step": 45205, "train_speed(iter/s)": 1.531441 }, { "acc": 0.97872028, "epoch": 21.190531989688306, "grad_norm": 4.2128825187683105, "learning_rate": 6.6409136247921e-06, "loss": 0.05803641, "memory(GiB)": 13.7, "step": 45210, "train_speed(iter/s)": 1.53145 }, { "acc": 0.98760414, "epoch": 21.19287555659714, "grad_norm": 6.310714244842529, "learning_rate": 6.64018140570158e-06, "loss": 0.05629848, "memory(GiB)": 13.7, "step": 45215, "train_speed(iter/s)": 1.531456 }, { "acc": 0.97761364, "epoch": 21.195219123505975, "grad_norm": 6.265804290771484, "learning_rate": 6.639449147198506e-06, "loss": 0.06663269, "memory(GiB)": 13.7, "step": 45220, "train_speed(iter/s)": 1.53146 }, { "acc": 0.96333332, "epoch": 21.197562690414813, "grad_norm": 5.258280277252197, "learning_rate": 6.638716849300479e-06, "loss": 0.07111601, "memory(GiB)": 13.7, "step": 45225, "train_speed(iter/s)": 1.531476 }, { "acc": 0.9916666, "epoch": 21.199906257323647, "grad_norm": 0.12757207453250885, "learning_rate": 6.6379845120251005e-06, "loss": 0.02554919, "memory(GiB)": 13.7, "step": 45230, "train_speed(iter/s)": 1.531475 }, { "acc": 0.97217264, "epoch": 21.20224982423248, "grad_norm": 3.7595438957214355, "learning_rate": 6.637252135389977e-06, "loss": 0.09730495, "memory(GiB)": 13.7, "step": 45235, "train_speed(iter/s)": 1.531482 }, { "acc": 0.97423611, "epoch": 21.204593391141316, "grad_norm": 5.503237247467041, "learning_rate": 6.636519719412705e-06, "loss": 0.10126338, "memory(GiB)": 13.7, "step": 45240, "train_speed(iter/s)": 1.53149 }, { "acc": 0.99196434, "epoch": 21.206936958050154, "grad_norm": 2.756443738937378, "learning_rate": 6.635787264110896e-06, "loss": 0.03052447, "memory(GiB)": 13.7, "step": 45245, "train_speed(iter/s)": 1.531494 }, { "acc": 0.984132, "epoch": 21.209280524958988, "grad_norm": 0.018965529277920723, "learning_rate": 6.6350547695021516e-06, "loss": 0.07387136, "memory(GiB)": 13.7, "step": 45250, "train_speed(iter/s)": 1.531507 }, { "acc": 0.99738102, "epoch": 21.211624091867822, "grad_norm": 3.03513765335083, "learning_rate": 6.634322235604084e-06, "loss": 0.06004207, "memory(GiB)": 13.7, "step": 45255, "train_speed(iter/s)": 1.531507 }, { "acc": 0.96997032, "epoch": 21.213967658776657, "grad_norm": 3.5355167388916016, "learning_rate": 6.633589662434294e-06, "loss": 0.17954388, "memory(GiB)": 13.7, "step": 45260, "train_speed(iter/s)": 1.531511 }, { "acc": 0.97726908, "epoch": 21.216311225685494, "grad_norm": 2.9869272708892822, "learning_rate": 6.632857050010395e-06, "loss": 0.0931664, "memory(GiB)": 13.7, "step": 45265, "train_speed(iter/s)": 1.531519 }, { "acc": 0.97875004, "epoch": 21.21865479259433, "grad_norm": 18.568607330322266, "learning_rate": 6.632124398349995e-06, "loss": 0.12131863, "memory(GiB)": 13.7, "step": 45270, "train_speed(iter/s)": 1.531525 }, { "acc": 0.98552084, "epoch": 21.220998359503163, "grad_norm": 3.328256845474243, "learning_rate": 6.631391707470705e-06, "loss": 0.05468477, "memory(GiB)": 13.7, "step": 45275, "train_speed(iter/s)": 1.531529 }, { "acc": 0.98153839, "epoch": 21.223341926411997, "grad_norm": 5.414792537689209, "learning_rate": 6.630658977390135e-06, "loss": 0.08487884, "memory(GiB)": 13.7, "step": 45280, "train_speed(iter/s)": 1.531537 }, { "acc": 0.9802084, "epoch": 21.225685493320835, "grad_norm": 5.631307601928711, "learning_rate": 6.6299262081259e-06, "loss": 0.08279359, "memory(GiB)": 13.7, "step": 45285, "train_speed(iter/s)": 1.531554 }, { "acc": 0.99235344, "epoch": 21.22802906022967, "grad_norm": 0.030039552599191666, "learning_rate": 6.629193399695612e-06, "loss": 0.03738582, "memory(GiB)": 13.7, "step": 45290, "train_speed(iter/s)": 1.531554 }, { "acc": 0.9927084, "epoch": 21.230372627138504, "grad_norm": 2.6922388076782227, "learning_rate": 6.628460552116886e-06, "loss": 0.03091882, "memory(GiB)": 13.7, "step": 45295, "train_speed(iter/s)": 1.531546 }, { "acc": 0.9817709, "epoch": 21.23271619404734, "grad_norm": 6.044933319091797, "learning_rate": 6.6277276654073356e-06, "loss": 0.06096555, "memory(GiB)": 13.7, "step": 45300, "train_speed(iter/s)": 1.531543 }, { "acc": 0.9864584, "epoch": 21.235059760956176, "grad_norm": 1.5576905012130737, "learning_rate": 6.6269947395845755e-06, "loss": 0.05103286, "memory(GiB)": 13.7, "step": 45305, "train_speed(iter/s)": 1.531549 }, { "acc": 0.98183422, "epoch": 21.23740332786501, "grad_norm": 3.0015881061553955, "learning_rate": 6.6262617746662295e-06, "loss": 0.06573352, "memory(GiB)": 13.7, "step": 45310, "train_speed(iter/s)": 1.531555 }, { "acc": 0.98291664, "epoch": 21.239746894773845, "grad_norm": 4.187086582183838, "learning_rate": 6.625528770669909e-06, "loss": 0.03107095, "memory(GiB)": 13.7, "step": 45315, "train_speed(iter/s)": 1.531561 }, { "acc": 0.97927084, "epoch": 21.242090461682682, "grad_norm": 4.2906270027160645, "learning_rate": 6.6247957276132365e-06, "loss": 0.08522144, "memory(GiB)": 13.7, "step": 45320, "train_speed(iter/s)": 1.531563 }, { "acc": 0.984375, "epoch": 21.244434028591517, "grad_norm": 1.7638990879058838, "learning_rate": 6.62406264551383e-06, "loss": 0.04568271, "memory(GiB)": 13.7, "step": 45325, "train_speed(iter/s)": 1.531563 }, { "acc": 0.97374458, "epoch": 21.24677759550035, "grad_norm": 24.428686141967773, "learning_rate": 6.623329524389312e-06, "loss": 0.09505573, "memory(GiB)": 13.7, "step": 45330, "train_speed(iter/s)": 1.531568 }, { "acc": 0.99375, "epoch": 21.249121162409185, "grad_norm": 0.22817189991474152, "learning_rate": 6.622596364257305e-06, "loss": 0.02381302, "memory(GiB)": 13.7, "step": 45335, "train_speed(iter/s)": 1.531568 }, { "acc": 0.97053032, "epoch": 21.251464729318023, "grad_norm": 4.501731872558594, "learning_rate": 6.621863165135429e-06, "loss": 0.06927231, "memory(GiB)": 13.7, "step": 45340, "train_speed(iter/s)": 1.531563 }, { "acc": 0.9760417, "epoch": 21.253808296226858, "grad_norm": 3.2541074752807617, "learning_rate": 6.62112992704131e-06, "loss": 0.09626948, "memory(GiB)": 13.7, "step": 45345, "train_speed(iter/s)": 1.53156 }, { "acc": 0.97889137, "epoch": 21.256151863135692, "grad_norm": 4.405556678771973, "learning_rate": 6.620396649992569e-06, "loss": 0.05463818, "memory(GiB)": 13.7, "step": 45350, "train_speed(iter/s)": 1.53157 }, { "acc": 0.984375, "epoch": 21.258495430044526, "grad_norm": 3.476034164428711, "learning_rate": 6.619663334006837e-06, "loss": 0.07552056, "memory(GiB)": 13.7, "step": 45355, "train_speed(iter/s)": 1.531572 }, { "acc": 0.9723959, "epoch": 21.260838996953364, "grad_norm": 4.049610614776611, "learning_rate": 6.618929979101738e-06, "loss": 0.08613364, "memory(GiB)": 13.7, "step": 45360, "train_speed(iter/s)": 1.531574 }, { "acc": 0.97394352, "epoch": 21.2631825638622, "grad_norm": 4.721536636352539, "learning_rate": 6.618196585294898e-06, "loss": 0.10474503, "memory(GiB)": 13.7, "step": 45365, "train_speed(iter/s)": 1.531581 }, { "acc": 0.96833334, "epoch": 21.265526130771033, "grad_norm": 3.9855284690856934, "learning_rate": 6.617463152603945e-06, "loss": 0.06047847, "memory(GiB)": 13.7, "step": 45370, "train_speed(iter/s)": 1.53158 }, { "acc": 0.9777977, "epoch": 21.26786969767987, "grad_norm": 3.7938976287841797, "learning_rate": 6.616729681046513e-06, "loss": 0.05045991, "memory(GiB)": 13.7, "step": 45375, "train_speed(iter/s)": 1.531584 }, { "acc": 0.9770834, "epoch": 21.270213264588705, "grad_norm": 3.685424327850342, "learning_rate": 6.615996170640228e-06, "loss": 0.07221541, "memory(GiB)": 13.7, "step": 45380, "train_speed(iter/s)": 1.531586 }, { "acc": 0.98511372, "epoch": 21.27255683149754, "grad_norm": 4.198516845703125, "learning_rate": 6.615262621402724e-06, "loss": 0.04603292, "memory(GiB)": 13.7, "step": 45385, "train_speed(iter/s)": 1.531591 }, { "acc": 0.98770828, "epoch": 21.274900398406373, "grad_norm": 7.710209369659424, "learning_rate": 6.6145290333516285e-06, "loss": 0.07317541, "memory(GiB)": 13.7, "step": 45390, "train_speed(iter/s)": 1.531594 }, { "acc": 0.99031248, "epoch": 21.27724396531521, "grad_norm": 1.5550754070281982, "learning_rate": 6.613795406504578e-06, "loss": 0.10507607, "memory(GiB)": 13.7, "step": 45395, "train_speed(iter/s)": 1.531601 }, { "acc": 0.98485994, "epoch": 21.279587532224046, "grad_norm": 5.058174133300781, "learning_rate": 6.613061740879208e-06, "loss": 0.05649056, "memory(GiB)": 13.7, "step": 45400, "train_speed(iter/s)": 1.531598 }, { "acc": 0.99083328, "epoch": 21.28193109913288, "grad_norm": 3.3434574604034424, "learning_rate": 6.612328036493151e-06, "loss": 0.02393427, "memory(GiB)": 13.7, "step": 45405, "train_speed(iter/s)": 1.531596 }, { "acc": 0.9802084, "epoch": 21.284274666041714, "grad_norm": 8.530454635620117, "learning_rate": 6.611594293364042e-06, "loss": 0.10432971, "memory(GiB)": 13.7, "step": 45410, "train_speed(iter/s)": 1.531601 }, { "acc": 0.98322773, "epoch": 21.286618232950552, "grad_norm": 5.960795879364014, "learning_rate": 6.610860511509519e-06, "loss": 0.06310949, "memory(GiB)": 13.7, "step": 45415, "train_speed(iter/s)": 1.531607 }, { "acc": 0.98727684, "epoch": 21.288961799859386, "grad_norm": 3.6996612548828125, "learning_rate": 6.610126690947223e-06, "loss": 0.05498562, "memory(GiB)": 13.7, "step": 45420, "train_speed(iter/s)": 1.531614 }, { "acc": 0.98916664, "epoch": 21.29130536676822, "grad_norm": 3.31803035736084, "learning_rate": 6.609392831694785e-06, "loss": 0.03676786, "memory(GiB)": 13.7, "step": 45425, "train_speed(iter/s)": 1.53162 }, { "acc": 0.99359379, "epoch": 21.293648933677055, "grad_norm": 1.533935546875, "learning_rate": 6.608658933769851e-06, "loss": 0.04013298, "memory(GiB)": 13.7, "step": 45430, "train_speed(iter/s)": 1.531622 }, { "acc": 0.97625008, "epoch": 21.295992500585893, "grad_norm": 0.32783785462379456, "learning_rate": 6.607924997190058e-06, "loss": 0.17589312, "memory(GiB)": 13.7, "step": 45435, "train_speed(iter/s)": 1.531631 }, { "acc": 0.97477684, "epoch": 21.298336067494727, "grad_norm": 1.7196717262268066, "learning_rate": 6.6071910219730515e-06, "loss": 0.09458301, "memory(GiB)": 13.7, "step": 45440, "train_speed(iter/s)": 1.531642 }, { "acc": 0.98604164, "epoch": 21.30067963440356, "grad_norm": 5.433367729187012, "learning_rate": 6.60645700813647e-06, "loss": 0.04211103, "memory(GiB)": 13.7, "step": 45445, "train_speed(iter/s)": 1.531646 }, { "acc": 0.96708336, "epoch": 21.3030232013124, "grad_norm": 2.1526753902435303, "learning_rate": 6.605722955697957e-06, "loss": 0.08453435, "memory(GiB)": 13.7, "step": 45450, "train_speed(iter/s)": 1.53166 }, { "acc": 0.97946434, "epoch": 21.305366768221234, "grad_norm": 11.339032173156738, "learning_rate": 6.6049888646751595e-06, "loss": 0.07667629, "memory(GiB)": 13.7, "step": 45455, "train_speed(iter/s)": 1.531662 }, { "acc": 0.98874998, "epoch": 21.307710335130068, "grad_norm": 1.5144827365875244, "learning_rate": 6.604254735085719e-06, "loss": 0.03936381, "memory(GiB)": 13.7, "step": 45460, "train_speed(iter/s)": 1.531667 }, { "acc": 0.97562504, "epoch": 21.310053902038902, "grad_norm": 6.510912895202637, "learning_rate": 6.603520566947284e-06, "loss": 0.08955774, "memory(GiB)": 13.7, "step": 45465, "train_speed(iter/s)": 1.531672 }, { "acc": 0.97924681, "epoch": 21.31239746894774, "grad_norm": 2.7400565147399902, "learning_rate": 6.602786360277503e-06, "loss": 0.10042455, "memory(GiB)": 13.7, "step": 45470, "train_speed(iter/s)": 1.531672 }, { "acc": 0.97889957, "epoch": 21.314741035856574, "grad_norm": 4.998267650604248, "learning_rate": 6.60205211509402e-06, "loss": 0.07897088, "memory(GiB)": 13.7, "step": 45475, "train_speed(iter/s)": 1.53167 }, { "acc": 0.9782671, "epoch": 21.31708460276541, "grad_norm": 4.083322525024414, "learning_rate": 6.601317831414484e-06, "loss": 0.11146564, "memory(GiB)": 13.7, "step": 45480, "train_speed(iter/s)": 1.531674 }, { "acc": 0.97615528, "epoch": 21.319428169674243, "grad_norm": 0.9350776076316833, "learning_rate": 6.60058350925655e-06, "loss": 0.09045612, "memory(GiB)": 13.7, "step": 45485, "train_speed(iter/s)": 1.531677 }, { "acc": 0.96752701, "epoch": 21.32177173658308, "grad_norm": 6.0686354637146, "learning_rate": 6.599849148637862e-06, "loss": 0.08228704, "memory(GiB)": 13.7, "step": 45490, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98397818, "epoch": 21.324115303491915, "grad_norm": 0.04208456352353096, "learning_rate": 6.599114749576076e-06, "loss": 0.06916408, "memory(GiB)": 13.7, "step": 45495, "train_speed(iter/s)": 1.531678 }, { "acc": 0.97633934, "epoch": 21.32645887040075, "grad_norm": 4.356300354003906, "learning_rate": 6.598380312088843e-06, "loss": 0.09708217, "memory(GiB)": 13.7, "step": 45500, "train_speed(iter/s)": 1.531669 }, { "acc": 0.99159718, "epoch": 21.328802437309584, "grad_norm": 0.09344740211963654, "learning_rate": 6.597645836193819e-06, "loss": 0.04712425, "memory(GiB)": 13.7, "step": 45505, "train_speed(iter/s)": 1.531671 }, { "acc": 0.98479166, "epoch": 21.33114600421842, "grad_norm": 3.213212251663208, "learning_rate": 6.596911321908653e-06, "loss": 0.06701191, "memory(GiB)": 13.7, "step": 45510, "train_speed(iter/s)": 1.531676 }, { "acc": 0.99312496, "epoch": 21.333489571127256, "grad_norm": 0.293742835521698, "learning_rate": 6.596176769251005e-06, "loss": 0.02772228, "memory(GiB)": 13.7, "step": 45515, "train_speed(iter/s)": 1.531671 }, { "acc": 0.98314562, "epoch": 21.33583313803609, "grad_norm": 3.8301773071289062, "learning_rate": 6.595442178238528e-06, "loss": 0.06837687, "memory(GiB)": 13.7, "step": 45520, "train_speed(iter/s)": 1.531668 }, { "acc": 0.9895833, "epoch": 21.338176704944924, "grad_norm": 2.1827433109283447, "learning_rate": 6.594707548888883e-06, "loss": 0.04323187, "memory(GiB)": 13.7, "step": 45525, "train_speed(iter/s)": 1.531673 }, { "acc": 0.98419876, "epoch": 21.340520271853762, "grad_norm": 3.7137649059295654, "learning_rate": 6.593972881219724e-06, "loss": 0.06927556, "memory(GiB)": 13.7, "step": 45530, "train_speed(iter/s)": 1.531669 }, { "acc": 0.98187504, "epoch": 21.342863838762597, "grad_norm": 5.34446382522583, "learning_rate": 6.5932381752487125e-06, "loss": 0.07419381, "memory(GiB)": 13.7, "step": 45535, "train_speed(iter/s)": 1.531674 }, { "acc": 0.9760416, "epoch": 21.34520740567143, "grad_norm": 2.3283448219299316, "learning_rate": 6.59250343099351e-06, "loss": 0.11539615, "memory(GiB)": 13.7, "step": 45540, "train_speed(iter/s)": 1.531681 }, { "acc": 0.97781258, "epoch": 21.34755097258027, "grad_norm": 5.068085193634033, "learning_rate": 6.591768648471773e-06, "loss": 0.07943352, "memory(GiB)": 13.7, "step": 45545, "train_speed(iter/s)": 1.531675 }, { "acc": 0.99125004, "epoch": 21.349894539489103, "grad_norm": 3.8049864768981934, "learning_rate": 6.5910338277011655e-06, "loss": 0.03496405, "memory(GiB)": 13.7, "step": 45550, "train_speed(iter/s)": 1.531678 }, { "acc": 0.978125, "epoch": 21.352238106397937, "grad_norm": 4.45882511138916, "learning_rate": 6.590298968699349e-06, "loss": 0.0704271, "memory(GiB)": 13.7, "step": 45555, "train_speed(iter/s)": 1.53168 }, { "acc": 0.98402786, "epoch": 21.35458167330677, "grad_norm": 1.748759388923645, "learning_rate": 6.589564071483991e-06, "loss": 0.06750606, "memory(GiB)": 13.7, "step": 45560, "train_speed(iter/s)": 1.531685 }, { "acc": 0.98187504, "epoch": 21.35692524021561, "grad_norm": 4.031177997589111, "learning_rate": 6.588829136072752e-06, "loss": 0.04593237, "memory(GiB)": 13.7, "step": 45565, "train_speed(iter/s)": 1.531702 }, { "acc": 0.97958336, "epoch": 21.359268807124444, "grad_norm": 3.2707178592681885, "learning_rate": 6.588094162483299e-06, "loss": 0.04070712, "memory(GiB)": 13.7, "step": 45570, "train_speed(iter/s)": 1.531706 }, { "acc": 0.9825695, "epoch": 21.361612374033278, "grad_norm": 0.1465514749288559, "learning_rate": 6.587359150733296e-06, "loss": 0.03459482, "memory(GiB)": 13.7, "step": 45575, "train_speed(iter/s)": 1.531708 }, { "acc": 0.98605728, "epoch": 21.363955940942112, "grad_norm": 0.8652646541595459, "learning_rate": 6.586624100840413e-06, "loss": 0.0783321, "memory(GiB)": 13.7, "step": 45580, "train_speed(iter/s)": 1.53171 }, { "acc": 0.97003202, "epoch": 21.36629950785095, "grad_norm": 17.830333709716797, "learning_rate": 6.5858890128223185e-06, "loss": 0.13829038, "memory(GiB)": 13.7, "step": 45585, "train_speed(iter/s)": 1.531709 }, { "acc": 0.98562498, "epoch": 21.368643074759785, "grad_norm": 0.7297029495239258, "learning_rate": 6.58515388669668e-06, "loss": 0.03462033, "memory(GiB)": 13.7, "step": 45590, "train_speed(iter/s)": 1.531715 }, { "acc": 0.98803034, "epoch": 21.37098664166862, "grad_norm": 0.013585137203335762, "learning_rate": 6.58441872248117e-06, "loss": 0.06797822, "memory(GiB)": 13.7, "step": 45595, "train_speed(iter/s)": 1.531729 }, { "acc": 0.97697296, "epoch": 21.373330208577453, "grad_norm": 6.531499862670898, "learning_rate": 6.5836835201934535e-06, "loss": 0.06728396, "memory(GiB)": 13.7, "step": 45600, "train_speed(iter/s)": 1.53173 }, { "acc": 0.984375, "epoch": 21.37567377548629, "grad_norm": 2.853672981262207, "learning_rate": 6.582948279851209e-06, "loss": 0.04964918, "memory(GiB)": 13.7, "step": 45605, "train_speed(iter/s)": 1.531736 }, { "acc": 0.99177084, "epoch": 21.378017342395125, "grad_norm": 0.1530545949935913, "learning_rate": 6.582213001472105e-06, "loss": 0.0520709, "memory(GiB)": 13.7, "step": 45610, "train_speed(iter/s)": 1.531739 }, { "acc": 0.99736118, "epoch": 21.38036090930396, "grad_norm": 0.5887911319732666, "learning_rate": 6.58147768507382e-06, "loss": 0.03883368, "memory(GiB)": 13.7, "step": 45615, "train_speed(iter/s)": 1.531743 }, { "acc": 0.98395834, "epoch": 21.382704476212798, "grad_norm": 5.81978178024292, "learning_rate": 6.580742330674022e-06, "loss": 0.0482796, "memory(GiB)": 13.7, "step": 45620, "train_speed(iter/s)": 1.531752 }, { "acc": 0.98781252, "epoch": 21.385048043121632, "grad_norm": 3.793531656265259, "learning_rate": 6.580006938290391e-06, "loss": 0.07002146, "memory(GiB)": 13.7, "step": 45625, "train_speed(iter/s)": 1.531758 }, { "acc": 0.99092255, "epoch": 21.387391610030466, "grad_norm": 1.3640979528427124, "learning_rate": 6.579271507940602e-06, "loss": 0.04983958, "memory(GiB)": 13.7, "step": 45630, "train_speed(iter/s)": 1.531761 }, { "acc": 0.98363094, "epoch": 21.3897351769393, "grad_norm": 3.474332094192505, "learning_rate": 6.578536039642334e-06, "loss": 0.04438803, "memory(GiB)": 13.7, "step": 45635, "train_speed(iter/s)": 1.531763 }, { "acc": 0.98258934, "epoch": 21.39207874384814, "grad_norm": 7.450661659240723, "learning_rate": 6.577800533413261e-06, "loss": 0.06588825, "memory(GiB)": 13.7, "step": 45640, "train_speed(iter/s)": 1.531764 }, { "acc": 0.97279758, "epoch": 21.394422310756973, "grad_norm": 4.771098613739014, "learning_rate": 6.577064989271068e-06, "loss": 0.09393095, "memory(GiB)": 13.7, "step": 45645, "train_speed(iter/s)": 1.531776 }, { "acc": 0.98827381, "epoch": 21.396765877665807, "grad_norm": 5.883480548858643, "learning_rate": 6.576329407233432e-06, "loss": 0.07489716, "memory(GiB)": 13.7, "step": 45650, "train_speed(iter/s)": 1.531779 }, { "acc": 0.98475151, "epoch": 21.39910944457464, "grad_norm": 5.27689790725708, "learning_rate": 6.575593787318033e-06, "loss": 0.05672002, "memory(GiB)": 13.7, "step": 45655, "train_speed(iter/s)": 1.531793 }, { "acc": 0.97092266, "epoch": 21.40145301148348, "grad_norm": 5.133403778076172, "learning_rate": 6.574858129542553e-06, "loss": 0.08917947, "memory(GiB)": 13.7, "step": 45660, "train_speed(iter/s)": 1.531801 }, { "acc": 0.98895836, "epoch": 21.403796578392313, "grad_norm": 4.826709270477295, "learning_rate": 6.574122433924677e-06, "loss": 0.03737404, "memory(GiB)": 13.7, "step": 45665, "train_speed(iter/s)": 1.531799 }, { "acc": 0.99229164, "epoch": 21.406140145301148, "grad_norm": 2.9677891731262207, "learning_rate": 6.573386700482086e-06, "loss": 0.04566068, "memory(GiB)": 13.7, "step": 45670, "train_speed(iter/s)": 1.531805 }, { "acc": 0.99125004, "epoch": 21.408483712209982, "grad_norm": 4.140109062194824, "learning_rate": 6.572650929232468e-06, "loss": 0.04658329, "memory(GiB)": 13.7, "step": 45675, "train_speed(iter/s)": 1.531814 }, { "acc": 0.978125, "epoch": 21.41082727911882, "grad_norm": 5.794869422912598, "learning_rate": 6.571915120193505e-06, "loss": 0.08751257, "memory(GiB)": 13.7, "step": 45680, "train_speed(iter/s)": 1.53182 }, { "acc": 0.97872028, "epoch": 21.413170846027654, "grad_norm": 1.3094643354415894, "learning_rate": 6.571179273382885e-06, "loss": 0.07963055, "memory(GiB)": 13.7, "step": 45685, "train_speed(iter/s)": 1.531824 }, { "acc": 0.99323864, "epoch": 21.41551441293649, "grad_norm": 0.08108195662498474, "learning_rate": 6.5704433888182974e-06, "loss": 0.05399829, "memory(GiB)": 13.7, "step": 45690, "train_speed(iter/s)": 1.531823 }, { "acc": 0.98715458, "epoch": 21.417857979845323, "grad_norm": 5.6013503074646, "learning_rate": 6.569707466517428e-06, "loss": 0.06388748, "memory(GiB)": 13.7, "step": 45695, "train_speed(iter/s)": 1.531823 }, { "acc": 0.98642368, "epoch": 21.42020154675416, "grad_norm": 0.6141123175621033, "learning_rate": 6.5689715064979655e-06, "loss": 0.11557738, "memory(GiB)": 13.7, "step": 45700, "train_speed(iter/s)": 1.531816 }, { "acc": 0.96715889, "epoch": 21.422545113662995, "grad_norm": 3.3778116703033447, "learning_rate": 6.5682355087776005e-06, "loss": 0.13852837, "memory(GiB)": 13.7, "step": 45705, "train_speed(iter/s)": 1.531811 }, { "acc": 0.98809528, "epoch": 21.42488868057183, "grad_norm": 2.710360527038574, "learning_rate": 6.567499473374023e-06, "loss": 0.05786572, "memory(GiB)": 13.7, "step": 45710, "train_speed(iter/s)": 1.531812 }, { "acc": 0.98974533, "epoch": 21.427232247480667, "grad_norm": 5.147988796234131, "learning_rate": 6.566763400304927e-06, "loss": 0.04107712, "memory(GiB)": 13.7, "step": 45715, "train_speed(iter/s)": 1.531808 }, { "acc": 0.98194447, "epoch": 21.4295758143895, "grad_norm": 3.500462293624878, "learning_rate": 6.5660272895880066e-06, "loss": 0.05378977, "memory(GiB)": 13.7, "step": 45720, "train_speed(iter/s)": 1.531815 }, { "acc": 0.97811956, "epoch": 21.431919381298336, "grad_norm": 4.689563751220703, "learning_rate": 6.5652911412409525e-06, "loss": 0.11097696, "memory(GiB)": 13.7, "step": 45725, "train_speed(iter/s)": 1.531805 }, { "acc": 0.97999992, "epoch": 21.43426294820717, "grad_norm": 0.5739614963531494, "learning_rate": 6.564554955281458e-06, "loss": 0.08441732, "memory(GiB)": 13.7, "step": 45730, "train_speed(iter/s)": 1.531814 }, { "acc": 0.96726189, "epoch": 21.436606515116008, "grad_norm": 8.157670974731445, "learning_rate": 6.563818731727223e-06, "loss": 0.08657138, "memory(GiB)": 13.7, "step": 45735, "train_speed(iter/s)": 1.53182 }, { "acc": 0.97861118, "epoch": 21.438950082024842, "grad_norm": 19.674877166748047, "learning_rate": 6.56308247059594e-06, "loss": 0.18354499, "memory(GiB)": 13.7, "step": 45740, "train_speed(iter/s)": 1.531825 }, { "acc": 0.98289261, "epoch": 21.441293648933677, "grad_norm": 0.7678974270820618, "learning_rate": 6.5623461719053085e-06, "loss": 0.08947589, "memory(GiB)": 13.7, "step": 45745, "train_speed(iter/s)": 1.531832 }, { "acc": 0.97927084, "epoch": 21.44363721584251, "grad_norm": 3.1451687812805176, "learning_rate": 6.561609835673026e-06, "loss": 0.04507279, "memory(GiB)": 13.7, "step": 45750, "train_speed(iter/s)": 1.53183 }, { "acc": 0.96931553, "epoch": 21.44598078275135, "grad_norm": 6.908126354217529, "learning_rate": 6.560873461916793e-06, "loss": 0.14222674, "memory(GiB)": 13.7, "step": 45755, "train_speed(iter/s)": 1.531838 }, { "acc": 0.97895832, "epoch": 21.448324349660183, "grad_norm": 0.9255964159965515, "learning_rate": 6.560137050654306e-06, "loss": 0.05260792, "memory(GiB)": 13.7, "step": 45760, "train_speed(iter/s)": 1.531848 }, { "acc": 0.97779751, "epoch": 21.450667916569017, "grad_norm": 3.4138171672821045, "learning_rate": 6.559400601903269e-06, "loss": 0.0767769, "memory(GiB)": 13.7, "step": 45765, "train_speed(iter/s)": 1.531854 }, { "acc": 0.97205353, "epoch": 21.45301148347785, "grad_norm": 2.763032913208008, "learning_rate": 6.558664115681384e-06, "loss": 0.11593473, "memory(GiB)": 13.7, "step": 45770, "train_speed(iter/s)": 1.531867 }, { "acc": 0.97809868, "epoch": 21.45535505038669, "grad_norm": 6.605988025665283, "learning_rate": 6.5579275920063515e-06, "loss": 0.08742123, "memory(GiB)": 13.7, "step": 45775, "train_speed(iter/s)": 1.531865 }, { "acc": 0.98601189, "epoch": 21.457698617295524, "grad_norm": 4.135819911956787, "learning_rate": 6.557191030895877e-06, "loss": 0.07207862, "memory(GiB)": 13.7, "step": 45780, "train_speed(iter/s)": 1.531876 }, { "acc": 0.98594704, "epoch": 21.460042184204358, "grad_norm": 1.2803773880004883, "learning_rate": 6.556454432367663e-06, "loss": 0.0575179, "memory(GiB)": 13.7, "step": 45785, "train_speed(iter/s)": 1.531878 }, { "acc": 0.97875004, "epoch": 21.462385751113196, "grad_norm": 4.927475452423096, "learning_rate": 6.555717796439417e-06, "loss": 0.0628974, "memory(GiB)": 13.7, "step": 45790, "train_speed(iter/s)": 1.531884 }, { "acc": 0.9895834, "epoch": 21.46472931802203, "grad_norm": 1.7787977457046509, "learning_rate": 6.554981123128844e-06, "loss": 0.0326279, "memory(GiB)": 13.7, "step": 45795, "train_speed(iter/s)": 1.531884 }, { "acc": 0.96821423, "epoch": 21.467072884930865, "grad_norm": 6.830376148223877, "learning_rate": 6.554244412453653e-06, "loss": 0.11449747, "memory(GiB)": 13.7, "step": 45800, "train_speed(iter/s)": 1.531895 }, { "acc": 0.99035721, "epoch": 21.4694164518397, "grad_norm": 7.688717365264893, "learning_rate": 6.553507664431549e-06, "loss": 0.04117126, "memory(GiB)": 13.7, "step": 45805, "train_speed(iter/s)": 1.531902 }, { "acc": 0.97850876, "epoch": 21.471760018748537, "grad_norm": 8.943836212158203, "learning_rate": 6.552770879080244e-06, "loss": 0.13059237, "memory(GiB)": 13.7, "step": 45810, "train_speed(iter/s)": 1.531913 }, { "acc": 0.9848959, "epoch": 21.47410358565737, "grad_norm": 4.83905029296875, "learning_rate": 6.552034056417444e-06, "loss": 0.0665118, "memory(GiB)": 13.7, "step": 45815, "train_speed(iter/s)": 1.531918 }, { "acc": 0.98490534, "epoch": 21.476447152566205, "grad_norm": 1.1302895545959473, "learning_rate": 6.551297196460867e-06, "loss": 0.0450913, "memory(GiB)": 13.7, "step": 45820, "train_speed(iter/s)": 1.531931 }, { "acc": 0.98666668, "epoch": 21.47879071947504, "grad_norm": 19.953582763671875, "learning_rate": 6.550560299228217e-06, "loss": 0.03791747, "memory(GiB)": 13.7, "step": 45825, "train_speed(iter/s)": 1.531946 }, { "acc": 0.975, "epoch": 21.481134286383877, "grad_norm": 0.013740146532654762, "learning_rate": 6.54982336473721e-06, "loss": 0.08595739, "memory(GiB)": 13.7, "step": 45830, "train_speed(iter/s)": 1.531946 }, { "acc": 0.9780303, "epoch": 21.48347785329271, "grad_norm": 1.6403779983520508, "learning_rate": 6.5490863930055605e-06, "loss": 0.07731611, "memory(GiB)": 13.7, "step": 45835, "train_speed(iter/s)": 1.531952 }, { "acc": 0.97573318, "epoch": 21.485821420201546, "grad_norm": 5.008092880249023, "learning_rate": 6.548349384050981e-06, "loss": 0.06459826, "memory(GiB)": 13.7, "step": 45840, "train_speed(iter/s)": 1.53196 }, { "acc": 0.98288689, "epoch": 21.48816498711038, "grad_norm": 4.63018274307251, "learning_rate": 6.547612337891188e-06, "loss": 0.07061805, "memory(GiB)": 13.7, "step": 45845, "train_speed(iter/s)": 1.531967 }, { "acc": 0.97842264, "epoch": 21.490508554019218, "grad_norm": 1.2385982275009155, "learning_rate": 6.546875254543895e-06, "loss": 0.08986535, "memory(GiB)": 13.7, "step": 45850, "train_speed(iter/s)": 1.531972 }, { "acc": 0.98447914, "epoch": 21.492852120928053, "grad_norm": 1.9444185495376587, "learning_rate": 6.546138134026823e-06, "loss": 0.04039429, "memory(GiB)": 13.7, "step": 45855, "train_speed(iter/s)": 1.531975 }, { "acc": 0.97936954, "epoch": 21.495195687836887, "grad_norm": 2.061277389526367, "learning_rate": 6.545400976357688e-06, "loss": 0.0756461, "memory(GiB)": 13.7, "step": 45860, "train_speed(iter/s)": 1.531987 }, { "acc": 0.98687496, "epoch": 21.497539254745725, "grad_norm": 1.190561056137085, "learning_rate": 6.5446637815542094e-06, "loss": 0.02875624, "memory(GiB)": 13.7, "step": 45865, "train_speed(iter/s)": 1.531981 }, { "acc": 0.98250399, "epoch": 21.49988282165456, "grad_norm": 4.257355690002441, "learning_rate": 6.543926549634103e-06, "loss": 0.05222452, "memory(GiB)": 13.7, "step": 45870, "train_speed(iter/s)": 1.53199 }, { "acc": 0.9669445, "epoch": 21.502226388563393, "grad_norm": 0.44596150517463684, "learning_rate": 6.543189280615096e-06, "loss": 0.08325907, "memory(GiB)": 13.7, "step": 45875, "train_speed(iter/s)": 1.53199 }, { "acc": 0.97979164, "epoch": 21.504569955472228, "grad_norm": 3.5167076587677, "learning_rate": 6.542451974514907e-06, "loss": 0.02642128, "memory(GiB)": 13.7, "step": 45880, "train_speed(iter/s)": 1.531988 }, { "acc": 0.9729166, "epoch": 21.506913522381065, "grad_norm": 4.1673736572265625, "learning_rate": 6.541714631351257e-06, "loss": 0.07079757, "memory(GiB)": 13.7, "step": 45885, "train_speed(iter/s)": 1.53199 }, { "acc": 0.97372589, "epoch": 21.5092570892899, "grad_norm": 6.271239280700684, "learning_rate": 6.54097725114187e-06, "loss": 0.09328436, "memory(GiB)": 13.7, "step": 45890, "train_speed(iter/s)": 1.531994 }, { "acc": 0.98633928, "epoch": 21.511600656198734, "grad_norm": 4.005577564239502, "learning_rate": 6.540239833904472e-06, "loss": 0.05660725, "memory(GiB)": 13.7, "step": 45895, "train_speed(iter/s)": 1.531987 }, { "acc": 0.9927084, "epoch": 21.51394422310757, "grad_norm": 1.4126176834106445, "learning_rate": 6.539502379656788e-06, "loss": 0.0279922, "memory(GiB)": 13.7, "step": 45900, "train_speed(iter/s)": 1.531999 }, { "acc": 0.97607222, "epoch": 21.516287790016406, "grad_norm": 6.510622978210449, "learning_rate": 6.5387648884165425e-06, "loss": 0.11894796, "memory(GiB)": 13.7, "step": 45905, "train_speed(iter/s)": 1.532005 }, { "acc": 0.984375, "epoch": 21.51863135692524, "grad_norm": 1.5534110069274902, "learning_rate": 6.538027360201461e-06, "loss": 0.04524749, "memory(GiB)": 13.7, "step": 45910, "train_speed(iter/s)": 1.532023 }, { "acc": 0.9777976, "epoch": 21.520974923834075, "grad_norm": 3.2385342121124268, "learning_rate": 6.537289795029272e-06, "loss": 0.09622644, "memory(GiB)": 13.7, "step": 45915, "train_speed(iter/s)": 1.532027 }, { "acc": 0.98252974, "epoch": 21.52331849074291, "grad_norm": 4.837905406951904, "learning_rate": 6.5365521929177064e-06, "loss": 0.08780221, "memory(GiB)": 13.7, "step": 45920, "train_speed(iter/s)": 1.532033 }, { "acc": 0.98536701, "epoch": 21.525662057651747, "grad_norm": 1.1514678001403809, "learning_rate": 6.535814553884493e-06, "loss": 0.03337761, "memory(GiB)": 13.7, "step": 45925, "train_speed(iter/s)": 1.532028 }, { "acc": 0.97593746, "epoch": 21.52800562456058, "grad_norm": 5.950214385986328, "learning_rate": 6.5350768779473625e-06, "loss": 0.08555853, "memory(GiB)": 13.7, "step": 45930, "train_speed(iter/s)": 1.53204 }, { "acc": 0.996875, "epoch": 21.530349191469416, "grad_norm": 2.004153251647949, "learning_rate": 6.534339165124043e-06, "loss": 0.0159502, "memory(GiB)": 13.7, "step": 45935, "train_speed(iter/s)": 1.532054 }, { "acc": 0.99152775, "epoch": 21.532692758378253, "grad_norm": 0.3848721981048584, "learning_rate": 6.53360141543227e-06, "loss": 0.03957843, "memory(GiB)": 13.7, "step": 45940, "train_speed(iter/s)": 1.532057 }, { "acc": 0.98888922, "epoch": 21.535036325287088, "grad_norm": 2.734562397003174, "learning_rate": 6.532863628889776e-06, "loss": 0.05782138, "memory(GiB)": 13.7, "step": 45945, "train_speed(iter/s)": 1.532061 }, { "acc": 0.99071865, "epoch": 21.537379892195922, "grad_norm": 1.9019544124603271, "learning_rate": 6.532125805514295e-06, "loss": 0.07393255, "memory(GiB)": 13.7, "step": 45950, "train_speed(iter/s)": 1.532073 }, { "acc": 0.97723665, "epoch": 21.539723459104756, "grad_norm": 4.843438148498535, "learning_rate": 6.5313879453235594e-06, "loss": 0.15556918, "memory(GiB)": 13.7, "step": 45955, "train_speed(iter/s)": 1.532077 }, { "acc": 0.97258835, "epoch": 21.542067026013594, "grad_norm": 5.389438629150391, "learning_rate": 6.530650048335308e-06, "loss": 0.11908894, "memory(GiB)": 13.7, "step": 45960, "train_speed(iter/s)": 1.532081 }, { "acc": 0.97875004, "epoch": 21.54441059292243, "grad_norm": 0.48850756883621216, "learning_rate": 6.529912114567277e-06, "loss": 0.0541164, "memory(GiB)": 13.7, "step": 45965, "train_speed(iter/s)": 1.532084 }, { "acc": 0.97885418, "epoch": 21.546754159831263, "grad_norm": 1.6457399129867554, "learning_rate": 6.529174144037203e-06, "loss": 0.11304789, "memory(GiB)": 13.7, "step": 45970, "train_speed(iter/s)": 1.532086 }, { "acc": 0.97145147, "epoch": 21.549097726740097, "grad_norm": 2.0522305965423584, "learning_rate": 6.528436136762826e-06, "loss": 0.13298922, "memory(GiB)": 13.7, "step": 45975, "train_speed(iter/s)": 1.532096 }, { "acc": 0.98714285, "epoch": 21.551441293648935, "grad_norm": 0.06945810467004776, "learning_rate": 6.52769809276188e-06, "loss": 0.09291592, "memory(GiB)": 13.7, "step": 45980, "train_speed(iter/s)": 1.53209 }, { "acc": 0.975947, "epoch": 21.55378486055777, "grad_norm": 0.03690246120095253, "learning_rate": 6.526960012052113e-06, "loss": 0.07311282, "memory(GiB)": 13.7, "step": 45985, "train_speed(iter/s)": 1.532099 }, { "acc": 0.9822917, "epoch": 21.556128427466604, "grad_norm": 5.938742160797119, "learning_rate": 6.52622189465126e-06, "loss": 0.07735461, "memory(GiB)": 13.7, "step": 45990, "train_speed(iter/s)": 1.53211 }, { "acc": 0.97770214, "epoch": 21.558471994375438, "grad_norm": 4.688400745391846, "learning_rate": 6.525483740577067e-06, "loss": 0.09881949, "memory(GiB)": 13.7, "step": 45995, "train_speed(iter/s)": 1.532117 }, { "acc": 0.98283844, "epoch": 21.560815561284276, "grad_norm": 5.232926368713379, "learning_rate": 6.524745549847271e-06, "loss": 0.07755338, "memory(GiB)": 13.7, "step": 46000, "train_speed(iter/s)": 1.532125 }, { "acc": 0.9984375, "epoch": 21.56315912819311, "grad_norm": 1.125167727470398, "learning_rate": 6.524007322479623e-06, "loss": 0.03211247, "memory(GiB)": 13.7, "step": 46005, "train_speed(iter/s)": 1.532124 }, { "acc": 0.98784447, "epoch": 21.565502695101944, "grad_norm": 2.665083885192871, "learning_rate": 6.523269058491864e-06, "loss": 0.07091261, "memory(GiB)": 13.7, "step": 46010, "train_speed(iter/s)": 1.532124 }, { "acc": 0.99561014, "epoch": 21.56784626201078, "grad_norm": 5.153646469116211, "learning_rate": 6.522530757901738e-06, "loss": 0.03200677, "memory(GiB)": 13.7, "step": 46015, "train_speed(iter/s)": 1.532122 }, { "acc": 0.98943453, "epoch": 21.570189828919617, "grad_norm": 1.8005143404006958, "learning_rate": 6.521792420726995e-06, "loss": 0.09621933, "memory(GiB)": 13.7, "step": 46020, "train_speed(iter/s)": 1.532134 }, { "acc": 0.98312502, "epoch": 21.57253339582845, "grad_norm": 3.22879958152771, "learning_rate": 6.521054046985377e-06, "loss": 0.05204319, "memory(GiB)": 13.7, "step": 46025, "train_speed(iter/s)": 1.532143 }, { "acc": 0.99298611, "epoch": 21.574876962737285, "grad_norm": 0.6562358140945435, "learning_rate": 6.520315636694638e-06, "loss": 0.01527425, "memory(GiB)": 13.7, "step": 46030, "train_speed(iter/s)": 1.532154 }, { "acc": 0.98106365, "epoch": 21.577220529646123, "grad_norm": 4.425942420959473, "learning_rate": 6.519577189872522e-06, "loss": 0.05813229, "memory(GiB)": 13.7, "step": 46035, "train_speed(iter/s)": 1.53216 }, { "acc": 0.98957787, "epoch": 21.579564096554957, "grad_norm": 2.3914694786071777, "learning_rate": 6.518838706536781e-06, "loss": 0.05135188, "memory(GiB)": 13.7, "step": 46040, "train_speed(iter/s)": 1.532168 }, { "acc": 0.97145834, "epoch": 21.58190766346379, "grad_norm": 4.102187156677246, "learning_rate": 6.518100186705167e-06, "loss": 0.05851078, "memory(GiB)": 13.7, "step": 46045, "train_speed(iter/s)": 1.532166 }, { "acc": 0.98104172, "epoch": 21.584251230372626, "grad_norm": 0.2323547601699829, "learning_rate": 6.51736163039543e-06, "loss": 0.07810836, "memory(GiB)": 13.7, "step": 46050, "train_speed(iter/s)": 1.532172 }, { "acc": 0.98113098, "epoch": 21.586594797281464, "grad_norm": 5.298161506652832, "learning_rate": 6.516623037625321e-06, "loss": 0.05839688, "memory(GiB)": 13.7, "step": 46055, "train_speed(iter/s)": 1.532182 }, { "acc": 0.98946428, "epoch": 21.588938364190298, "grad_norm": 4.9152302742004395, "learning_rate": 6.515884408412597e-06, "loss": 0.04702511, "memory(GiB)": 13.7, "step": 46060, "train_speed(iter/s)": 1.532191 }, { "acc": 0.99375, "epoch": 21.591281931099132, "grad_norm": 0.10801918059587479, "learning_rate": 6.515145742775008e-06, "loss": 0.01131599, "memory(GiB)": 13.7, "step": 46065, "train_speed(iter/s)": 1.532191 }, { "acc": 0.98858175, "epoch": 21.593625498007967, "grad_norm": 3.9877824783325195, "learning_rate": 6.514407040730314e-06, "loss": 0.06012251, "memory(GiB)": 13.7, "step": 46070, "train_speed(iter/s)": 1.532199 }, { "acc": 0.9854167, "epoch": 21.595969064916805, "grad_norm": 5.609597206115723, "learning_rate": 6.513668302296266e-06, "loss": 0.04185009, "memory(GiB)": 13.7, "step": 46075, "train_speed(iter/s)": 1.532199 }, { "acc": 0.98270836, "epoch": 21.59831263182564, "grad_norm": 5.003026008605957, "learning_rate": 6.512929527490626e-06, "loss": 0.05620853, "memory(GiB)": 13.7, "step": 46080, "train_speed(iter/s)": 1.532197 }, { "acc": 0.9803175, "epoch": 21.600656198734473, "grad_norm": 2.2556779384613037, "learning_rate": 6.512190716331146e-06, "loss": 0.08293778, "memory(GiB)": 13.7, "step": 46085, "train_speed(iter/s)": 1.532199 }, { "acc": 0.99125004, "epoch": 21.602999765643307, "grad_norm": 3.0660009384155273, "learning_rate": 6.511451868835588e-06, "loss": 0.0286903, "memory(GiB)": 13.7, "step": 46090, "train_speed(iter/s)": 1.532204 }, { "acc": 0.99336309, "epoch": 21.605343332552145, "grad_norm": 0.8275740742683411, "learning_rate": 6.510712985021712e-06, "loss": 0.05133159, "memory(GiB)": 13.7, "step": 46095, "train_speed(iter/s)": 1.532204 }, { "acc": 0.98937502, "epoch": 21.60768689946098, "grad_norm": 1.605509877204895, "learning_rate": 6.509974064907278e-06, "loss": 0.0437223, "memory(GiB)": 13.7, "step": 46100, "train_speed(iter/s)": 1.532215 }, { "acc": 0.97974205, "epoch": 21.610030466369814, "grad_norm": 6.807834625244141, "learning_rate": 6.509235108510044e-06, "loss": 0.09934487, "memory(GiB)": 13.7, "step": 46105, "train_speed(iter/s)": 1.532217 }, { "acc": 0.9802084, "epoch": 21.612374033278652, "grad_norm": 1.5843961238861084, "learning_rate": 6.508496115847776e-06, "loss": 0.09049564, "memory(GiB)": 13.7, "step": 46110, "train_speed(iter/s)": 1.532217 }, { "acc": 0.99404764, "epoch": 21.614717600187486, "grad_norm": 1.2020574808120728, "learning_rate": 6.5077570869382365e-06, "loss": 0.03575161, "memory(GiB)": 13.7, "step": 46115, "train_speed(iter/s)": 1.532224 }, { "acc": 0.97217255, "epoch": 21.61706116709632, "grad_norm": 1.3042229413986206, "learning_rate": 6.507018021799186e-06, "loss": 0.0709629, "memory(GiB)": 13.7, "step": 46120, "train_speed(iter/s)": 1.532224 }, { "acc": 0.98657198, "epoch": 21.619404734005155, "grad_norm": 4.479179382324219, "learning_rate": 6.506278920448393e-06, "loss": 0.04420617, "memory(GiB)": 13.7, "step": 46125, "train_speed(iter/s)": 1.532221 }, { "acc": 0.99187498, "epoch": 21.621748300913993, "grad_norm": 1.7296801805496216, "learning_rate": 6.505539782903622e-06, "loss": 0.01315591, "memory(GiB)": 13.7, "step": 46130, "train_speed(iter/s)": 1.532231 }, { "acc": 0.97373514, "epoch": 21.624091867822827, "grad_norm": 6.530942916870117, "learning_rate": 6.504800609182638e-06, "loss": 0.1206985, "memory(GiB)": 13.7, "step": 46135, "train_speed(iter/s)": 1.532232 }, { "acc": 0.96937504, "epoch": 21.62643543473166, "grad_norm": 0.001943010138347745, "learning_rate": 6.5040613993032085e-06, "loss": 0.06280042, "memory(GiB)": 13.7, "step": 46140, "train_speed(iter/s)": 1.532235 }, { "acc": 0.98874998, "epoch": 21.628779001640495, "grad_norm": 1.5095127820968628, "learning_rate": 6.503322153283105e-06, "loss": 0.02837851, "memory(GiB)": 13.7, "step": 46145, "train_speed(iter/s)": 1.532233 }, { "acc": 0.97204857, "epoch": 21.631122568549333, "grad_norm": 3.647134780883789, "learning_rate": 6.502582871140093e-06, "loss": 0.10027039, "memory(GiB)": 13.7, "step": 46150, "train_speed(iter/s)": 1.532235 }, { "acc": 0.97358284, "epoch": 21.633466135458168, "grad_norm": 6.330902099609375, "learning_rate": 6.501843552891943e-06, "loss": 0.1108181, "memory(GiB)": 13.7, "step": 46155, "train_speed(iter/s)": 1.532239 }, { "acc": 0.97019348, "epoch": 21.635809702367002, "grad_norm": 4.904604434967041, "learning_rate": 6.5011041985564275e-06, "loss": 0.09726892, "memory(GiB)": 13.7, "step": 46160, "train_speed(iter/s)": 1.532246 }, { "acc": 0.98078136, "epoch": 21.638153269275836, "grad_norm": 3.3439202308654785, "learning_rate": 6.500364808151316e-06, "loss": 0.09129862, "memory(GiB)": 13.7, "step": 46165, "train_speed(iter/s)": 1.532244 }, { "acc": 0.98529758, "epoch": 21.640496836184674, "grad_norm": 2.113199234008789, "learning_rate": 6.499625381694382e-06, "loss": 0.03345184, "memory(GiB)": 13.7, "step": 46170, "train_speed(iter/s)": 1.532249 }, { "acc": 0.97002182, "epoch": 21.64284040309351, "grad_norm": 1.1909997463226318, "learning_rate": 6.498885919203398e-06, "loss": 0.10293759, "memory(GiB)": 13.7, "step": 46175, "train_speed(iter/s)": 1.532257 }, { "acc": 0.97937498, "epoch": 21.645183970002343, "grad_norm": 1.0317435264587402, "learning_rate": 6.4981464206961416e-06, "loss": 0.07531786, "memory(GiB)": 13.7, "step": 46180, "train_speed(iter/s)": 1.532261 }, { "acc": 0.97696428, "epoch": 21.647527536911177, "grad_norm": 2.8927266597747803, "learning_rate": 6.497406886190383e-06, "loss": 0.08503565, "memory(GiB)": 13.7, "step": 46185, "train_speed(iter/s)": 1.532261 }, { "acc": 0.98946428, "epoch": 21.649871103820015, "grad_norm": 4.730291843414307, "learning_rate": 6.496667315703901e-06, "loss": 0.03525939, "memory(GiB)": 13.7, "step": 46190, "train_speed(iter/s)": 1.532261 }, { "acc": 0.98869047, "epoch": 21.65221467072885, "grad_norm": 7.934533596038818, "learning_rate": 6.495927709254473e-06, "loss": 0.04727418, "memory(GiB)": 13.7, "step": 46195, "train_speed(iter/s)": 1.532274 }, { "acc": 0.984375, "epoch": 21.654558237637684, "grad_norm": 9.763971328735352, "learning_rate": 6.495188066859875e-06, "loss": 0.05853305, "memory(GiB)": 13.7, "step": 46200, "train_speed(iter/s)": 1.532285 }, { "acc": 0.97906246, "epoch": 21.65690180454652, "grad_norm": 3.5957980155944824, "learning_rate": 6.494448388537886e-06, "loss": 0.05383561, "memory(GiB)": 13.7, "step": 46205, "train_speed(iter/s)": 1.532283 }, { "acc": 0.99097757, "epoch": 21.659245371455356, "grad_norm": 4.2159247398376465, "learning_rate": 6.493708674306286e-06, "loss": 0.05486084, "memory(GiB)": 13.7, "step": 46210, "train_speed(iter/s)": 1.532292 }, { "acc": 0.98604164, "epoch": 21.66158893836419, "grad_norm": 2.6191329956054688, "learning_rate": 6.492968924182856e-06, "loss": 0.03654965, "memory(GiB)": 13.7, "step": 46215, "train_speed(iter/s)": 1.532299 }, { "acc": 0.97166672, "epoch": 21.663932505273024, "grad_norm": 4.706645965576172, "learning_rate": 6.492229138185376e-06, "loss": 0.05629981, "memory(GiB)": 13.7, "step": 46220, "train_speed(iter/s)": 1.532301 }, { "acc": 0.98567705, "epoch": 21.666276072181862, "grad_norm": 3.654848098754883, "learning_rate": 6.491489316331627e-06, "loss": 0.04691867, "memory(GiB)": 13.7, "step": 46225, "train_speed(iter/s)": 1.532303 }, { "acc": 0.97458334, "epoch": 21.668619639090696, "grad_norm": 7.301771640777588, "learning_rate": 6.490749458639393e-06, "loss": 0.06722131, "memory(GiB)": 13.7, "step": 46230, "train_speed(iter/s)": 1.532303 }, { "acc": 0.97771778, "epoch": 21.67096320599953, "grad_norm": 3.7109322547912598, "learning_rate": 6.490009565126458e-06, "loss": 0.0627073, "memory(GiB)": 13.7, "step": 46235, "train_speed(iter/s)": 1.532304 }, { "acc": 0.97770834, "epoch": 21.673306772908365, "grad_norm": 14.382997512817383, "learning_rate": 6.489269635810609e-06, "loss": 0.05422174, "memory(GiB)": 13.7, "step": 46240, "train_speed(iter/s)": 1.53231 }, { "acc": 0.96903267, "epoch": 21.675650339817203, "grad_norm": 8.473625183105469, "learning_rate": 6.4885296707096265e-06, "loss": 0.07858831, "memory(GiB)": 13.7, "step": 46245, "train_speed(iter/s)": 1.532314 }, { "acc": 0.97824993, "epoch": 21.677993906726037, "grad_norm": 5.678072929382324, "learning_rate": 6.487789669841299e-06, "loss": 0.05266908, "memory(GiB)": 13.7, "step": 46250, "train_speed(iter/s)": 1.53232 }, { "acc": 0.98698864, "epoch": 21.68033747363487, "grad_norm": 5.741931915283203, "learning_rate": 6.487049633223416e-06, "loss": 0.05870774, "memory(GiB)": 13.7, "step": 46255, "train_speed(iter/s)": 1.532335 }, { "acc": 0.9872139, "epoch": 21.682681040543706, "grad_norm": 3.869459390640259, "learning_rate": 6.486309560873763e-06, "loss": 0.05639451, "memory(GiB)": 13.7, "step": 46260, "train_speed(iter/s)": 1.532346 }, { "acc": 0.98825922, "epoch": 21.685024607452544, "grad_norm": 2.739071846008301, "learning_rate": 6.4855694528101285e-06, "loss": 0.05478604, "memory(GiB)": 13.7, "step": 46265, "train_speed(iter/s)": 1.53236 }, { "acc": 0.97258015, "epoch": 21.687368174361378, "grad_norm": 2.895444631576538, "learning_rate": 6.484829309050303e-06, "loss": 0.07529644, "memory(GiB)": 13.7, "step": 46270, "train_speed(iter/s)": 1.532367 }, { "acc": 0.96606522, "epoch": 21.689711741270212, "grad_norm": 3.711599588394165, "learning_rate": 6.484089129612076e-06, "loss": 0.13018975, "memory(GiB)": 13.7, "step": 46275, "train_speed(iter/s)": 1.532374 }, { "acc": 0.99437504, "epoch": 21.69205530817905, "grad_norm": 3.837414026260376, "learning_rate": 6.483348914513243e-06, "loss": 0.05307522, "memory(GiB)": 13.7, "step": 46280, "train_speed(iter/s)": 1.53237 }, { "acc": 0.98451385, "epoch": 21.694398875087884, "grad_norm": 2.099064588546753, "learning_rate": 6.482608663771592e-06, "loss": 0.09531187, "memory(GiB)": 13.7, "step": 46285, "train_speed(iter/s)": 1.532376 }, { "acc": 0.96681919, "epoch": 21.69674244199672, "grad_norm": 9.480232238769531, "learning_rate": 6.4818683774049195e-06, "loss": 0.12976099, "memory(GiB)": 13.7, "step": 46290, "train_speed(iter/s)": 1.532378 }, { "acc": 0.97489586, "epoch": 21.699086008905553, "grad_norm": 7.673602104187012, "learning_rate": 6.481128055431016e-06, "loss": 0.08903515, "memory(GiB)": 13.7, "step": 46295, "train_speed(iter/s)": 1.532379 }, { "acc": 0.96834679, "epoch": 21.70142957581439, "grad_norm": 0.3750980496406555, "learning_rate": 6.480387697867681e-06, "loss": 0.06955136, "memory(GiB)": 13.7, "step": 46300, "train_speed(iter/s)": 1.532388 }, { "acc": 0.98160725, "epoch": 21.703773142723225, "grad_norm": 8.171712875366211, "learning_rate": 6.479647304732706e-06, "loss": 0.07439008, "memory(GiB)": 13.7, "step": 46305, "train_speed(iter/s)": 1.5324 }, { "acc": 0.98351192, "epoch": 21.70611670963206, "grad_norm": 2.444720506668091, "learning_rate": 6.478906876043888e-06, "loss": 0.02976416, "memory(GiB)": 13.7, "step": 46310, "train_speed(iter/s)": 1.5324 }, { "acc": 0.98666668, "epoch": 21.708460276540894, "grad_norm": 0.2098316252231598, "learning_rate": 6.478166411819026e-06, "loss": 0.0421249, "memory(GiB)": 13.7, "step": 46315, "train_speed(iter/s)": 1.532411 }, { "acc": 0.97770834, "epoch": 21.71080384344973, "grad_norm": 2.643465518951416, "learning_rate": 6.477425912075919e-06, "loss": 0.08147126, "memory(GiB)": 13.7, "step": 46320, "train_speed(iter/s)": 1.532418 }, { "acc": 0.990625, "epoch": 21.713147410358566, "grad_norm": 1.3504599332809448, "learning_rate": 6.4766853768323645e-06, "loss": 0.01604834, "memory(GiB)": 13.7, "step": 46325, "train_speed(iter/s)": 1.532432 }, { "acc": 0.96811962, "epoch": 21.7154909772674, "grad_norm": 6.125937461853027, "learning_rate": 6.475944806106164e-06, "loss": 0.14072471, "memory(GiB)": 13.7, "step": 46330, "train_speed(iter/s)": 1.532439 }, { "acc": 0.97958336, "epoch": 21.717834544176235, "grad_norm": 3.385798692703247, "learning_rate": 6.475204199915117e-06, "loss": 0.07291998, "memory(GiB)": 13.7, "step": 46335, "train_speed(iter/s)": 1.532448 }, { "acc": 0.975, "epoch": 21.720178111085072, "grad_norm": 3.9290342330932617, "learning_rate": 6.474463558277024e-06, "loss": 0.080168, "memory(GiB)": 13.7, "step": 46340, "train_speed(iter/s)": 1.532459 }, { "acc": 0.97863102, "epoch": 21.722521677993907, "grad_norm": 1.1301754713058472, "learning_rate": 6.473722881209692e-06, "loss": 0.07131982, "memory(GiB)": 13.7, "step": 46345, "train_speed(iter/s)": 1.532466 }, { "acc": 0.98456421, "epoch": 21.72486524490274, "grad_norm": 2.314509153366089, "learning_rate": 6.47298216873092e-06, "loss": 0.08503966, "memory(GiB)": 13.7, "step": 46350, "train_speed(iter/s)": 1.532466 }, { "acc": 0.9795928, "epoch": 21.72720881181158, "grad_norm": 7.292415142059326, "learning_rate": 6.472241420858516e-06, "loss": 0.05660006, "memory(GiB)": 13.7, "step": 46355, "train_speed(iter/s)": 1.532477 }, { "acc": 0.98923607, "epoch": 21.729552378720413, "grad_norm": 2.506080389022827, "learning_rate": 6.471500637610282e-06, "loss": 0.0307891, "memory(GiB)": 13.7, "step": 46360, "train_speed(iter/s)": 1.532473 }, { "acc": 0.97999992, "epoch": 21.731895945629248, "grad_norm": 2.200268030166626, "learning_rate": 6.470759819004027e-06, "loss": 0.05338804, "memory(GiB)": 13.7, "step": 46365, "train_speed(iter/s)": 1.532474 }, { "acc": 0.9850893, "epoch": 21.734239512538082, "grad_norm": 2.598477363586426, "learning_rate": 6.470018965057554e-06, "loss": 0.03847233, "memory(GiB)": 13.7, "step": 46370, "train_speed(iter/s)": 1.532484 }, { "acc": 0.97175598, "epoch": 21.73658307944692, "grad_norm": 5.580733299255371, "learning_rate": 6.4692780757886735e-06, "loss": 0.10850589, "memory(GiB)": 13.7, "step": 46375, "train_speed(iter/s)": 1.532481 }, { "acc": 0.9796196, "epoch": 21.738926646355754, "grad_norm": 10.663408279418945, "learning_rate": 6.468537151215194e-06, "loss": 0.10258675, "memory(GiB)": 13.7, "step": 46380, "train_speed(iter/s)": 1.53249 }, { "acc": 0.98518314, "epoch": 21.74127021326459, "grad_norm": 2.7066195011138916, "learning_rate": 6.467796191354925e-06, "loss": 0.08179011, "memory(GiB)": 13.7, "step": 46385, "train_speed(iter/s)": 1.532499 }, { "acc": 0.96651039, "epoch": 21.743613780173423, "grad_norm": 6.3559746742248535, "learning_rate": 6.4670551962256724e-06, "loss": 0.11931281, "memory(GiB)": 13.7, "step": 46390, "train_speed(iter/s)": 1.532499 }, { "acc": 0.97801466, "epoch": 21.74595734708226, "grad_norm": 4.321533203125, "learning_rate": 6.466314165845253e-06, "loss": 0.09312396, "memory(GiB)": 13.7, "step": 46395, "train_speed(iter/s)": 1.532495 }, { "acc": 0.97872028, "epoch": 21.748300913991095, "grad_norm": 5.966681480407715, "learning_rate": 6.465573100231477e-06, "loss": 0.05806627, "memory(GiB)": 13.7, "step": 46400, "train_speed(iter/s)": 1.532497 }, { "acc": 0.98587456, "epoch": 21.75064448089993, "grad_norm": 1.7583997249603271, "learning_rate": 6.464831999402158e-06, "loss": 0.07899633, "memory(GiB)": 13.7, "step": 46405, "train_speed(iter/s)": 1.532505 }, { "acc": 0.97512312, "epoch": 21.752988047808763, "grad_norm": 2.549196720123291, "learning_rate": 6.464090863375108e-06, "loss": 0.14246962, "memory(GiB)": 13.7, "step": 46410, "train_speed(iter/s)": 1.532508 }, { "acc": 0.97863102, "epoch": 21.7553316147176, "grad_norm": 4.308985233306885, "learning_rate": 6.463349692168139e-06, "loss": 0.06431578, "memory(GiB)": 13.7, "step": 46415, "train_speed(iter/s)": 1.532517 }, { "acc": 0.97799101, "epoch": 21.757675181626436, "grad_norm": 4.697840690612793, "learning_rate": 6.462608485799072e-06, "loss": 0.16442103, "memory(GiB)": 13.7, "step": 46420, "train_speed(iter/s)": 1.532521 }, { "acc": 0.9782589, "epoch": 21.76001874853527, "grad_norm": 4.29317569732666, "learning_rate": 6.461867244285718e-06, "loss": 0.07250884, "memory(GiB)": 13.7, "step": 46425, "train_speed(iter/s)": 1.532529 }, { "acc": 0.9916667, "epoch": 21.762362315444108, "grad_norm": 0.4203006625175476, "learning_rate": 6.4611259676459004e-06, "loss": 0.02707373, "memory(GiB)": 13.7, "step": 46430, "train_speed(iter/s)": 1.532535 }, { "acc": 0.97264881, "epoch": 21.764705882352942, "grad_norm": 2.959195375442505, "learning_rate": 6.46038465589743e-06, "loss": 0.09248139, "memory(GiB)": 13.7, "step": 46435, "train_speed(iter/s)": 1.532531 }, { "acc": 0.99375, "epoch": 21.767049449261776, "grad_norm": 0.006857988424599171, "learning_rate": 6.459643309058129e-06, "loss": 0.01293687, "memory(GiB)": 13.7, "step": 46440, "train_speed(iter/s)": 1.532539 }, { "acc": 0.97618055, "epoch": 21.76939301617061, "grad_norm": 2.495408296585083, "learning_rate": 6.458901927145818e-06, "loss": 0.14785428, "memory(GiB)": 13.7, "step": 46445, "train_speed(iter/s)": 1.532541 }, { "acc": 0.9785183, "epoch": 21.77173658307945, "grad_norm": 1.0360568761825562, "learning_rate": 6.458160510178314e-06, "loss": 0.07074375, "memory(GiB)": 13.7, "step": 46450, "train_speed(iter/s)": 1.532547 }, { "acc": 0.98592262, "epoch": 21.774080149988283, "grad_norm": 2.7732577323913574, "learning_rate": 6.45741905817344e-06, "loss": 0.04148527, "memory(GiB)": 13.7, "step": 46455, "train_speed(iter/s)": 1.532555 }, { "acc": 0.98562508, "epoch": 21.776423716897117, "grad_norm": 0.7928597927093506, "learning_rate": 6.456677571149021e-06, "loss": 0.05912288, "memory(GiB)": 13.7, "step": 46460, "train_speed(iter/s)": 1.532564 }, { "acc": 0.98534718, "epoch": 21.77876728380595, "grad_norm": 2.87290096282959, "learning_rate": 6.455936049122874e-06, "loss": 0.06414924, "memory(GiB)": 13.7, "step": 46465, "train_speed(iter/s)": 1.532559 }, { "acc": 0.9811553, "epoch": 21.78111085071479, "grad_norm": 6.42610502243042, "learning_rate": 6.455194492112828e-06, "loss": 0.05755045, "memory(GiB)": 13.7, "step": 46470, "train_speed(iter/s)": 1.532556 }, { "acc": 0.96011362, "epoch": 21.783454417623624, "grad_norm": 12.062535285949707, "learning_rate": 6.454452900136703e-06, "loss": 0.13760616, "memory(GiB)": 13.7, "step": 46475, "train_speed(iter/s)": 1.532557 }, { "acc": 0.97072306, "epoch": 21.785797984532458, "grad_norm": 4.523905277252197, "learning_rate": 6.4537112732123286e-06, "loss": 0.08055674, "memory(GiB)": 13.7, "step": 46480, "train_speed(iter/s)": 1.532554 }, { "acc": 0.9719717, "epoch": 21.788141551441292, "grad_norm": 3.9120447635650635, "learning_rate": 6.452969611357529e-06, "loss": 0.10444028, "memory(GiB)": 13.7, "step": 46485, "train_speed(iter/s)": 1.532562 }, { "acc": 0.98008928, "epoch": 21.79048511835013, "grad_norm": 9.158357620239258, "learning_rate": 6.452227914590132e-06, "loss": 0.07504901, "memory(GiB)": 13.7, "step": 46490, "train_speed(iter/s)": 1.532567 }, { "acc": 0.97718754, "epoch": 21.792828685258964, "grad_norm": 3.630967378616333, "learning_rate": 6.451486182927965e-06, "loss": 0.05636238, "memory(GiB)": 13.7, "step": 46495, "train_speed(iter/s)": 1.532568 }, { "acc": 0.98500004, "epoch": 21.7951722521678, "grad_norm": 0.0982251986861229, "learning_rate": 6.450744416388855e-06, "loss": 0.03082762, "memory(GiB)": 13.7, "step": 46500, "train_speed(iter/s)": 1.532573 }, { "acc": 0.98125, "epoch": 21.797515819076633, "grad_norm": 4.080347061157227, "learning_rate": 6.450002614990638e-06, "loss": 0.04798967, "memory(GiB)": 13.7, "step": 46505, "train_speed(iter/s)": 1.53258 }, { "acc": 0.98299675, "epoch": 21.79985938598547, "grad_norm": 3.0748109817504883, "learning_rate": 6.449260778751136e-06, "loss": 0.0792246, "memory(GiB)": 13.7, "step": 46510, "train_speed(iter/s)": 1.532583 }, { "acc": 0.97833328, "epoch": 21.802202952894305, "grad_norm": 6.165016174316406, "learning_rate": 6.448518907688188e-06, "loss": 0.08323079, "memory(GiB)": 13.7, "step": 46515, "train_speed(iter/s)": 1.532585 }, { "acc": 0.98005199, "epoch": 21.80454651980314, "grad_norm": 4.29157829284668, "learning_rate": 6.447777001819621e-06, "loss": 0.09601527, "memory(GiB)": 13.7, "step": 46520, "train_speed(iter/s)": 1.53258 }, { "acc": 0.98062506, "epoch": 21.806890086711977, "grad_norm": 6.985991954803467, "learning_rate": 6.4470350611632684e-06, "loss": 0.10708411, "memory(GiB)": 13.7, "step": 46525, "train_speed(iter/s)": 1.532589 }, { "acc": 0.99510422, "epoch": 21.80923365362081, "grad_norm": 3.199877977371216, "learning_rate": 6.446293085736967e-06, "loss": 0.05043236, "memory(GiB)": 13.7, "step": 46530, "train_speed(iter/s)": 1.532594 }, { "acc": 0.9742857, "epoch": 21.811577220529646, "grad_norm": 9.11594009399414, "learning_rate": 6.445551075558548e-06, "loss": 0.08876181, "memory(GiB)": 13.7, "step": 46535, "train_speed(iter/s)": 1.532601 }, { "acc": 0.9885417, "epoch": 21.81392078743848, "grad_norm": 0.16484513878822327, "learning_rate": 6.444809030645849e-06, "loss": 0.03010672, "memory(GiB)": 13.7, "step": 46540, "train_speed(iter/s)": 1.532608 }, { "acc": 0.98363094, "epoch": 21.816264354347318, "grad_norm": 4.312172889709473, "learning_rate": 6.444066951016707e-06, "loss": 0.06911666, "memory(GiB)": 13.7, "step": 46545, "train_speed(iter/s)": 1.532609 }, { "acc": 0.95883932, "epoch": 21.818607921256152, "grad_norm": 7.130578994750977, "learning_rate": 6.443324836688957e-06, "loss": 0.1478595, "memory(GiB)": 13.7, "step": 46550, "train_speed(iter/s)": 1.532605 }, { "acc": 0.97738094, "epoch": 21.820951488164987, "grad_norm": 7.357491493225098, "learning_rate": 6.442582687680436e-06, "loss": 0.06597658, "memory(GiB)": 13.7, "step": 46555, "train_speed(iter/s)": 1.532603 }, { "acc": 0.97673759, "epoch": 21.82329505507382, "grad_norm": 6.506857395172119, "learning_rate": 6.441840504008988e-06, "loss": 0.09215817, "memory(GiB)": 13.7, "step": 46560, "train_speed(iter/s)": 1.532602 }, { "acc": 0.97529764, "epoch": 21.82563862198266, "grad_norm": 0.9309580326080322, "learning_rate": 6.441098285692447e-06, "loss": 0.10767099, "memory(GiB)": 13.7, "step": 46565, "train_speed(iter/s)": 1.532602 }, { "acc": 0.98383923, "epoch": 21.827982188891493, "grad_norm": 4.074269771575928, "learning_rate": 6.440356032748659e-06, "loss": 0.05260448, "memory(GiB)": 13.7, "step": 46570, "train_speed(iter/s)": 1.532609 }, { "acc": 0.97190514, "epoch": 21.830325755800327, "grad_norm": 7.11305046081543, "learning_rate": 6.43961374519546e-06, "loss": 0.17154762, "memory(GiB)": 13.7, "step": 46575, "train_speed(iter/s)": 1.532615 }, { "acc": 0.98091345, "epoch": 21.83266932270916, "grad_norm": 3.9952456951141357, "learning_rate": 6.4388714230506954e-06, "loss": 0.06273322, "memory(GiB)": 13.7, "step": 46580, "train_speed(iter/s)": 1.532619 }, { "acc": 0.98659973, "epoch": 21.835012889618, "grad_norm": 2.9084579944610596, "learning_rate": 6.438129066332207e-06, "loss": 0.08516053, "memory(GiB)": 13.7, "step": 46585, "train_speed(iter/s)": 1.532627 }, { "acc": 0.98291664, "epoch": 21.837356456526834, "grad_norm": 3.881568193435669, "learning_rate": 6.4373866750578376e-06, "loss": 0.11868962, "memory(GiB)": 13.7, "step": 46590, "train_speed(iter/s)": 1.532635 }, { "acc": 0.99115524, "epoch": 21.839700023435668, "grad_norm": 2.0750691890716553, "learning_rate": 6.436644249245434e-06, "loss": 0.03250952, "memory(GiB)": 13.7, "step": 46595, "train_speed(iter/s)": 1.532645 }, { "acc": 0.97778845, "epoch": 21.842043590344506, "grad_norm": 6.070793628692627, "learning_rate": 6.435901788912841e-06, "loss": 0.07576559, "memory(GiB)": 13.7, "step": 46600, "train_speed(iter/s)": 1.532638 }, { "acc": 0.9836607, "epoch": 21.84438715725334, "grad_norm": 4.318224906921387, "learning_rate": 6.435159294077904e-06, "loss": 0.04632322, "memory(GiB)": 13.7, "step": 46605, "train_speed(iter/s)": 1.532646 }, { "acc": 0.9760417, "epoch": 21.846730724162175, "grad_norm": 1.540022611618042, "learning_rate": 6.4344167647584706e-06, "loss": 0.03722794, "memory(GiB)": 13.7, "step": 46610, "train_speed(iter/s)": 1.532653 }, { "acc": 0.99154758, "epoch": 21.84907429107101, "grad_norm": 4.742194175720215, "learning_rate": 6.433674200972391e-06, "loss": 0.05814493, "memory(GiB)": 13.7, "step": 46615, "train_speed(iter/s)": 1.53266 }, { "acc": 0.97666664, "epoch": 21.851417857979847, "grad_norm": 10.310885429382324, "learning_rate": 6.43293160273751e-06, "loss": 0.08688661, "memory(GiB)": 13.7, "step": 46620, "train_speed(iter/s)": 1.532655 }, { "acc": 0.97669287, "epoch": 21.85376142488868, "grad_norm": 0.5116618871688843, "learning_rate": 6.43218897007168e-06, "loss": 0.09845121, "memory(GiB)": 13.7, "step": 46625, "train_speed(iter/s)": 1.532662 }, { "acc": 0.98422346, "epoch": 21.856104991797515, "grad_norm": 1.472424864768982, "learning_rate": 6.43144630299275e-06, "loss": 0.08320083, "memory(GiB)": 13.7, "step": 46630, "train_speed(iter/s)": 1.532667 }, { "acc": 0.9927084, "epoch": 21.85844855870635, "grad_norm": 2.674781322479248, "learning_rate": 6.430703601518573e-06, "loss": 0.04484403, "memory(GiB)": 13.7, "step": 46635, "train_speed(iter/s)": 1.532667 }, { "acc": 0.98010416, "epoch": 21.860792125615188, "grad_norm": 6.043496608734131, "learning_rate": 6.429960865666998e-06, "loss": 0.12396179, "memory(GiB)": 13.7, "step": 46640, "train_speed(iter/s)": 1.532671 }, { "acc": 0.98410797, "epoch": 21.863135692524022, "grad_norm": 4.390298366546631, "learning_rate": 6.429218095455881e-06, "loss": 0.06356346, "memory(GiB)": 13.7, "step": 46645, "train_speed(iter/s)": 1.532671 }, { "acc": 0.97312508, "epoch": 21.865479259432856, "grad_norm": 3.1192643642425537, "learning_rate": 6.4284752909030755e-06, "loss": 0.06343403, "memory(GiB)": 13.7, "step": 46650, "train_speed(iter/s)": 1.532673 }, { "acc": 0.97666664, "epoch": 21.86782282634169, "grad_norm": 0.9895345568656921, "learning_rate": 6.427732452026434e-06, "loss": 0.1217242, "memory(GiB)": 13.7, "step": 46655, "train_speed(iter/s)": 1.532677 }, { "acc": 0.98297615, "epoch": 21.87016639325053, "grad_norm": 4.058197498321533, "learning_rate": 6.426989578843816e-06, "loss": 0.07308624, "memory(GiB)": 13.7, "step": 46660, "train_speed(iter/s)": 1.532688 }, { "acc": 0.99642859, "epoch": 21.872509960159363, "grad_norm": 3.358607292175293, "learning_rate": 6.4262466713730706e-06, "loss": 0.02776558, "memory(GiB)": 13.7, "step": 46665, "train_speed(iter/s)": 1.532696 }, { "acc": 0.97965279, "epoch": 21.874853527068197, "grad_norm": 0.6053252220153809, "learning_rate": 6.425503729632061e-06, "loss": 0.0399866, "memory(GiB)": 13.7, "step": 46670, "train_speed(iter/s)": 1.532709 }, { "acc": 0.97613096, "epoch": 21.87719709397703, "grad_norm": 5.866880416870117, "learning_rate": 6.424760753638643e-06, "loss": 0.13803381, "memory(GiB)": 13.7, "step": 46675, "train_speed(iter/s)": 1.532715 }, { "acc": 0.96967268, "epoch": 21.87954066088587, "grad_norm": 4.765735149383545, "learning_rate": 6.4240177434106775e-06, "loss": 0.0924238, "memory(GiB)": 13.7, "step": 46680, "train_speed(iter/s)": 1.532718 }, { "acc": 0.98145828, "epoch": 21.881884227794703, "grad_norm": 5.6209917068481445, "learning_rate": 6.42327469896602e-06, "loss": 0.08480033, "memory(GiB)": 13.7, "step": 46685, "train_speed(iter/s)": 1.532717 }, { "acc": 0.96937504, "epoch": 21.884227794703538, "grad_norm": 7.024327278137207, "learning_rate": 6.422531620322534e-06, "loss": 0.0989276, "memory(GiB)": 13.7, "step": 46690, "train_speed(iter/s)": 1.53273 }, { "acc": 0.9760417, "epoch": 21.886571361612376, "grad_norm": 2.5776588916778564, "learning_rate": 6.421788507498079e-06, "loss": 0.08780552, "memory(GiB)": 13.7, "step": 46695, "train_speed(iter/s)": 1.532727 }, { "acc": 0.9881485, "epoch": 21.88891492852121, "grad_norm": 3.631286382675171, "learning_rate": 6.421045360510517e-06, "loss": 0.08753051, "memory(GiB)": 13.7, "step": 46700, "train_speed(iter/s)": 1.532732 }, { "acc": 0.97351189, "epoch": 21.891258495430044, "grad_norm": 4.7990899085998535, "learning_rate": 6.420302179377712e-06, "loss": 0.07014555, "memory(GiB)": 13.7, "step": 46705, "train_speed(iter/s)": 1.532752 }, { "acc": 0.98029766, "epoch": 21.89360206233888, "grad_norm": 2.206678867340088, "learning_rate": 6.419558964117527e-06, "loss": 0.14459972, "memory(GiB)": 13.7, "step": 46710, "train_speed(iter/s)": 1.532752 }, { "acc": 0.9825922, "epoch": 21.895945629247716, "grad_norm": 0.01913938671350479, "learning_rate": 6.418815714747826e-06, "loss": 0.05907083, "memory(GiB)": 13.7, "step": 46715, "train_speed(iter/s)": 1.532759 }, { "acc": 0.98174677, "epoch": 21.89828919615655, "grad_norm": 2.3226966857910156, "learning_rate": 6.418072431286476e-06, "loss": 0.05660135, "memory(GiB)": 13.7, "step": 46720, "train_speed(iter/s)": 1.532766 }, { "acc": 0.98528118, "epoch": 21.900632763065385, "grad_norm": 1.9981420040130615, "learning_rate": 6.4173291137513415e-06, "loss": 0.03299971, "memory(GiB)": 13.7, "step": 46725, "train_speed(iter/s)": 1.532772 }, { "acc": 0.98094501, "epoch": 21.90297632997422, "grad_norm": 4.2655253410339355, "learning_rate": 6.416585762160289e-06, "loss": 0.07354581, "memory(GiB)": 13.7, "step": 46730, "train_speed(iter/s)": 1.532782 }, { "acc": 0.98500004, "epoch": 21.905319896883057, "grad_norm": 5.8969502449035645, "learning_rate": 6.415842376531187e-06, "loss": 0.0362033, "memory(GiB)": 13.7, "step": 46735, "train_speed(iter/s)": 1.532786 }, { "acc": 0.984375, "epoch": 21.90766346379189, "grad_norm": 2.4530410766601562, "learning_rate": 6.415098956881904e-06, "loss": 0.05390028, "memory(GiB)": 13.7, "step": 46740, "train_speed(iter/s)": 1.532796 }, { "acc": 0.97962828, "epoch": 21.910007030700726, "grad_norm": 4.432638645172119, "learning_rate": 6.414355503230311e-06, "loss": 0.08028097, "memory(GiB)": 13.7, "step": 46745, "train_speed(iter/s)": 1.532796 }, { "acc": 0.98499994, "epoch": 21.91235059760956, "grad_norm": 0.5090783834457397, "learning_rate": 6.413612015594274e-06, "loss": 0.0488575, "memory(GiB)": 13.7, "step": 46750, "train_speed(iter/s)": 1.532804 }, { "acc": 0.99613972, "epoch": 21.914694164518398, "grad_norm": 0.1179511621594429, "learning_rate": 6.412868493991669e-06, "loss": 0.0361312, "memory(GiB)": 13.7, "step": 46755, "train_speed(iter/s)": 1.532812 }, { "acc": 0.98708324, "epoch": 21.917037731427232, "grad_norm": 0.8486884832382202, "learning_rate": 6.4121249384403636e-06, "loss": 0.03159897, "memory(GiB)": 13.7, "step": 46760, "train_speed(iter/s)": 1.532813 }, { "acc": 0.97937508, "epoch": 21.919381298336067, "grad_norm": 4.964811325073242, "learning_rate": 6.411381348958235e-06, "loss": 0.05132757, "memory(GiB)": 13.7, "step": 46765, "train_speed(iter/s)": 1.532827 }, { "acc": 0.96855774, "epoch": 21.921724865244904, "grad_norm": 4.7547736167907715, "learning_rate": 6.410637725563151e-06, "loss": 0.10468391, "memory(GiB)": 13.7, "step": 46770, "train_speed(iter/s)": 1.532829 }, { "acc": 0.99125004, "epoch": 21.92406843215374, "grad_norm": 1.3638635873794556, "learning_rate": 6.409894068272988e-06, "loss": 0.03999363, "memory(GiB)": 13.7, "step": 46775, "train_speed(iter/s)": 1.532825 }, { "acc": 0.98757439, "epoch": 21.926411999062573, "grad_norm": 1.6946079730987549, "learning_rate": 6.4091503771056244e-06, "loss": 0.07221477, "memory(GiB)": 13.7, "step": 46780, "train_speed(iter/s)": 1.532824 }, { "acc": 0.98555555, "epoch": 21.928755565971407, "grad_norm": 8.57718276977539, "learning_rate": 6.408406652078934e-06, "loss": 0.07787089, "memory(GiB)": 13.7, "step": 46785, "train_speed(iter/s)": 1.532819 }, { "acc": 0.98416672, "epoch": 21.931099132880245, "grad_norm": 7.1355085372924805, "learning_rate": 6.4076628932107905e-06, "loss": 0.06855927, "memory(GiB)": 13.7, "step": 46790, "train_speed(iter/s)": 1.532825 }, { "acc": 0.9822588, "epoch": 21.93344269978908, "grad_norm": 0.016328880563378334, "learning_rate": 6.406919100519074e-06, "loss": 0.06173728, "memory(GiB)": 13.7, "step": 46795, "train_speed(iter/s)": 1.532834 }, { "acc": 0.97020226, "epoch": 21.935786266697914, "grad_norm": 3.858142852783203, "learning_rate": 6.406175274021664e-06, "loss": 0.08211889, "memory(GiB)": 13.7, "step": 46800, "train_speed(iter/s)": 1.532843 }, { "acc": 0.9739584, "epoch": 21.938129833606748, "grad_norm": 4.626747131347656, "learning_rate": 6.405431413736439e-06, "loss": 0.09777952, "memory(GiB)": 13.7, "step": 46805, "train_speed(iter/s)": 1.532856 }, { "acc": 0.95560398, "epoch": 21.940473400515586, "grad_norm": 7.290787696838379, "learning_rate": 6.404687519681276e-06, "loss": 0.10514711, "memory(GiB)": 13.7, "step": 46810, "train_speed(iter/s)": 1.53287 }, { "acc": 0.9802084, "epoch": 21.94281696742442, "grad_norm": 3.624912977218628, "learning_rate": 6.40394359187406e-06, "loss": 0.05451217, "memory(GiB)": 13.7, "step": 46815, "train_speed(iter/s)": 1.532883 }, { "acc": 0.96806545, "epoch": 21.945160534333255, "grad_norm": 6.5538105964660645, "learning_rate": 6.40319963033267e-06, "loss": 0.09747145, "memory(GiB)": 13.7, "step": 46820, "train_speed(iter/s)": 1.532884 }, { "acc": 0.99156246, "epoch": 21.94750410124209, "grad_norm": 3.526430368423462, "learning_rate": 6.40245563507499e-06, "loss": 0.05075606, "memory(GiB)": 13.7, "step": 46825, "train_speed(iter/s)": 1.532887 }, { "acc": 0.9838542, "epoch": 21.949847668150927, "grad_norm": 6.202216625213623, "learning_rate": 6.401711606118902e-06, "loss": 0.07241756, "memory(GiB)": 13.7, "step": 46830, "train_speed(iter/s)": 1.532889 }, { "acc": 0.98238096, "epoch": 21.95219123505976, "grad_norm": 4.9439921379089355, "learning_rate": 6.40096754348229e-06, "loss": 0.04744408, "memory(GiB)": 13.7, "step": 46835, "train_speed(iter/s)": 1.532888 }, { "acc": 0.9928977, "epoch": 21.954534801968595, "grad_norm": 0.01158547680824995, "learning_rate": 6.400223447183039e-06, "loss": 0.06826002, "memory(GiB)": 13.7, "step": 46840, "train_speed(iter/s)": 1.532883 }, { "acc": 0.9777977, "epoch": 21.956878368877433, "grad_norm": 0.6909923553466797, "learning_rate": 6.399479317239036e-06, "loss": 0.08332529, "memory(GiB)": 13.7, "step": 46845, "train_speed(iter/s)": 1.532891 }, { "acc": 0.98819447, "epoch": 21.959221935786267, "grad_norm": 2.9911036491394043, "learning_rate": 6.398735153668164e-06, "loss": 0.06371725, "memory(GiB)": 13.7, "step": 46850, "train_speed(iter/s)": 1.532899 }, { "acc": 0.97758007, "epoch": 21.9615655026951, "grad_norm": 2.385066270828247, "learning_rate": 6.397990956488315e-06, "loss": 0.08963536, "memory(GiB)": 13.7, "step": 46855, "train_speed(iter/s)": 1.532892 }, { "acc": 0.98944445, "epoch": 21.963909069603936, "grad_norm": 0.9219667911529541, "learning_rate": 6.397246725717371e-06, "loss": 0.0320455, "memory(GiB)": 13.7, "step": 46860, "train_speed(iter/s)": 1.532896 }, { "acc": 0.98145828, "epoch": 21.966252636512774, "grad_norm": 3.7994046211242676, "learning_rate": 6.396502461373227e-06, "loss": 0.07158599, "memory(GiB)": 13.7, "step": 46865, "train_speed(iter/s)": 1.532895 }, { "acc": 0.99091339, "epoch": 21.96859620342161, "grad_norm": 4.793947219848633, "learning_rate": 6.395758163473771e-06, "loss": 0.03893954, "memory(GiB)": 13.7, "step": 46870, "train_speed(iter/s)": 1.532902 }, { "acc": 0.98093128, "epoch": 21.970939770330443, "grad_norm": 3.498866081237793, "learning_rate": 6.395013832036891e-06, "loss": 0.06547012, "memory(GiB)": 13.7, "step": 46875, "train_speed(iter/s)": 1.532907 }, { "acc": 0.97500954, "epoch": 21.973283337239277, "grad_norm": 3.0838115215301514, "learning_rate": 6.394269467080479e-06, "loss": 0.06201262, "memory(GiB)": 13.7, "step": 46880, "train_speed(iter/s)": 1.532914 }, { "acc": 0.98826389, "epoch": 21.975626904148115, "grad_norm": 4.502452373504639, "learning_rate": 6.393525068622429e-06, "loss": 0.04119316, "memory(GiB)": 13.7, "step": 46885, "train_speed(iter/s)": 1.53292 }, { "acc": 0.97920141, "epoch": 21.97797047105695, "grad_norm": 3.190528154373169, "learning_rate": 6.392780636680633e-06, "loss": 0.06306376, "memory(GiB)": 13.7, "step": 46890, "train_speed(iter/s)": 1.532932 }, { "acc": 0.97312498, "epoch": 21.980314037965783, "grad_norm": 21.38848114013672, "learning_rate": 6.392036171272985e-06, "loss": 0.08082982, "memory(GiB)": 13.7, "step": 46895, "train_speed(iter/s)": 1.532937 }, { "acc": 0.99375, "epoch": 21.982657604874618, "grad_norm": 0.005411479622125626, "learning_rate": 6.391291672417378e-06, "loss": 0.01788145, "memory(GiB)": 13.7, "step": 46900, "train_speed(iter/s)": 1.532942 }, { "acc": 0.99301472, "epoch": 21.985001171783455, "grad_norm": 3.850038528442383, "learning_rate": 6.390547140131707e-06, "loss": 0.038265, "memory(GiB)": 13.7, "step": 46905, "train_speed(iter/s)": 1.532953 }, { "acc": 0.9749053, "epoch": 21.98734473869229, "grad_norm": 1.6413706541061401, "learning_rate": 6.389802574433874e-06, "loss": 0.06977049, "memory(GiB)": 13.7, "step": 46910, "train_speed(iter/s)": 1.532959 }, { "acc": 0.9947917, "epoch": 21.989688305601124, "grad_norm": 3.2379298210144043, "learning_rate": 6.389057975341767e-06, "loss": 0.01193658, "memory(GiB)": 13.7, "step": 46915, "train_speed(iter/s)": 1.53296 }, { "acc": 0.9979166, "epoch": 21.992031872509962, "grad_norm": 2.786045789718628, "learning_rate": 6.388313342873289e-06, "loss": 0.02183221, "memory(GiB)": 13.7, "step": 46920, "train_speed(iter/s)": 1.532978 }, { "acc": 0.98978634, "epoch": 21.994375439418796, "grad_norm": 0.9407172799110413, "learning_rate": 6.387568677046337e-06, "loss": 0.05709087, "memory(GiB)": 13.7, "step": 46925, "train_speed(iter/s)": 1.532987 }, { "acc": 0.97891941, "epoch": 21.99671900632763, "grad_norm": 6.4286789894104, "learning_rate": 6.386823977878813e-06, "loss": 0.08454995, "memory(GiB)": 13.7, "step": 46930, "train_speed(iter/s)": 1.532999 }, { "acc": 0.99312496, "epoch": 21.999062573236465, "grad_norm": 4.849210262298584, "learning_rate": 6.3860792453886125e-06, "loss": 0.06214079, "memory(GiB)": 13.7, "step": 46935, "train_speed(iter/s)": 1.532994 }, { "acc": 0.98549671, "epoch": 22.001406140145303, "grad_norm": 3.9421398639678955, "learning_rate": 6.385334479593641e-06, "loss": 0.04651133, "memory(GiB)": 13.7, "step": 46940, "train_speed(iter/s)": 1.532966 }, { "acc": 0.98010416, "epoch": 22.003749707054137, "grad_norm": 3.6559226512908936, "learning_rate": 6.384589680511798e-06, "loss": 0.06377088, "memory(GiB)": 13.7, "step": 46945, "train_speed(iter/s)": 1.532974 }, { "acc": 0.97488098, "epoch": 22.00609327396297, "grad_norm": 1.5699914693832397, "learning_rate": 6.383844848160985e-06, "loss": 0.11645859, "memory(GiB)": 13.7, "step": 46950, "train_speed(iter/s)": 1.532984 }, { "acc": 0.98984375, "epoch": 22.008436840871806, "grad_norm": 4.693421363830566, "learning_rate": 6.383099982559105e-06, "loss": 0.04607746, "memory(GiB)": 13.7, "step": 46955, "train_speed(iter/s)": 1.532987 }, { "acc": 0.98660717, "epoch": 22.010780407780643, "grad_norm": 4.175610065460205, "learning_rate": 6.3823550837240665e-06, "loss": 0.03882556, "memory(GiB)": 13.7, "step": 46960, "train_speed(iter/s)": 1.532977 }, { "acc": 0.99457417, "epoch": 22.013123974689478, "grad_norm": 1.138475775718689, "learning_rate": 6.381610151673772e-06, "loss": 0.05174043, "memory(GiB)": 13.7, "step": 46965, "train_speed(iter/s)": 1.532971 }, { "acc": 0.98402786, "epoch": 22.015467541598312, "grad_norm": 2.5982115268707275, "learning_rate": 6.380865186426124e-06, "loss": 0.06810834, "memory(GiB)": 13.7, "step": 46970, "train_speed(iter/s)": 1.53297 }, { "acc": 0.97521172, "epoch": 22.017811108507146, "grad_norm": 1.0470478534698486, "learning_rate": 6.380120187999034e-06, "loss": 0.05441579, "memory(GiB)": 13.7, "step": 46975, "train_speed(iter/s)": 1.532976 }, { "acc": 0.98425598, "epoch": 22.020154675415984, "grad_norm": 0.011861066333949566, "learning_rate": 6.379375156410406e-06, "loss": 0.08633111, "memory(GiB)": 13.7, "step": 46980, "train_speed(iter/s)": 1.532973 }, { "acc": 0.98702946, "epoch": 22.02249824232482, "grad_norm": 3.532759189605713, "learning_rate": 6.378630091678148e-06, "loss": 0.06820214, "memory(GiB)": 13.7, "step": 46985, "train_speed(iter/s)": 1.532981 }, { "acc": 0.9854167, "epoch": 22.024841809233653, "grad_norm": 8.755796432495117, "learning_rate": 6.3778849938201715e-06, "loss": 0.05229097, "memory(GiB)": 13.7, "step": 46990, "train_speed(iter/s)": 1.532985 }, { "acc": 0.98491297, "epoch": 22.027185376142487, "grad_norm": 4.905913352966309, "learning_rate": 6.377139862854387e-06, "loss": 0.06607793, "memory(GiB)": 13.7, "step": 46995, "train_speed(iter/s)": 1.532993 }, { "acc": 0.98467264, "epoch": 22.029528943051325, "grad_norm": 0.01483170222491026, "learning_rate": 6.376394698798699e-06, "loss": 0.07403023, "memory(GiB)": 13.7, "step": 47000, "train_speed(iter/s)": 1.532997 }, { "acc": 0.98520298, "epoch": 22.03187250996016, "grad_norm": 2.8333616256713867, "learning_rate": 6.375649501671025e-06, "loss": 0.06790371, "memory(GiB)": 13.7, "step": 47005, "train_speed(iter/s)": 1.533002 }, { "acc": 0.98488102, "epoch": 22.034216076868994, "grad_norm": 1.8997081518173218, "learning_rate": 6.374904271489274e-06, "loss": 0.03383532, "memory(GiB)": 13.7, "step": 47010, "train_speed(iter/s)": 1.533007 }, { "acc": 0.98125, "epoch": 22.03655964377783, "grad_norm": 0.971325159072876, "learning_rate": 6.37415900827136e-06, "loss": 0.03701002, "memory(GiB)": 13.7, "step": 47015, "train_speed(iter/s)": 1.53301 }, { "acc": 0.97372475, "epoch": 22.038903210686666, "grad_norm": 0.00462969858199358, "learning_rate": 6.373413712035194e-06, "loss": 0.08806071, "memory(GiB)": 13.7, "step": 47020, "train_speed(iter/s)": 1.533014 }, { "acc": 0.97189484, "epoch": 22.0412467775955, "grad_norm": 9.941402435302734, "learning_rate": 6.372668382798694e-06, "loss": 0.08860581, "memory(GiB)": 13.7, "step": 47025, "train_speed(iter/s)": 1.533018 }, { "acc": 0.98835392, "epoch": 22.043590344504334, "grad_norm": 0.04612239450216293, "learning_rate": 6.371923020579775e-06, "loss": 0.06565026, "memory(GiB)": 13.7, "step": 47030, "train_speed(iter/s)": 1.533019 }, { "acc": 0.97802086, "epoch": 22.045933911413172, "grad_norm": 2.725510597229004, "learning_rate": 6.371177625396352e-06, "loss": 0.08733321, "memory(GiB)": 13.7, "step": 47035, "train_speed(iter/s)": 1.533019 }, { "acc": 0.98625002, "epoch": 22.048277478322007, "grad_norm": 4.360051155090332, "learning_rate": 6.370432197266342e-06, "loss": 0.05125335, "memory(GiB)": 13.7, "step": 47040, "train_speed(iter/s)": 1.533034 }, { "acc": 0.97401514, "epoch": 22.05062104523084, "grad_norm": 1.9540129899978638, "learning_rate": 6.369686736207661e-06, "loss": 0.09606023, "memory(GiB)": 13.7, "step": 47045, "train_speed(iter/s)": 1.533035 }, { "acc": 0.98477678, "epoch": 22.052964612139675, "grad_norm": 2.4263930320739746, "learning_rate": 6.36894124223823e-06, "loss": 0.05372475, "memory(GiB)": 13.7, "step": 47050, "train_speed(iter/s)": 1.533036 }, { "acc": 0.97259674, "epoch": 22.055308179048513, "grad_norm": 3.453071355819702, "learning_rate": 6.3681957153759675e-06, "loss": 0.12169654, "memory(GiB)": 13.7, "step": 47055, "train_speed(iter/s)": 1.53304 }, { "acc": 0.97406254, "epoch": 22.057651745957347, "grad_norm": 1.1542044878005981, "learning_rate": 6.3674501556387934e-06, "loss": 0.0764595, "memory(GiB)": 13.7, "step": 47060, "train_speed(iter/s)": 1.533042 }, { "acc": 0.97986107, "epoch": 22.05999531286618, "grad_norm": 3.2676258087158203, "learning_rate": 6.366704563044627e-06, "loss": 0.04408332, "memory(GiB)": 13.7, "step": 47065, "train_speed(iter/s)": 1.533035 }, { "acc": 0.97327585, "epoch": 22.062338879775016, "grad_norm": 4.951587677001953, "learning_rate": 6.365958937611393e-06, "loss": 0.10639429, "memory(GiB)": 13.7, "step": 47070, "train_speed(iter/s)": 1.533045 }, { "acc": 0.97061014, "epoch": 22.064682446683854, "grad_norm": 2.9189767837524414, "learning_rate": 6.365213279357011e-06, "loss": 0.09253383, "memory(GiB)": 13.7, "step": 47075, "train_speed(iter/s)": 1.533047 }, { "acc": 0.975, "epoch": 22.067026013592688, "grad_norm": 5.3552937507629395, "learning_rate": 6.364467588299404e-06, "loss": 0.07355853, "memory(GiB)": 13.7, "step": 47080, "train_speed(iter/s)": 1.533046 }, { "acc": 0.9916667, "epoch": 22.069369580501522, "grad_norm": 0.008933622390031815, "learning_rate": 6.363721864456498e-06, "loss": 0.03667824, "memory(GiB)": 13.7, "step": 47085, "train_speed(iter/s)": 1.533047 }, { "acc": 0.98621244, "epoch": 22.07171314741036, "grad_norm": 4.338749885559082, "learning_rate": 6.3629761078462195e-06, "loss": 0.05855279, "memory(GiB)": 13.7, "step": 47090, "train_speed(iter/s)": 1.533055 }, { "acc": 0.96892853, "epoch": 22.074056714319195, "grad_norm": 6.645414352416992, "learning_rate": 6.362230318486489e-06, "loss": 0.10789945, "memory(GiB)": 13.7, "step": 47095, "train_speed(iter/s)": 1.53306 }, { "acc": 0.99300594, "epoch": 22.07640028122803, "grad_norm": 2.923295259475708, "learning_rate": 6.361484496395236e-06, "loss": 0.02645006, "memory(GiB)": 13.7, "step": 47100, "train_speed(iter/s)": 1.533062 }, { "acc": 0.99597225, "epoch": 22.078743848136863, "grad_norm": 0.2058204859495163, "learning_rate": 6.360738641590386e-06, "loss": 0.03507694, "memory(GiB)": 13.7, "step": 47105, "train_speed(iter/s)": 1.533063 }, { "acc": 0.97806549, "epoch": 22.0810874150457, "grad_norm": 5.556941032409668, "learning_rate": 6.3599927540898675e-06, "loss": 0.10192716, "memory(GiB)": 13.7, "step": 47110, "train_speed(iter/s)": 1.533071 }, { "acc": 0.98767853, "epoch": 22.083430981954535, "grad_norm": 5.388921737670898, "learning_rate": 6.359246833911611e-06, "loss": 0.05139198, "memory(GiB)": 13.7, "step": 47115, "train_speed(iter/s)": 1.533072 }, { "acc": 0.98963795, "epoch": 22.08577454886337, "grad_norm": 3.3696699142456055, "learning_rate": 6.358500881073544e-06, "loss": 0.04909279, "memory(GiB)": 13.7, "step": 47120, "train_speed(iter/s)": 1.533077 }, { "acc": 0.97686958, "epoch": 22.088118115772204, "grad_norm": 2.2034966945648193, "learning_rate": 6.3577548955935955e-06, "loss": 0.09307355, "memory(GiB)": 13.7, "step": 47125, "train_speed(iter/s)": 1.533082 }, { "acc": 0.96923609, "epoch": 22.090461682681042, "grad_norm": 70.74580383300781, "learning_rate": 6.357008877489701e-06, "loss": 0.13593243, "memory(GiB)": 13.7, "step": 47130, "train_speed(iter/s)": 1.533084 }, { "acc": 0.97863092, "epoch": 22.092805249589876, "grad_norm": 3.1991753578186035, "learning_rate": 6.356262826779789e-06, "loss": 0.0422882, "memory(GiB)": 13.7, "step": 47135, "train_speed(iter/s)": 1.533086 }, { "acc": 0.98383923, "epoch": 22.09514881649871, "grad_norm": 3.0293643474578857, "learning_rate": 6.355516743481791e-06, "loss": 0.04498068, "memory(GiB)": 13.7, "step": 47140, "train_speed(iter/s)": 1.533091 }, { "acc": 0.98096685, "epoch": 22.097492383407545, "grad_norm": 1.6394726037979126, "learning_rate": 6.354770627613642e-06, "loss": 0.06946207, "memory(GiB)": 13.7, "step": 47145, "train_speed(iter/s)": 1.533096 }, { "acc": 0.97089453, "epoch": 22.099835950316383, "grad_norm": 2.37129545211792, "learning_rate": 6.354024479193277e-06, "loss": 0.11071916, "memory(GiB)": 13.7, "step": 47150, "train_speed(iter/s)": 1.533107 }, { "acc": 0.98455353, "epoch": 22.102179517225217, "grad_norm": 3.9062702655792236, "learning_rate": 6.35327829823863e-06, "loss": 0.07850535, "memory(GiB)": 13.7, "step": 47155, "train_speed(iter/s)": 1.533117 }, { "acc": 0.97039146, "epoch": 22.10452308413405, "grad_norm": 6.252151966094971, "learning_rate": 6.3525320847676385e-06, "loss": 0.16500996, "memory(GiB)": 13.7, "step": 47160, "train_speed(iter/s)": 1.533116 }, { "acc": 0.969909, "epoch": 22.106866651042886, "grad_norm": 7.196018218994141, "learning_rate": 6.351785838798234e-06, "loss": 0.14290918, "memory(GiB)": 13.7, "step": 47165, "train_speed(iter/s)": 1.533103 }, { "acc": 0.97520828, "epoch": 22.109210217951723, "grad_norm": 6.534291744232178, "learning_rate": 6.351039560348362e-06, "loss": 0.1235587, "memory(GiB)": 13.7, "step": 47170, "train_speed(iter/s)": 1.533102 }, { "acc": 0.98425598, "epoch": 22.111553784860558, "grad_norm": 4.126155853271484, "learning_rate": 6.350293249435954e-06, "loss": 0.05571526, "memory(GiB)": 13.7, "step": 47175, "train_speed(iter/s)": 1.53311 }, { "acc": 0.99020824, "epoch": 22.113897351769392, "grad_norm": 0.7490464448928833, "learning_rate": 6.349546906078949e-06, "loss": 0.0461688, "memory(GiB)": 13.7, "step": 47180, "train_speed(iter/s)": 1.533108 }, { "acc": 0.98482647, "epoch": 22.11624091867823, "grad_norm": 3.1085195541381836, "learning_rate": 6.348800530295289e-06, "loss": 0.07806154, "memory(GiB)": 13.7, "step": 47185, "train_speed(iter/s)": 1.533108 }, { "acc": 0.97523518, "epoch": 22.118584485587064, "grad_norm": 11.798225402832031, "learning_rate": 6.348054122102915e-06, "loss": 0.09605932, "memory(GiB)": 13.7, "step": 47190, "train_speed(iter/s)": 1.533115 }, { "acc": 0.9875, "epoch": 22.1209280524959, "grad_norm": 4.449764251708984, "learning_rate": 6.347307681519765e-06, "loss": 0.04644055, "memory(GiB)": 13.7, "step": 47195, "train_speed(iter/s)": 1.53312 }, { "acc": 0.99285717, "epoch": 22.123271619404733, "grad_norm": 2.2032904624938965, "learning_rate": 6.346561208563784e-06, "loss": 0.05064454, "memory(GiB)": 13.7, "step": 47200, "train_speed(iter/s)": 1.533119 }, { "acc": 0.97156258, "epoch": 22.12561518631357, "grad_norm": 12.410943984985352, "learning_rate": 6.345814703252914e-06, "loss": 0.08385151, "memory(GiB)": 13.7, "step": 47205, "train_speed(iter/s)": 1.53312 }, { "acc": 0.98208666, "epoch": 22.127958753222405, "grad_norm": 0.9443114995956421, "learning_rate": 6.345068165605098e-06, "loss": 0.03493115, "memory(GiB)": 13.7, "step": 47210, "train_speed(iter/s)": 1.533125 }, { "acc": 0.98680553, "epoch": 22.13030232013124, "grad_norm": 4.54049825668335, "learning_rate": 6.344321595638281e-06, "loss": 0.05748783, "memory(GiB)": 13.7, "step": 47215, "train_speed(iter/s)": 1.533127 }, { "acc": 0.990625, "epoch": 22.132645887040074, "grad_norm": 3.4893007278442383, "learning_rate": 6.343574993370408e-06, "loss": 0.04484532, "memory(GiB)": 13.7, "step": 47220, "train_speed(iter/s)": 1.533122 }, { "acc": 0.98353634, "epoch": 22.13498945394891, "grad_norm": 4.808688640594482, "learning_rate": 6.3428283588194225e-06, "loss": 0.1086158, "memory(GiB)": 13.7, "step": 47225, "train_speed(iter/s)": 1.53312 }, { "acc": 0.98102684, "epoch": 22.137333020857746, "grad_norm": 6.171502113342285, "learning_rate": 6.3420816920032745e-06, "loss": 0.06026507, "memory(GiB)": 13.7, "step": 47230, "train_speed(iter/s)": 1.533121 }, { "acc": 0.98395834, "epoch": 22.13967658776658, "grad_norm": 2.9457874298095703, "learning_rate": 6.341334992939911e-06, "loss": 0.03235781, "memory(GiB)": 13.7, "step": 47235, "train_speed(iter/s)": 1.533129 }, { "acc": 0.9958334, "epoch": 22.142020154675414, "grad_norm": 0.4436013996601105, "learning_rate": 6.340588261647276e-06, "loss": 0.04958219, "memory(GiB)": 13.7, "step": 47240, "train_speed(iter/s)": 1.53313 }, { "acc": 0.98520832, "epoch": 22.144363721584252, "grad_norm": 4.957324504852295, "learning_rate": 6.339841498143324e-06, "loss": 0.07412198, "memory(GiB)": 13.7, "step": 47245, "train_speed(iter/s)": 1.53313 }, { "acc": 0.981007, "epoch": 22.146707288493086, "grad_norm": 3.9215049743652344, "learning_rate": 6.339094702446002e-06, "loss": 0.04381339, "memory(GiB)": 13.7, "step": 47250, "train_speed(iter/s)": 1.533142 }, { "acc": 0.98195515, "epoch": 22.14905085540192, "grad_norm": 9.639697074890137, "learning_rate": 6.338347874573263e-06, "loss": 0.08850282, "memory(GiB)": 13.7, "step": 47255, "train_speed(iter/s)": 1.533154 }, { "acc": 0.9777976, "epoch": 22.15139442231076, "grad_norm": 9.863539695739746, "learning_rate": 6.337601014543053e-06, "loss": 0.07975044, "memory(GiB)": 13.7, "step": 47260, "train_speed(iter/s)": 1.533156 }, { "acc": 0.98884811, "epoch": 22.153737989219593, "grad_norm": 4.615975856781006, "learning_rate": 6.33685412237333e-06, "loss": 0.04630264, "memory(GiB)": 13.7, "step": 47265, "train_speed(iter/s)": 1.533165 }, { "acc": 0.96986294, "epoch": 22.156081556128427, "grad_norm": 3.1663732528686523, "learning_rate": 6.336107198082042e-06, "loss": 0.09937193, "memory(GiB)": 13.7, "step": 47270, "train_speed(iter/s)": 1.533169 }, { "acc": 0.97201462, "epoch": 22.15842512303726, "grad_norm": 7.066352844238281, "learning_rate": 6.335360241687147e-06, "loss": 0.16542175, "memory(GiB)": 13.7, "step": 47275, "train_speed(iter/s)": 1.533182 }, { "acc": 0.97941923, "epoch": 22.1607686899461, "grad_norm": 9.631901741027832, "learning_rate": 6.3346132532065956e-06, "loss": 0.05791402, "memory(GiB)": 13.7, "step": 47280, "train_speed(iter/s)": 1.533184 }, { "acc": 0.97165184, "epoch": 22.163112256854934, "grad_norm": 6.340571880340576, "learning_rate": 6.3338662326583444e-06, "loss": 0.08824279, "memory(GiB)": 13.7, "step": 47285, "train_speed(iter/s)": 1.533188 }, { "acc": 0.97875004, "epoch": 22.165455823763768, "grad_norm": 7.231012344360352, "learning_rate": 6.3331191800603495e-06, "loss": 0.12864282, "memory(GiB)": 13.7, "step": 47290, "train_speed(iter/s)": 1.533189 }, { "acc": 0.97508154, "epoch": 22.167799390672602, "grad_norm": 0.043500710278749466, "learning_rate": 6.33237209543057e-06, "loss": 0.09157939, "memory(GiB)": 13.7, "step": 47295, "train_speed(iter/s)": 1.533185 }, { "acc": 0.98666124, "epoch": 22.17014295758144, "grad_norm": 3.979990243911743, "learning_rate": 6.331624978786959e-06, "loss": 0.04113408, "memory(GiB)": 13.7, "step": 47300, "train_speed(iter/s)": 1.533187 }, { "acc": 0.97934027, "epoch": 22.172486524490274, "grad_norm": 5.293113708496094, "learning_rate": 6.330877830147475e-06, "loss": 0.10337024, "memory(GiB)": 13.7, "step": 47305, "train_speed(iter/s)": 1.533193 }, { "acc": 0.97857151, "epoch": 22.17483009139911, "grad_norm": 5.752584457397461, "learning_rate": 6.330130649530081e-06, "loss": 0.04779108, "memory(GiB)": 13.7, "step": 47310, "train_speed(iter/s)": 1.533201 }, { "acc": 0.9863636, "epoch": 22.177173658307943, "grad_norm": 3.9642927646636963, "learning_rate": 6.329383436952732e-06, "loss": 0.08960969, "memory(GiB)": 13.7, "step": 47315, "train_speed(iter/s)": 1.533207 }, { "acc": 0.98403845, "epoch": 22.17951722521678, "grad_norm": 3.180072546005249, "learning_rate": 6.328636192433392e-06, "loss": 0.0511292, "memory(GiB)": 13.7, "step": 47320, "train_speed(iter/s)": 1.533208 }, { "acc": 0.96812496, "epoch": 22.181860792125615, "grad_norm": 6.876164436340332, "learning_rate": 6.327888915990021e-06, "loss": 0.07424294, "memory(GiB)": 13.7, "step": 47325, "train_speed(iter/s)": 1.533215 }, { "acc": 0.9953125, "epoch": 22.18420435903445, "grad_norm": 2.7049922943115234, "learning_rate": 6.327141607640581e-06, "loss": 0.0190007, "memory(GiB)": 13.7, "step": 47330, "train_speed(iter/s)": 1.533223 }, { "acc": 0.96788692, "epoch": 22.186547925943287, "grad_norm": 8.192732810974121, "learning_rate": 6.326394267403037e-06, "loss": 0.16412774, "memory(GiB)": 13.7, "step": 47335, "train_speed(iter/s)": 1.533227 }, { "acc": 0.97014523, "epoch": 22.18889149285212, "grad_norm": 4.089386463165283, "learning_rate": 6.32564689529535e-06, "loss": 0.12148538, "memory(GiB)": 13.7, "step": 47340, "train_speed(iter/s)": 1.53323 }, { "acc": 0.97723217, "epoch": 22.191235059760956, "grad_norm": 4.804081439971924, "learning_rate": 6.3248994913354845e-06, "loss": 0.15901532, "memory(GiB)": 13.7, "step": 47345, "train_speed(iter/s)": 1.533234 }, { "acc": 0.98198872, "epoch": 22.19357862666979, "grad_norm": 1.0094233751296997, "learning_rate": 6.324152055541406e-06, "loss": 0.12348558, "memory(GiB)": 13.7, "step": 47350, "train_speed(iter/s)": 1.533233 }, { "acc": 0.98675594, "epoch": 22.195922193578628, "grad_norm": 0.35903990268707275, "learning_rate": 6.323404587931082e-06, "loss": 0.04091326, "memory(GiB)": 13.7, "step": 47355, "train_speed(iter/s)": 1.53324 }, { "acc": 0.96875, "epoch": 22.198265760487462, "grad_norm": 4.970489978790283, "learning_rate": 6.322657088522475e-06, "loss": 0.10065172, "memory(GiB)": 13.7, "step": 47360, "train_speed(iter/s)": 1.53324 }, { "acc": 0.98819447, "epoch": 22.200609327396297, "grad_norm": 2.488342523574829, "learning_rate": 6.321909557333557e-06, "loss": 0.08118376, "memory(GiB)": 13.7, "step": 47365, "train_speed(iter/s)": 1.53324 }, { "acc": 0.99259806, "epoch": 22.20295289430513, "grad_norm": 1.698533535003662, "learning_rate": 6.3211619943822945e-06, "loss": 0.05284263, "memory(GiB)": 13.7, "step": 47370, "train_speed(iter/s)": 1.533239 }, { "acc": 0.9744298, "epoch": 22.20529646121397, "grad_norm": 1.0309948921203613, "learning_rate": 6.320414399686655e-06, "loss": 0.08888578, "memory(GiB)": 13.7, "step": 47375, "train_speed(iter/s)": 1.533248 }, { "acc": 0.99206848, "epoch": 22.207640028122803, "grad_norm": 3.8497211933135986, "learning_rate": 6.3196667732646126e-06, "loss": 0.06220874, "memory(GiB)": 13.7, "step": 47380, "train_speed(iter/s)": 1.533251 }, { "acc": 0.98675594, "epoch": 22.209983595031638, "grad_norm": 1.4969744682312012, "learning_rate": 6.318919115134133e-06, "loss": 0.02360741, "memory(GiB)": 13.7, "step": 47385, "train_speed(iter/s)": 1.533258 }, { "acc": 0.98779764, "epoch": 22.212327161940472, "grad_norm": 4.264286518096924, "learning_rate": 6.318171425313189e-06, "loss": 0.0517253, "memory(GiB)": 13.7, "step": 47390, "train_speed(iter/s)": 1.533255 }, { "acc": 0.98335228, "epoch": 22.21467072884931, "grad_norm": 1.2119451761245728, "learning_rate": 6.3174237038197525e-06, "loss": 0.06376378, "memory(GiB)": 13.7, "step": 47395, "train_speed(iter/s)": 1.533257 }, { "acc": 0.98239584, "epoch": 22.217014295758144, "grad_norm": 4.275272846221924, "learning_rate": 6.3166759506717965e-06, "loss": 0.06698225, "memory(GiB)": 13.7, "step": 47400, "train_speed(iter/s)": 1.533251 }, { "acc": 0.99278851, "epoch": 22.21935786266698, "grad_norm": 1.4773716926574707, "learning_rate": 6.315928165887295e-06, "loss": 0.03226556, "memory(GiB)": 13.7, "step": 47405, "train_speed(iter/s)": 1.533258 }, { "acc": 0.98781242, "epoch": 22.221701429575816, "grad_norm": 3.200871467590332, "learning_rate": 6.315180349484222e-06, "loss": 0.02507175, "memory(GiB)": 13.7, "step": 47410, "train_speed(iter/s)": 1.533261 }, { "acc": 0.98627863, "epoch": 22.22404499648465, "grad_norm": 3.751347064971924, "learning_rate": 6.3144325014805515e-06, "loss": 0.06881682, "memory(GiB)": 13.7, "step": 47415, "train_speed(iter/s)": 1.533269 }, { "acc": 0.9776, "epoch": 22.226388563393485, "grad_norm": 4.40123176574707, "learning_rate": 6.313684621894263e-06, "loss": 0.06659092, "memory(GiB)": 13.7, "step": 47420, "train_speed(iter/s)": 1.533275 }, { "acc": 0.98354578, "epoch": 22.22873213030232, "grad_norm": 7.101879119873047, "learning_rate": 6.3129367107433296e-06, "loss": 0.08719271, "memory(GiB)": 13.7, "step": 47425, "train_speed(iter/s)": 1.533283 }, { "acc": 0.98708334, "epoch": 22.231075697211157, "grad_norm": 3.651054859161377, "learning_rate": 6.312188768045727e-06, "loss": 0.08378664, "memory(GiB)": 13.7, "step": 47430, "train_speed(iter/s)": 1.533282 }, { "acc": 0.98708334, "epoch": 22.23341926411999, "grad_norm": 0.05687132850289345, "learning_rate": 6.311440793819438e-06, "loss": 0.03165156, "memory(GiB)": 13.7, "step": 47435, "train_speed(iter/s)": 1.533274 }, { "acc": 0.97041664, "epoch": 22.235762831028826, "grad_norm": 0.27493515610694885, "learning_rate": 6.3106927880824376e-06, "loss": 0.09068262, "memory(GiB)": 13.7, "step": 47440, "train_speed(iter/s)": 1.533278 }, { "acc": 0.98421841, "epoch": 22.23810639793766, "grad_norm": 6.3124237060546875, "learning_rate": 6.3099447508527075e-06, "loss": 0.04765555, "memory(GiB)": 13.7, "step": 47445, "train_speed(iter/s)": 1.533279 }, { "acc": 0.99479532, "epoch": 22.240449964846498, "grad_norm": 2.2726333141326904, "learning_rate": 6.3091966821482275e-06, "loss": 0.03292384, "memory(GiB)": 13.7, "step": 47450, "train_speed(iter/s)": 1.533279 }, { "acc": 0.97243156, "epoch": 22.242793531755332, "grad_norm": 1.7143276929855347, "learning_rate": 6.308448581986978e-06, "loss": 0.10332596, "memory(GiB)": 13.7, "step": 47455, "train_speed(iter/s)": 1.533278 }, { "acc": 0.98809528, "epoch": 22.245137098664166, "grad_norm": 3.930720329284668, "learning_rate": 6.307700450386941e-06, "loss": 0.04177783, "memory(GiB)": 13.7, "step": 47460, "train_speed(iter/s)": 1.533288 }, { "acc": 0.98440475, "epoch": 22.247480665573, "grad_norm": 1.1684342622756958, "learning_rate": 6.306952287366103e-06, "loss": 0.04276931, "memory(GiB)": 13.7, "step": 47465, "train_speed(iter/s)": 1.533299 }, { "acc": 1.0, "epoch": 22.24982423248184, "grad_norm": 0.0024395522195845842, "learning_rate": 6.306204092942442e-06, "loss": 0.01375239, "memory(GiB)": 13.7, "step": 47470, "train_speed(iter/s)": 1.533303 }, { "acc": 0.96655636, "epoch": 22.252167799390673, "grad_norm": 7.053746223449707, "learning_rate": 6.305455867133944e-06, "loss": 0.08710415, "memory(GiB)": 13.7, "step": 47475, "train_speed(iter/s)": 1.533302 }, { "acc": 0.99181547, "epoch": 22.254511366299507, "grad_norm": 2.5649125576019287, "learning_rate": 6.304707609958594e-06, "loss": 0.02883343, "memory(GiB)": 13.7, "step": 47480, "train_speed(iter/s)": 1.533302 }, { "acc": 0.99011364, "epoch": 22.25685493320834, "grad_norm": 1.6264225244522095, "learning_rate": 6.303959321434377e-06, "loss": 0.03856547, "memory(GiB)": 13.7, "step": 47485, "train_speed(iter/s)": 1.533302 }, { "acc": 0.99028845, "epoch": 22.25919850011718, "grad_norm": 0.3045576214790344, "learning_rate": 6.3032110015792795e-06, "loss": 0.04643966, "memory(GiB)": 13.7, "step": 47490, "train_speed(iter/s)": 1.533308 }, { "acc": 0.98388891, "epoch": 22.261542067026014, "grad_norm": 5.942150115966797, "learning_rate": 6.3024626504112904e-06, "loss": 0.04311225, "memory(GiB)": 13.7, "step": 47495, "train_speed(iter/s)": 1.533311 }, { "acc": 0.99383011, "epoch": 22.263885633934848, "grad_norm": 4.113958358764648, "learning_rate": 6.301714267948397e-06, "loss": 0.01965587, "memory(GiB)": 13.7, "step": 47500, "train_speed(iter/s)": 1.533308 }, { "acc": 0.97423496, "epoch": 22.266229200843686, "grad_norm": 2.8333585262298584, "learning_rate": 6.30096585420859e-06, "loss": 0.08187833, "memory(GiB)": 13.7, "step": 47505, "train_speed(iter/s)": 1.533311 }, { "acc": 0.98571434, "epoch": 22.26857276775252, "grad_norm": 3.1259098052978516, "learning_rate": 6.300217409209851e-06, "loss": 0.06747521, "memory(GiB)": 13.7, "step": 47510, "train_speed(iter/s)": 1.533311 }, { "acc": 0.98672619, "epoch": 22.270916334661354, "grad_norm": 9.474556922912598, "learning_rate": 6.299468932970179e-06, "loss": 0.04592739, "memory(GiB)": 13.7, "step": 47515, "train_speed(iter/s)": 1.533309 }, { "acc": 0.99229164, "epoch": 22.27325990157019, "grad_norm": 2.6708316802978516, "learning_rate": 6.2987204255075585e-06, "loss": 0.03610697, "memory(GiB)": 13.7, "step": 47520, "train_speed(iter/s)": 1.533316 }, { "acc": 0.9791666, "epoch": 22.275603468479027, "grad_norm": 3.374537706375122, "learning_rate": 6.297971886839985e-06, "loss": 0.06644291, "memory(GiB)": 13.7, "step": 47525, "train_speed(iter/s)": 1.533323 }, { "acc": 0.98117065, "epoch": 22.27794703538786, "grad_norm": 5.672805309295654, "learning_rate": 6.297223316985452e-06, "loss": 0.06389247, "memory(GiB)": 13.7, "step": 47530, "train_speed(iter/s)": 1.533324 }, { "acc": 0.97486115, "epoch": 22.280290602296695, "grad_norm": 0.8366833925247192, "learning_rate": 6.296474715961948e-06, "loss": 0.09484472, "memory(GiB)": 13.7, "step": 47535, "train_speed(iter/s)": 1.533333 }, { "acc": 0.96932964, "epoch": 22.28263416920553, "grad_norm": 4.548503875732422, "learning_rate": 6.29572608378747e-06, "loss": 0.09264854, "memory(GiB)": 13.7, "step": 47540, "train_speed(iter/s)": 1.533334 }, { "acc": 0.97342262, "epoch": 22.284977736114367, "grad_norm": 2.793494701385498, "learning_rate": 6.294977420480013e-06, "loss": 0.05826836, "memory(GiB)": 13.7, "step": 47545, "train_speed(iter/s)": 1.533331 }, { "acc": 0.97724209, "epoch": 22.2873213030232, "grad_norm": 2.6321403980255127, "learning_rate": 6.29422872605757e-06, "loss": 0.09632358, "memory(GiB)": 13.7, "step": 47550, "train_speed(iter/s)": 1.533336 }, { "acc": 0.9887845, "epoch": 22.289664869932036, "grad_norm": 2.5583059787750244, "learning_rate": 6.293480000538141e-06, "loss": 0.04700825, "memory(GiB)": 13.7, "step": 47555, "train_speed(iter/s)": 1.533338 }, { "acc": 0.96907196, "epoch": 22.29200843684087, "grad_norm": 2.8700382709503174, "learning_rate": 6.292731243939719e-06, "loss": 0.10773473, "memory(GiB)": 13.7, "step": 47560, "train_speed(iter/s)": 1.53335 }, { "acc": 0.99270287, "epoch": 22.294352003749708, "grad_norm": 2.463916540145874, "learning_rate": 6.291982456280303e-06, "loss": 0.03799018, "memory(GiB)": 13.7, "step": 47565, "train_speed(iter/s)": 1.533361 }, { "acc": 0.98369818, "epoch": 22.296695570658542, "grad_norm": 2.906187057495117, "learning_rate": 6.2912336375778916e-06, "loss": 0.06213102, "memory(GiB)": 13.7, "step": 47570, "train_speed(iter/s)": 1.533363 }, { "acc": 0.97678146, "epoch": 22.299039137567377, "grad_norm": 2.9869441986083984, "learning_rate": 6.290484787850484e-06, "loss": 0.07215122, "memory(GiB)": 13.7, "step": 47575, "train_speed(iter/s)": 1.533366 }, { "acc": 0.97696438, "epoch": 22.301382704476215, "grad_norm": 3.824108362197876, "learning_rate": 6.289735907116079e-06, "loss": 0.09557527, "memory(GiB)": 13.7, "step": 47580, "train_speed(iter/s)": 1.533368 }, { "acc": 0.98500004, "epoch": 22.30372627138505, "grad_norm": 0.9230487942695618, "learning_rate": 6.288986995392681e-06, "loss": 0.04383899, "memory(GiB)": 13.7, "step": 47585, "train_speed(iter/s)": 1.533362 }, { "acc": 0.98660717, "epoch": 22.306069838293883, "grad_norm": 1.9072015285491943, "learning_rate": 6.288238052698288e-06, "loss": 0.0446929, "memory(GiB)": 13.7, "step": 47590, "train_speed(iter/s)": 1.533367 }, { "acc": 0.97958336, "epoch": 22.308413405202717, "grad_norm": 5.347455024719238, "learning_rate": 6.287489079050901e-06, "loss": 0.06262488, "memory(GiB)": 13.7, "step": 47595, "train_speed(iter/s)": 1.533367 }, { "acc": 0.97416668, "epoch": 22.310756972111555, "grad_norm": 8.956596374511719, "learning_rate": 6.286740074468526e-06, "loss": 0.12498147, "memory(GiB)": 13.7, "step": 47600, "train_speed(iter/s)": 1.533363 }, { "acc": 0.9841918, "epoch": 22.31310053902039, "grad_norm": 4.337295055389404, "learning_rate": 6.2859910389691644e-06, "loss": 0.04941776, "memory(GiB)": 13.7, "step": 47605, "train_speed(iter/s)": 1.533376 }, { "acc": 0.97559528, "epoch": 22.315444105929224, "grad_norm": 5.036278247833252, "learning_rate": 6.285241972570821e-06, "loss": 0.11143612, "memory(GiB)": 13.7, "step": 47610, "train_speed(iter/s)": 1.533372 }, { "acc": 0.98186264, "epoch": 22.317787672838058, "grad_norm": 7.135065078735352, "learning_rate": 6.284492875291502e-06, "loss": 0.0580398, "memory(GiB)": 13.7, "step": 47615, "train_speed(iter/s)": 1.533369 }, { "acc": 1.0, "epoch": 22.320131239746896, "grad_norm": 0.02167113684117794, "learning_rate": 6.2837437471492125e-06, "loss": 0.01042347, "memory(GiB)": 13.7, "step": 47620, "train_speed(iter/s)": 1.53337 }, { "acc": 0.9786211, "epoch": 22.32247480665573, "grad_norm": 2.7370245456695557, "learning_rate": 6.2829945881619596e-06, "loss": 0.07958609, "memory(GiB)": 13.7, "step": 47625, "train_speed(iter/s)": 1.533374 }, { "acc": 0.98358135, "epoch": 22.324818373564565, "grad_norm": 1.3530739545822144, "learning_rate": 6.28224539834775e-06, "loss": 0.0693002, "memory(GiB)": 13.7, "step": 47630, "train_speed(iter/s)": 1.533369 }, { "acc": 0.98067703, "epoch": 22.3271619404734, "grad_norm": 5.411835193634033, "learning_rate": 6.281496177724593e-06, "loss": 0.04877635, "memory(GiB)": 13.7, "step": 47635, "train_speed(iter/s)": 1.53337 }, { "acc": 0.96958332, "epoch": 22.329505507382237, "grad_norm": 6.070980072021484, "learning_rate": 6.2807469263104935e-06, "loss": 0.14747964, "memory(GiB)": 13.7, "step": 47640, "train_speed(iter/s)": 1.533367 }, { "acc": 0.97370911, "epoch": 22.33184907429107, "grad_norm": 4.108757019042969, "learning_rate": 6.2799976441234656e-06, "loss": 0.07412827, "memory(GiB)": 13.7, "step": 47645, "train_speed(iter/s)": 1.533369 }, { "acc": 0.9901042, "epoch": 22.334192641199905, "grad_norm": 2.258636713027954, "learning_rate": 6.2792483311815175e-06, "loss": 0.06185893, "memory(GiB)": 13.7, "step": 47650, "train_speed(iter/s)": 1.533372 }, { "acc": 0.99593754, "epoch": 22.33653620810874, "grad_norm": 0.8217688202857971, "learning_rate": 6.278498987502659e-06, "loss": 0.01226588, "memory(GiB)": 13.7, "step": 47655, "train_speed(iter/s)": 1.533374 }, { "acc": 0.98708334, "epoch": 22.338879775017578, "grad_norm": 0.4907490015029907, "learning_rate": 6.277749613104905e-06, "loss": 0.02735084, "memory(GiB)": 13.7, "step": 47660, "train_speed(iter/s)": 1.533372 }, { "acc": 0.98500004, "epoch": 22.341223341926412, "grad_norm": 4.553249835968018, "learning_rate": 6.2770002080062665e-06, "loss": 0.05737728, "memory(GiB)": 13.7, "step": 47665, "train_speed(iter/s)": 1.533372 }, { "acc": 0.9833334, "epoch": 22.343566908835246, "grad_norm": 6.883594989776611, "learning_rate": 6.276250772224756e-06, "loss": 0.09924996, "memory(GiB)": 13.7, "step": 47670, "train_speed(iter/s)": 1.53338 }, { "acc": 0.97531252, "epoch": 22.345910475744084, "grad_norm": 4.830722808837891, "learning_rate": 6.27550130577839e-06, "loss": 0.07712454, "memory(GiB)": 13.7, "step": 47675, "train_speed(iter/s)": 1.533387 }, { "acc": 0.98328381, "epoch": 22.34825404265292, "grad_norm": 1.0839189291000366, "learning_rate": 6.27475180868518e-06, "loss": 0.08099992, "memory(GiB)": 13.7, "step": 47680, "train_speed(iter/s)": 1.533397 }, { "acc": 0.99020824, "epoch": 22.350597609561753, "grad_norm": 0.6591397523880005, "learning_rate": 6.274002280963142e-06, "loss": 0.02884654, "memory(GiB)": 13.7, "step": 47685, "train_speed(iter/s)": 1.533406 }, { "acc": 0.97664146, "epoch": 22.352941176470587, "grad_norm": 11.73169231414795, "learning_rate": 6.273252722630294e-06, "loss": 0.0968063, "memory(GiB)": 13.7, "step": 47690, "train_speed(iter/s)": 1.533412 }, { "acc": 0.9833333, "epoch": 22.355284743379425, "grad_norm": 3.33465313911438, "learning_rate": 6.272503133704649e-06, "loss": 0.04148592, "memory(GiB)": 13.7, "step": 47695, "train_speed(iter/s)": 1.533413 }, { "acc": 0.96625004, "epoch": 22.35762831028826, "grad_norm": 2.8711116313934326, "learning_rate": 6.271753514204231e-06, "loss": 0.08287007, "memory(GiB)": 13.7, "step": 47700, "train_speed(iter/s)": 1.533422 }, { "acc": 0.98055973, "epoch": 22.359971877197093, "grad_norm": 2.8309051990509033, "learning_rate": 6.271003864147054e-06, "loss": 0.11237724, "memory(GiB)": 13.7, "step": 47705, "train_speed(iter/s)": 1.533431 }, { "acc": 0.98467264, "epoch": 22.362315444105928, "grad_norm": 2.078678607940674, "learning_rate": 6.270254183551137e-06, "loss": 0.04368238, "memory(GiB)": 13.7, "step": 47710, "train_speed(iter/s)": 1.533434 }, { "acc": 0.9815814, "epoch": 22.364659011014766, "grad_norm": 5.616445064544678, "learning_rate": 6.2695044724345025e-06, "loss": 0.06961998, "memory(GiB)": 13.7, "step": 47715, "train_speed(iter/s)": 1.533439 }, { "acc": 0.97855911, "epoch": 22.3670025779236, "grad_norm": 1.781058669090271, "learning_rate": 6.268754730815168e-06, "loss": 0.07711701, "memory(GiB)": 13.7, "step": 47720, "train_speed(iter/s)": 1.533433 }, { "acc": 0.97763672, "epoch": 22.369346144832434, "grad_norm": 5.585373401641846, "learning_rate": 6.268004958711159e-06, "loss": 0.07405511, "memory(GiB)": 13.7, "step": 47725, "train_speed(iter/s)": 1.53343 }, { "acc": 0.99613094, "epoch": 22.37168971174127, "grad_norm": 1.1065679788589478, "learning_rate": 6.267255156140492e-06, "loss": 0.01541019, "memory(GiB)": 13.7, "step": 47730, "train_speed(iter/s)": 1.533439 }, { "acc": 0.9791667, "epoch": 22.374033278650106, "grad_norm": 3.2449450492858887, "learning_rate": 6.266505323121193e-06, "loss": 0.03744549, "memory(GiB)": 13.7, "step": 47735, "train_speed(iter/s)": 1.533442 }, { "acc": 0.996875, "epoch": 22.37637684555894, "grad_norm": 3.2157301902770996, "learning_rate": 6.265755459671284e-06, "loss": 0.02067181, "memory(GiB)": 13.7, "step": 47740, "train_speed(iter/s)": 1.533447 }, { "acc": 0.98140755, "epoch": 22.378720412467775, "grad_norm": 14.204875946044922, "learning_rate": 6.2650055658087936e-06, "loss": 0.10466685, "memory(GiB)": 13.7, "step": 47745, "train_speed(iter/s)": 1.533448 }, { "acc": 0.952598, "epoch": 22.381063979376613, "grad_norm": 6.3637237548828125, "learning_rate": 6.264255641551742e-06, "loss": 0.13733509, "memory(GiB)": 13.7, "step": 47750, "train_speed(iter/s)": 1.533456 }, { "acc": 0.99375, "epoch": 22.383407546285447, "grad_norm": 4.410087585449219, "learning_rate": 6.2635056869181565e-06, "loss": 0.03437076, "memory(GiB)": 13.7, "step": 47755, "train_speed(iter/s)": 1.533462 }, { "acc": 0.98946428, "epoch": 22.38575111319428, "grad_norm": 0.6584946513175964, "learning_rate": 6.262755701926065e-06, "loss": 0.04356792, "memory(GiB)": 13.7, "step": 47760, "train_speed(iter/s)": 1.533466 }, { "acc": 0.96528854, "epoch": 22.388094680103116, "grad_norm": 3.27116060256958, "learning_rate": 6.262005686593492e-06, "loss": 0.11637635, "memory(GiB)": 13.7, "step": 47765, "train_speed(iter/s)": 1.533459 }, { "acc": 0.96776295, "epoch": 22.390438247011954, "grad_norm": 3.4233386516571045, "learning_rate": 6.261255640938465e-06, "loss": 0.12487857, "memory(GiB)": 13.7, "step": 47770, "train_speed(iter/s)": 1.533461 }, { "acc": 0.99192705, "epoch": 22.392781813920788, "grad_norm": 2.688666582107544, "learning_rate": 6.260505564979017e-06, "loss": 0.04569461, "memory(GiB)": 13.7, "step": 47775, "train_speed(iter/s)": 1.533459 }, { "acc": 0.98973217, "epoch": 22.395125380829622, "grad_norm": 5.643017292022705, "learning_rate": 6.2597554587331745e-06, "loss": 0.05575242, "memory(GiB)": 13.7, "step": 47780, "train_speed(iter/s)": 1.533462 }, { "acc": 0.98758154, "epoch": 22.397468947738457, "grad_norm": 5.60380220413208, "learning_rate": 6.259005322218965e-06, "loss": 0.03613524, "memory(GiB)": 13.7, "step": 47785, "train_speed(iter/s)": 1.533469 }, { "acc": 0.97361107, "epoch": 22.399812514647294, "grad_norm": 7.4358906745910645, "learning_rate": 6.258255155454425e-06, "loss": 0.11433887, "memory(GiB)": 13.7, "step": 47790, "train_speed(iter/s)": 1.533472 }, { "acc": 0.97321434, "epoch": 22.40215608155613, "grad_norm": 3.9721312522888184, "learning_rate": 6.257504958457582e-06, "loss": 0.0885512, "memory(GiB)": 13.7, "step": 47795, "train_speed(iter/s)": 1.533475 }, { "acc": 0.98968754, "epoch": 22.404499648464963, "grad_norm": 6.184988975524902, "learning_rate": 6.256754731246469e-06, "loss": 0.08136398, "memory(GiB)": 13.7, "step": 47800, "train_speed(iter/s)": 1.533486 }, { "acc": 0.99571428, "epoch": 22.406843215373797, "grad_norm": 0.3588254451751709, "learning_rate": 6.25600447383912e-06, "loss": 0.02897062, "memory(GiB)": 13.7, "step": 47805, "train_speed(iter/s)": 1.533488 }, { "acc": 0.98334484, "epoch": 22.409186782282635, "grad_norm": 0.010807293467223644, "learning_rate": 6.255254186253568e-06, "loss": 0.06677435, "memory(GiB)": 13.7, "step": 47810, "train_speed(iter/s)": 1.533494 }, { "acc": 0.98187494, "epoch": 22.41153034919147, "grad_norm": 0.9279181957244873, "learning_rate": 6.254503868507847e-06, "loss": 0.05178336, "memory(GiB)": 13.7, "step": 47815, "train_speed(iter/s)": 1.533503 }, { "acc": 0.97038689, "epoch": 22.413873916100304, "grad_norm": 1.9403760433197021, "learning_rate": 6.253753520619995e-06, "loss": 0.09455852, "memory(GiB)": 13.7, "step": 47820, "train_speed(iter/s)": 1.533514 }, { "acc": 0.97887306, "epoch": 22.41621748300914, "grad_norm": 5.068410873413086, "learning_rate": 6.253003142608044e-06, "loss": 0.12152274, "memory(GiB)": 13.7, "step": 47825, "train_speed(iter/s)": 1.53352 }, { "acc": 0.98708334, "epoch": 22.418561049917976, "grad_norm": 3.076251745223999, "learning_rate": 6.252252734490031e-06, "loss": 0.05012128, "memory(GiB)": 13.7, "step": 47830, "train_speed(iter/s)": 1.533528 }, { "acc": 0.98520298, "epoch": 22.42090461682681, "grad_norm": 4.380109786987305, "learning_rate": 6.251502296283994e-06, "loss": 0.08831706, "memory(GiB)": 13.7, "step": 47835, "train_speed(iter/s)": 1.533531 }, { "acc": 0.98633928, "epoch": 22.423248183735645, "grad_norm": 2.9865458011627197, "learning_rate": 6.250751828007974e-06, "loss": 0.05297316, "memory(GiB)": 13.7, "step": 47840, "train_speed(iter/s)": 1.533548 }, { "acc": 0.99125004, "epoch": 22.425591750644482, "grad_norm": 3.539914846420288, "learning_rate": 6.250001329680008e-06, "loss": 0.02143817, "memory(GiB)": 13.7, "step": 47845, "train_speed(iter/s)": 1.533554 }, { "acc": 0.9906023, "epoch": 22.427935317553317, "grad_norm": 0.19213959574699402, "learning_rate": 6.249250801318134e-06, "loss": 0.03809699, "memory(GiB)": 13.7, "step": 47850, "train_speed(iter/s)": 1.533557 }, { "acc": 0.97229166, "epoch": 22.43027888446215, "grad_norm": 6.478331089019775, "learning_rate": 6.248500242940393e-06, "loss": 0.07553897, "memory(GiB)": 13.7, "step": 47855, "train_speed(iter/s)": 1.533561 }, { "acc": 0.9875, "epoch": 22.432622451370985, "grad_norm": 3.920926332473755, "learning_rate": 6.2477496545648255e-06, "loss": 0.03358745, "memory(GiB)": 13.7, "step": 47860, "train_speed(iter/s)": 1.533577 }, { "acc": 0.98708334, "epoch": 22.434966018279823, "grad_norm": 3.1519200801849365, "learning_rate": 6.246999036209475e-06, "loss": 0.04261495, "memory(GiB)": 13.7, "step": 47865, "train_speed(iter/s)": 1.533587 }, { "acc": 0.99541664, "epoch": 22.437309585188657, "grad_norm": 1.0818507671356201, "learning_rate": 6.246248387892382e-06, "loss": 0.01897144, "memory(GiB)": 13.7, "step": 47870, "train_speed(iter/s)": 1.533589 }, { "acc": 0.99142857, "epoch": 22.439653152097492, "grad_norm": 1.402764081954956, "learning_rate": 6.245497709631589e-06, "loss": 0.02889695, "memory(GiB)": 13.7, "step": 47875, "train_speed(iter/s)": 1.533594 }, { "acc": 0.98918743, "epoch": 22.441996719006326, "grad_norm": 5.066995620727539, "learning_rate": 6.244747001445143e-06, "loss": 0.03283133, "memory(GiB)": 13.7, "step": 47880, "train_speed(iter/s)": 1.533592 }, { "acc": 0.98132439, "epoch": 22.444340285915164, "grad_norm": 1.2176100015640259, "learning_rate": 6.243996263351089e-06, "loss": 0.06971579, "memory(GiB)": 13.7, "step": 47885, "train_speed(iter/s)": 1.533601 }, { "acc": 0.98773031, "epoch": 22.446683852824, "grad_norm": 0.37778306007385254, "learning_rate": 6.243245495367466e-06, "loss": 0.0631373, "memory(GiB)": 13.7, "step": 47890, "train_speed(iter/s)": 1.533615 }, { "acc": 0.98633928, "epoch": 22.449027419732833, "grad_norm": 2.6470420360565186, "learning_rate": 6.242494697512326e-06, "loss": 0.0351535, "memory(GiB)": 13.7, "step": 47895, "train_speed(iter/s)": 1.53362 }, { "acc": 0.97593136, "epoch": 22.45137098664167, "grad_norm": 4.323358058929443, "learning_rate": 6.241743869803714e-06, "loss": 0.08288118, "memory(GiB)": 13.7, "step": 47900, "train_speed(iter/s)": 1.533627 }, { "acc": 0.98270836, "epoch": 22.453714553550505, "grad_norm": 3.444894790649414, "learning_rate": 6.240993012259674e-06, "loss": 0.04049203, "memory(GiB)": 13.7, "step": 47905, "train_speed(iter/s)": 1.53363 }, { "acc": 0.96821432, "epoch": 22.45605812045934, "grad_norm": 4.902509689331055, "learning_rate": 6.2402421248982606e-06, "loss": 0.06867858, "memory(GiB)": 13.7, "step": 47910, "train_speed(iter/s)": 1.533637 }, { "acc": 0.9822916, "epoch": 22.458401687368173, "grad_norm": 1.4284013509750366, "learning_rate": 6.239491207737518e-06, "loss": 0.03775364, "memory(GiB)": 13.7, "step": 47915, "train_speed(iter/s)": 1.533639 }, { "acc": 0.97531252, "epoch": 22.46074525427701, "grad_norm": 2.5643396377563477, "learning_rate": 6.238740260795499e-06, "loss": 0.11143166, "memory(GiB)": 13.7, "step": 47920, "train_speed(iter/s)": 1.533637 }, { "acc": 0.98880682, "epoch": 22.463088821185845, "grad_norm": 8.103924751281738, "learning_rate": 6.237989284090251e-06, "loss": 0.0663583, "memory(GiB)": 13.7, "step": 47925, "train_speed(iter/s)": 1.533635 }, { "acc": 0.99236107, "epoch": 22.46543238809468, "grad_norm": 1.6786673069000244, "learning_rate": 6.237238277639826e-06, "loss": 0.02533894, "memory(GiB)": 13.7, "step": 47930, "train_speed(iter/s)": 1.533631 }, { "acc": 0.97448864, "epoch": 22.467775955003514, "grad_norm": 6.933476448059082, "learning_rate": 6.236487241462276e-06, "loss": 0.08540065, "memory(GiB)": 13.7, "step": 47935, "train_speed(iter/s)": 1.533627 }, { "acc": 0.97692461, "epoch": 22.470119521912352, "grad_norm": 6.487515926361084, "learning_rate": 6.235736175575652e-06, "loss": 0.09700124, "memory(GiB)": 13.7, "step": 47940, "train_speed(iter/s)": 1.533622 }, { "acc": 0.99333334, "epoch": 22.472463088821186, "grad_norm": 0.1590648740530014, "learning_rate": 6.234985079998008e-06, "loss": 0.01188393, "memory(GiB)": 13.7, "step": 47945, "train_speed(iter/s)": 1.533618 }, { "acc": 0.97937498, "epoch": 22.47480665573002, "grad_norm": 4.22947359085083, "learning_rate": 6.2342339547474e-06, "loss": 0.07310678, "memory(GiB)": 13.7, "step": 47950, "train_speed(iter/s)": 1.533627 }, { "acc": 0.9791666, "epoch": 22.477150222638855, "grad_norm": 4.24565315246582, "learning_rate": 6.233482799841881e-06, "loss": 0.0605274, "memory(GiB)": 13.7, "step": 47955, "train_speed(iter/s)": 1.533632 }, { "acc": 0.9854166, "epoch": 22.479493789547693, "grad_norm": 3.791700839996338, "learning_rate": 6.2327316152995046e-06, "loss": 0.05349498, "memory(GiB)": 13.7, "step": 47960, "train_speed(iter/s)": 1.533632 }, { "acc": 0.97739582, "epoch": 22.481837356456527, "grad_norm": 4.186174392700195, "learning_rate": 6.23198040113833e-06, "loss": 0.08569324, "memory(GiB)": 13.7, "step": 47965, "train_speed(iter/s)": 1.533629 }, { "acc": 0.98573389, "epoch": 22.48418092336536, "grad_norm": 4.998338222503662, "learning_rate": 6.2312291573764125e-06, "loss": 0.04709283, "memory(GiB)": 13.7, "step": 47970, "train_speed(iter/s)": 1.533635 }, { "acc": 0.99070511, "epoch": 22.486524490274196, "grad_norm": 2.345585823059082, "learning_rate": 6.230477884031808e-06, "loss": 0.06813156, "memory(GiB)": 13.7, "step": 47975, "train_speed(iter/s)": 1.533642 }, { "acc": 0.96250534, "epoch": 22.488868057183033, "grad_norm": 5.0832953453063965, "learning_rate": 6.229726581122576e-06, "loss": 0.114429, "memory(GiB)": 13.7, "step": 47980, "train_speed(iter/s)": 1.533639 }, { "acc": 0.98205929, "epoch": 22.491211624091868, "grad_norm": 1.5910649299621582, "learning_rate": 6.228975248666775e-06, "loss": 0.10595922, "memory(GiB)": 13.7, "step": 47985, "train_speed(iter/s)": 1.533645 }, { "acc": 0.98937492, "epoch": 22.493555191000702, "grad_norm": 2.1484243869781494, "learning_rate": 6.228223886682466e-06, "loss": 0.04181986, "memory(GiB)": 13.7, "step": 47990, "train_speed(iter/s)": 1.533644 }, { "acc": 0.99120045, "epoch": 22.49589875790954, "grad_norm": 2.112004041671753, "learning_rate": 6.227472495187708e-06, "loss": 0.05412889, "memory(GiB)": 13.7, "step": 47995, "train_speed(iter/s)": 1.533652 }, { "acc": 0.9859375, "epoch": 22.498242324818374, "grad_norm": 5.438291072845459, "learning_rate": 6.226721074200562e-06, "loss": 0.06268896, "memory(GiB)": 13.7, "step": 48000, "train_speed(iter/s)": 1.533653 }, { "acc": 0.98208332, "epoch": 22.50058589172721, "grad_norm": 6.337208271026611, "learning_rate": 6.22596962373909e-06, "loss": 0.04796525, "memory(GiB)": 13.7, "step": 48005, "train_speed(iter/s)": 1.533663 }, { "acc": 0.97940483, "epoch": 22.502929458636043, "grad_norm": 5.11169958114624, "learning_rate": 6.225218143821355e-06, "loss": 0.05072351, "memory(GiB)": 13.7, "step": 48010, "train_speed(iter/s)": 1.533668 }, { "acc": 0.98364563, "epoch": 22.50527302554488, "grad_norm": 0.5273395776748657, "learning_rate": 6.2244666344654185e-06, "loss": 0.05914098, "memory(GiB)": 13.7, "step": 48015, "train_speed(iter/s)": 1.533674 }, { "acc": 0.96782198, "epoch": 22.507616592453715, "grad_norm": 7.185523509979248, "learning_rate": 6.223715095689343e-06, "loss": 0.13827003, "memory(GiB)": 13.7, "step": 48020, "train_speed(iter/s)": 1.533682 }, { "acc": 0.98631945, "epoch": 22.50996015936255, "grad_norm": 3.8098695278167725, "learning_rate": 6.222963527511199e-06, "loss": 0.02081678, "memory(GiB)": 13.7, "step": 48025, "train_speed(iter/s)": 1.533683 }, { "acc": 0.98526783, "epoch": 22.512303726271384, "grad_norm": 3.2022337913513184, "learning_rate": 6.222211929949044e-06, "loss": 0.07258967, "memory(GiB)": 13.7, "step": 48030, "train_speed(iter/s)": 1.533683 }, { "acc": 0.9854167, "epoch": 22.51464729318022, "grad_norm": 7.154712200164795, "learning_rate": 6.22146030302095e-06, "loss": 0.03354428, "memory(GiB)": 13.7, "step": 48035, "train_speed(iter/s)": 1.533688 }, { "acc": 0.996875, "epoch": 22.516990860089056, "grad_norm": 2.5729966163635254, "learning_rate": 6.220708646744981e-06, "loss": 0.01982103, "memory(GiB)": 13.7, "step": 48040, "train_speed(iter/s)": 1.533694 }, { "acc": 0.97355194, "epoch": 22.51933442699789, "grad_norm": 3.2400357723236084, "learning_rate": 6.219956961139206e-06, "loss": 0.09733586, "memory(GiB)": 13.7, "step": 48045, "train_speed(iter/s)": 1.5337 }, { "acc": 0.98500004, "epoch": 22.521677993906724, "grad_norm": 4.011753082275391, "learning_rate": 6.219205246221691e-06, "loss": 0.06516314, "memory(GiB)": 13.7, "step": 48050, "train_speed(iter/s)": 1.533702 }, { "acc": 0.98015881, "epoch": 22.524021560815562, "grad_norm": 4.3865556716918945, "learning_rate": 6.2184535020105065e-06, "loss": 0.07193329, "memory(GiB)": 13.7, "step": 48055, "train_speed(iter/s)": 1.533702 }, { "acc": 0.97250004, "epoch": 22.526365127724397, "grad_norm": 7.126108169555664, "learning_rate": 6.217701728523721e-06, "loss": 0.10045105, "memory(GiB)": 13.7, "step": 48060, "train_speed(iter/s)": 1.533706 }, { "acc": 0.98761978, "epoch": 22.52870869463323, "grad_norm": 0.007336195092648268, "learning_rate": 6.216949925779402e-06, "loss": 0.06458335, "memory(GiB)": 13.7, "step": 48065, "train_speed(iter/s)": 1.533714 }, { "acc": 0.97241879, "epoch": 22.531052261542065, "grad_norm": 2.7323508262634277, "learning_rate": 6.216198093795627e-06, "loss": 0.10125971, "memory(GiB)": 13.7, "step": 48070, "train_speed(iter/s)": 1.533723 }, { "acc": 0.98217258, "epoch": 22.533395828450903, "grad_norm": 1.6323974132537842, "learning_rate": 6.2154462325904605e-06, "loss": 0.04507741, "memory(GiB)": 13.7, "step": 48075, "train_speed(iter/s)": 1.533726 }, { "acc": 0.98083334, "epoch": 22.535739395359737, "grad_norm": 5.754765510559082, "learning_rate": 6.214694342181979e-06, "loss": 0.11117159, "memory(GiB)": 13.7, "step": 48080, "train_speed(iter/s)": 1.533719 }, { "acc": 0.98000002, "epoch": 22.53808296226857, "grad_norm": 0.02324046939611435, "learning_rate": 6.213942422588255e-06, "loss": 0.02908243, "memory(GiB)": 13.7, "step": 48085, "train_speed(iter/s)": 1.533719 }, { "acc": 0.98006945, "epoch": 22.54042652917741, "grad_norm": 0.18499360978603363, "learning_rate": 6.21319047382736e-06, "loss": 0.10677271, "memory(GiB)": 13.7, "step": 48090, "train_speed(iter/s)": 1.533713 }, { "acc": 0.98000002, "epoch": 22.542770096086244, "grad_norm": 4.090323448181152, "learning_rate": 6.212438495917374e-06, "loss": 0.04788403, "memory(GiB)": 13.7, "step": 48095, "train_speed(iter/s)": 1.533716 }, { "acc": 0.97202053, "epoch": 22.545113662995078, "grad_norm": 10.426212310791016, "learning_rate": 6.211686488876365e-06, "loss": 0.08243387, "memory(GiB)": 13.7, "step": 48100, "train_speed(iter/s)": 1.533717 }, { "acc": 0.9754261, "epoch": 22.547457229903912, "grad_norm": 5.255057334899902, "learning_rate": 6.210934452722413e-06, "loss": 0.14640354, "memory(GiB)": 13.7, "step": 48105, "train_speed(iter/s)": 1.533721 }, { "acc": 0.98864088, "epoch": 22.54980079681275, "grad_norm": 0.03990241140127182, "learning_rate": 6.210182387473592e-06, "loss": 0.08591093, "memory(GiB)": 13.7, "step": 48110, "train_speed(iter/s)": 1.533721 }, { "acc": 0.98678036, "epoch": 22.552144363721585, "grad_norm": 9.775144577026367, "learning_rate": 6.209430293147981e-06, "loss": 0.07210501, "memory(GiB)": 13.7, "step": 48115, "train_speed(iter/s)": 1.533725 }, { "acc": 0.98604164, "epoch": 22.55448793063042, "grad_norm": 3.4128434658050537, "learning_rate": 6.208678169763656e-06, "loss": 0.04880608, "memory(GiB)": 13.7, "step": 48120, "train_speed(iter/s)": 1.533736 }, { "acc": 0.98728628, "epoch": 22.556831497539253, "grad_norm": 6.127282619476318, "learning_rate": 6.2079260173387e-06, "loss": 0.06925354, "memory(GiB)": 13.7, "step": 48125, "train_speed(iter/s)": 1.533744 }, { "acc": 0.99229164, "epoch": 22.55917506444809, "grad_norm": 0.008600356988608837, "learning_rate": 6.2071738358911885e-06, "loss": 0.02462909, "memory(GiB)": 13.7, "step": 48130, "train_speed(iter/s)": 1.533744 }, { "acc": 0.97781248, "epoch": 22.561518631356925, "grad_norm": 7.452932357788086, "learning_rate": 6.206421625439201e-06, "loss": 0.12591051, "memory(GiB)": 13.7, "step": 48135, "train_speed(iter/s)": 1.533751 }, { "acc": 0.98395834, "epoch": 22.56386219826576, "grad_norm": 0.9574805498123169, "learning_rate": 6.20566938600082e-06, "loss": 0.0698553, "memory(GiB)": 13.7, "step": 48140, "train_speed(iter/s)": 1.533757 }, { "acc": 0.96580048, "epoch": 22.566205765174594, "grad_norm": 7.618346691131592, "learning_rate": 6.204917117594127e-06, "loss": 0.1129263, "memory(GiB)": 13.7, "step": 48145, "train_speed(iter/s)": 1.533759 }, { "acc": 0.97411194, "epoch": 22.568549332083432, "grad_norm": 4.15953254699707, "learning_rate": 6.204164820237203e-06, "loss": 0.09289188, "memory(GiB)": 13.7, "step": 48150, "train_speed(iter/s)": 1.533762 }, { "acc": 0.97777004, "epoch": 22.570892898992266, "grad_norm": 0.8212922811508179, "learning_rate": 6.20341249394813e-06, "loss": 0.0843195, "memory(GiB)": 13.7, "step": 48155, "train_speed(iter/s)": 1.533761 }, { "acc": 0.97351189, "epoch": 22.5732364659011, "grad_norm": 5.538153171539307, "learning_rate": 6.2026601387449925e-06, "loss": 0.16127269, "memory(GiB)": 13.7, "step": 48160, "train_speed(iter/s)": 1.53376 }, { "acc": 0.97218752, "epoch": 22.57558003280994, "grad_norm": 0.3841826915740967, "learning_rate": 6.201907754645875e-06, "loss": 0.09461628, "memory(GiB)": 13.7, "step": 48165, "train_speed(iter/s)": 1.533762 }, { "acc": 0.9714529, "epoch": 22.577923599718773, "grad_norm": 1.0407953262329102, "learning_rate": 6.201155341668863e-06, "loss": 0.12269113, "memory(GiB)": 13.7, "step": 48170, "train_speed(iter/s)": 1.533763 }, { "acc": 0.98129168, "epoch": 22.580267166627607, "grad_norm": 0.05706987529993057, "learning_rate": 6.20040289983204e-06, "loss": 0.10702943, "memory(GiB)": 13.7, "step": 48175, "train_speed(iter/s)": 1.533766 }, { "acc": 0.98744049, "epoch": 22.58261073353644, "grad_norm": 4.233736991882324, "learning_rate": 6.199650429153494e-06, "loss": 0.05233061, "memory(GiB)": 13.7, "step": 48180, "train_speed(iter/s)": 1.533761 }, { "acc": 0.98125, "epoch": 22.58495430044528, "grad_norm": 3.216034412384033, "learning_rate": 6.19889792965131e-06, "loss": 0.04522839, "memory(GiB)": 13.7, "step": 48185, "train_speed(iter/s)": 1.533768 }, { "acc": 0.98508015, "epoch": 22.587297867354113, "grad_norm": 0.08009501546621323, "learning_rate": 6.1981454013435784e-06, "loss": 0.07551749, "memory(GiB)": 13.7, "step": 48190, "train_speed(iter/s)": 1.533776 }, { "acc": 0.98675594, "epoch": 22.589641434262948, "grad_norm": 1.4210385084152222, "learning_rate": 6.197392844248384e-06, "loss": 0.0577117, "memory(GiB)": 13.7, "step": 48195, "train_speed(iter/s)": 1.533784 }, { "acc": 0.9833334, "epoch": 22.591985001171782, "grad_norm": 0.304787278175354, "learning_rate": 6.196640258383819e-06, "loss": 0.0692618, "memory(GiB)": 13.7, "step": 48200, "train_speed(iter/s)": 1.533782 }, { "acc": 0.96361618, "epoch": 22.59432856808062, "grad_norm": 3.5453333854675293, "learning_rate": 6.195887643767973e-06, "loss": 0.13348358, "memory(GiB)": 13.7, "step": 48205, "train_speed(iter/s)": 1.533782 }, { "acc": 0.97076397, "epoch": 22.596672134989454, "grad_norm": 2.9335858821868896, "learning_rate": 6.1951350004189324e-06, "loss": 0.09767742, "memory(GiB)": 13.7, "step": 48210, "train_speed(iter/s)": 1.533781 }, { "acc": 0.984375, "epoch": 22.59901570189829, "grad_norm": 5.004127502441406, "learning_rate": 6.194382328354792e-06, "loss": 0.09327281, "memory(GiB)": 13.7, "step": 48215, "train_speed(iter/s)": 1.533775 }, { "acc": 0.97304068, "epoch": 22.601359268807123, "grad_norm": 7.384366989135742, "learning_rate": 6.193629627593645e-06, "loss": 0.08897718, "memory(GiB)": 13.7, "step": 48220, "train_speed(iter/s)": 1.533777 }, { "acc": 0.98104172, "epoch": 22.60370283571596, "grad_norm": 1.4728573560714722, "learning_rate": 6.192876898153579e-06, "loss": 0.06981148, "memory(GiB)": 13.7, "step": 48225, "train_speed(iter/s)": 1.533784 }, { "acc": 0.97663374, "epoch": 22.606046402624795, "grad_norm": 5.253028392791748, "learning_rate": 6.19212414005269e-06, "loss": 0.05359168, "memory(GiB)": 13.7, "step": 48230, "train_speed(iter/s)": 1.533785 }, { "acc": 0.97753191, "epoch": 22.60838996953363, "grad_norm": 2.4163901805877686, "learning_rate": 6.191371353309073e-06, "loss": 0.09712948, "memory(GiB)": 13.7, "step": 48235, "train_speed(iter/s)": 1.533782 }, { "acc": 0.99237175, "epoch": 22.610733536442467, "grad_norm": 1.8193796873092651, "learning_rate": 6.1906185379408176e-06, "loss": 0.05602303, "memory(GiB)": 13.7, "step": 48240, "train_speed(iter/s)": 1.533787 }, { "acc": 0.98187504, "epoch": 22.6130771033513, "grad_norm": 3.672621011734009, "learning_rate": 6.1898656939660264e-06, "loss": 0.07248375, "memory(GiB)": 13.7, "step": 48245, "train_speed(iter/s)": 1.533802 }, { "acc": 0.97694054, "epoch": 22.615420670260136, "grad_norm": 1.4787596464157104, "learning_rate": 6.18911282140279e-06, "loss": 0.06254992, "memory(GiB)": 13.7, "step": 48250, "train_speed(iter/s)": 1.533806 }, { "acc": 0.97886362, "epoch": 22.61776423716897, "grad_norm": 8.102099418640137, "learning_rate": 6.188359920269203e-06, "loss": 0.11870893, "memory(GiB)": 13.7, "step": 48255, "train_speed(iter/s)": 1.533807 }, { "acc": 0.97909727, "epoch": 22.620107804077808, "grad_norm": 6.898923873901367, "learning_rate": 6.187606990583371e-06, "loss": 0.07311157, "memory(GiB)": 13.7, "step": 48260, "train_speed(iter/s)": 1.533809 }, { "acc": 0.98189487, "epoch": 22.622451370986642, "grad_norm": 2.5516791343688965, "learning_rate": 6.186854032363385e-06, "loss": 0.05912351, "memory(GiB)": 13.7, "step": 48265, "train_speed(iter/s)": 1.533812 }, { "acc": 0.98352947, "epoch": 22.624794937895476, "grad_norm": 3.923452138900757, "learning_rate": 6.186101045627344e-06, "loss": 0.06200954, "memory(GiB)": 13.7, "step": 48270, "train_speed(iter/s)": 1.533809 }, { "acc": 0.98785715, "epoch": 22.62713850480431, "grad_norm": 5.747720241546631, "learning_rate": 6.18534803039335e-06, "loss": 0.06906493, "memory(GiB)": 13.7, "step": 48275, "train_speed(iter/s)": 1.533808 }, { "acc": 0.96372032, "epoch": 22.62948207171315, "grad_norm": 5.705726146697998, "learning_rate": 6.184594986679502e-06, "loss": 0.09130554, "memory(GiB)": 13.7, "step": 48280, "train_speed(iter/s)": 1.533814 }, { "acc": 0.99312496, "epoch": 22.631825638621983, "grad_norm": 1.0432238578796387, "learning_rate": 6.183841914503898e-06, "loss": 0.02112585, "memory(GiB)": 13.7, "step": 48285, "train_speed(iter/s)": 1.533808 }, { "acc": 0.99045324, "epoch": 22.634169205530817, "grad_norm": 0.032948244363069534, "learning_rate": 6.183088813884646e-06, "loss": 0.04170829, "memory(GiB)": 13.7, "step": 48290, "train_speed(iter/s)": 1.533809 }, { "acc": 0.9760417, "epoch": 22.63651277243965, "grad_norm": 3.30731201171875, "learning_rate": 6.182335684839843e-06, "loss": 0.06860075, "memory(GiB)": 13.7, "step": 48295, "train_speed(iter/s)": 1.533813 }, { "acc": 0.99211807, "epoch": 22.63885633934849, "grad_norm": 2.5944831371307373, "learning_rate": 6.181582527387592e-06, "loss": 0.03739304, "memory(GiB)": 13.7, "step": 48300, "train_speed(iter/s)": 1.533817 }, { "acc": 0.97868061, "epoch": 22.641199906257324, "grad_norm": 0.042024437338113785, "learning_rate": 6.180829341545997e-06, "loss": 0.07311081, "memory(GiB)": 13.7, "step": 48305, "train_speed(iter/s)": 1.533825 }, { "acc": 0.98125, "epoch": 22.643543473166158, "grad_norm": 2.3167648315429688, "learning_rate": 6.180076127333163e-06, "loss": 0.05423028, "memory(GiB)": 13.7, "step": 48310, "train_speed(iter/s)": 1.533834 }, { "acc": 0.98529758, "epoch": 22.645887040074996, "grad_norm": 10.431130409240723, "learning_rate": 6.179322884767194e-06, "loss": 0.05633703, "memory(GiB)": 13.7, "step": 48315, "train_speed(iter/s)": 1.533835 }, { "acc": 0.9690279, "epoch": 22.64823060698383, "grad_norm": 5.68934965133667, "learning_rate": 6.178569613866196e-06, "loss": 0.08536187, "memory(GiB)": 13.7, "step": 48320, "train_speed(iter/s)": 1.533835 }, { "acc": 0.97281246, "epoch": 22.650574173892664, "grad_norm": 4.128961563110352, "learning_rate": 6.177816314648274e-06, "loss": 0.09834158, "memory(GiB)": 13.7, "step": 48325, "train_speed(iter/s)": 1.533832 }, { "acc": 0.98145828, "epoch": 22.6529177408015, "grad_norm": 4.415132522583008, "learning_rate": 6.177062987131533e-06, "loss": 0.03436561, "memory(GiB)": 13.7, "step": 48330, "train_speed(iter/s)": 1.533839 }, { "acc": 0.9958333, "epoch": 22.655261307710337, "grad_norm": 3.7478675842285156, "learning_rate": 6.176309631334086e-06, "loss": 0.0303749, "memory(GiB)": 13.7, "step": 48335, "train_speed(iter/s)": 1.533836 }, { "acc": 0.98406754, "epoch": 22.65760487461917, "grad_norm": 0.928066074848175, "learning_rate": 6.175556247274038e-06, "loss": 0.04493741, "memory(GiB)": 13.7, "step": 48340, "train_speed(iter/s)": 1.533842 }, { "acc": 0.98071432, "epoch": 22.659948441528005, "grad_norm": 0.17127956449985504, "learning_rate": 6.1748028349695e-06, "loss": 0.04642284, "memory(GiB)": 13.7, "step": 48345, "train_speed(iter/s)": 1.533839 }, { "acc": 0.9704567, "epoch": 22.66229200843684, "grad_norm": 2.2282087802886963, "learning_rate": 6.174049394438578e-06, "loss": 0.14468093, "memory(GiB)": 13.7, "step": 48350, "train_speed(iter/s)": 1.533846 }, { "acc": 0.98386364, "epoch": 22.664635575345677, "grad_norm": 1.5539603233337402, "learning_rate": 6.173295925699384e-06, "loss": 0.06025672, "memory(GiB)": 13.7, "step": 48355, "train_speed(iter/s)": 1.533844 }, { "acc": 0.98055058, "epoch": 22.66697914225451, "grad_norm": 3.1897146701812744, "learning_rate": 6.1725424287700274e-06, "loss": 0.09833012, "memory(GiB)": 13.7, "step": 48360, "train_speed(iter/s)": 1.533852 }, { "acc": 0.9848959, "epoch": 22.669322709163346, "grad_norm": 3.9962925910949707, "learning_rate": 6.171788903668623e-06, "loss": 0.06854541, "memory(GiB)": 13.7, "step": 48365, "train_speed(iter/s)": 1.533851 }, { "acc": 0.97092257, "epoch": 22.67166627607218, "grad_norm": 4.4222540855407715, "learning_rate": 6.171035350413281e-06, "loss": 0.10841846, "memory(GiB)": 13.7, "step": 48370, "train_speed(iter/s)": 1.533861 }, { "acc": 0.958006, "epoch": 22.674009842981018, "grad_norm": 3.044001817703247, "learning_rate": 6.170281769022114e-06, "loss": 0.09754025, "memory(GiB)": 13.7, "step": 48375, "train_speed(iter/s)": 1.533861 }, { "acc": 0.97914562, "epoch": 22.676353409889852, "grad_norm": 3.1610465049743652, "learning_rate": 6.169528159513237e-06, "loss": 0.09435003, "memory(GiB)": 13.7, "step": 48380, "train_speed(iter/s)": 1.533873 }, { "acc": 0.98145828, "epoch": 22.678696976798687, "grad_norm": 0.9910581111907959, "learning_rate": 6.168774521904764e-06, "loss": 0.04275051, "memory(GiB)": 13.7, "step": 48385, "train_speed(iter/s)": 1.533878 }, { "acc": 0.98441353, "epoch": 22.681040543707525, "grad_norm": 6.263608932495117, "learning_rate": 6.16802085621481e-06, "loss": 0.07398487, "memory(GiB)": 13.7, "step": 48390, "train_speed(iter/s)": 1.533875 }, { "acc": 0.97250004, "epoch": 22.68338411061636, "grad_norm": 7.437841415405273, "learning_rate": 6.16726716246149e-06, "loss": 0.15575584, "memory(GiB)": 13.7, "step": 48395, "train_speed(iter/s)": 1.533887 }, { "acc": 0.9746726, "epoch": 22.685727677525193, "grad_norm": 9.846830368041992, "learning_rate": 6.16651344066292e-06, "loss": 0.09350613, "memory(GiB)": 13.7, "step": 48400, "train_speed(iter/s)": 1.533875 }, { "acc": 0.98152771, "epoch": 22.688071244434028, "grad_norm": 4.832249164581299, "learning_rate": 6.165759690837216e-06, "loss": 0.06939878, "memory(GiB)": 13.7, "step": 48405, "train_speed(iter/s)": 1.533873 }, { "acc": 0.98706226, "epoch": 22.690414811342865, "grad_norm": 2.790851354598999, "learning_rate": 6.1650059130025e-06, "loss": 0.05322753, "memory(GiB)": 13.7, "step": 48410, "train_speed(iter/s)": 1.533879 }, { "acc": 0.99117556, "epoch": 22.6927583782517, "grad_norm": 3.3092286586761475, "learning_rate": 6.164252107176885e-06, "loss": 0.06859804, "memory(GiB)": 13.7, "step": 48415, "train_speed(iter/s)": 1.533883 }, { "acc": 0.97352066, "epoch": 22.695101945160534, "grad_norm": 7.524496555328369, "learning_rate": 6.1634982733784945e-06, "loss": 0.06207139, "memory(GiB)": 13.7, "step": 48420, "train_speed(iter/s)": 1.533885 }, { "acc": 0.98862181, "epoch": 22.69744551206937, "grad_norm": 4.152733325958252, "learning_rate": 6.1627444116254455e-06, "loss": 0.04668548, "memory(GiB)": 13.7, "step": 48425, "train_speed(iter/s)": 1.53388 }, { "acc": 0.97588968, "epoch": 22.699789078978206, "grad_norm": 4.935661315917969, "learning_rate": 6.161990521935859e-06, "loss": 0.0887507, "memory(GiB)": 13.7, "step": 48430, "train_speed(iter/s)": 1.533881 }, { "acc": 0.9848958, "epoch": 22.70213264588704, "grad_norm": 2.2024457454681396, "learning_rate": 6.161236604327856e-06, "loss": 0.06130051, "memory(GiB)": 13.7, "step": 48435, "train_speed(iter/s)": 1.533886 }, { "acc": 0.98431549, "epoch": 22.704476212795875, "grad_norm": 2.711527109146118, "learning_rate": 6.1604826588195596e-06, "loss": 0.06521565, "memory(GiB)": 13.7, "step": 48440, "train_speed(iter/s)": 1.533885 }, { "acc": 0.98916664, "epoch": 22.70681977970471, "grad_norm": 0.9564262628555298, "learning_rate": 6.1597286854290885e-06, "loss": 0.03146562, "memory(GiB)": 13.7, "step": 48445, "train_speed(iter/s)": 1.533887 }, { "acc": 0.984375, "epoch": 22.709163346613547, "grad_norm": 1.3000824451446533, "learning_rate": 6.15897468417457e-06, "loss": 0.05619313, "memory(GiB)": 13.7, "step": 48450, "train_speed(iter/s)": 1.533888 }, { "acc": 0.98562498, "epoch": 22.71150691352238, "grad_norm": 2.8501152992248535, "learning_rate": 6.158220655074126e-06, "loss": 0.06549388, "memory(GiB)": 13.7, "step": 48455, "train_speed(iter/s)": 1.533894 }, { "acc": 0.99125004, "epoch": 22.713850480431216, "grad_norm": 2.7885453701019287, "learning_rate": 6.1574665981458795e-06, "loss": 0.02700629, "memory(GiB)": 13.7, "step": 48460, "train_speed(iter/s)": 1.533894 }, { "acc": 0.98407736, "epoch": 22.71619404734005, "grad_norm": 5.29310941696167, "learning_rate": 6.156712513407959e-06, "loss": 0.04337985, "memory(GiB)": 13.7, "step": 48465, "train_speed(iter/s)": 1.533898 }, { "acc": 0.98676472, "epoch": 22.718537614248888, "grad_norm": 4.208798408508301, "learning_rate": 6.1559584008784866e-06, "loss": 0.067397, "memory(GiB)": 13.7, "step": 48470, "train_speed(iter/s)": 1.533902 }, { "acc": 0.9765564, "epoch": 22.720881181157722, "grad_norm": 2.706545829772949, "learning_rate": 6.15520426057559e-06, "loss": 0.10204036, "memory(GiB)": 13.7, "step": 48475, "train_speed(iter/s)": 1.533902 }, { "acc": 0.9854166, "epoch": 22.723224748066556, "grad_norm": 3.9463727474212646, "learning_rate": 6.154450092517398e-06, "loss": 0.04669956, "memory(GiB)": 13.7, "step": 48480, "train_speed(iter/s)": 1.533901 }, { "acc": 0.9894886, "epoch": 22.725568314975394, "grad_norm": 4.822598457336426, "learning_rate": 6.153695896722036e-06, "loss": 0.07163035, "memory(GiB)": 13.7, "step": 48485, "train_speed(iter/s)": 1.533904 }, { "acc": 0.99511366, "epoch": 22.72791188188423, "grad_norm": 1.4724698066711426, "learning_rate": 6.152941673207633e-06, "loss": 0.0184512, "memory(GiB)": 13.7, "step": 48490, "train_speed(iter/s)": 1.533903 }, { "acc": 0.97468748, "epoch": 22.730255448793063, "grad_norm": 2.7814981937408447, "learning_rate": 6.152187421992319e-06, "loss": 0.09845232, "memory(GiB)": 13.7, "step": 48495, "train_speed(iter/s)": 1.533912 }, { "acc": 0.98929501, "epoch": 22.732599015701897, "grad_norm": 3.5156519412994385, "learning_rate": 6.151433143094222e-06, "loss": 0.05076762, "memory(GiB)": 13.7, "step": 48500, "train_speed(iter/s)": 1.53391 }, { "acc": 0.96899805, "epoch": 22.734942582610735, "grad_norm": 6.456539154052734, "learning_rate": 6.150678836531473e-06, "loss": 0.09145167, "memory(GiB)": 13.7, "step": 48505, "train_speed(iter/s)": 1.533912 }, { "acc": 0.98833332, "epoch": 22.73728614951957, "grad_norm": 4.560024738311768, "learning_rate": 6.149924502322203e-06, "loss": 0.0508592, "memory(GiB)": 13.7, "step": 48510, "train_speed(iter/s)": 1.533915 }, { "acc": 0.9822916, "epoch": 22.739629716428404, "grad_norm": 4.939488410949707, "learning_rate": 6.149170140484546e-06, "loss": 0.05121942, "memory(GiB)": 13.7, "step": 48515, "train_speed(iter/s)": 1.533921 }, { "acc": 0.99403839, "epoch": 22.741973283337238, "grad_norm": 0.3770681917667389, "learning_rate": 6.148415751036633e-06, "loss": 0.0210107, "memory(GiB)": 13.7, "step": 48520, "train_speed(iter/s)": 1.533923 }, { "acc": 0.98842258, "epoch": 22.744316850246076, "grad_norm": 0.9464574456214905, "learning_rate": 6.147661333996595e-06, "loss": 0.03009626, "memory(GiB)": 13.7, "step": 48525, "train_speed(iter/s)": 1.533933 }, { "acc": 0.98916664, "epoch": 22.74666041715491, "grad_norm": 5.69285774230957, "learning_rate": 6.146906889382567e-06, "loss": 0.05835047, "memory(GiB)": 13.7, "step": 48530, "train_speed(iter/s)": 1.53394 }, { "acc": 0.98157816, "epoch": 22.749003984063744, "grad_norm": 13.522293090820312, "learning_rate": 6.1461524172126826e-06, "loss": 0.07607574, "memory(GiB)": 13.7, "step": 48535, "train_speed(iter/s)": 1.533952 }, { "acc": 0.9788393, "epoch": 22.75134755097258, "grad_norm": 3.392648220062256, "learning_rate": 6.14539791750508e-06, "loss": 0.12494409, "memory(GiB)": 13.7, "step": 48540, "train_speed(iter/s)": 1.533949 }, { "acc": 0.98683519, "epoch": 22.753691117881417, "grad_norm": 3.0316781997680664, "learning_rate": 6.144643390277892e-06, "loss": 0.04145751, "memory(GiB)": 13.7, "step": 48545, "train_speed(iter/s)": 1.533955 }, { "acc": 0.99340277, "epoch": 22.75603468479025, "grad_norm": 3.741579055786133, "learning_rate": 6.143888835549255e-06, "loss": 0.0388335, "memory(GiB)": 13.7, "step": 48550, "train_speed(iter/s)": 1.533956 }, { "acc": 0.98592262, "epoch": 22.758378251699085, "grad_norm": 0.5242931246757507, "learning_rate": 6.143134253337308e-06, "loss": 0.06889409, "memory(GiB)": 13.7, "step": 48555, "train_speed(iter/s)": 1.533958 }, { "acc": 0.98779755, "epoch": 22.76072181860792, "grad_norm": 3.66615629196167, "learning_rate": 6.142379643660188e-06, "loss": 0.03343278, "memory(GiB)": 13.7, "step": 48560, "train_speed(iter/s)": 1.533955 }, { "acc": 0.9780303, "epoch": 22.763065385516757, "grad_norm": 2.206186532974243, "learning_rate": 6.1416250065360305e-06, "loss": 0.10517944, "memory(GiB)": 13.7, "step": 48565, "train_speed(iter/s)": 1.533953 }, { "acc": 0.98544645, "epoch": 22.76540895242559, "grad_norm": 0.6882966160774231, "learning_rate": 6.140870341982977e-06, "loss": 0.05159996, "memory(GiB)": 13.7, "step": 48570, "train_speed(iter/s)": 1.533953 }, { "acc": 0.98340282, "epoch": 22.767752519334426, "grad_norm": 2.718127489089966, "learning_rate": 6.14011565001917e-06, "loss": 0.06323619, "memory(GiB)": 13.7, "step": 48575, "train_speed(iter/s)": 1.533954 }, { "acc": 0.99702587, "epoch": 22.770096086243264, "grad_norm": 0.005686737596988678, "learning_rate": 6.1393609306627435e-06, "loss": 0.03200508, "memory(GiB)": 13.7, "step": 48580, "train_speed(iter/s)": 1.533957 }, { "acc": 0.99187508, "epoch": 22.772439653152098, "grad_norm": 0.46922221779823303, "learning_rate": 6.138606183931842e-06, "loss": 0.01748937, "memory(GiB)": 13.7, "step": 48585, "train_speed(iter/s)": 1.533969 }, { "acc": 0.98520832, "epoch": 22.774783220060932, "grad_norm": 1.0696152448654175, "learning_rate": 6.1378514098446066e-06, "loss": 0.06885806, "memory(GiB)": 13.7, "step": 48590, "train_speed(iter/s)": 1.533983 }, { "acc": 0.996875, "epoch": 22.777126786969767, "grad_norm": 0.6084509491920471, "learning_rate": 6.137096608419182e-06, "loss": 0.0144539, "memory(GiB)": 13.7, "step": 48595, "train_speed(iter/s)": 1.533984 }, { "acc": 0.98763885, "epoch": 22.779470353878605, "grad_norm": 3.9315028190612793, "learning_rate": 6.136341779673708e-06, "loss": 0.0283556, "memory(GiB)": 13.7, "step": 48600, "train_speed(iter/s)": 1.533982 }, { "acc": 0.98715944, "epoch": 22.78181392078744, "grad_norm": 8.556495666503906, "learning_rate": 6.135586923626329e-06, "loss": 0.07470585, "memory(GiB)": 13.7, "step": 48605, "train_speed(iter/s)": 1.533985 }, { "acc": 0.99571428, "epoch": 22.784157487696273, "grad_norm": 1.9216278791427612, "learning_rate": 6.134832040295188e-06, "loss": 0.02143475, "memory(GiB)": 13.7, "step": 48610, "train_speed(iter/s)": 1.533993 }, { "acc": 0.97520828, "epoch": 22.786501054605107, "grad_norm": 2.624762773513794, "learning_rate": 6.134077129698433e-06, "loss": 0.12196674, "memory(GiB)": 13.7, "step": 48615, "train_speed(iter/s)": 1.534 }, { "acc": 0.97621298, "epoch": 22.788844621513945, "grad_norm": 3.3324153423309326, "learning_rate": 6.133322191854206e-06, "loss": 0.06049433, "memory(GiB)": 13.7, "step": 48620, "train_speed(iter/s)": 1.534006 }, { "acc": 0.97875004, "epoch": 22.79118818842278, "grad_norm": 9.437943458557129, "learning_rate": 6.132567226780656e-06, "loss": 0.06312081, "memory(GiB)": 13.7, "step": 48625, "train_speed(iter/s)": 1.534003 }, { "acc": 0.97802086, "epoch": 22.793531755331614, "grad_norm": 1.213590145111084, "learning_rate": 6.131812234495931e-06, "loss": 0.06584411, "memory(GiB)": 13.7, "step": 48630, "train_speed(iter/s)": 1.534003 }, { "acc": 0.99167728, "epoch": 22.795875322240448, "grad_norm": 5.007377624511719, "learning_rate": 6.131057215018174e-06, "loss": 0.0341819, "memory(GiB)": 13.7, "step": 48635, "train_speed(iter/s)": 1.53401 }, { "acc": 0.9833334, "epoch": 22.798218889149286, "grad_norm": 1.1435531377792358, "learning_rate": 6.130302168365537e-06, "loss": 0.05849091, "memory(GiB)": 13.7, "step": 48640, "train_speed(iter/s)": 1.534004 }, { "acc": 0.97770824, "epoch": 22.80056245605812, "grad_norm": 3.5320212841033936, "learning_rate": 6.129547094556167e-06, "loss": 0.05530846, "memory(GiB)": 13.7, "step": 48645, "train_speed(iter/s)": 1.534009 }, { "acc": 0.98539562, "epoch": 22.802906022966955, "grad_norm": 6.6924028396606445, "learning_rate": 6.128791993608214e-06, "loss": 0.0681603, "memory(GiB)": 13.7, "step": 48650, "train_speed(iter/s)": 1.534009 }, { "acc": 0.98488102, "epoch": 22.805249589875793, "grad_norm": 3.200178384780884, "learning_rate": 6.128036865539828e-06, "loss": 0.0487069, "memory(GiB)": 13.7, "step": 48655, "train_speed(iter/s)": 1.534017 }, { "acc": 0.97927084, "epoch": 22.807593156784627, "grad_norm": 3.4063847064971924, "learning_rate": 6.127281710369162e-06, "loss": 0.06207017, "memory(GiB)": 13.7, "step": 48660, "train_speed(iter/s)": 1.534018 }, { "acc": 0.98258934, "epoch": 22.80993672369346, "grad_norm": 6.467215538024902, "learning_rate": 6.126526528114362e-06, "loss": 0.04973081, "memory(GiB)": 13.7, "step": 48665, "train_speed(iter/s)": 1.534018 }, { "acc": 0.98401785, "epoch": 22.812280290602295, "grad_norm": 3.6611855030059814, "learning_rate": 6.125771318793586e-06, "loss": 0.04919195, "memory(GiB)": 13.7, "step": 48670, "train_speed(iter/s)": 1.53402 }, { "acc": 0.97530022, "epoch": 22.814623857511133, "grad_norm": 6.530482292175293, "learning_rate": 6.125016082424984e-06, "loss": 0.12067416, "memory(GiB)": 13.7, "step": 48675, "train_speed(iter/s)": 1.534029 }, { "acc": 0.98736115, "epoch": 22.816967424419968, "grad_norm": 1.4488130807876587, "learning_rate": 6.1242608190267114e-06, "loss": 0.05416067, "memory(GiB)": 13.7, "step": 48680, "train_speed(iter/s)": 1.534034 }, { "acc": 0.98781252, "epoch": 22.819310991328802, "grad_norm": 43.64523696899414, "learning_rate": 6.1235055286169174e-06, "loss": 0.03778247, "memory(GiB)": 13.7, "step": 48685, "train_speed(iter/s)": 1.534039 }, { "acc": 0.98955135, "epoch": 22.821654558237636, "grad_norm": 2.041339874267578, "learning_rate": 6.1227502112137614e-06, "loss": 0.05397136, "memory(GiB)": 13.7, "step": 48690, "train_speed(iter/s)": 1.53405 }, { "acc": 0.96304569, "epoch": 22.823998125146474, "grad_norm": 6.462604999542236, "learning_rate": 6.121994866835395e-06, "loss": 0.09223705, "memory(GiB)": 13.7, "step": 48695, "train_speed(iter/s)": 1.534052 }, { "acc": 0.98362179, "epoch": 22.82634169205531, "grad_norm": 5.626167297363281, "learning_rate": 6.121239495499979e-06, "loss": 0.07234426, "memory(GiB)": 13.7, "step": 48700, "train_speed(iter/s)": 1.534053 }, { "acc": 0.98291664, "epoch": 22.828685258964143, "grad_norm": 0.020845230668783188, "learning_rate": 6.120484097225668e-06, "loss": 0.04659103, "memory(GiB)": 13.7, "step": 48705, "train_speed(iter/s)": 1.534061 }, { "acc": 0.97426615, "epoch": 22.831028825872977, "grad_norm": 3.344413995742798, "learning_rate": 6.119728672030616e-06, "loss": 0.11332794, "memory(GiB)": 13.7, "step": 48710, "train_speed(iter/s)": 1.534063 }, { "acc": 0.99197922, "epoch": 22.833372392781815, "grad_norm": 1.988366961479187, "learning_rate": 6.118973219932985e-06, "loss": 0.02666543, "memory(GiB)": 13.7, "step": 48715, "train_speed(iter/s)": 1.534068 }, { "acc": 0.9649107, "epoch": 22.83571595969065, "grad_norm": 0.3728489577770233, "learning_rate": 6.118217740950934e-06, "loss": 0.15216222, "memory(GiB)": 13.7, "step": 48720, "train_speed(iter/s)": 1.534072 }, { "acc": 0.98467255, "epoch": 22.838059526599483, "grad_norm": 1.0725865364074707, "learning_rate": 6.1174622351026195e-06, "loss": 0.04955727, "memory(GiB)": 13.7, "step": 48725, "train_speed(iter/s)": 1.534075 }, { "acc": 0.96071434, "epoch": 22.84040309350832, "grad_norm": 7.50118350982666, "learning_rate": 6.116706702406201e-06, "loss": 0.09477735, "memory(GiB)": 13.7, "step": 48730, "train_speed(iter/s)": 1.534082 }, { "acc": 0.97101765, "epoch": 22.842746660417156, "grad_norm": 0.03758137300610542, "learning_rate": 6.1159511428798415e-06, "loss": 0.06243213, "memory(GiB)": 13.7, "step": 48735, "train_speed(iter/s)": 1.534092 }, { "acc": 0.96613092, "epoch": 22.84509022732599, "grad_norm": 32.61577224731445, "learning_rate": 6.115195556541699e-06, "loss": 0.13979864, "memory(GiB)": 13.7, "step": 48740, "train_speed(iter/s)": 1.534099 }, { "acc": 0.97791672, "epoch": 22.847433794234824, "grad_norm": 7.0167927742004395, "learning_rate": 6.1144399434099385e-06, "loss": 0.07069064, "memory(GiB)": 13.7, "step": 48745, "train_speed(iter/s)": 1.534105 }, { "acc": 0.98197918, "epoch": 22.849777361143662, "grad_norm": 2.9111902713775635, "learning_rate": 6.1136843035027225e-06, "loss": 0.04217055, "memory(GiB)": 13.7, "step": 48750, "train_speed(iter/s)": 1.534104 }, { "acc": 0.97726765, "epoch": 22.852120928052496, "grad_norm": 1.195932149887085, "learning_rate": 6.1129286368382124e-06, "loss": 0.05130181, "memory(GiB)": 13.7, "step": 48755, "train_speed(iter/s)": 1.534107 }, { "acc": 0.99437504, "epoch": 22.85446449496133, "grad_norm": 3.651010036468506, "learning_rate": 6.112172943434573e-06, "loss": 0.01423631, "memory(GiB)": 13.7, "step": 48760, "train_speed(iter/s)": 1.534109 }, { "acc": 0.9804018, "epoch": 22.856808061870165, "grad_norm": 2.6434309482574463, "learning_rate": 6.111417223309969e-06, "loss": 0.06446615, "memory(GiB)": 13.7, "step": 48765, "train_speed(iter/s)": 1.534117 }, { "acc": 0.98022728, "epoch": 22.859151628779003, "grad_norm": 2.885413646697998, "learning_rate": 6.1106614764825625e-06, "loss": 0.06211655, "memory(GiB)": 13.7, "step": 48770, "train_speed(iter/s)": 1.534123 }, { "acc": 0.96468754, "epoch": 22.861495195687837, "grad_norm": 2.78429913520813, "learning_rate": 6.109905702970521e-06, "loss": 0.07058558, "memory(GiB)": 13.7, "step": 48775, "train_speed(iter/s)": 1.534131 }, { "acc": 0.984375, "epoch": 22.86383876259667, "grad_norm": 6.239201068878174, "learning_rate": 6.1091499027920136e-06, "loss": 0.0481514, "memory(GiB)": 13.7, "step": 48780, "train_speed(iter/s)": 1.534128 }, { "acc": 0.9888195, "epoch": 22.866182329505506, "grad_norm": 6.115329742431641, "learning_rate": 6.108394075965201e-06, "loss": 0.05650775, "memory(GiB)": 13.7, "step": 48785, "train_speed(iter/s)": 1.53413 }, { "acc": 0.97696428, "epoch": 22.868525896414344, "grad_norm": 11.686731338500977, "learning_rate": 6.107638222508258e-06, "loss": 0.07590575, "memory(GiB)": 13.7, "step": 48790, "train_speed(iter/s)": 1.534141 }, { "acc": 0.98946428, "epoch": 22.870869463323178, "grad_norm": 1.8681349754333496, "learning_rate": 6.106882342439346e-06, "loss": 0.03893637, "memory(GiB)": 13.7, "step": 48795, "train_speed(iter/s)": 1.534148 }, { "acc": 0.97909718, "epoch": 22.873213030232012, "grad_norm": 3.202070951461792, "learning_rate": 6.10612643577664e-06, "loss": 0.08667553, "memory(GiB)": 13.7, "step": 48800, "train_speed(iter/s)": 1.534156 }, { "acc": 0.9831399, "epoch": 22.87555659714085, "grad_norm": 0.7585034966468811, "learning_rate": 6.105370502538307e-06, "loss": 0.06639737, "memory(GiB)": 13.7, "step": 48805, "train_speed(iter/s)": 1.534157 }, { "acc": 0.99508934, "epoch": 22.877900164049684, "grad_norm": 2.7759153842926025, "learning_rate": 6.104614542742516e-06, "loss": 0.0287676, "memory(GiB)": 13.7, "step": 48810, "train_speed(iter/s)": 1.53416 }, { "acc": 0.96150303, "epoch": 22.88024373095852, "grad_norm": 5.262535572052002, "learning_rate": 6.103858556407439e-06, "loss": 0.10361128, "memory(GiB)": 13.7, "step": 48815, "train_speed(iter/s)": 1.534167 }, { "acc": 0.9926136, "epoch": 22.882587297867353, "grad_norm": 4.48681116104126, "learning_rate": 6.103102543551247e-06, "loss": 0.0475583, "memory(GiB)": 13.7, "step": 48820, "train_speed(iter/s)": 1.534166 }, { "acc": 0.99079857, "epoch": 22.88493086477619, "grad_norm": 0.41916656494140625, "learning_rate": 6.1023465041921114e-06, "loss": 0.046349, "memory(GiB)": 13.7, "step": 48825, "train_speed(iter/s)": 1.534161 }, { "acc": 0.98101759, "epoch": 22.887274431685025, "grad_norm": 5.069061279296875, "learning_rate": 6.101590438348206e-06, "loss": 0.06489873, "memory(GiB)": 13.7, "step": 48830, "train_speed(iter/s)": 1.534171 }, { "acc": 0.9828125, "epoch": 22.88961799859386, "grad_norm": 19.899181365966797, "learning_rate": 6.100834346037704e-06, "loss": 0.06641537, "memory(GiB)": 13.7, "step": 48835, "train_speed(iter/s)": 1.534175 }, { "acc": 0.98390865, "epoch": 22.891961565502694, "grad_norm": 1.8272995948791504, "learning_rate": 6.10007822727878e-06, "loss": 0.05241271, "memory(GiB)": 13.7, "step": 48840, "train_speed(iter/s)": 1.534166 }, { "acc": 0.99087791, "epoch": 22.89430513241153, "grad_norm": 0.01681077666580677, "learning_rate": 6.0993220820896055e-06, "loss": 0.05132298, "memory(GiB)": 13.7, "step": 48845, "train_speed(iter/s)": 1.534167 }, { "acc": 0.99082794, "epoch": 22.896648699320366, "grad_norm": 3.649036407470703, "learning_rate": 6.098565910488361e-06, "loss": 0.04164436, "memory(GiB)": 13.7, "step": 48850, "train_speed(iter/s)": 1.534164 }, { "acc": 0.97461948, "epoch": 22.8989922662292, "grad_norm": 5.112249851226807, "learning_rate": 6.097809712493219e-06, "loss": 0.08816904, "memory(GiB)": 13.7, "step": 48855, "train_speed(iter/s)": 1.534169 }, { "acc": 0.97092266, "epoch": 22.901335833138035, "grad_norm": 2.1563329696655273, "learning_rate": 6.097053488122354e-06, "loss": 0.1129941, "memory(GiB)": 13.7, "step": 48860, "train_speed(iter/s)": 1.534176 }, { "acc": 0.98842268, "epoch": 22.903679400046872, "grad_norm": 2.37294340133667, "learning_rate": 6.096297237393949e-06, "loss": 0.03547561, "memory(GiB)": 13.7, "step": 48865, "train_speed(iter/s)": 1.53418 }, { "acc": 0.98111115, "epoch": 22.906022966955707, "grad_norm": 0.003022474469617009, "learning_rate": 6.095540960326175e-06, "loss": 0.0868257, "memory(GiB)": 13.7, "step": 48870, "train_speed(iter/s)": 1.534186 }, { "acc": 0.98623514, "epoch": 22.90836653386454, "grad_norm": 5.473296165466309, "learning_rate": 6.094784656937216e-06, "loss": 0.04262894, "memory(GiB)": 13.7, "step": 48875, "train_speed(iter/s)": 1.534197 }, { "acc": 0.98571434, "epoch": 22.91071010077338, "grad_norm": 4.710103511810303, "learning_rate": 6.094028327245247e-06, "loss": 0.05143528, "memory(GiB)": 13.7, "step": 48880, "train_speed(iter/s)": 1.534194 }, { "acc": 0.97788696, "epoch": 22.913053667682213, "grad_norm": 4.9034223556518555, "learning_rate": 6.093271971268451e-06, "loss": 0.06188838, "memory(GiB)": 13.7, "step": 48885, "train_speed(iter/s)": 1.534203 }, { "acc": 0.98760309, "epoch": 22.915397234591047, "grad_norm": 4.176013946533203, "learning_rate": 6.092515589025008e-06, "loss": 0.05999079, "memory(GiB)": 13.7, "step": 48890, "train_speed(iter/s)": 1.534196 }, { "acc": 0.97133932, "epoch": 22.917740801499882, "grad_norm": 3.6238110065460205, "learning_rate": 6.091759180533097e-06, "loss": 0.08412308, "memory(GiB)": 13.7, "step": 48895, "train_speed(iter/s)": 1.534205 }, { "acc": 0.99875002, "epoch": 22.92008436840872, "grad_norm": 1.2082195281982422, "learning_rate": 6.091002745810899e-06, "loss": 0.02130113, "memory(GiB)": 13.7, "step": 48900, "train_speed(iter/s)": 1.534201 }, { "acc": 0.98738098, "epoch": 22.922427935317554, "grad_norm": 3.601135730743408, "learning_rate": 6.090246284876597e-06, "loss": 0.08215479, "memory(GiB)": 13.7, "step": 48905, "train_speed(iter/s)": 1.534199 }, { "acc": 0.99004421, "epoch": 22.92477150222639, "grad_norm": 3.221125364303589, "learning_rate": 6.0894897977483765e-06, "loss": 0.05476837, "memory(GiB)": 13.7, "step": 48910, "train_speed(iter/s)": 1.534196 }, { "acc": 0.98392448, "epoch": 22.927115069135223, "grad_norm": 2.5519156455993652, "learning_rate": 6.088733284444417e-06, "loss": 0.11708884, "memory(GiB)": 13.7, "step": 48915, "train_speed(iter/s)": 1.534203 }, { "acc": 0.98175602, "epoch": 22.92945863604406, "grad_norm": 1.3746618032455444, "learning_rate": 6.087976744982907e-06, "loss": 0.09524797, "memory(GiB)": 13.7, "step": 48920, "train_speed(iter/s)": 1.534212 }, { "acc": 0.9953125, "epoch": 22.931802202952895, "grad_norm": 2.9505455493927, "learning_rate": 6.087220179382027e-06, "loss": 0.0166209, "memory(GiB)": 13.7, "step": 48925, "train_speed(iter/s)": 1.534219 }, { "acc": 0.98880215, "epoch": 22.93414576986173, "grad_norm": 3.70202374458313, "learning_rate": 6.086463587659963e-06, "loss": 0.02638794, "memory(GiB)": 13.7, "step": 48930, "train_speed(iter/s)": 1.534214 }, { "acc": 0.99083338, "epoch": 22.936489336770563, "grad_norm": 2.982593536376953, "learning_rate": 6.0857069698349045e-06, "loss": 0.03578897, "memory(GiB)": 13.7, "step": 48935, "train_speed(iter/s)": 1.534211 }, { "acc": 0.99113092, "epoch": 22.9388329036794, "grad_norm": 3.1493284702301025, "learning_rate": 6.084950325925034e-06, "loss": 0.03770083, "memory(GiB)": 13.7, "step": 48940, "train_speed(iter/s)": 1.534214 }, { "acc": 0.98934526, "epoch": 22.941176470588236, "grad_norm": 4.215174198150635, "learning_rate": 6.08419365594854e-06, "loss": 0.05424, "memory(GiB)": 13.7, "step": 48945, "train_speed(iter/s)": 1.534219 }, { "acc": 0.98972225, "epoch": 22.94352003749707, "grad_norm": 0.027356738224625587, "learning_rate": 6.083436959923612e-06, "loss": 0.05303069, "memory(GiB)": 13.7, "step": 48950, "train_speed(iter/s)": 1.534221 }, { "acc": 0.96379461, "epoch": 22.945863604405904, "grad_norm": 4.407015323638916, "learning_rate": 6.082680237868437e-06, "loss": 0.11630044, "memory(GiB)": 13.7, "step": 48955, "train_speed(iter/s)": 1.534221 }, { "acc": 0.98416662, "epoch": 22.948207171314742, "grad_norm": 6.088019847869873, "learning_rate": 6.0819234898012035e-06, "loss": 0.04827274, "memory(GiB)": 13.7, "step": 48960, "train_speed(iter/s)": 1.534217 }, { "acc": 0.98946428, "epoch": 22.950550738223576, "grad_norm": 3.3653409481048584, "learning_rate": 6.0811667157401035e-06, "loss": 0.03935157, "memory(GiB)": 13.7, "step": 48965, "train_speed(iter/s)": 1.534223 }, { "acc": 0.98370037, "epoch": 22.95289430513241, "grad_norm": 11.067873001098633, "learning_rate": 6.0804099157033236e-06, "loss": 0.0851197, "memory(GiB)": 13.7, "step": 48970, "train_speed(iter/s)": 1.534222 }, { "acc": 0.98567543, "epoch": 22.95523787204125, "grad_norm": 2.9914872646331787, "learning_rate": 6.079653089709059e-06, "loss": 0.04683372, "memory(GiB)": 13.7, "step": 48975, "train_speed(iter/s)": 1.534224 }, { "acc": 0.98145828, "epoch": 22.957581438950083, "grad_norm": 5.351784706115723, "learning_rate": 6.0788962377755e-06, "loss": 0.05331545, "memory(GiB)": 13.7, "step": 48980, "train_speed(iter/s)": 1.534221 }, { "acc": 0.98425598, "epoch": 22.959925005858917, "grad_norm": 1.5245907306671143, "learning_rate": 6.078139359920837e-06, "loss": 0.05771825, "memory(GiB)": 13.7, "step": 48985, "train_speed(iter/s)": 1.534216 }, { "acc": 0.97354164, "epoch": 22.96226857276775, "grad_norm": 3.3822782039642334, "learning_rate": 6.077382456163263e-06, "loss": 0.08741242, "memory(GiB)": 13.7, "step": 48990, "train_speed(iter/s)": 1.534224 }, { "acc": 0.97833338, "epoch": 22.96461213967659, "grad_norm": 0.1763472557067871, "learning_rate": 6.076625526520975e-06, "loss": 0.0780244, "memory(GiB)": 13.7, "step": 48995, "train_speed(iter/s)": 1.534229 }, { "acc": 0.99175596, "epoch": 22.966955706585424, "grad_norm": 2.4777612686157227, "learning_rate": 6.0758685710121636e-06, "loss": 0.03514734, "memory(GiB)": 13.7, "step": 49000, "train_speed(iter/s)": 1.534236 }, { "acc": 0.99375, "epoch": 22.969299273494258, "grad_norm": 4.291346549987793, "learning_rate": 6.075111589655024e-06, "loss": 0.03334411, "memory(GiB)": 13.7, "step": 49005, "train_speed(iter/s)": 1.534242 }, { "acc": 0.98475275, "epoch": 22.971642840403092, "grad_norm": 3.593181848526001, "learning_rate": 6.0743545824677515e-06, "loss": 0.07260647, "memory(GiB)": 13.7, "step": 49010, "train_speed(iter/s)": 1.534248 }, { "acc": 0.978125, "epoch": 22.97398640731193, "grad_norm": 2.472687244415283, "learning_rate": 6.0735975494685465e-06, "loss": 0.05172786, "memory(GiB)": 13.7, "step": 49015, "train_speed(iter/s)": 1.534251 }, { "acc": 0.98670635, "epoch": 22.976329974220764, "grad_norm": 3.267942428588867, "learning_rate": 6.0728404906755975e-06, "loss": 0.06382844, "memory(GiB)": 13.7, "step": 49020, "train_speed(iter/s)": 1.534253 }, { "acc": 0.98839283, "epoch": 22.9786735411296, "grad_norm": 0.547162652015686, "learning_rate": 6.072083406107109e-06, "loss": 0.03609797, "memory(GiB)": 13.7, "step": 49025, "train_speed(iter/s)": 1.534261 }, { "acc": 0.98924675, "epoch": 22.981017108038433, "grad_norm": 1.6723318099975586, "learning_rate": 6.0713262957812735e-06, "loss": 0.04064697, "memory(GiB)": 13.7, "step": 49030, "train_speed(iter/s)": 1.534267 }, { "acc": 0.97997026, "epoch": 22.98336067494727, "grad_norm": 4.0265960693359375, "learning_rate": 6.070569159716293e-06, "loss": 0.09437711, "memory(GiB)": 13.7, "step": 49035, "train_speed(iter/s)": 1.534277 }, { "acc": 0.97474279, "epoch": 22.985704241856105, "grad_norm": 2.8427364826202393, "learning_rate": 6.069811997930365e-06, "loss": 0.14587487, "memory(GiB)": 13.7, "step": 49040, "train_speed(iter/s)": 1.53428 }, { "acc": 0.99125004, "epoch": 22.98804780876494, "grad_norm": 10.35814094543457, "learning_rate": 6.069054810441689e-06, "loss": 0.03732945, "memory(GiB)": 13.7, "step": 49045, "train_speed(iter/s)": 1.534278 }, { "acc": 0.97937498, "epoch": 22.990391375673774, "grad_norm": 5.331010341644287, "learning_rate": 6.068297597268467e-06, "loss": 0.03629689, "memory(GiB)": 13.7, "step": 49050, "train_speed(iter/s)": 1.534284 }, { "acc": 0.97892818, "epoch": 22.99273494258261, "grad_norm": 3.1632864475250244, "learning_rate": 6.0675403584288985e-06, "loss": 0.10369246, "memory(GiB)": 13.7, "step": 49055, "train_speed(iter/s)": 1.534289 }, { "acc": 0.98423615, "epoch": 22.995078509491446, "grad_norm": 2.4910757541656494, "learning_rate": 6.066783093941186e-06, "loss": 0.05051108, "memory(GiB)": 13.7, "step": 49060, "train_speed(iter/s)": 1.534297 }, { "acc": 0.98035717, "epoch": 22.99742207640028, "grad_norm": 0.055595703423023224, "learning_rate": 6.06602580382353e-06, "loss": 0.08511316, "memory(GiB)": 13.7, "step": 49065, "train_speed(iter/s)": 1.534301 }, { "acc": 0.98113098, "epoch": 22.999765643309118, "grad_norm": 209.7084197998047, "learning_rate": 6.065268488094135e-06, "loss": 0.06892704, "memory(GiB)": 13.7, "step": 49070, "train_speed(iter/s)": 1.534303 }, { "acc": 0.98311005, "epoch": 23.002109210217952, "grad_norm": 5.1170220375061035, "learning_rate": 6.064511146771203e-06, "loss": 0.07731268, "memory(GiB)": 13.7, "step": 49075, "train_speed(iter/s)": 1.534276 }, { "acc": 0.96833324, "epoch": 23.004452777126787, "grad_norm": 1.7179831266403198, "learning_rate": 6.063753779872937e-06, "loss": 0.05557207, "memory(GiB)": 13.7, "step": 49080, "train_speed(iter/s)": 1.534284 }, { "acc": 0.98395834, "epoch": 23.00679634403562, "grad_norm": 4.94275426864624, "learning_rate": 6.062996387417545e-06, "loss": 0.05529178, "memory(GiB)": 13.7, "step": 49085, "train_speed(iter/s)": 1.534281 }, { "acc": 0.98708334, "epoch": 23.00913991094446, "grad_norm": 3.830582618713379, "learning_rate": 6.062238969423229e-06, "loss": 0.02935737, "memory(GiB)": 13.7, "step": 49090, "train_speed(iter/s)": 1.534286 }, { "acc": 0.99750004, "epoch": 23.011483477853293, "grad_norm": 2.4611902236938477, "learning_rate": 6.061481525908198e-06, "loss": 0.0186275, "memory(GiB)": 13.7, "step": 49095, "train_speed(iter/s)": 1.534293 }, { "acc": 0.96967258, "epoch": 23.013827044762127, "grad_norm": 2.1014649868011475, "learning_rate": 6.060724056890656e-06, "loss": 0.13238264, "memory(GiB)": 13.7, "step": 49100, "train_speed(iter/s)": 1.534298 }, { "acc": 0.99395828, "epoch": 23.01617061167096, "grad_norm": 3.0817625522613525, "learning_rate": 6.059966562388812e-06, "loss": 0.05576981, "memory(GiB)": 13.7, "step": 49105, "train_speed(iter/s)": 1.534298 }, { "acc": 0.97351189, "epoch": 23.0185141785798, "grad_norm": 2.7799887657165527, "learning_rate": 6.05920904242087e-06, "loss": 0.10081898, "memory(GiB)": 13.7, "step": 49110, "train_speed(iter/s)": 1.5343 }, { "acc": 0.97875004, "epoch": 23.020857745488634, "grad_norm": 3.0006885528564453, "learning_rate": 6.058451497005043e-06, "loss": 0.03783809, "memory(GiB)": 13.7, "step": 49115, "train_speed(iter/s)": 1.534303 }, { "acc": 0.97008009, "epoch": 23.023201312397468, "grad_norm": 10.58925724029541, "learning_rate": 6.057693926159536e-06, "loss": 0.08908509, "memory(GiB)": 13.7, "step": 49120, "train_speed(iter/s)": 1.534308 }, { "acc": 0.98530636, "epoch": 23.025544879306302, "grad_norm": 1.6775603294372559, "learning_rate": 6.0569363299025606e-06, "loss": 0.07555142, "memory(GiB)": 13.7, "step": 49125, "train_speed(iter/s)": 1.534312 }, { "acc": 0.98889427, "epoch": 23.02788844621514, "grad_norm": 0.10573016852140427, "learning_rate": 6.056178708252326e-06, "loss": 0.02216129, "memory(GiB)": 13.7, "step": 49130, "train_speed(iter/s)": 1.534321 }, { "acc": 0.98175592, "epoch": 23.030232013123975, "grad_norm": 36.5482292175293, "learning_rate": 6.055421061227044e-06, "loss": 0.08600116, "memory(GiB)": 13.7, "step": 49135, "train_speed(iter/s)": 1.534327 }, { "acc": 0.96409874, "epoch": 23.03257558003281, "grad_norm": 4.694460391998291, "learning_rate": 6.054663388844927e-06, "loss": 0.10809472, "memory(GiB)": 13.7, "step": 49140, "train_speed(iter/s)": 1.534338 }, { "acc": 0.99404764, "epoch": 23.034919146941647, "grad_norm": 3.564128875732422, "learning_rate": 6.0539056911241845e-06, "loss": 0.02044827, "memory(GiB)": 13.7, "step": 49145, "train_speed(iter/s)": 1.534346 }, { "acc": 0.97853279, "epoch": 23.03726271385048, "grad_norm": 8.13850212097168, "learning_rate": 6.053147968083029e-06, "loss": 0.06394892, "memory(GiB)": 13.7, "step": 49150, "train_speed(iter/s)": 1.534349 }, { "acc": 0.98537779, "epoch": 23.039606280759315, "grad_norm": 5.459612846374512, "learning_rate": 6.052390219739674e-06, "loss": 0.10378646, "memory(GiB)": 13.7, "step": 49155, "train_speed(iter/s)": 1.534354 }, { "acc": 0.98291664, "epoch": 23.04194984766815, "grad_norm": 3.8185126781463623, "learning_rate": 6.051632446112334e-06, "loss": 0.05699805, "memory(GiB)": 13.7, "step": 49160, "train_speed(iter/s)": 1.534365 }, { "acc": 0.98051891, "epoch": 23.044293414576988, "grad_norm": 2.590827465057373, "learning_rate": 6.050874647219224e-06, "loss": 0.0969189, "memory(GiB)": 13.7, "step": 49165, "train_speed(iter/s)": 1.534366 }, { "acc": 0.98194447, "epoch": 23.046636981485822, "grad_norm": 3.143882989883423, "learning_rate": 6.050116823078558e-06, "loss": 0.0557763, "memory(GiB)": 13.7, "step": 49170, "train_speed(iter/s)": 1.534369 }, { "acc": 0.98666668, "epoch": 23.048980548394656, "grad_norm": 5.024936199188232, "learning_rate": 6.0493589737085526e-06, "loss": 0.07508258, "memory(GiB)": 13.7, "step": 49175, "train_speed(iter/s)": 1.534372 }, { "acc": 0.979072, "epoch": 23.05132411530349, "grad_norm": 4.809438705444336, "learning_rate": 6.0486010991274216e-06, "loss": 0.0548329, "memory(GiB)": 13.7, "step": 49180, "train_speed(iter/s)": 1.534372 }, { "acc": 0.990625, "epoch": 23.05366768221233, "grad_norm": 2.396801471710205, "learning_rate": 6.047843199353386e-06, "loss": 0.05475847, "memory(GiB)": 13.7, "step": 49185, "train_speed(iter/s)": 1.534366 }, { "acc": 0.99257278, "epoch": 23.056011249121163, "grad_norm": 2.5104002952575684, "learning_rate": 6.04708527440466e-06, "loss": 0.02535152, "memory(GiB)": 13.7, "step": 49190, "train_speed(iter/s)": 1.53438 }, { "acc": 0.98849201, "epoch": 23.058354816029997, "grad_norm": 1.218098759651184, "learning_rate": 6.0463273242994605e-06, "loss": 0.03519303, "memory(GiB)": 13.7, "step": 49195, "train_speed(iter/s)": 1.534387 }, { "acc": 0.9780304, "epoch": 23.06069838293883, "grad_norm": 2.7643253803253174, "learning_rate": 6.045569349056007e-06, "loss": 0.03844737, "memory(GiB)": 13.7, "step": 49200, "train_speed(iter/s)": 1.534387 }, { "acc": 0.97455807, "epoch": 23.06304194984767, "grad_norm": 5.013428211212158, "learning_rate": 6.044811348692522e-06, "loss": 0.06306672, "memory(GiB)": 13.7, "step": 49205, "train_speed(iter/s)": 1.53439 }, { "acc": 0.98006945, "epoch": 23.065385516756503, "grad_norm": 2.7616257667541504, "learning_rate": 6.044053323227222e-06, "loss": 0.06388911, "memory(GiB)": 13.7, "step": 49210, "train_speed(iter/s)": 1.534397 }, { "acc": 0.98008928, "epoch": 23.067729083665338, "grad_norm": 3.939830780029297, "learning_rate": 6.04329527267833e-06, "loss": 0.08359752, "memory(GiB)": 13.7, "step": 49215, "train_speed(iter/s)": 1.534403 }, { "acc": 0.98842258, "epoch": 23.070072650574176, "grad_norm": 0.048031002283096313, "learning_rate": 6.042537197064063e-06, "loss": 0.04086803, "memory(GiB)": 13.7, "step": 49220, "train_speed(iter/s)": 1.53441 }, { "acc": 0.97250004, "epoch": 23.07241621748301, "grad_norm": 0.2472916692495346, "learning_rate": 6.041779096402648e-06, "loss": 0.06814735, "memory(GiB)": 13.7, "step": 49225, "train_speed(iter/s)": 1.534414 }, { "acc": 0.98760414, "epoch": 23.074759784391844, "grad_norm": 3.8069188594818115, "learning_rate": 6.041020970712304e-06, "loss": 0.08425213, "memory(GiB)": 13.7, "step": 49230, "train_speed(iter/s)": 1.534413 }, { "acc": 0.9838541, "epoch": 23.07710335130068, "grad_norm": 0.010626084171235561, "learning_rate": 6.040262820011253e-06, "loss": 0.06311831, "memory(GiB)": 13.7, "step": 49235, "train_speed(iter/s)": 1.534419 }, { "acc": 0.98552084, "epoch": 23.079446918209516, "grad_norm": 1.7552586793899536, "learning_rate": 6.0395046443177185e-06, "loss": 0.04967667, "memory(GiB)": 13.7, "step": 49240, "train_speed(iter/s)": 1.534426 }, { "acc": 0.99206848, "epoch": 23.08179048511835, "grad_norm": 1.3356698751449585, "learning_rate": 6.038746443649928e-06, "loss": 0.04784228, "memory(GiB)": 13.7, "step": 49245, "train_speed(iter/s)": 1.534426 }, { "acc": 0.98916664, "epoch": 23.084134052027185, "grad_norm": 2.3779454231262207, "learning_rate": 6.037988218026103e-06, "loss": 0.03598873, "memory(GiB)": 13.7, "step": 49250, "train_speed(iter/s)": 1.53443 }, { "acc": 0.98955421, "epoch": 23.08647761893602, "grad_norm": 0.20227794349193573, "learning_rate": 6.03722996746447e-06, "loss": 0.04963802, "memory(GiB)": 13.7, "step": 49255, "train_speed(iter/s)": 1.534427 }, { "acc": 0.97094555, "epoch": 23.088821185844857, "grad_norm": 4.789191246032715, "learning_rate": 6.036471691983253e-06, "loss": 0.14201832, "memory(GiB)": 13.7, "step": 49260, "train_speed(iter/s)": 1.534432 }, { "acc": 0.97621117, "epoch": 23.09116475275369, "grad_norm": 2.469970464706421, "learning_rate": 6.035713391600681e-06, "loss": 0.08101624, "memory(GiB)": 13.7, "step": 49265, "train_speed(iter/s)": 1.534431 }, { "acc": 0.97166672, "epoch": 23.093508319662526, "grad_norm": 4.197612762451172, "learning_rate": 6.034955066334981e-06, "loss": 0.09585626, "memory(GiB)": 13.7, "step": 49270, "train_speed(iter/s)": 1.53445 }, { "acc": 0.96800594, "epoch": 23.09585188657136, "grad_norm": 9.475120544433594, "learning_rate": 6.034196716204379e-06, "loss": 0.08004819, "memory(GiB)": 13.7, "step": 49275, "train_speed(iter/s)": 1.534452 }, { "acc": 0.99437504, "epoch": 23.098195453480198, "grad_norm": 3.770641326904297, "learning_rate": 6.033438341227104e-06, "loss": 0.02166138, "memory(GiB)": 13.7, "step": 49280, "train_speed(iter/s)": 1.53446 }, { "acc": 0.9723011, "epoch": 23.100539020389032, "grad_norm": 0.49302148818969727, "learning_rate": 6.032679941421384e-06, "loss": 0.10280731, "memory(GiB)": 13.7, "step": 49285, "train_speed(iter/s)": 1.53447 }, { "acc": 0.97328129, "epoch": 23.102882587297866, "grad_norm": 4.9665703773498535, "learning_rate": 6.03192151680545e-06, "loss": 0.05574752, "memory(GiB)": 13.7, "step": 49290, "train_speed(iter/s)": 1.534475 }, { "acc": 0.99618053, "epoch": 23.105226154206704, "grad_norm": 1.6570318937301636, "learning_rate": 6.031163067397529e-06, "loss": 0.04340869, "memory(GiB)": 13.7, "step": 49295, "train_speed(iter/s)": 1.534483 }, { "acc": 0.9854166, "epoch": 23.10756972111554, "grad_norm": 4.396740913391113, "learning_rate": 6.030404593215857e-06, "loss": 0.0359356, "memory(GiB)": 13.7, "step": 49300, "train_speed(iter/s)": 1.534493 }, { "acc": 0.9833333, "epoch": 23.109913288024373, "grad_norm": 2.650139093399048, "learning_rate": 6.029646094278661e-06, "loss": 0.07985668, "memory(GiB)": 13.7, "step": 49305, "train_speed(iter/s)": 1.534495 }, { "acc": 0.9958334, "epoch": 23.112256854933207, "grad_norm": 0.9768199324607849, "learning_rate": 6.028887570604175e-06, "loss": 0.01599491, "memory(GiB)": 13.7, "step": 49310, "train_speed(iter/s)": 1.534498 }, { "acc": 0.98696423, "epoch": 23.114600421842045, "grad_norm": 0.25389888882637024, "learning_rate": 6.028129022210628e-06, "loss": 0.03873889, "memory(GiB)": 13.7, "step": 49315, "train_speed(iter/s)": 1.534506 }, { "acc": 0.96997471, "epoch": 23.11694398875088, "grad_norm": 3.4786508083343506, "learning_rate": 6.027370449116257e-06, "loss": 0.09589983, "memory(GiB)": 13.7, "step": 49320, "train_speed(iter/s)": 1.534508 }, { "acc": 0.98113098, "epoch": 23.119287555659714, "grad_norm": 4.061965465545654, "learning_rate": 6.0266118513392925e-06, "loss": 0.07270628, "memory(GiB)": 13.7, "step": 49325, "train_speed(iter/s)": 1.534507 }, { "acc": 0.97678566, "epoch": 23.121631122568548, "grad_norm": 5.128317832946777, "learning_rate": 6.025853228897971e-06, "loss": 0.10100943, "memory(GiB)": 13.7, "step": 49330, "train_speed(iter/s)": 1.534513 }, { "acc": 0.98111115, "epoch": 23.123974689477386, "grad_norm": 0.37630128860473633, "learning_rate": 6.0250945818105265e-06, "loss": 0.04736229, "memory(GiB)": 13.7, "step": 49335, "train_speed(iter/s)": 1.534521 }, { "acc": 0.98062496, "epoch": 23.12631825638622, "grad_norm": 6.397352695465088, "learning_rate": 6.024335910095192e-06, "loss": 0.07380177, "memory(GiB)": 13.7, "step": 49340, "train_speed(iter/s)": 1.534522 }, { "acc": 0.98343143, "epoch": 23.128661823295054, "grad_norm": 1.553267240524292, "learning_rate": 6.02357721377021e-06, "loss": 0.04553368, "memory(GiB)": 13.7, "step": 49345, "train_speed(iter/s)": 1.534533 }, { "acc": 0.97899628, "epoch": 23.13100539020389, "grad_norm": 2.35711669921875, "learning_rate": 6.022818492853812e-06, "loss": 0.06973822, "memory(GiB)": 13.7, "step": 49350, "train_speed(iter/s)": 1.534541 }, { "acc": 0.99030838, "epoch": 23.133348957112727, "grad_norm": 1.166264295578003, "learning_rate": 6.022059747364234e-06, "loss": 0.04269351, "memory(GiB)": 13.7, "step": 49355, "train_speed(iter/s)": 1.534555 }, { "acc": 0.97937498, "epoch": 23.13569252402156, "grad_norm": 0.13895590603351593, "learning_rate": 6.021300977319716e-06, "loss": 0.04963924, "memory(GiB)": 13.7, "step": 49360, "train_speed(iter/s)": 1.534558 }, { "acc": 0.97458334, "epoch": 23.138036090930395, "grad_norm": 5.815946578979492, "learning_rate": 6.020542182738497e-06, "loss": 0.0700957, "memory(GiB)": 13.7, "step": 49365, "train_speed(iter/s)": 1.534567 }, { "acc": 0.97875004, "epoch": 23.14037965783923, "grad_norm": 1.9713095426559448, "learning_rate": 6.019783363638814e-06, "loss": 0.04695762, "memory(GiB)": 13.7, "step": 49370, "train_speed(iter/s)": 1.534576 }, { "acc": 0.99375, "epoch": 23.142723224748067, "grad_norm": 4.969923496246338, "learning_rate": 6.019024520038909e-06, "loss": 0.03638574, "memory(GiB)": 13.7, "step": 49375, "train_speed(iter/s)": 1.534579 }, { "acc": 0.98645287, "epoch": 23.1450667916569, "grad_norm": 2.5581860542297363, "learning_rate": 6.018265651957019e-06, "loss": 0.04707829, "memory(GiB)": 13.7, "step": 49380, "train_speed(iter/s)": 1.534585 }, { "acc": 0.98552084, "epoch": 23.147410358565736, "grad_norm": 4.233582496643066, "learning_rate": 6.017506759411387e-06, "loss": 0.04788491, "memory(GiB)": 13.7, "step": 49385, "train_speed(iter/s)": 1.534584 }, { "acc": 0.99145832, "epoch": 23.149753925474574, "grad_norm": 1.4448846578598022, "learning_rate": 6.016747842420254e-06, "loss": 0.02243444, "memory(GiB)": 13.7, "step": 49390, "train_speed(iter/s)": 1.534587 }, { "acc": 0.97770844, "epoch": 23.152097492383408, "grad_norm": 3.6656434535980225, "learning_rate": 6.0159889010018626e-06, "loss": 0.07087275, "memory(GiB)": 13.7, "step": 49395, "train_speed(iter/s)": 1.534595 }, { "acc": 0.97800598, "epoch": 23.154441059292242, "grad_norm": 0.16309604048728943, "learning_rate": 6.015229935174453e-06, "loss": 0.07487095, "memory(GiB)": 13.7, "step": 49400, "train_speed(iter/s)": 1.534602 }, { "acc": 0.98812504, "epoch": 23.156784626201077, "grad_norm": 3.8352906703948975, "learning_rate": 6.014470944956269e-06, "loss": 0.03278468, "memory(GiB)": 13.7, "step": 49405, "train_speed(iter/s)": 1.5346 }, { "acc": 0.991572, "epoch": 23.159128193109915, "grad_norm": 3.9733080863952637, "learning_rate": 6.013711930365554e-06, "loss": 0.05048244, "memory(GiB)": 13.7, "step": 49410, "train_speed(iter/s)": 1.534607 }, { "acc": 0.9870759, "epoch": 23.16147176001875, "grad_norm": 1.5932612419128418, "learning_rate": 6.012952891420553e-06, "loss": 0.03617635, "memory(GiB)": 13.7, "step": 49415, "train_speed(iter/s)": 1.534615 }, { "acc": 0.98898439, "epoch": 23.163815326927583, "grad_norm": 1.3295401334762573, "learning_rate": 6.012193828139512e-06, "loss": 0.06405706, "memory(GiB)": 13.7, "step": 49420, "train_speed(iter/s)": 1.534615 }, { "acc": 0.99330359, "epoch": 23.166158893836418, "grad_norm": 4.718549728393555, "learning_rate": 6.011434740540675e-06, "loss": 0.04067366, "memory(GiB)": 13.7, "step": 49425, "train_speed(iter/s)": 1.534614 }, { "acc": 0.98791161, "epoch": 23.168502460745255, "grad_norm": 4.329953193664551, "learning_rate": 6.0106756286422865e-06, "loss": 0.04756407, "memory(GiB)": 13.7, "step": 49430, "train_speed(iter/s)": 1.534622 }, { "acc": 0.98240614, "epoch": 23.17084602765409, "grad_norm": 5.910475730895996, "learning_rate": 6.009916492462598e-06, "loss": 0.08079591, "memory(GiB)": 13.7, "step": 49435, "train_speed(iter/s)": 1.534628 }, { "acc": 0.98986111, "epoch": 23.173189594562924, "grad_norm": 2.95080304145813, "learning_rate": 6.009157332019849e-06, "loss": 0.05609481, "memory(GiB)": 13.7, "step": 49440, "train_speed(iter/s)": 1.53463 }, { "acc": 0.97922344, "epoch": 23.17553316147176, "grad_norm": 2.0962400436401367, "learning_rate": 6.008398147332294e-06, "loss": 0.10498092, "memory(GiB)": 13.7, "step": 49445, "train_speed(iter/s)": 1.534626 }, { "acc": 0.99241667, "epoch": 23.177876728380596, "grad_norm": 1.3714675903320312, "learning_rate": 6.007638938418178e-06, "loss": 0.04251318, "memory(GiB)": 13.7, "step": 49450, "train_speed(iter/s)": 1.534629 }, { "acc": 0.99523811, "epoch": 23.18022029528943, "grad_norm": 1.3448586463928223, "learning_rate": 6.0068797052957504e-06, "loss": 0.04995806, "memory(GiB)": 13.7, "step": 49455, "train_speed(iter/s)": 1.534626 }, { "acc": 0.9895834, "epoch": 23.182563862198265, "grad_norm": 4.847211837768555, "learning_rate": 6.0061204479832605e-06, "loss": 0.03112635, "memory(GiB)": 13.7, "step": 49460, "train_speed(iter/s)": 1.534626 }, { "acc": 0.98708334, "epoch": 23.184907429107103, "grad_norm": 0.08757509291172028, "learning_rate": 6.00536116649896e-06, "loss": 0.03341112, "memory(GiB)": 13.7, "step": 49465, "train_speed(iter/s)": 1.53463 }, { "acc": 0.98142357, "epoch": 23.187250996015937, "grad_norm": 2.258991241455078, "learning_rate": 6.004601860861097e-06, "loss": 0.1031135, "memory(GiB)": 13.7, "step": 49470, "train_speed(iter/s)": 1.534641 }, { "acc": 0.9848958, "epoch": 23.18959456292477, "grad_norm": 5.363658905029297, "learning_rate": 6.003842531087923e-06, "loss": 0.0875458, "memory(GiB)": 13.7, "step": 49475, "train_speed(iter/s)": 1.534645 }, { "acc": 0.99373541, "epoch": 23.191938129833606, "grad_norm": 3.064349412918091, "learning_rate": 6.0030831771976946e-06, "loss": 0.05233062, "memory(GiB)": 13.7, "step": 49480, "train_speed(iter/s)": 1.534651 }, { "acc": 0.97390537, "epoch": 23.194281696742443, "grad_norm": 3.5789849758148193, "learning_rate": 6.0023237992086595e-06, "loss": 0.11254064, "memory(GiB)": 13.7, "step": 49485, "train_speed(iter/s)": 1.534656 }, { "acc": 0.97406254, "epoch": 23.196625263651278, "grad_norm": 5.066195964813232, "learning_rate": 6.001564397139068e-06, "loss": 0.10298272, "memory(GiB)": 13.7, "step": 49490, "train_speed(iter/s)": 1.534652 }, { "acc": 0.98708334, "epoch": 23.198968830560112, "grad_norm": 3.706956148147583, "learning_rate": 6.00080497100718e-06, "loss": 0.04271069, "memory(GiB)": 13.7, "step": 49495, "train_speed(iter/s)": 1.534651 }, { "acc": 0.97847595, "epoch": 23.201312397468946, "grad_norm": 0.0852212980389595, "learning_rate": 6.000045520831247e-06, "loss": 0.08430836, "memory(GiB)": 13.7, "step": 49500, "train_speed(iter/s)": 1.53466 }, { "acc": 0.98062496, "epoch": 23.203655964377784, "grad_norm": 3.5400147438049316, "learning_rate": 5.999286046629521e-06, "loss": 0.10192435, "memory(GiB)": 13.7, "step": 49505, "train_speed(iter/s)": 1.534667 }, { "acc": 0.98963737, "epoch": 23.20599953128662, "grad_norm": 2.7610363960266113, "learning_rate": 5.998526548420262e-06, "loss": 0.04165466, "memory(GiB)": 13.7, "step": 49510, "train_speed(iter/s)": 1.534678 }, { "acc": 0.99490128, "epoch": 23.208343098195453, "grad_norm": 2.587207317352295, "learning_rate": 5.997767026221721e-06, "loss": 0.02836989, "memory(GiB)": 13.7, "step": 49515, "train_speed(iter/s)": 1.534688 }, { "acc": 0.9854166, "epoch": 23.210686665104287, "grad_norm": 5.393081188201904, "learning_rate": 5.997007480052161e-06, "loss": 0.04439034, "memory(GiB)": 13.7, "step": 49520, "train_speed(iter/s)": 1.53469 }, { "acc": 0.98270836, "epoch": 23.213030232013125, "grad_norm": 18.080732345581055, "learning_rate": 5.9962479099298324e-06, "loss": 0.14590037, "memory(GiB)": 13.7, "step": 49525, "train_speed(iter/s)": 1.5347 }, { "acc": 0.990625, "epoch": 23.21537379892196, "grad_norm": 0.9410844445228577, "learning_rate": 5.995488315872996e-06, "loss": 0.03544717, "memory(GiB)": 13.7, "step": 49530, "train_speed(iter/s)": 1.534702 }, { "acc": 0.98161707, "epoch": 23.217717365830794, "grad_norm": 3.8237521648406982, "learning_rate": 5.994728697899909e-06, "loss": 0.05026705, "memory(GiB)": 13.7, "step": 49535, "train_speed(iter/s)": 1.534694 }, { "acc": 0.98354168, "epoch": 23.22006093273963, "grad_norm": 2.0391106605529785, "learning_rate": 5.99396905602883e-06, "loss": 0.07520698, "memory(GiB)": 13.7, "step": 49540, "train_speed(iter/s)": 1.5347 }, { "acc": 0.99229164, "epoch": 23.222404499648466, "grad_norm": 1.342856526374817, "learning_rate": 5.993209390278017e-06, "loss": 0.04265898, "memory(GiB)": 13.7, "step": 49545, "train_speed(iter/s)": 1.534711 }, { "acc": 0.98633928, "epoch": 23.2247480665573, "grad_norm": 3.8634395599365234, "learning_rate": 5.992449700665735e-06, "loss": 0.05123239, "memory(GiB)": 13.7, "step": 49550, "train_speed(iter/s)": 1.534711 }, { "acc": 0.97626991, "epoch": 23.227091633466134, "grad_norm": 4.228202819824219, "learning_rate": 5.991689987210239e-06, "loss": 0.07445168, "memory(GiB)": 13.7, "step": 49555, "train_speed(iter/s)": 1.534715 }, { "acc": 0.96823864, "epoch": 23.229435200374972, "grad_norm": 3.2954797744750977, "learning_rate": 5.990930249929794e-06, "loss": 0.10970665, "memory(GiB)": 13.7, "step": 49560, "train_speed(iter/s)": 1.534722 }, { "acc": 0.99807692, "epoch": 23.231778767283807, "grad_norm": 0.005923946388065815, "learning_rate": 5.990170488842658e-06, "loss": 0.00751352, "memory(GiB)": 13.7, "step": 49565, "train_speed(iter/s)": 1.534723 }, { "acc": 0.97974205, "epoch": 23.23412233419264, "grad_norm": 0.07171203941106796, "learning_rate": 5.989410703967095e-06, "loss": 0.05468131, "memory(GiB)": 13.7, "step": 49570, "train_speed(iter/s)": 1.534726 }, { "acc": 0.9854166, "epoch": 23.236465901101475, "grad_norm": 1.0755338668823242, "learning_rate": 5.988650895321366e-06, "loss": 0.05484857, "memory(GiB)": 13.7, "step": 49575, "train_speed(iter/s)": 1.534723 }, { "acc": 0.98537769, "epoch": 23.238809468010313, "grad_norm": 7.217827320098877, "learning_rate": 5.9878910629237376e-06, "loss": 0.08349435, "memory(GiB)": 13.7, "step": 49580, "train_speed(iter/s)": 1.534727 }, { "acc": 0.98290615, "epoch": 23.241153034919147, "grad_norm": 5.301426887512207, "learning_rate": 5.987131206792471e-06, "loss": 0.05587517, "memory(GiB)": 13.7, "step": 49585, "train_speed(iter/s)": 1.534735 }, { "acc": 0.98214283, "epoch": 23.24349660182798, "grad_norm": 3.8435449600219727, "learning_rate": 5.986371326945832e-06, "loss": 0.10479157, "memory(GiB)": 13.7, "step": 49590, "train_speed(iter/s)": 1.534736 }, { "acc": 0.97253857, "epoch": 23.245840168736816, "grad_norm": 4.077508449554443, "learning_rate": 5.985611423402086e-06, "loss": 0.11868265, "memory(GiB)": 13.7, "step": 49595, "train_speed(iter/s)": 1.534754 }, { "acc": 0.98538685, "epoch": 23.248183735645654, "grad_norm": 1.6340670585632324, "learning_rate": 5.984851496179497e-06, "loss": 0.09146644, "memory(GiB)": 13.7, "step": 49600, "train_speed(iter/s)": 1.534753 }, { "acc": 0.98083334, "epoch": 23.250527302554488, "grad_norm": 4.3462910652160645, "learning_rate": 5.984091545296332e-06, "loss": 0.07487674, "memory(GiB)": 13.7, "step": 49605, "train_speed(iter/s)": 1.534757 }, { "acc": 0.9791666, "epoch": 23.252870869463322, "grad_norm": 5.813002586364746, "learning_rate": 5.9833315707708575e-06, "loss": 0.07217214, "memory(GiB)": 13.7, "step": 49610, "train_speed(iter/s)": 1.53476 }, { "acc": 0.98592262, "epoch": 23.255214436372157, "grad_norm": 0.23698601126670837, "learning_rate": 5.982571572621341e-06, "loss": 0.05024883, "memory(GiB)": 13.7, "step": 49615, "train_speed(iter/s)": 1.534763 }, { "acc": 0.9841197, "epoch": 23.257558003280995, "grad_norm": 8.220128059387207, "learning_rate": 5.981811550866049e-06, "loss": 0.10870287, "memory(GiB)": 13.7, "step": 49620, "train_speed(iter/s)": 1.534769 }, { "acc": 0.99437504, "epoch": 23.25990157018983, "grad_norm": 5.213128566741943, "learning_rate": 5.981051505523253e-06, "loss": 0.02449682, "memory(GiB)": 13.7, "step": 49625, "train_speed(iter/s)": 1.534775 }, { "acc": 0.99153843, "epoch": 23.262245137098663, "grad_norm": 0.4038289785385132, "learning_rate": 5.98029143661122e-06, "loss": 0.02571766, "memory(GiB)": 13.7, "step": 49630, "train_speed(iter/s)": 1.534776 }, { "acc": 0.9927083, "epoch": 23.2645887040075, "grad_norm": 5.746804237365723, "learning_rate": 5.979531344148218e-06, "loss": 0.03063253, "memory(GiB)": 13.7, "step": 49635, "train_speed(iter/s)": 1.534776 }, { "acc": 0.9894886, "epoch": 23.266932270916335, "grad_norm": 4.96276330947876, "learning_rate": 5.978771228152519e-06, "loss": 0.02972998, "memory(GiB)": 13.7, "step": 49640, "train_speed(iter/s)": 1.534778 }, { "acc": 0.9794322, "epoch": 23.26927583782517, "grad_norm": 1.6348237991333008, "learning_rate": 5.978011088642397e-06, "loss": 0.07588252, "memory(GiB)": 13.7, "step": 49645, "train_speed(iter/s)": 1.534787 }, { "acc": 0.9727685, "epoch": 23.271619404734004, "grad_norm": 1.9232761859893799, "learning_rate": 5.977250925636116e-06, "loss": 0.10360425, "memory(GiB)": 13.7, "step": 49650, "train_speed(iter/s)": 1.53479 }, { "acc": 0.98282204, "epoch": 23.273962971642842, "grad_norm": 4.666773319244385, "learning_rate": 5.976490739151954e-06, "loss": 0.08285017, "memory(GiB)": 13.7, "step": 49655, "train_speed(iter/s)": 1.534793 }, { "acc": 0.97991667, "epoch": 23.276306538551676, "grad_norm": 1.110801100730896, "learning_rate": 5.975730529208179e-06, "loss": 0.04184803, "memory(GiB)": 13.7, "step": 49660, "train_speed(iter/s)": 1.534791 }, { "acc": 0.97488098, "epoch": 23.27865010546051, "grad_norm": 3.4069526195526123, "learning_rate": 5.974970295823066e-06, "loss": 0.09230939, "memory(GiB)": 13.7, "step": 49665, "train_speed(iter/s)": 1.534792 }, { "acc": 0.99301586, "epoch": 23.280993672369345, "grad_norm": 29.547103881835938, "learning_rate": 5.974210039014891e-06, "loss": 0.06120731, "memory(GiB)": 13.7, "step": 49670, "train_speed(iter/s)": 1.534792 }, { "acc": 0.9875, "epoch": 23.283337239278183, "grad_norm": 3.5699400901794434, "learning_rate": 5.973449758801924e-06, "loss": 0.02704291, "memory(GiB)": 13.7, "step": 49675, "train_speed(iter/s)": 1.5348 }, { "acc": 0.98909225, "epoch": 23.285680806187017, "grad_norm": 2.883408546447754, "learning_rate": 5.972689455202439e-06, "loss": 0.04430214, "memory(GiB)": 13.7, "step": 49680, "train_speed(iter/s)": 1.534807 }, { "acc": 0.9796875, "epoch": 23.28802437309585, "grad_norm": 5.9368486404418945, "learning_rate": 5.971929128234718e-06, "loss": 0.06292514, "memory(GiB)": 13.7, "step": 49685, "train_speed(iter/s)": 1.534812 }, { "acc": 0.98738098, "epoch": 23.290367940004685, "grad_norm": 7.675937652587891, "learning_rate": 5.9711687779170295e-06, "loss": 0.06721017, "memory(GiB)": 13.7, "step": 49690, "train_speed(iter/s)": 1.53482 }, { "acc": 0.97339287, "epoch": 23.292711506913523, "grad_norm": 2.6710383892059326, "learning_rate": 5.9704084042676514e-06, "loss": 0.08670878, "memory(GiB)": 13.7, "step": 49695, "train_speed(iter/s)": 1.534827 }, { "acc": 0.97076397, "epoch": 23.295055073822358, "grad_norm": 8.807618141174316, "learning_rate": 5.969648007304864e-06, "loss": 0.11772108, "memory(GiB)": 13.7, "step": 49700, "train_speed(iter/s)": 1.534824 }, { "acc": 0.98324299, "epoch": 23.297398640731192, "grad_norm": 4.463921546936035, "learning_rate": 5.9688875870469406e-06, "loss": 0.05642996, "memory(GiB)": 13.7, "step": 49705, "train_speed(iter/s)": 1.534829 }, { "acc": 0.9875, "epoch": 23.29974220764003, "grad_norm": 0.07796728610992432, "learning_rate": 5.968127143512161e-06, "loss": 0.05081611, "memory(GiB)": 13.7, "step": 49710, "train_speed(iter/s)": 1.534833 }, { "acc": 0.99298611, "epoch": 23.302085774548864, "grad_norm": 2.3923163414001465, "learning_rate": 5.9673666767188045e-06, "loss": 0.02512632, "memory(GiB)": 13.7, "step": 49715, "train_speed(iter/s)": 1.534833 }, { "acc": 0.98059521, "epoch": 23.3044293414577, "grad_norm": 5.364824295043945, "learning_rate": 5.96660618668515e-06, "loss": 0.04377728, "memory(GiB)": 13.7, "step": 49720, "train_speed(iter/s)": 1.53483 }, { "acc": 0.96520834, "epoch": 23.306772908366533, "grad_norm": 2.7508158683776855, "learning_rate": 5.965845673429476e-06, "loss": 0.06666139, "memory(GiB)": 13.7, "step": 49725, "train_speed(iter/s)": 1.53484 }, { "acc": 0.98947296, "epoch": 23.30911647527537, "grad_norm": 4.503604888916016, "learning_rate": 5.965085136970065e-06, "loss": 0.04913737, "memory(GiB)": 13.7, "step": 49730, "train_speed(iter/s)": 1.534837 }, { "acc": 0.97657204, "epoch": 23.311460042184205, "grad_norm": 4.254638671875, "learning_rate": 5.964324577325195e-06, "loss": 0.06520436, "memory(GiB)": 13.7, "step": 49735, "train_speed(iter/s)": 1.53484 }, { "acc": 0.9953125, "epoch": 23.31380360909304, "grad_norm": 2.1336348056793213, "learning_rate": 5.963563994513147e-06, "loss": 0.03580498, "memory(GiB)": 13.7, "step": 49740, "train_speed(iter/s)": 1.534846 }, { "acc": 0.98187504, "epoch": 23.316147176001873, "grad_norm": 4.410576820373535, "learning_rate": 5.962803388552206e-06, "loss": 0.07939438, "memory(GiB)": 13.7, "step": 49745, "train_speed(iter/s)": 1.534844 }, { "acc": 0.98125, "epoch": 23.31849074291071, "grad_norm": 5.714108943939209, "learning_rate": 5.962042759460651e-06, "loss": 0.05225062, "memory(GiB)": 13.7, "step": 49750, "train_speed(iter/s)": 1.534845 }, { "acc": 0.98764877, "epoch": 23.320834309819546, "grad_norm": 5.540318489074707, "learning_rate": 5.961282107256769e-06, "loss": 0.07857347, "memory(GiB)": 13.7, "step": 49755, "train_speed(iter/s)": 1.534845 }, { "acc": 0.99178028, "epoch": 23.32317787672838, "grad_norm": 1.5328881740570068, "learning_rate": 5.9605214319588424e-06, "loss": 0.05305052, "memory(GiB)": 13.7, "step": 49760, "train_speed(iter/s)": 1.534839 }, { "acc": 0.9734375, "epoch": 23.325521443637214, "grad_norm": 5.351545333862305, "learning_rate": 5.959760733585152e-06, "loss": 0.09370078, "memory(GiB)": 13.7, "step": 49765, "train_speed(iter/s)": 1.534841 }, { "acc": 0.97895832, "epoch": 23.327865010546052, "grad_norm": 7.98917293548584, "learning_rate": 5.9590000121539886e-06, "loss": 0.09407868, "memory(GiB)": 13.7, "step": 49770, "train_speed(iter/s)": 1.534853 }, { "acc": 0.97375994, "epoch": 23.330208577454886, "grad_norm": 2.874612808227539, "learning_rate": 5.958239267683632e-06, "loss": 0.08324097, "memory(GiB)": 13.7, "step": 49775, "train_speed(iter/s)": 1.534855 }, { "acc": 0.99375, "epoch": 23.33255214436372, "grad_norm": 0.07654270529747009, "learning_rate": 5.957478500192369e-06, "loss": 0.01582144, "memory(GiB)": 13.7, "step": 49780, "train_speed(iter/s)": 1.534854 }, { "acc": 0.96437187, "epoch": 23.33489571127256, "grad_norm": 6.784764289855957, "learning_rate": 5.956717709698488e-06, "loss": 0.1015867, "memory(GiB)": 13.7, "step": 49785, "train_speed(iter/s)": 1.534856 }, { "acc": 0.98805923, "epoch": 23.337239278181393, "grad_norm": 0.05688629671931267, "learning_rate": 5.955956896220273e-06, "loss": 0.0539534, "memory(GiB)": 13.7, "step": 49790, "train_speed(iter/s)": 1.534862 }, { "acc": 0.99375, "epoch": 23.339582845090227, "grad_norm": 3.160846471786499, "learning_rate": 5.955196059776013e-06, "loss": 0.04567698, "memory(GiB)": 13.7, "step": 49795, "train_speed(iter/s)": 1.534871 }, { "acc": 0.9828125, "epoch": 23.34192641199906, "grad_norm": 5.437079429626465, "learning_rate": 5.954435200383998e-06, "loss": 0.0527598, "memory(GiB)": 13.7, "step": 49800, "train_speed(iter/s)": 1.53487 }, { "acc": 0.96559525, "epoch": 23.3442699789079, "grad_norm": 4.127551078796387, "learning_rate": 5.953674318062512e-06, "loss": 0.1138135, "memory(GiB)": 13.7, "step": 49805, "train_speed(iter/s)": 1.534872 }, { "acc": 0.97614584, "epoch": 23.346613545816734, "grad_norm": 5.570191860198975, "learning_rate": 5.952913412829849e-06, "loss": 0.10036882, "memory(GiB)": 13.7, "step": 49810, "train_speed(iter/s)": 1.534879 }, { "acc": 0.96422787, "epoch": 23.348957112725568, "grad_norm": 4.184658050537109, "learning_rate": 5.952152484704298e-06, "loss": 0.11422209, "memory(GiB)": 13.7, "step": 49815, "train_speed(iter/s)": 1.534878 }, { "acc": 0.97648811, "epoch": 23.351300679634402, "grad_norm": 10.624975204467773, "learning_rate": 5.951391533704146e-06, "loss": 0.08329102, "memory(GiB)": 13.7, "step": 49820, "train_speed(iter/s)": 1.534885 }, { "acc": 0.9916667, "epoch": 23.35364424654324, "grad_norm": 2.9196670055389404, "learning_rate": 5.950630559847683e-06, "loss": 0.04124438, "memory(GiB)": 13.7, "step": 49825, "train_speed(iter/s)": 1.534894 }, { "acc": 0.98862181, "epoch": 23.355987813452074, "grad_norm": 2.3582684993743896, "learning_rate": 5.9498695631532065e-06, "loss": 0.10218441, "memory(GiB)": 13.7, "step": 49830, "train_speed(iter/s)": 1.534894 }, { "acc": 0.98604164, "epoch": 23.35833138036091, "grad_norm": 4.24210786819458, "learning_rate": 5.949108543639003e-06, "loss": 0.05522457, "memory(GiB)": 13.7, "step": 49835, "train_speed(iter/s)": 1.534893 }, { "acc": 0.9935606, "epoch": 23.360674947269743, "grad_norm": 10.141736030578613, "learning_rate": 5.948347501323367e-06, "loss": 0.04126133, "memory(GiB)": 13.7, "step": 49840, "train_speed(iter/s)": 1.534903 }, { "acc": 0.98416672, "epoch": 23.36301851417858, "grad_norm": 5.1484198570251465, "learning_rate": 5.947586436224589e-06, "loss": 0.05124233, "memory(GiB)": 13.7, "step": 49845, "train_speed(iter/s)": 1.534908 }, { "acc": 0.98770828, "epoch": 23.365362081087415, "grad_norm": 0.018187038600444794, "learning_rate": 5.946825348360969e-06, "loss": 0.05431615, "memory(GiB)": 13.7, "step": 49850, "train_speed(iter/s)": 1.534919 }, { "acc": 0.9794445, "epoch": 23.36770564799625, "grad_norm": 0.13447925448417664, "learning_rate": 5.9460642377507925e-06, "loss": 0.05750977, "memory(GiB)": 13.7, "step": 49855, "train_speed(iter/s)": 1.534928 }, { "acc": 0.984375, "epoch": 23.370049214905084, "grad_norm": 3.8698484897613525, "learning_rate": 5.94530310441236e-06, "loss": 0.09960192, "memory(GiB)": 13.7, "step": 49860, "train_speed(iter/s)": 1.534936 }, { "acc": 0.99375, "epoch": 23.37239278181392, "grad_norm": 0.016296103596687317, "learning_rate": 5.944541948363963e-06, "loss": 0.01558587, "memory(GiB)": 13.7, "step": 49865, "train_speed(iter/s)": 1.534941 }, { "acc": 0.96816425, "epoch": 23.374736348722756, "grad_norm": 5.423569202423096, "learning_rate": 5.9437807696239005e-06, "loss": 0.12371886, "memory(GiB)": 13.7, "step": 49870, "train_speed(iter/s)": 1.534944 }, { "acc": 0.97733593, "epoch": 23.37707991563159, "grad_norm": 3.3545989990234375, "learning_rate": 5.943019568210468e-06, "loss": 0.0782613, "memory(GiB)": 13.7, "step": 49875, "train_speed(iter/s)": 1.534948 }, { "acc": 0.97599211, "epoch": 23.379423482540428, "grad_norm": 1.2056965827941895, "learning_rate": 5.9422583441419605e-06, "loss": 0.07023569, "memory(GiB)": 13.7, "step": 49880, "train_speed(iter/s)": 1.534949 }, { "acc": 0.98478546, "epoch": 23.381767049449262, "grad_norm": 3.509727954864502, "learning_rate": 5.941497097436675e-06, "loss": 0.0593839, "memory(GiB)": 13.7, "step": 49885, "train_speed(iter/s)": 1.534954 }, { "acc": 0.99916668, "epoch": 23.384110616358097, "grad_norm": 0.11841733008623123, "learning_rate": 5.940735828112911e-06, "loss": 0.03002319, "memory(GiB)": 13.7, "step": 49890, "train_speed(iter/s)": 1.534958 }, { "acc": 0.98628473, "epoch": 23.38645418326693, "grad_norm": 5.952407360076904, "learning_rate": 5.939974536188969e-06, "loss": 0.0584696, "memory(GiB)": 13.7, "step": 49895, "train_speed(iter/s)": 1.534975 }, { "acc": 0.98058033, "epoch": 23.38879775017577, "grad_norm": 4.491598129272461, "learning_rate": 5.939213221683143e-06, "loss": 0.10837764, "memory(GiB)": 13.7, "step": 49900, "train_speed(iter/s)": 1.534982 }, { "acc": 0.98472223, "epoch": 23.391141317084603, "grad_norm": 3.7567765712738037, "learning_rate": 5.9384518846137376e-06, "loss": 0.04888887, "memory(GiB)": 13.7, "step": 49905, "train_speed(iter/s)": 1.534983 }, { "acc": 0.97108135, "epoch": 23.393484883993438, "grad_norm": 1.7662123441696167, "learning_rate": 5.937690524999048e-06, "loss": 0.08970853, "memory(GiB)": 13.7, "step": 49910, "train_speed(iter/s)": 1.534987 }, { "acc": 0.97869053, "epoch": 23.395828450902272, "grad_norm": 2.277238607406616, "learning_rate": 5.936929142857377e-06, "loss": 0.05781485, "memory(GiB)": 13.7, "step": 49915, "train_speed(iter/s)": 1.53499 }, { "acc": 0.98038692, "epoch": 23.39817201781111, "grad_norm": 6.078751564025879, "learning_rate": 5.936167738207027e-06, "loss": 0.07194557, "memory(GiB)": 13.7, "step": 49920, "train_speed(iter/s)": 1.535002 }, { "acc": 0.99125004, "epoch": 23.400515584719944, "grad_norm": 2.3083279132843018, "learning_rate": 5.9354063110662995e-06, "loss": 0.04320293, "memory(GiB)": 13.7, "step": 49925, "train_speed(iter/s)": 1.535001 }, { "acc": 0.9770834, "epoch": 23.40285915162878, "grad_norm": 4.300530910491943, "learning_rate": 5.934644861453494e-06, "loss": 0.09462509, "memory(GiB)": 13.7, "step": 49930, "train_speed(iter/s)": 1.535008 }, { "acc": 0.98966351, "epoch": 23.405202718537613, "grad_norm": 2.5894858837127686, "learning_rate": 5.933883389386914e-06, "loss": 0.04044394, "memory(GiB)": 13.7, "step": 49935, "train_speed(iter/s)": 1.535015 }, { "acc": 0.9811554, "epoch": 23.40754628544645, "grad_norm": 5.495959758758545, "learning_rate": 5.9331218948848665e-06, "loss": 0.07642699, "memory(GiB)": 13.7, "step": 49940, "train_speed(iter/s)": 1.535022 }, { "acc": 0.99508934, "epoch": 23.409889852355285, "grad_norm": 2.0629873275756836, "learning_rate": 5.9323603779656505e-06, "loss": 0.0402934, "memory(GiB)": 13.7, "step": 49945, "train_speed(iter/s)": 1.535028 }, { "acc": 0.9708333, "epoch": 23.41223341926412, "grad_norm": 2.7723095417022705, "learning_rate": 5.931598838647573e-06, "loss": 0.1014343, "memory(GiB)": 13.7, "step": 49950, "train_speed(iter/s)": 1.535038 }, { "acc": 0.98028851, "epoch": 23.414576986172957, "grad_norm": 4.036413669586182, "learning_rate": 5.930837276948939e-06, "loss": 0.05242829, "memory(GiB)": 13.7, "step": 49955, "train_speed(iter/s)": 1.53504 }, { "acc": 0.9842803, "epoch": 23.41692055308179, "grad_norm": 0.02660275436937809, "learning_rate": 5.930075692888051e-06, "loss": 0.0496365, "memory(GiB)": 13.7, "step": 49960, "train_speed(iter/s)": 1.535045 }, { "acc": 0.9854167, "epoch": 23.419264119990626, "grad_norm": 3.948176145553589, "learning_rate": 5.9293140864832185e-06, "loss": 0.0629759, "memory(GiB)": 13.7, "step": 49965, "train_speed(iter/s)": 1.535043 }, { "acc": 0.97302494, "epoch": 23.42160768689946, "grad_norm": 3.3120975494384766, "learning_rate": 5.928552457752746e-06, "loss": 0.14185454, "memory(GiB)": 13.7, "step": 49970, "train_speed(iter/s)": 1.535042 }, { "acc": 0.99056807, "epoch": 23.423951253808298, "grad_norm": 2.4522645473480225, "learning_rate": 5.927790806714943e-06, "loss": 0.05278119, "memory(GiB)": 13.7, "step": 49975, "train_speed(iter/s)": 1.53505 }, { "acc": 0.98458328, "epoch": 23.426294820717132, "grad_norm": 3.070608615875244, "learning_rate": 5.9270291333881154e-06, "loss": 0.04656143, "memory(GiB)": 13.7, "step": 49980, "train_speed(iter/s)": 1.535056 }, { "acc": 0.98903847, "epoch": 23.428638387625966, "grad_norm": 0.7779824137687683, "learning_rate": 5.926267437790573e-06, "loss": 0.0353009, "memory(GiB)": 13.7, "step": 49985, "train_speed(iter/s)": 1.53506 }, { "acc": 0.97385406, "epoch": 23.4309819545348, "grad_norm": 1.1838269233703613, "learning_rate": 5.925505719940619e-06, "loss": 0.09958262, "memory(GiB)": 13.7, "step": 49990, "train_speed(iter/s)": 1.535058 }, { "acc": 0.98008928, "epoch": 23.43332552144364, "grad_norm": 5.29646635055542, "learning_rate": 5.924743979856568e-06, "loss": 0.05455989, "memory(GiB)": 13.7, "step": 49995, "train_speed(iter/s)": 1.535063 }, { "acc": 0.9864583, "epoch": 23.435669088352473, "grad_norm": 2.380033254623413, "learning_rate": 5.9239822175567265e-06, "loss": 0.03817323, "memory(GiB)": 13.7, "step": 50000, "train_speed(iter/s)": 1.535072 }, { "epoch": 23.435669088352473, "eval_acc": 0.7751736696567073, "eval_loss": 1.212769865989685, "eval_runtime": 143.3163, "eval_samples_per_second": 56.295, "eval_steps_per_second": 7.04, "step": 50000 }, { "acc": 0.97570515, "epoch": 23.438012655261307, "grad_norm": 3.2191309928894043, "learning_rate": 5.923220433059408e-06, "loss": 0.10188942, "memory(GiB)": 13.7, "step": 50005, "train_speed(iter/s)": 1.526979 }, { "acc": 0.9895834, "epoch": 23.44035622217014, "grad_norm": 3.006854772567749, "learning_rate": 5.922458626382922e-06, "loss": 0.0278396, "memory(GiB)": 13.7, "step": 50010, "train_speed(iter/s)": 1.526968 }, { "acc": 0.99126987, "epoch": 23.44269978907898, "grad_norm": 1.139273762702942, "learning_rate": 5.921696797545579e-06, "loss": 0.05058006, "memory(GiB)": 13.7, "step": 50015, "train_speed(iter/s)": 1.526969 }, { "acc": 0.98916664, "epoch": 23.445043355987814, "grad_norm": 0.17424796521663666, "learning_rate": 5.920934946565691e-06, "loss": 0.02396694, "memory(GiB)": 13.7, "step": 50020, "train_speed(iter/s)": 1.52698 }, { "acc": 0.96729164, "epoch": 23.447386922896648, "grad_norm": 2.9784011840820312, "learning_rate": 5.9201730734615725e-06, "loss": 0.12870351, "memory(GiB)": 13.7, "step": 50025, "train_speed(iter/s)": 1.526985 }, { "acc": 0.99011364, "epoch": 23.449730489805486, "grad_norm": 2.7528114318847656, "learning_rate": 5.9194111782515325e-06, "loss": 0.03383542, "memory(GiB)": 13.7, "step": 50030, "train_speed(iter/s)": 1.526992 }, { "acc": 0.98916664, "epoch": 23.45207405671432, "grad_norm": 3.171616792678833, "learning_rate": 5.918649260953888e-06, "loss": 0.02470923, "memory(GiB)": 13.7, "step": 50035, "train_speed(iter/s)": 1.527002 }, { "acc": 0.98937502, "epoch": 23.454417623623154, "grad_norm": 0.058609798550605774, "learning_rate": 5.91788732158695e-06, "loss": 0.02455516, "memory(GiB)": 13.7, "step": 50040, "train_speed(iter/s)": 1.527 }, { "acc": 0.98708334, "epoch": 23.45676119053199, "grad_norm": 0.18283018469810486, "learning_rate": 5.917125360169035e-06, "loss": 0.02264137, "memory(GiB)": 13.7, "step": 50045, "train_speed(iter/s)": 1.52701 }, { "acc": 0.98883934, "epoch": 23.459104757440826, "grad_norm": 4.554296970367432, "learning_rate": 5.916363376718459e-06, "loss": 0.0391714, "memory(GiB)": 13.7, "step": 50050, "train_speed(iter/s)": 1.527015 }, { "acc": 0.9671875, "epoch": 23.46144832434966, "grad_norm": 7.212414741516113, "learning_rate": 5.915601371253535e-06, "loss": 0.12490865, "memory(GiB)": 13.7, "step": 50055, "train_speed(iter/s)": 1.527024 }, { "acc": 0.9944129, "epoch": 23.463791891258495, "grad_norm": 2.073575973510742, "learning_rate": 5.914839343792581e-06, "loss": 0.01968227, "memory(GiB)": 13.7, "step": 50060, "train_speed(iter/s)": 1.527029 }, { "acc": 0.98798065, "epoch": 23.46613545816733, "grad_norm": 3.8969626426696777, "learning_rate": 5.914077294353914e-06, "loss": 0.04582844, "memory(GiB)": 13.7, "step": 50065, "train_speed(iter/s)": 1.527028 }, { "acc": 0.99375, "epoch": 23.468479025076167, "grad_norm": 0.535880982875824, "learning_rate": 5.91331522295585e-06, "loss": 0.02976741, "memory(GiB)": 13.7, "step": 50070, "train_speed(iter/s)": 1.527035 }, { "acc": 0.96573868, "epoch": 23.470822591985, "grad_norm": 8.759990692138672, "learning_rate": 5.912553129616704e-06, "loss": 0.10990425, "memory(GiB)": 13.7, "step": 50075, "train_speed(iter/s)": 1.527038 }, { "acc": 0.98403845, "epoch": 23.473166158893836, "grad_norm": 4.910464763641357, "learning_rate": 5.9117910143548e-06, "loss": 0.07932456, "memory(GiB)": 13.7, "step": 50080, "train_speed(iter/s)": 1.527038 }, { "acc": 0.97215281, "epoch": 23.47550972580267, "grad_norm": 1.3702296018600464, "learning_rate": 5.911028877188452e-06, "loss": 0.0871958, "memory(GiB)": 13.7, "step": 50085, "train_speed(iter/s)": 1.527041 }, { "acc": 0.97769356, "epoch": 23.477853292711508, "grad_norm": 0.42286568880081177, "learning_rate": 5.910266718135983e-06, "loss": 0.07623069, "memory(GiB)": 13.7, "step": 50090, "train_speed(iter/s)": 1.527051 }, { "acc": 0.98130951, "epoch": 23.480196859620342, "grad_norm": 6.43771505355835, "learning_rate": 5.90950453721571e-06, "loss": 0.07259251, "memory(GiB)": 13.7, "step": 50095, "train_speed(iter/s)": 1.527056 }, { "acc": 0.9927083, "epoch": 23.482540426529177, "grad_norm": 7.760813236236572, "learning_rate": 5.908742334445954e-06, "loss": 0.02830596, "memory(GiB)": 13.7, "step": 50100, "train_speed(iter/s)": 1.527064 }, { "acc": 0.97770834, "epoch": 23.48488399343801, "grad_norm": 6.493192672729492, "learning_rate": 5.907980109845035e-06, "loss": 0.04611445, "memory(GiB)": 13.7, "step": 50105, "train_speed(iter/s)": 1.527071 }, { "acc": 0.99177084, "epoch": 23.48722756034685, "grad_norm": 5.88950777053833, "learning_rate": 5.907217863431278e-06, "loss": 0.05707879, "memory(GiB)": 13.7, "step": 50110, "train_speed(iter/s)": 1.527078 }, { "acc": 0.9864584, "epoch": 23.489571127255683, "grad_norm": 6.681368350982666, "learning_rate": 5.906455595223002e-06, "loss": 0.05489056, "memory(GiB)": 13.7, "step": 50115, "train_speed(iter/s)": 1.527073 }, { "acc": 0.97731152, "epoch": 23.491914694164517, "grad_norm": 2.7306997776031494, "learning_rate": 5.905693305238528e-06, "loss": 0.05817282, "memory(GiB)": 13.7, "step": 50120, "train_speed(iter/s)": 1.527069 }, { "acc": 0.98184528, "epoch": 23.494258261073355, "grad_norm": 7.524394512176514, "learning_rate": 5.904930993496181e-06, "loss": 0.09600468, "memory(GiB)": 13.7, "step": 50125, "train_speed(iter/s)": 1.527071 }, { "acc": 0.97899895, "epoch": 23.49660182798219, "grad_norm": 2.60306978225708, "learning_rate": 5.904168660014286e-06, "loss": 0.0610495, "memory(GiB)": 13.7, "step": 50130, "train_speed(iter/s)": 1.527077 }, { "acc": 0.99653845, "epoch": 23.498945394891024, "grad_norm": 1.5432149171829224, "learning_rate": 5.903406304811164e-06, "loss": 0.01673919, "memory(GiB)": 13.7, "step": 50135, "train_speed(iter/s)": 1.527087 }, { "acc": 0.97014341, "epoch": 23.501288961799858, "grad_norm": 0.7608736157417297, "learning_rate": 5.902643927905139e-06, "loss": 0.08461913, "memory(GiB)": 13.7, "step": 50140, "train_speed(iter/s)": 1.527099 }, { "acc": 0.9640625, "epoch": 23.503632528708696, "grad_norm": 7.360172271728516, "learning_rate": 5.901881529314539e-06, "loss": 0.11890478, "memory(GiB)": 13.7, "step": 50145, "train_speed(iter/s)": 1.527107 }, { "acc": 0.98635426, "epoch": 23.50597609561753, "grad_norm": 2.2097511291503906, "learning_rate": 5.9011191090576905e-06, "loss": 0.06269683, "memory(GiB)": 13.7, "step": 50150, "train_speed(iter/s)": 1.527114 }, { "acc": 0.98287144, "epoch": 23.508319662526365, "grad_norm": 0.029776331037282944, "learning_rate": 5.9003566671529165e-06, "loss": 0.05479947, "memory(GiB)": 13.7, "step": 50155, "train_speed(iter/s)": 1.52712 }, { "acc": 0.9807291, "epoch": 23.5106632294352, "grad_norm": 0.01769895851612091, "learning_rate": 5.899594203618544e-06, "loss": 0.03966053, "memory(GiB)": 13.7, "step": 50160, "train_speed(iter/s)": 1.527131 }, { "acc": 0.98819447, "epoch": 23.513006796344037, "grad_norm": 21.74225425720215, "learning_rate": 5.8988317184729e-06, "loss": 0.02835259, "memory(GiB)": 13.7, "step": 50165, "train_speed(iter/s)": 1.527134 }, { "acc": 0.99109373, "epoch": 23.51535036325287, "grad_norm": 2.5352771282196045, "learning_rate": 5.898069211734312e-06, "loss": 0.04165176, "memory(GiB)": 13.7, "step": 50170, "train_speed(iter/s)": 1.527131 }, { "acc": 0.97711201, "epoch": 23.517693930161705, "grad_norm": 3.782625198364258, "learning_rate": 5.89730668342111e-06, "loss": 0.0653317, "memory(GiB)": 13.7, "step": 50175, "train_speed(iter/s)": 1.52713 }, { "acc": 0.9783123, "epoch": 23.52003749707054, "grad_norm": 6.367305755615234, "learning_rate": 5.89654413355162e-06, "loss": 0.08101118, "memory(GiB)": 13.7, "step": 50180, "train_speed(iter/s)": 1.527136 }, { "acc": 0.96750002, "epoch": 23.522381063979378, "grad_norm": 4.135522365570068, "learning_rate": 5.895781562144175e-06, "loss": 0.06670566, "memory(GiB)": 13.7, "step": 50185, "train_speed(iter/s)": 1.527146 }, { "acc": 0.98104172, "epoch": 23.524724630888212, "grad_norm": 0.08999735862016678, "learning_rate": 5.895018969217103e-06, "loss": 0.04680796, "memory(GiB)": 13.7, "step": 50190, "train_speed(iter/s)": 1.527148 }, { "acc": 0.97800598, "epoch": 23.527068197797046, "grad_norm": 8.59570598602295, "learning_rate": 5.894256354788732e-06, "loss": 0.05822747, "memory(GiB)": 13.7, "step": 50195, "train_speed(iter/s)": 1.527155 }, { "acc": 0.99301472, "epoch": 23.529411764705884, "grad_norm": 2.135286569595337, "learning_rate": 5.8934937188773955e-06, "loss": 0.0406938, "memory(GiB)": 13.7, "step": 50200, "train_speed(iter/s)": 1.527155 }, { "acc": 0.96789141, "epoch": 23.53175533161472, "grad_norm": 1.991441011428833, "learning_rate": 5.892731061501422e-06, "loss": 0.07338641, "memory(GiB)": 13.7, "step": 50205, "train_speed(iter/s)": 1.527163 }, { "acc": 0.97948856, "epoch": 23.534098898523553, "grad_norm": 1.6105756759643555, "learning_rate": 5.891968382679146e-06, "loss": 0.05634915, "memory(GiB)": 13.7, "step": 50210, "train_speed(iter/s)": 1.527163 }, { "acc": 0.97019444, "epoch": 23.536442465432387, "grad_norm": 7.908059120178223, "learning_rate": 5.891205682428897e-06, "loss": 0.13665149, "memory(GiB)": 13.7, "step": 50215, "train_speed(iter/s)": 1.527167 }, { "acc": 0.99487171, "epoch": 23.538786032341225, "grad_norm": 3.0217738151550293, "learning_rate": 5.890442960769013e-06, "loss": 0.0482752, "memory(GiB)": 13.7, "step": 50220, "train_speed(iter/s)": 1.527178 }, { "acc": 0.979072, "epoch": 23.54112959925006, "grad_norm": 30.675386428833008, "learning_rate": 5.889680217717822e-06, "loss": 0.0900346, "memory(GiB)": 13.7, "step": 50225, "train_speed(iter/s)": 1.527183 }, { "acc": 0.9911459, "epoch": 23.543473166158893, "grad_norm": 2.4496281147003174, "learning_rate": 5.888917453293661e-06, "loss": 0.03974819, "memory(GiB)": 13.7, "step": 50230, "train_speed(iter/s)": 1.527197 }, { "acc": 0.98154764, "epoch": 23.545816733067728, "grad_norm": 5.673641681671143, "learning_rate": 5.888154667514863e-06, "loss": 0.03088019, "memory(GiB)": 13.7, "step": 50235, "train_speed(iter/s)": 1.527207 }, { "acc": 0.97663698, "epoch": 23.548160299976566, "grad_norm": 3.100358486175537, "learning_rate": 5.88739186039976e-06, "loss": 0.07668243, "memory(GiB)": 13.7, "step": 50240, "train_speed(iter/s)": 1.527211 }, { "acc": 0.99562502, "epoch": 23.5505038668854, "grad_norm": 0.015696782618761063, "learning_rate": 5.886629031966694e-06, "loss": 0.02120163, "memory(GiB)": 13.7, "step": 50245, "train_speed(iter/s)": 1.527224 }, { "acc": 0.98228807, "epoch": 23.552847433794234, "grad_norm": 4.484205722808838, "learning_rate": 5.885866182233995e-06, "loss": 0.08174686, "memory(GiB)": 13.7, "step": 50250, "train_speed(iter/s)": 1.527234 }, { "acc": 0.97574406, "epoch": 23.55519100070307, "grad_norm": 3.3257763385772705, "learning_rate": 5.885103311220002e-06, "loss": 0.09947904, "memory(GiB)": 13.7, "step": 50255, "train_speed(iter/s)": 1.527238 }, { "acc": 0.98973217, "epoch": 23.557534567611906, "grad_norm": 2.810624361038208, "learning_rate": 5.884340418943052e-06, "loss": 0.03157868, "memory(GiB)": 13.7, "step": 50260, "train_speed(iter/s)": 1.527236 }, { "acc": 0.98556557, "epoch": 23.55987813452074, "grad_norm": 4.489234924316406, "learning_rate": 5.883577505421481e-06, "loss": 0.05933725, "memory(GiB)": 13.7, "step": 50265, "train_speed(iter/s)": 1.527226 }, { "acc": 0.9738637, "epoch": 23.562221701429575, "grad_norm": 5.1044816970825195, "learning_rate": 5.882814570673629e-06, "loss": 0.09828201, "memory(GiB)": 13.7, "step": 50270, "train_speed(iter/s)": 1.52723 }, { "acc": 0.97782202, "epoch": 23.564565268338413, "grad_norm": 7.8099894523620605, "learning_rate": 5.882051614717834e-06, "loss": 0.09487119, "memory(GiB)": 13.7, "step": 50275, "train_speed(iter/s)": 1.527238 }, { "acc": 0.98611107, "epoch": 23.566908835247247, "grad_norm": 6.138639450073242, "learning_rate": 5.8812886375724334e-06, "loss": 0.09330968, "memory(GiB)": 13.7, "step": 50280, "train_speed(iter/s)": 1.527245 }, { "acc": 0.9828125, "epoch": 23.56925240215608, "grad_norm": 11.029707908630371, "learning_rate": 5.880525639255766e-06, "loss": 0.06045432, "memory(GiB)": 13.7, "step": 50285, "train_speed(iter/s)": 1.52725 }, { "acc": 0.98827114, "epoch": 23.571595969064916, "grad_norm": 0.007342529948800802, "learning_rate": 5.879762619786177e-06, "loss": 0.04672126, "memory(GiB)": 13.7, "step": 50290, "train_speed(iter/s)": 1.527251 }, { "acc": 0.990625, "epoch": 23.573939535973754, "grad_norm": 4.665190696716309, "learning_rate": 5.878999579182e-06, "loss": 0.02700973, "memory(GiB)": 13.7, "step": 50295, "train_speed(iter/s)": 1.527255 }, { "acc": 0.9875, "epoch": 23.576283102882588, "grad_norm": 0.03236500918865204, "learning_rate": 5.87823651746158e-06, "loss": 0.08235089, "memory(GiB)": 13.7, "step": 50300, "train_speed(iter/s)": 1.527264 }, { "acc": 0.9708334, "epoch": 23.578626669791422, "grad_norm": 8.789278984069824, "learning_rate": 5.877473434643258e-06, "loss": 0.1600966, "memory(GiB)": 13.7, "step": 50305, "train_speed(iter/s)": 1.527271 }, { "acc": 0.99192715, "epoch": 23.580970236700256, "grad_norm": 0.5777102112770081, "learning_rate": 5.8767103307453754e-06, "loss": 0.0426353, "memory(GiB)": 13.7, "step": 50310, "train_speed(iter/s)": 1.527273 }, { "acc": 0.98217258, "epoch": 23.583313803609094, "grad_norm": 7.125705718994141, "learning_rate": 5.875947205786278e-06, "loss": 0.05787922, "memory(GiB)": 13.7, "step": 50315, "train_speed(iter/s)": 1.527284 }, { "acc": 0.9677084, "epoch": 23.58565737051793, "grad_norm": 6.267518997192383, "learning_rate": 5.8751840597843025e-06, "loss": 0.12375157, "memory(GiB)": 13.7, "step": 50320, "train_speed(iter/s)": 1.527296 }, { "acc": 0.9869792, "epoch": 23.588000937426763, "grad_norm": 2.5910592079162598, "learning_rate": 5.874420892757796e-06, "loss": 0.02966031, "memory(GiB)": 13.7, "step": 50325, "train_speed(iter/s)": 1.527305 }, { "acc": 0.97582788, "epoch": 23.590344504335597, "grad_norm": 4.746623516082764, "learning_rate": 5.873657704725102e-06, "loss": 0.08758091, "memory(GiB)": 13.7, "step": 50330, "train_speed(iter/s)": 1.527322 }, { "acc": 0.96798611, "epoch": 23.592688071244435, "grad_norm": 4.440821170806885, "learning_rate": 5.872894495704567e-06, "loss": 0.10973558, "memory(GiB)": 13.7, "step": 50335, "train_speed(iter/s)": 1.527328 }, { "acc": 0.99750004, "epoch": 23.59503163815327, "grad_norm": 0.031173275783658028, "learning_rate": 5.872131265714533e-06, "loss": 0.0215341, "memory(GiB)": 13.7, "step": 50340, "train_speed(iter/s)": 1.527337 }, { "acc": 0.99028273, "epoch": 23.597375205062104, "grad_norm": 2.043365001678467, "learning_rate": 5.871368014773347e-06, "loss": 0.05393953, "memory(GiB)": 13.7, "step": 50345, "train_speed(iter/s)": 1.527337 }, { "acc": 0.97250004, "epoch": 23.59971877197094, "grad_norm": 2.665618419647217, "learning_rate": 5.8706047428993565e-06, "loss": 0.08163979, "memory(GiB)": 13.7, "step": 50350, "train_speed(iter/s)": 1.527347 }, { "acc": 0.996875, "epoch": 23.602062338879776, "grad_norm": 1.1045160293579102, "learning_rate": 5.869841450110903e-06, "loss": 0.02391827, "memory(GiB)": 13.7, "step": 50355, "train_speed(iter/s)": 1.527349 }, { "acc": 0.97152777, "epoch": 23.60440590578861, "grad_norm": 0.006446284707635641, "learning_rate": 5.869078136426342e-06, "loss": 0.0595177, "memory(GiB)": 13.7, "step": 50360, "train_speed(iter/s)": 1.527359 }, { "acc": 0.99520836, "epoch": 23.606749472697445, "grad_norm": 1.5306347608566284, "learning_rate": 5.868314801864013e-06, "loss": 0.02064364, "memory(GiB)": 13.7, "step": 50365, "train_speed(iter/s)": 1.527356 }, { "acc": 0.98178024, "epoch": 23.609093039606282, "grad_norm": 3.2848780155181885, "learning_rate": 5.867551446442267e-06, "loss": 0.06372001, "memory(GiB)": 13.7, "step": 50370, "train_speed(iter/s)": 1.527358 }, { "acc": 0.98615532, "epoch": 23.611436606515117, "grad_norm": 2.1642940044403076, "learning_rate": 5.866788070179454e-06, "loss": 0.04216711, "memory(GiB)": 13.7, "step": 50375, "train_speed(iter/s)": 1.527358 }, { "acc": 0.99514608, "epoch": 23.61378017342395, "grad_norm": 0.485176682472229, "learning_rate": 5.866024673093921e-06, "loss": 0.03402725, "memory(GiB)": 13.7, "step": 50380, "train_speed(iter/s)": 1.527356 }, { "acc": 0.97510414, "epoch": 23.616123740332785, "grad_norm": 6.081878662109375, "learning_rate": 5.8652612552040165e-06, "loss": 0.08173505, "memory(GiB)": 13.7, "step": 50385, "train_speed(iter/s)": 1.527367 }, { "acc": 0.97636356, "epoch": 23.618467307241623, "grad_norm": 2.6192641258239746, "learning_rate": 5.8644978165280954e-06, "loss": 0.14948792, "memory(GiB)": 13.7, "step": 50390, "train_speed(iter/s)": 1.527373 }, { "acc": 0.97609625, "epoch": 23.620810874150457, "grad_norm": 3.5659477710723877, "learning_rate": 5.863734357084504e-06, "loss": 0.13209412, "memory(GiB)": 13.7, "step": 50395, "train_speed(iter/s)": 1.527374 }, { "acc": 0.9854167, "epoch": 23.62315444105929, "grad_norm": 0.2496299296617508, "learning_rate": 5.862970876891595e-06, "loss": 0.07544951, "memory(GiB)": 13.7, "step": 50400, "train_speed(iter/s)": 1.527375 }, { "acc": 0.99375, "epoch": 23.625498007968126, "grad_norm": 1.381752371788025, "learning_rate": 5.8622073759677205e-06, "loss": 0.03294426, "memory(GiB)": 13.7, "step": 50405, "train_speed(iter/s)": 1.527374 }, { "acc": 0.98160715, "epoch": 23.627841574876964, "grad_norm": 0.06980960071086884, "learning_rate": 5.86144385433123e-06, "loss": 0.06155626, "memory(GiB)": 13.7, "step": 50410, "train_speed(iter/s)": 1.527374 }, { "acc": 0.97416668, "epoch": 23.630185141785798, "grad_norm": 0.9026633501052856, "learning_rate": 5.860680312000477e-06, "loss": 0.0579529, "memory(GiB)": 13.7, "step": 50415, "train_speed(iter/s)": 1.527382 }, { "acc": 0.97734375, "epoch": 23.632528708694633, "grad_norm": 10.404989242553711, "learning_rate": 5.859916748993817e-06, "loss": 0.07398489, "memory(GiB)": 13.7, "step": 50420, "train_speed(iter/s)": 1.527395 }, { "acc": 0.98800936, "epoch": 23.634872275603467, "grad_norm": 3.7602899074554443, "learning_rate": 5.8591531653296e-06, "loss": 0.04314142, "memory(GiB)": 13.7, "step": 50425, "train_speed(iter/s)": 1.527401 }, { "acc": 0.98000002, "epoch": 23.637215842512305, "grad_norm": 1.7372171878814697, "learning_rate": 5.858389561026183e-06, "loss": 0.06844796, "memory(GiB)": 13.7, "step": 50430, "train_speed(iter/s)": 1.527403 }, { "acc": 0.9879014, "epoch": 23.63955940942114, "grad_norm": 1.3389792442321777, "learning_rate": 5.85762593610192e-06, "loss": 0.04634362, "memory(GiB)": 13.7, "step": 50435, "train_speed(iter/s)": 1.527404 }, { "acc": 0.9864584, "epoch": 23.641902976329973, "grad_norm": 2.2230069637298584, "learning_rate": 5.856862290575165e-06, "loss": 0.06228498, "memory(GiB)": 13.7, "step": 50440, "train_speed(iter/s)": 1.52741 }, { "acc": 0.99300594, "epoch": 23.64424654323881, "grad_norm": 3.764719009399414, "learning_rate": 5.856098624464274e-06, "loss": 0.03800606, "memory(GiB)": 13.7, "step": 50445, "train_speed(iter/s)": 1.527419 }, { "acc": 0.97866077, "epoch": 23.646590110147645, "grad_norm": 4.848381042480469, "learning_rate": 5.855334937787602e-06, "loss": 0.06166934, "memory(GiB)": 13.7, "step": 50450, "train_speed(iter/s)": 1.527421 }, { "acc": 0.98217258, "epoch": 23.64893367705648, "grad_norm": 6.0018510818481445, "learning_rate": 5.854571230563507e-06, "loss": 0.03606293, "memory(GiB)": 13.7, "step": 50455, "train_speed(iter/s)": 1.527428 }, { "acc": 0.990625, "epoch": 23.651277243965314, "grad_norm": 0.9338821768760681, "learning_rate": 5.853807502810345e-06, "loss": 0.05273187, "memory(GiB)": 13.7, "step": 50460, "train_speed(iter/s)": 1.527431 }, { "acc": 0.98891029, "epoch": 23.653620810874152, "grad_norm": 0.09912195056676865, "learning_rate": 5.853043754546475e-06, "loss": 0.03136954, "memory(GiB)": 13.7, "step": 50465, "train_speed(iter/s)": 1.527439 }, { "acc": 0.98790751, "epoch": 23.655964377782986, "grad_norm": 0.055013082921504974, "learning_rate": 5.852279985790252e-06, "loss": 0.05010052, "memory(GiB)": 13.7, "step": 50470, "train_speed(iter/s)": 1.527445 }, { "acc": 0.97537785, "epoch": 23.65830794469182, "grad_norm": 5.7796196937561035, "learning_rate": 5.8515161965600375e-06, "loss": 0.05889995, "memory(GiB)": 13.7, "step": 50475, "train_speed(iter/s)": 1.527451 }, { "acc": 0.9842804, "epoch": 23.660651511600655, "grad_norm": 7.136176109313965, "learning_rate": 5.850752386874189e-06, "loss": 0.08605198, "memory(GiB)": 13.7, "step": 50480, "train_speed(iter/s)": 1.527458 }, { "acc": 0.98604164, "epoch": 23.662995078509493, "grad_norm": 4.006430149078369, "learning_rate": 5.849988556751069e-06, "loss": 0.07655405, "memory(GiB)": 13.7, "step": 50485, "train_speed(iter/s)": 1.527448 }, { "acc": 0.9947917, "epoch": 23.665338645418327, "grad_norm": 3.331988573074341, "learning_rate": 5.849224706209031e-06, "loss": 0.00829382, "memory(GiB)": 13.7, "step": 50490, "train_speed(iter/s)": 1.527451 }, { "acc": 0.9916667, "epoch": 23.66768221232716, "grad_norm": 1.240201473236084, "learning_rate": 5.848460835266442e-06, "loss": 0.02280768, "memory(GiB)": 13.7, "step": 50495, "train_speed(iter/s)": 1.527453 }, { "acc": 0.9802084, "epoch": 23.670025779235996, "grad_norm": 14.883179664611816, "learning_rate": 5.847696943941659e-06, "loss": 0.11189551, "memory(GiB)": 13.7, "step": 50500, "train_speed(iter/s)": 1.527455 }, { "acc": 0.98569241, "epoch": 23.672369346144833, "grad_norm": 5.028138160705566, "learning_rate": 5.846933032253046e-06, "loss": 0.06758039, "memory(GiB)": 13.7, "step": 50505, "train_speed(iter/s)": 1.52746 }, { "acc": 0.97166672, "epoch": 23.674712913053668, "grad_norm": 4.608811378479004, "learning_rate": 5.846169100218963e-06, "loss": 0.11580782, "memory(GiB)": 13.7, "step": 50510, "train_speed(iter/s)": 1.527462 }, { "acc": 0.9877449, "epoch": 23.677056479962502, "grad_norm": 0.8536443114280701, "learning_rate": 5.845405147857773e-06, "loss": 0.04355214, "memory(GiB)": 13.7, "step": 50515, "train_speed(iter/s)": 1.527461 }, { "acc": 0.98409224, "epoch": 23.679400046871336, "grad_norm": 1.921624779701233, "learning_rate": 5.844641175187837e-06, "loss": 0.05593664, "memory(GiB)": 13.7, "step": 50520, "train_speed(iter/s)": 1.527459 }, { "acc": 0.98036709, "epoch": 23.681743613780174, "grad_norm": 4.324532508850098, "learning_rate": 5.8438771822275254e-06, "loss": 0.10432042, "memory(GiB)": 13.7, "step": 50525, "train_speed(iter/s)": 1.527459 }, { "acc": 0.9864584, "epoch": 23.68408718068901, "grad_norm": 4.153758525848389, "learning_rate": 5.843113168995193e-06, "loss": 0.01667536, "memory(GiB)": 13.7, "step": 50530, "train_speed(iter/s)": 1.52746 }, { "acc": 0.9697917, "epoch": 23.686430747597843, "grad_norm": 1.4884939193725586, "learning_rate": 5.842349135509209e-06, "loss": 0.06942875, "memory(GiB)": 13.7, "step": 50535, "train_speed(iter/s)": 1.527468 }, { "acc": 0.98041668, "epoch": 23.68877431450668, "grad_norm": 0.4886970520019531, "learning_rate": 5.8415850817879385e-06, "loss": 0.10801873, "memory(GiB)": 13.7, "step": 50540, "train_speed(iter/s)": 1.527472 }, { "acc": 0.97767859, "epoch": 23.691117881415515, "grad_norm": 4.155651092529297, "learning_rate": 5.840821007849744e-06, "loss": 0.08348117, "memory(GiB)": 13.7, "step": 50545, "train_speed(iter/s)": 1.52747 }, { "acc": 0.96153278, "epoch": 23.69346144832435, "grad_norm": 7.2062201499938965, "learning_rate": 5.8400569137129935e-06, "loss": 0.15763417, "memory(GiB)": 13.7, "step": 50550, "train_speed(iter/s)": 1.527475 }, { "acc": 0.99375, "epoch": 23.695805015233184, "grad_norm": 1.8156794309616089, "learning_rate": 5.839292799396055e-06, "loss": 0.03872674, "memory(GiB)": 13.7, "step": 50555, "train_speed(iter/s)": 1.527478 }, { "acc": 0.98410711, "epoch": 23.69814858214202, "grad_norm": 1.3774590492248535, "learning_rate": 5.83852866491729e-06, "loss": 0.085822, "memory(GiB)": 13.7, "step": 50560, "train_speed(iter/s)": 1.527482 }, { "acc": 0.98275852, "epoch": 23.700492149050856, "grad_norm": 5.376256465911865, "learning_rate": 5.837764510295071e-06, "loss": 0.11192638, "memory(GiB)": 13.7, "step": 50565, "train_speed(iter/s)": 1.527485 }, { "acc": 0.97704449, "epoch": 23.70283571595969, "grad_norm": 7.423428058624268, "learning_rate": 5.8370003355477654e-06, "loss": 0.14807781, "memory(GiB)": 13.7, "step": 50570, "train_speed(iter/s)": 1.527493 }, { "acc": 0.99653845, "epoch": 23.705179282868524, "grad_norm": 2.8192851543426514, "learning_rate": 5.836236140693737e-06, "loss": 0.03882362, "memory(GiB)": 13.7, "step": 50575, "train_speed(iter/s)": 1.527494 }, { "acc": 0.9822917, "epoch": 23.707522849777362, "grad_norm": 78.15628814697266, "learning_rate": 5.835471925751359e-06, "loss": 0.05434803, "memory(GiB)": 13.7, "step": 50580, "train_speed(iter/s)": 1.527494 }, { "acc": 0.9875, "epoch": 23.709866416686197, "grad_norm": 5.960597991943359, "learning_rate": 5.834707690738998e-06, "loss": 0.05265442, "memory(GiB)": 13.7, "step": 50585, "train_speed(iter/s)": 1.527504 }, { "acc": 0.99293652, "epoch": 23.71220998359503, "grad_norm": 3.286310911178589, "learning_rate": 5.8339434356750246e-06, "loss": 0.03151342, "memory(GiB)": 13.7, "step": 50590, "train_speed(iter/s)": 1.527512 }, { "acc": 0.98447914, "epoch": 23.714553550503865, "grad_norm": 1.957764983177185, "learning_rate": 5.833179160577811e-06, "loss": 0.04527096, "memory(GiB)": 13.7, "step": 50595, "train_speed(iter/s)": 1.527523 }, { "acc": 0.98904762, "epoch": 23.716897117412703, "grad_norm": 3.360884189605713, "learning_rate": 5.832414865465724e-06, "loss": 0.08167655, "memory(GiB)": 13.7, "step": 50600, "train_speed(iter/s)": 1.527525 }, { "acc": 0.9885416, "epoch": 23.719240684321537, "grad_norm": 2.6590511798858643, "learning_rate": 5.831650550357136e-06, "loss": 0.05272642, "memory(GiB)": 13.7, "step": 50605, "train_speed(iter/s)": 1.527535 }, { "acc": 0.98443184, "epoch": 23.72158425123037, "grad_norm": 4.058681488037109, "learning_rate": 5.830886215270424e-06, "loss": 0.09250606, "memory(GiB)": 13.7, "step": 50610, "train_speed(iter/s)": 1.527539 }, { "acc": 0.9889204, "epoch": 23.72392781813921, "grad_norm": 2.1159565448760986, "learning_rate": 5.830121860223951e-06, "loss": 0.03544314, "memory(GiB)": 13.7, "step": 50615, "train_speed(iter/s)": 1.527548 }, { "acc": 0.97876492, "epoch": 23.726271385048044, "grad_norm": 0.29998764395713806, "learning_rate": 5.829357485236095e-06, "loss": 0.11432924, "memory(GiB)": 13.7, "step": 50620, "train_speed(iter/s)": 1.527561 }, { "acc": 0.99541664, "epoch": 23.728614951956878, "grad_norm": 3.011413097381592, "learning_rate": 5.828593090325227e-06, "loss": 0.04857162, "memory(GiB)": 13.7, "step": 50625, "train_speed(iter/s)": 1.527562 }, { "acc": 0.97637148, "epoch": 23.730958518865712, "grad_norm": 2.5782294273376465, "learning_rate": 5.8278286755097235e-06, "loss": 0.11362724, "memory(GiB)": 13.7, "step": 50630, "train_speed(iter/s)": 1.527565 }, { "acc": 0.97875004, "epoch": 23.73330208577455, "grad_norm": 5.326374530792236, "learning_rate": 5.8270642408079555e-06, "loss": 0.06606963, "memory(GiB)": 13.7, "step": 50635, "train_speed(iter/s)": 1.527575 }, { "acc": 0.975, "epoch": 23.735645652683385, "grad_norm": 6.572592258453369, "learning_rate": 5.826299786238299e-06, "loss": 0.06658185, "memory(GiB)": 13.7, "step": 50640, "train_speed(iter/s)": 1.527577 }, { "acc": 0.98434486, "epoch": 23.73798921959222, "grad_norm": 0.9388589262962341, "learning_rate": 5.825535311819127e-06, "loss": 0.04660442, "memory(GiB)": 13.7, "step": 50645, "train_speed(iter/s)": 1.527579 }, { "acc": 0.98443947, "epoch": 23.740332786501053, "grad_norm": 0.5012564659118652, "learning_rate": 5.82477081756882e-06, "loss": 0.09146976, "memory(GiB)": 13.7, "step": 50650, "train_speed(iter/s)": 1.527576 }, { "acc": 0.9817708, "epoch": 23.74267635340989, "grad_norm": 1.7562105655670166, "learning_rate": 5.824006303505749e-06, "loss": 0.02747164, "memory(GiB)": 13.7, "step": 50655, "train_speed(iter/s)": 1.527567 }, { "acc": 0.99125004, "epoch": 23.745019920318725, "grad_norm": 0.022373080253601074, "learning_rate": 5.8232417696482926e-06, "loss": 0.03210505, "memory(GiB)": 13.7, "step": 50660, "train_speed(iter/s)": 1.527561 }, { "acc": 0.97520218, "epoch": 23.74736348722756, "grad_norm": 4.787998676300049, "learning_rate": 5.822477216014823e-06, "loss": 0.07188817, "memory(GiB)": 13.7, "step": 50665, "train_speed(iter/s)": 1.527566 }, { "acc": 0.990382, "epoch": 23.749707054136394, "grad_norm": 3.443086624145508, "learning_rate": 5.821712642623726e-06, "loss": 0.06378567, "memory(GiB)": 13.7, "step": 50670, "train_speed(iter/s)": 1.527574 }, { "acc": 0.98556557, "epoch": 23.752050621045232, "grad_norm": 1.3022143840789795, "learning_rate": 5.820948049493371e-06, "loss": 0.05820883, "memory(GiB)": 13.7, "step": 50675, "train_speed(iter/s)": 1.527571 }, { "acc": 0.97479172, "epoch": 23.754394187954066, "grad_norm": 3.6647286415100098, "learning_rate": 5.8201834366421436e-06, "loss": 0.10630221, "memory(GiB)": 13.7, "step": 50680, "train_speed(iter/s)": 1.527566 }, { "acc": 0.9770833, "epoch": 23.7567377548629, "grad_norm": 13.030074119567871, "learning_rate": 5.819418804088419e-06, "loss": 0.07940828, "memory(GiB)": 13.7, "step": 50685, "train_speed(iter/s)": 1.527576 }, { "acc": 0.9864584, "epoch": 23.75908132177174, "grad_norm": 3.420429229736328, "learning_rate": 5.8186541518505745e-06, "loss": 0.04605036, "memory(GiB)": 13.7, "step": 50690, "train_speed(iter/s)": 1.527583 }, { "acc": 0.98041668, "epoch": 23.761424888680573, "grad_norm": 2.8639800548553467, "learning_rate": 5.817889479946995e-06, "loss": 0.04234032, "memory(GiB)": 13.7, "step": 50695, "train_speed(iter/s)": 1.527585 }, { "acc": 0.98184528, "epoch": 23.763768455589407, "grad_norm": 4.655384063720703, "learning_rate": 5.817124788396057e-06, "loss": 0.06803277, "memory(GiB)": 13.7, "step": 50700, "train_speed(iter/s)": 1.527592 }, { "acc": 0.98916664, "epoch": 23.76611202249824, "grad_norm": 5.068323135375977, "learning_rate": 5.8163600772161415e-06, "loss": 0.01908399, "memory(GiB)": 13.7, "step": 50705, "train_speed(iter/s)": 1.527595 }, { "acc": 0.98770828, "epoch": 23.76845558940708, "grad_norm": 3.5406510829925537, "learning_rate": 5.815595346425629e-06, "loss": 0.02969787, "memory(GiB)": 13.7, "step": 50710, "train_speed(iter/s)": 1.527599 }, { "acc": 0.9885088, "epoch": 23.770799156315913, "grad_norm": 2.488900661468506, "learning_rate": 5.814830596042903e-06, "loss": 0.08473938, "memory(GiB)": 13.7, "step": 50715, "train_speed(iter/s)": 1.527604 }, { "acc": 0.98395834, "epoch": 23.773142723224748, "grad_norm": 4.638258934020996, "learning_rate": 5.8140658260863434e-06, "loss": 0.04432088, "memory(GiB)": 13.7, "step": 50720, "train_speed(iter/s)": 1.527605 }, { "acc": 0.984375, "epoch": 23.775486290133582, "grad_norm": 2.805039405822754, "learning_rate": 5.813301036574336e-06, "loss": 0.0923733, "memory(GiB)": 13.7, "step": 50725, "train_speed(iter/s)": 1.527607 }, { "acc": 0.98149805, "epoch": 23.77782985704242, "grad_norm": 0.3566790819168091, "learning_rate": 5.812536227525261e-06, "loss": 0.10371733, "memory(GiB)": 13.7, "step": 50730, "train_speed(iter/s)": 1.527619 }, { "acc": 0.98622026, "epoch": 23.780173423951254, "grad_norm": 0.039769113063812256, "learning_rate": 5.8117713989575045e-06, "loss": 0.04654004, "memory(GiB)": 13.7, "step": 50735, "train_speed(iter/s)": 1.527625 }, { "acc": 0.96863098, "epoch": 23.78251699086009, "grad_norm": 4.818794250488281, "learning_rate": 5.8110065508894475e-06, "loss": 0.08921784, "memory(GiB)": 13.7, "step": 50740, "train_speed(iter/s)": 1.52763 }, { "acc": 0.98486614, "epoch": 23.784860557768923, "grad_norm": 3.2727417945861816, "learning_rate": 5.810241683339475e-06, "loss": 0.05854529, "memory(GiB)": 13.7, "step": 50745, "train_speed(iter/s)": 1.527636 }, { "acc": 0.99229164, "epoch": 23.78720412467776, "grad_norm": 1.3479431867599487, "learning_rate": 5.809476796325972e-06, "loss": 0.04452446, "memory(GiB)": 13.7, "step": 50750, "train_speed(iter/s)": 1.527645 }, { "acc": 0.97840281, "epoch": 23.789547691586595, "grad_norm": 3.0087478160858154, "learning_rate": 5.808711889867326e-06, "loss": 0.0874336, "memory(GiB)": 13.7, "step": 50755, "train_speed(iter/s)": 1.527647 }, { "acc": 0.99571428, "epoch": 23.79189125849543, "grad_norm": 0.0038087053690105677, "learning_rate": 5.807946963981922e-06, "loss": 0.01388451, "memory(GiB)": 13.7, "step": 50760, "train_speed(iter/s)": 1.527655 }, { "acc": 0.97677078, "epoch": 23.794234825404267, "grad_norm": 1.120784044265747, "learning_rate": 5.807182018688143e-06, "loss": 0.05421084, "memory(GiB)": 13.7, "step": 50765, "train_speed(iter/s)": 1.527667 }, { "acc": 0.9885416, "epoch": 23.7965783923131, "grad_norm": 0.06918569654226303, "learning_rate": 5.806417054004381e-06, "loss": 0.04846992, "memory(GiB)": 13.7, "step": 50770, "train_speed(iter/s)": 1.52768 }, { "acc": 0.98675594, "epoch": 23.798921959221936, "grad_norm": 2.5566892623901367, "learning_rate": 5.805652069949022e-06, "loss": 0.05519506, "memory(GiB)": 13.7, "step": 50775, "train_speed(iter/s)": 1.527678 }, { "acc": 0.99352903, "epoch": 23.80126552613077, "grad_norm": 5.325674057006836, "learning_rate": 5.804887066540449e-06, "loss": 0.06421821, "memory(GiB)": 13.7, "step": 50780, "train_speed(iter/s)": 1.527683 }, { "acc": 0.98354168, "epoch": 23.803609093039608, "grad_norm": 3.572855234146118, "learning_rate": 5.804122043797055e-06, "loss": 0.07162317, "memory(GiB)": 13.7, "step": 50785, "train_speed(iter/s)": 1.527685 }, { "acc": 0.98586311, "epoch": 23.805952659948442, "grad_norm": 3.734692096710205, "learning_rate": 5.803357001737227e-06, "loss": 0.04395186, "memory(GiB)": 13.7, "step": 50790, "train_speed(iter/s)": 1.527694 }, { "acc": 0.97967262, "epoch": 23.808296226857276, "grad_norm": 6.203829765319824, "learning_rate": 5.802591940379354e-06, "loss": 0.10705955, "memory(GiB)": 13.7, "step": 50795, "train_speed(iter/s)": 1.527701 }, { "acc": 0.96823864, "epoch": 23.81063979376611, "grad_norm": 8.683506965637207, "learning_rate": 5.801826859741826e-06, "loss": 0.09039857, "memory(GiB)": 13.7, "step": 50800, "train_speed(iter/s)": 1.527707 }, { "acc": 0.98310089, "epoch": 23.81298336067495, "grad_norm": 0.015385613776743412, "learning_rate": 5.801061759843033e-06, "loss": 0.07151136, "memory(GiB)": 13.7, "step": 50805, "train_speed(iter/s)": 1.527717 }, { "acc": 0.979072, "epoch": 23.815326927583783, "grad_norm": 3.7754621505737305, "learning_rate": 5.800296640701365e-06, "loss": 0.05603767, "memory(GiB)": 13.7, "step": 50810, "train_speed(iter/s)": 1.527724 }, { "acc": 0.98126354, "epoch": 23.817670494492617, "grad_norm": 2.804293394088745, "learning_rate": 5.7995315023352135e-06, "loss": 0.10472033, "memory(GiB)": 13.7, "step": 50815, "train_speed(iter/s)": 1.527731 }, { "acc": 0.97381945, "epoch": 23.82001406140145, "grad_norm": 5.59134578704834, "learning_rate": 5.798766344762972e-06, "loss": 0.07297589, "memory(GiB)": 13.7, "step": 50820, "train_speed(iter/s)": 1.52774 }, { "acc": 0.97910719, "epoch": 23.82235762831029, "grad_norm": 0.051089610904455185, "learning_rate": 5.798001168003027e-06, "loss": 0.06250177, "memory(GiB)": 13.7, "step": 50825, "train_speed(iter/s)": 1.52775 }, { "acc": 0.98894348, "epoch": 23.824701195219124, "grad_norm": 2.124880313873291, "learning_rate": 5.797235972073775e-06, "loss": 0.03119235, "memory(GiB)": 13.7, "step": 50830, "train_speed(iter/s)": 1.527752 }, { "acc": 0.96854172, "epoch": 23.827044762127958, "grad_norm": 3.9022552967071533, "learning_rate": 5.7964707569936075e-06, "loss": 0.12605394, "memory(GiB)": 13.7, "step": 50835, "train_speed(iter/s)": 1.527756 }, { "acc": 0.98237181, "epoch": 23.829388329036796, "grad_norm": 0.01856887899339199, "learning_rate": 5.795705522780917e-06, "loss": 0.0437305, "memory(GiB)": 13.7, "step": 50840, "train_speed(iter/s)": 1.527757 }, { "acc": 0.99375, "epoch": 23.83173189594563, "grad_norm": 0.00582914287224412, "learning_rate": 5.794940269454099e-06, "loss": 0.0477328, "memory(GiB)": 13.7, "step": 50845, "train_speed(iter/s)": 1.527755 }, { "acc": 0.9854167, "epoch": 23.834075462854464, "grad_norm": 4.210268020629883, "learning_rate": 5.794174997031546e-06, "loss": 0.03599311, "memory(GiB)": 13.7, "step": 50850, "train_speed(iter/s)": 1.527757 }, { "acc": 0.98253212, "epoch": 23.8364190297633, "grad_norm": 0.01715634949505329, "learning_rate": 5.793409705531655e-06, "loss": 0.07539081, "memory(GiB)": 13.7, "step": 50855, "train_speed(iter/s)": 1.527762 }, { "acc": 0.98291664, "epoch": 23.838762596672137, "grad_norm": 4.736157417297363, "learning_rate": 5.792644394972821e-06, "loss": 0.05715495, "memory(GiB)": 13.7, "step": 50860, "train_speed(iter/s)": 1.527764 }, { "acc": 0.98718204, "epoch": 23.84110616358097, "grad_norm": 1.7591094970703125, "learning_rate": 5.791879065373436e-06, "loss": 0.06471758, "memory(GiB)": 13.7, "step": 50865, "train_speed(iter/s)": 1.527773 }, { "acc": 0.97979164, "epoch": 23.843449730489805, "grad_norm": 4.548538684844971, "learning_rate": 5.791113716751897e-06, "loss": 0.10841261, "memory(GiB)": 13.7, "step": 50870, "train_speed(iter/s)": 1.527782 }, { "acc": 0.9833334, "epoch": 23.84579329739864, "grad_norm": 0.9175550937652588, "learning_rate": 5.790348349126603e-06, "loss": 0.05050828, "memory(GiB)": 13.7, "step": 50875, "train_speed(iter/s)": 1.52779 }, { "acc": 0.98807497, "epoch": 23.848136864307477, "grad_norm": 5.149914741516113, "learning_rate": 5.789582962515951e-06, "loss": 0.03363598, "memory(GiB)": 13.7, "step": 50880, "train_speed(iter/s)": 1.527801 }, { "acc": 0.97737246, "epoch": 23.85048043121631, "grad_norm": 7.053438663482666, "learning_rate": 5.788817556938334e-06, "loss": 0.08786252, "memory(GiB)": 13.7, "step": 50885, "train_speed(iter/s)": 1.527804 }, { "acc": 0.98883934, "epoch": 23.852823998125146, "grad_norm": 4.794412612915039, "learning_rate": 5.7880521324121545e-06, "loss": 0.03485243, "memory(GiB)": 13.7, "step": 50890, "train_speed(iter/s)": 1.527813 }, { "acc": 0.9822588, "epoch": 23.85516756503398, "grad_norm": 1.6623996496200562, "learning_rate": 5.787286688955808e-06, "loss": 0.04594037, "memory(GiB)": 13.7, "step": 50895, "train_speed(iter/s)": 1.527819 }, { "acc": 0.97770844, "epoch": 23.857511131942818, "grad_norm": 3.384021043777466, "learning_rate": 5.786521226587696e-06, "loss": 0.08332324, "memory(GiB)": 13.7, "step": 50900, "train_speed(iter/s)": 1.527816 }, { "acc": 0.96645298, "epoch": 23.859854698851652, "grad_norm": 3.8055050373077393, "learning_rate": 5.785755745326216e-06, "loss": 0.1437652, "memory(GiB)": 13.7, "step": 50905, "train_speed(iter/s)": 1.52782 }, { "acc": 0.97870045, "epoch": 23.862198265760487, "grad_norm": 4.283801555633545, "learning_rate": 5.7849902451897665e-06, "loss": 0.04268962, "memory(GiB)": 13.7, "step": 50910, "train_speed(iter/s)": 1.527822 }, { "acc": 0.98258934, "epoch": 23.86454183266932, "grad_norm": 343.20587158203125, "learning_rate": 5.784224726196748e-06, "loss": 0.09814395, "memory(GiB)": 13.7, "step": 50915, "train_speed(iter/s)": 1.527826 }, { "acc": 0.98461304, "epoch": 23.86688539957816, "grad_norm": 5.578333854675293, "learning_rate": 5.783459188365564e-06, "loss": 0.04696192, "memory(GiB)": 13.7, "step": 50920, "train_speed(iter/s)": 1.527832 }, { "acc": 0.96520824, "epoch": 23.869228966486993, "grad_norm": 4.21998929977417, "learning_rate": 5.7826936317146095e-06, "loss": 0.14669108, "memory(GiB)": 13.7, "step": 50925, "train_speed(iter/s)": 1.527845 }, { "acc": 0.98625002, "epoch": 23.871572533395828, "grad_norm": 2.893183469772339, "learning_rate": 5.781928056262294e-06, "loss": 0.04678804, "memory(GiB)": 13.7, "step": 50930, "train_speed(iter/s)": 1.527853 }, { "acc": 0.98708324, "epoch": 23.873916100304665, "grad_norm": 2.687941551208496, "learning_rate": 5.781162462027014e-06, "loss": 0.04639213, "memory(GiB)": 13.7, "step": 50935, "train_speed(iter/s)": 1.52786 }, { "acc": 0.98395824, "epoch": 23.8762596672135, "grad_norm": 4.881972312927246, "learning_rate": 5.780396849027171e-06, "loss": 0.05896832, "memory(GiB)": 13.7, "step": 50940, "train_speed(iter/s)": 1.527869 }, { "acc": 0.9802084, "epoch": 23.878603234122334, "grad_norm": 3.2989890575408936, "learning_rate": 5.779631217281173e-06, "loss": 0.07464359, "memory(GiB)": 13.7, "step": 50945, "train_speed(iter/s)": 1.527874 }, { "acc": 0.98657198, "epoch": 23.88094680103117, "grad_norm": 2.8199799060821533, "learning_rate": 5.7788655668074186e-06, "loss": 0.05353664, "memory(GiB)": 13.7, "step": 50950, "train_speed(iter/s)": 1.527878 }, { "acc": 0.9875, "epoch": 23.883290367940006, "grad_norm": 2.802713632583618, "learning_rate": 5.778099897624313e-06, "loss": 0.03983931, "memory(GiB)": 13.7, "step": 50955, "train_speed(iter/s)": 1.527876 }, { "acc": 0.97883015, "epoch": 23.88563393484884, "grad_norm": 3.5343494415283203, "learning_rate": 5.777334209750262e-06, "loss": 0.06258017, "memory(GiB)": 13.7, "step": 50960, "train_speed(iter/s)": 1.527878 }, { "acc": 0.97211313, "epoch": 23.887977501757675, "grad_norm": 2.010122060775757, "learning_rate": 5.776568503203667e-06, "loss": 0.09055348, "memory(GiB)": 13.7, "step": 50965, "train_speed(iter/s)": 1.527879 }, { "acc": 0.98812504, "epoch": 23.89032106866651, "grad_norm": 2.764254570007324, "learning_rate": 5.7758027780029345e-06, "loss": 0.05115424, "memory(GiB)": 13.7, "step": 50970, "train_speed(iter/s)": 1.527883 }, { "acc": 0.9916666, "epoch": 23.892664635575347, "grad_norm": 1.7569273710250854, "learning_rate": 5.775037034166471e-06, "loss": 0.03395912, "memory(GiB)": 13.7, "step": 50975, "train_speed(iter/s)": 1.52789 }, { "acc": 0.99193182, "epoch": 23.89500820248418, "grad_norm": 0.5379294753074646, "learning_rate": 5.774271271712683e-06, "loss": 0.04289381, "memory(GiB)": 13.7, "step": 50980, "train_speed(iter/s)": 1.527898 }, { "acc": 0.975, "epoch": 23.897351769393016, "grad_norm": 4.188783168792725, "learning_rate": 5.773505490659974e-06, "loss": 0.09679459, "memory(GiB)": 13.7, "step": 50985, "train_speed(iter/s)": 1.527903 }, { "acc": 0.97317533, "epoch": 23.89969533630185, "grad_norm": 6.742992877960205, "learning_rate": 5.7727396910267534e-06, "loss": 0.09450188, "memory(GiB)": 13.7, "step": 50990, "train_speed(iter/s)": 1.527899 }, { "acc": 0.98625002, "epoch": 23.902038903210688, "grad_norm": 4.561243057250977, "learning_rate": 5.771973872831426e-06, "loss": 0.07342046, "memory(GiB)": 13.7, "step": 50995, "train_speed(iter/s)": 1.5279 }, { "acc": 0.9854167, "epoch": 23.904382470119522, "grad_norm": 3.167583465576172, "learning_rate": 5.771208036092403e-06, "loss": 0.03200497, "memory(GiB)": 13.7, "step": 51000, "train_speed(iter/s)": 1.527901 }, { "acc": 0.99229164, "epoch": 23.906726037028356, "grad_norm": 2.125514268875122, "learning_rate": 5.77044218082809e-06, "loss": 0.0241163, "memory(GiB)": 13.7, "step": 51005, "train_speed(iter/s)": 1.527916 }, { "acc": 0.98308535, "epoch": 23.90906960393719, "grad_norm": 5.35009241104126, "learning_rate": 5.769676307056897e-06, "loss": 0.0639535, "memory(GiB)": 13.7, "step": 51010, "train_speed(iter/s)": 1.527922 }, { "acc": 0.99004402, "epoch": 23.91141317084603, "grad_norm": 2.5748519897460938, "learning_rate": 5.768910414797231e-06, "loss": 0.07621412, "memory(GiB)": 13.7, "step": 51015, "train_speed(iter/s)": 1.527926 }, { "acc": 0.99627972, "epoch": 23.913756737754863, "grad_norm": 0.929726779460907, "learning_rate": 5.768144504067505e-06, "loss": 0.03400516, "memory(GiB)": 13.7, "step": 51020, "train_speed(iter/s)": 1.527934 }, { "acc": 0.97553034, "epoch": 23.916100304663697, "grad_norm": 6.706120491027832, "learning_rate": 5.767378574886126e-06, "loss": 0.06258971, "memory(GiB)": 13.7, "step": 51025, "train_speed(iter/s)": 1.527937 }, { "acc": 0.9859375, "epoch": 23.918443871572535, "grad_norm": 1.202452301979065, "learning_rate": 5.766612627271506e-06, "loss": 0.06729766, "memory(GiB)": 13.7, "step": 51030, "train_speed(iter/s)": 1.527938 }, { "acc": 0.98413696, "epoch": 23.92078743848137, "grad_norm": 6.579031944274902, "learning_rate": 5.765846661242055e-06, "loss": 0.05592121, "memory(GiB)": 13.7, "step": 51035, "train_speed(iter/s)": 1.527944 }, { "acc": 0.9864584, "epoch": 23.923131005390204, "grad_norm": 0.808594286441803, "learning_rate": 5.765080676816185e-06, "loss": 0.05363017, "memory(GiB)": 13.7, "step": 51040, "train_speed(iter/s)": 1.527945 }, { "acc": 0.9837122, "epoch": 23.925474572299038, "grad_norm": 0.006442543119192123, "learning_rate": 5.764314674012305e-06, "loss": 0.06450896, "memory(GiB)": 13.7, "step": 51045, "train_speed(iter/s)": 1.527949 }, { "acc": 0.97842264, "epoch": 23.927818139207876, "grad_norm": 2.7004141807556152, "learning_rate": 5.763548652848833e-06, "loss": 0.07617137, "memory(GiB)": 13.7, "step": 51050, "train_speed(iter/s)": 1.527958 }, { "acc": 0.9833334, "epoch": 23.93016170611671, "grad_norm": 6.535305500030518, "learning_rate": 5.762782613344177e-06, "loss": 0.04105585, "memory(GiB)": 13.7, "step": 51055, "train_speed(iter/s)": 1.527961 }, { "acc": 0.9734046, "epoch": 23.932505273025544, "grad_norm": 2.601818799972534, "learning_rate": 5.7620165555167495e-06, "loss": 0.1027948, "memory(GiB)": 13.7, "step": 51060, "train_speed(iter/s)": 1.527962 }, { "acc": 0.98758011, "epoch": 23.93484883993438, "grad_norm": 3.354597330093384, "learning_rate": 5.761250479384968e-06, "loss": 0.0407184, "memory(GiB)": 13.7, "step": 51065, "train_speed(iter/s)": 1.52797 }, { "acc": 0.97778845, "epoch": 23.937192406843216, "grad_norm": 3.677767753601074, "learning_rate": 5.760484384967244e-06, "loss": 0.07720457, "memory(GiB)": 13.7, "step": 51070, "train_speed(iter/s)": 1.527976 }, { "acc": 0.97979174, "epoch": 23.93953597375205, "grad_norm": 4.813439846038818, "learning_rate": 5.759718272281991e-06, "loss": 0.06962596, "memory(GiB)": 13.7, "step": 51075, "train_speed(iter/s)": 1.527974 }, { "acc": 0.98988094, "epoch": 23.941879540660885, "grad_norm": 2.981431007385254, "learning_rate": 5.758952141347626e-06, "loss": 0.03520246, "memory(GiB)": 13.7, "step": 51080, "train_speed(iter/s)": 1.527976 }, { "acc": 0.98222876, "epoch": 23.94422310756972, "grad_norm": 5.554328441619873, "learning_rate": 5.758185992182564e-06, "loss": 0.0976839, "memory(GiB)": 13.7, "step": 51085, "train_speed(iter/s)": 1.527982 }, { "acc": 0.97724094, "epoch": 23.946566674478557, "grad_norm": 0.8164895176887512, "learning_rate": 5.7574198248052175e-06, "loss": 0.06548978, "memory(GiB)": 13.7, "step": 51090, "train_speed(iter/s)": 1.527992 }, { "acc": 0.97526512, "epoch": 23.94891024138739, "grad_norm": 3.7949867248535156, "learning_rate": 5.756653639234007e-06, "loss": 0.03963104, "memory(GiB)": 13.7, "step": 51095, "train_speed(iter/s)": 1.528001 }, { "acc": 0.97783155, "epoch": 23.951253808296226, "grad_norm": 2.186976909637451, "learning_rate": 5.755887435487345e-06, "loss": 0.04352748, "memory(GiB)": 13.7, "step": 51100, "train_speed(iter/s)": 1.528002 }, { "acc": 0.97759876, "epoch": 23.953597375205064, "grad_norm": 3.978644371032715, "learning_rate": 5.755121213583652e-06, "loss": 0.04574847, "memory(GiB)": 13.7, "step": 51105, "train_speed(iter/s)": 1.528011 }, { "acc": 0.97041664, "epoch": 23.955940942113898, "grad_norm": 1.1504523754119873, "learning_rate": 5.754354973541344e-06, "loss": 0.11098335, "memory(GiB)": 13.7, "step": 51110, "train_speed(iter/s)": 1.528017 }, { "acc": 0.9786232, "epoch": 23.958284509022732, "grad_norm": 3.331167697906494, "learning_rate": 5.753588715378841e-06, "loss": 0.07405244, "memory(GiB)": 13.7, "step": 51115, "train_speed(iter/s)": 1.528017 }, { "acc": 0.99020834, "epoch": 23.960628075931567, "grad_norm": 1.6102365255355835, "learning_rate": 5.752822439114557e-06, "loss": 0.03612372, "memory(GiB)": 13.7, "step": 51120, "train_speed(iter/s)": 1.528021 }, { "acc": 0.97937498, "epoch": 23.962971642840404, "grad_norm": 2.679596185684204, "learning_rate": 5.752056144766914e-06, "loss": 0.09543752, "memory(GiB)": 13.7, "step": 51125, "train_speed(iter/s)": 1.528032 }, { "acc": 0.990625, "epoch": 23.96531520974924, "grad_norm": 4.046420574188232, "learning_rate": 5.751289832354331e-06, "loss": 0.03708666, "memory(GiB)": 13.7, "step": 51130, "train_speed(iter/s)": 1.528032 }, { "acc": 0.97140875, "epoch": 23.967658776658073, "grad_norm": 5.500876426696777, "learning_rate": 5.750523501895226e-06, "loss": 0.09550154, "memory(GiB)": 13.7, "step": 51135, "train_speed(iter/s)": 1.528033 }, { "acc": 0.99264889, "epoch": 23.970002343566907, "grad_norm": 1.0068761110305786, "learning_rate": 5.7497571534080195e-06, "loss": 0.04102527, "memory(GiB)": 13.7, "step": 51140, "train_speed(iter/s)": 1.528038 }, { "acc": 0.98959179, "epoch": 23.972345910475745, "grad_norm": 2.9796714782714844, "learning_rate": 5.748990786911133e-06, "loss": 0.10203743, "memory(GiB)": 13.7, "step": 51145, "train_speed(iter/s)": 1.528041 }, { "acc": 0.98809528, "epoch": 23.97468947738458, "grad_norm": 4.501254081726074, "learning_rate": 5.748224402422988e-06, "loss": 0.02696069, "memory(GiB)": 13.7, "step": 51150, "train_speed(iter/s)": 1.528043 }, { "acc": 0.9840476, "epoch": 23.977033044293414, "grad_norm": 1.6778227090835571, "learning_rate": 5.747457999962005e-06, "loss": 0.0518815, "memory(GiB)": 13.7, "step": 51155, "train_speed(iter/s)": 1.528045 }, { "acc": 0.9927083, "epoch": 23.979376611202248, "grad_norm": 1.8543636798858643, "learning_rate": 5.746691579546605e-06, "loss": 0.01858084, "memory(GiB)": 13.7, "step": 51160, "train_speed(iter/s)": 1.528041 }, { "acc": 0.9739583, "epoch": 23.981720178111086, "grad_norm": 5.096701145172119, "learning_rate": 5.745925141195209e-06, "loss": 0.11042922, "memory(GiB)": 13.7, "step": 51165, "train_speed(iter/s)": 1.528043 }, { "acc": 0.98708334, "epoch": 23.98406374501992, "grad_norm": 0.01572365313768387, "learning_rate": 5.745158684926243e-06, "loss": 0.04192586, "memory(GiB)": 13.7, "step": 51170, "train_speed(iter/s)": 1.528051 }, { "acc": 0.9859375, "epoch": 23.986407311928755, "grad_norm": 0.03902977332472801, "learning_rate": 5.744392210758128e-06, "loss": 0.09102889, "memory(GiB)": 13.7, "step": 51175, "train_speed(iter/s)": 1.528045 }, { "acc": 0.97508926, "epoch": 23.988750878837592, "grad_norm": 4.4831061363220215, "learning_rate": 5.743625718709289e-06, "loss": 0.10789881, "memory(GiB)": 13.7, "step": 51180, "train_speed(iter/s)": 1.52805 }, { "acc": 0.98500004, "epoch": 23.991094445746427, "grad_norm": 5.415291786193848, "learning_rate": 5.74285920879815e-06, "loss": 0.03741381, "memory(GiB)": 13.7, "step": 51185, "train_speed(iter/s)": 1.528065 }, { "acc": 0.9902647, "epoch": 23.99343801265526, "grad_norm": 0.6496368646621704, "learning_rate": 5.742092681043132e-06, "loss": 0.02404174, "memory(GiB)": 13.7, "step": 51190, "train_speed(iter/s)": 1.528075 }, { "acc": 0.97729168, "epoch": 23.995781579564095, "grad_norm": 3.0524685382843018, "learning_rate": 5.741326135462665e-06, "loss": 0.08383563, "memory(GiB)": 13.7, "step": 51195, "train_speed(iter/s)": 1.52808 }, { "acc": 0.96758928, "epoch": 23.998125146472933, "grad_norm": 7.813044548034668, "learning_rate": 5.740559572075171e-06, "loss": 0.1166775, "memory(GiB)": 13.7, "step": 51200, "train_speed(iter/s)": 1.528093 }, { "acc": 0.9802084, "epoch": 24.000468713381768, "grad_norm": 1.9663493633270264, "learning_rate": 5.739792990899074e-06, "loss": 0.06121356, "memory(GiB)": 13.7, "step": 51205, "train_speed(iter/s)": 1.528061 }, { "acc": 0.98479166, "epoch": 24.002812280290602, "grad_norm": 3.189215898513794, "learning_rate": 5.739026391952803e-06, "loss": 0.06189548, "memory(GiB)": 13.7, "step": 51210, "train_speed(iter/s)": 1.528063 }, { "acc": 0.98246527, "epoch": 24.005155847199436, "grad_norm": 1.6854807138442993, "learning_rate": 5.738259775254785e-06, "loss": 0.06826195, "memory(GiB)": 13.7, "step": 51215, "train_speed(iter/s)": 1.528073 }, { "acc": 0.98488102, "epoch": 24.007499414108274, "grad_norm": 2.2556426525115967, "learning_rate": 5.737493140823444e-06, "loss": 0.07363689, "memory(GiB)": 13.7, "step": 51220, "train_speed(iter/s)": 1.528076 }, { "acc": 0.99437504, "epoch": 24.00984298101711, "grad_norm": 2.72420072555542, "learning_rate": 5.736726488677211e-06, "loss": 0.03654358, "memory(GiB)": 13.7, "step": 51225, "train_speed(iter/s)": 1.528085 }, { "acc": 0.96958332, "epoch": 24.012186547925943, "grad_norm": 6.586002349853516, "learning_rate": 5.73595981883451e-06, "loss": 0.07889659, "memory(GiB)": 13.7, "step": 51230, "train_speed(iter/s)": 1.528085 }, { "acc": 0.98770828, "epoch": 24.014530114834777, "grad_norm": 5.967533111572266, "learning_rate": 5.735193131313771e-06, "loss": 0.06582301, "memory(GiB)": 13.7, "step": 51235, "train_speed(iter/s)": 1.528088 }, { "acc": 0.98511362, "epoch": 24.016873681743615, "grad_norm": 2.222393274307251, "learning_rate": 5.734426426133426e-06, "loss": 0.05737734, "memory(GiB)": 13.7, "step": 51240, "train_speed(iter/s)": 1.528091 }, { "acc": 0.97904758, "epoch": 24.01921724865245, "grad_norm": 9.70168685913086, "learning_rate": 5.7336597033118994e-06, "loss": 0.09503844, "memory(GiB)": 13.7, "step": 51245, "train_speed(iter/s)": 1.528094 }, { "acc": 0.98115082, "epoch": 24.021560815561283, "grad_norm": 3.9425487518310547, "learning_rate": 5.732892962867621e-06, "loss": 0.05102906, "memory(GiB)": 13.7, "step": 51250, "train_speed(iter/s)": 1.528095 }, { "acc": 0.97749462, "epoch": 24.02390438247012, "grad_norm": 4.310434818267822, "learning_rate": 5.732126204819023e-06, "loss": 0.10016658, "memory(GiB)": 13.7, "step": 51255, "train_speed(iter/s)": 1.528103 }, { "acc": 0.9832386, "epoch": 24.026247949378956, "grad_norm": 5.3708109855651855, "learning_rate": 5.731359429184533e-06, "loss": 0.05291319, "memory(GiB)": 13.7, "step": 51260, "train_speed(iter/s)": 1.528103 }, { "acc": 0.97847233, "epoch": 24.02859151628779, "grad_norm": 1.961329698562622, "learning_rate": 5.730592635982584e-06, "loss": 0.14916964, "memory(GiB)": 13.7, "step": 51265, "train_speed(iter/s)": 1.528116 }, { "acc": 0.97020836, "epoch": 24.030935083196624, "grad_norm": 0.43453705310821533, "learning_rate": 5.729825825231607e-06, "loss": 0.08508344, "memory(GiB)": 13.7, "step": 51270, "train_speed(iter/s)": 1.528116 }, { "acc": 0.98104162, "epoch": 24.033278650105462, "grad_norm": 6.968115329742432, "learning_rate": 5.7290589969500345e-06, "loss": 0.05276331, "memory(GiB)": 13.7, "step": 51275, "train_speed(iter/s)": 1.528119 }, { "acc": 0.97422581, "epoch": 24.035622217014296, "grad_norm": 4.423694133758545, "learning_rate": 5.728292151156295e-06, "loss": 0.07271289, "memory(GiB)": 13.7, "step": 51280, "train_speed(iter/s)": 1.528123 }, { "acc": 0.9869792, "epoch": 24.03796578392313, "grad_norm": 0.01919642463326454, "learning_rate": 5.727525287868825e-06, "loss": 0.02944242, "memory(GiB)": 13.7, "step": 51285, "train_speed(iter/s)": 1.528124 }, { "acc": 0.97071419, "epoch": 24.040309350831965, "grad_norm": 5.431568622589111, "learning_rate": 5.726758407106055e-06, "loss": 0.08157684, "memory(GiB)": 13.7, "step": 51290, "train_speed(iter/s)": 1.528127 }, { "acc": 0.98001308, "epoch": 24.042652917740803, "grad_norm": 1.8290462493896484, "learning_rate": 5.7259915088864174e-06, "loss": 0.06108902, "memory(GiB)": 13.7, "step": 51295, "train_speed(iter/s)": 1.52813 }, { "acc": 0.98410931, "epoch": 24.044996484649637, "grad_norm": 1.6594715118408203, "learning_rate": 5.725224593228349e-06, "loss": 0.12198716, "memory(GiB)": 13.7, "step": 51300, "train_speed(iter/s)": 1.52813 }, { "acc": 0.98299675, "epoch": 24.04734005155847, "grad_norm": 0.9495365023612976, "learning_rate": 5.724457660150281e-06, "loss": 0.05361457, "memory(GiB)": 13.7, "step": 51305, "train_speed(iter/s)": 1.528137 }, { "acc": 0.98270836, "epoch": 24.049683618467306, "grad_norm": 7.651695251464844, "learning_rate": 5.723690709670649e-06, "loss": 0.06136897, "memory(GiB)": 13.7, "step": 51310, "train_speed(iter/s)": 1.528138 }, { "acc": 0.97113094, "epoch": 24.052027185376144, "grad_norm": 7.460803508758545, "learning_rate": 5.72292374180789e-06, "loss": 0.15581732, "memory(GiB)": 13.7, "step": 51315, "train_speed(iter/s)": 1.528141 }, { "acc": 0.98540173, "epoch": 24.054370752284978, "grad_norm": 2.09454607963562, "learning_rate": 5.722156756580438e-06, "loss": 0.06484048, "memory(GiB)": 13.7, "step": 51320, "train_speed(iter/s)": 1.528148 }, { "acc": 0.98738098, "epoch": 24.056714319193812, "grad_norm": 3.768026113510132, "learning_rate": 5.721389754006726e-06, "loss": 0.05543876, "memory(GiB)": 13.7, "step": 51325, "train_speed(iter/s)": 1.528153 }, { "acc": 0.98842258, "epoch": 24.059057886102647, "grad_norm": 0.02365564927458763, "learning_rate": 5.720622734105194e-06, "loss": 0.04727744, "memory(GiB)": 13.7, "step": 51330, "train_speed(iter/s)": 1.528161 }, { "acc": 0.98416672, "epoch": 24.061401453011484, "grad_norm": 5.420666694641113, "learning_rate": 5.719855696894277e-06, "loss": 0.06783175, "memory(GiB)": 13.7, "step": 51335, "train_speed(iter/s)": 1.528165 }, { "acc": 0.97493057, "epoch": 24.06374501992032, "grad_norm": 3.1239466667175293, "learning_rate": 5.719088642392412e-06, "loss": 0.0926043, "memory(GiB)": 13.7, "step": 51340, "train_speed(iter/s)": 1.528166 }, { "acc": 0.97701387, "epoch": 24.066088586829153, "grad_norm": 1.338991403579712, "learning_rate": 5.718321570618038e-06, "loss": 0.1287035, "memory(GiB)": 13.7, "step": 51345, "train_speed(iter/s)": 1.528174 }, { "acc": 0.97930803, "epoch": 24.06843215373799, "grad_norm": 4.126535415649414, "learning_rate": 5.71755448158959e-06, "loss": 0.12262199, "memory(GiB)": 13.7, "step": 51350, "train_speed(iter/s)": 1.528178 }, { "acc": 0.9890625, "epoch": 24.070775720646825, "grad_norm": 4.422082901000977, "learning_rate": 5.7167873753255085e-06, "loss": 0.06040366, "memory(GiB)": 13.7, "step": 51355, "train_speed(iter/s)": 1.528171 }, { "acc": 0.97892857, "epoch": 24.07311928755566, "grad_norm": 2.8496382236480713, "learning_rate": 5.716020251844232e-06, "loss": 0.07526799, "memory(GiB)": 13.7, "step": 51360, "train_speed(iter/s)": 1.528176 }, { "acc": 0.97416668, "epoch": 24.075462854464494, "grad_norm": 0.7719688415527344, "learning_rate": 5.7152531111642e-06, "loss": 0.1053562, "memory(GiB)": 13.7, "step": 51365, "train_speed(iter/s)": 1.528172 }, { "acc": 0.97766943, "epoch": 24.07780642137333, "grad_norm": 0.8243600726127625, "learning_rate": 5.71448595330385e-06, "loss": 0.06672576, "memory(GiB)": 13.7, "step": 51370, "train_speed(iter/s)": 1.528172 }, { "acc": 0.98913689, "epoch": 24.080149988282166, "grad_norm": 2.494678497314453, "learning_rate": 5.713718778281625e-06, "loss": 0.04389482, "memory(GiB)": 13.7, "step": 51375, "train_speed(iter/s)": 1.528171 }, { "acc": 0.98359375, "epoch": 24.082493555191, "grad_norm": 7.080751895904541, "learning_rate": 5.712951586115961e-06, "loss": 0.04629329, "memory(GiB)": 13.7, "step": 51380, "train_speed(iter/s)": 1.528171 }, { "acc": 0.97270298, "epoch": 24.084837122099835, "grad_norm": 1.104815125465393, "learning_rate": 5.7121843768253025e-06, "loss": 0.08969438, "memory(GiB)": 13.7, "step": 51385, "train_speed(iter/s)": 1.528172 }, { "acc": 0.99312496, "epoch": 24.087180689008672, "grad_norm": 7.077499866485596, "learning_rate": 5.711417150428091e-06, "loss": 0.02185357, "memory(GiB)": 13.7, "step": 51390, "train_speed(iter/s)": 1.528175 }, { "acc": 0.98903503, "epoch": 24.089524255917507, "grad_norm": 3.8932485580444336, "learning_rate": 5.710649906942766e-06, "loss": 0.0582162, "memory(GiB)": 13.7, "step": 51395, "train_speed(iter/s)": 1.52818 }, { "acc": 0.978125, "epoch": 24.09186782282634, "grad_norm": 3.3101277351379395, "learning_rate": 5.70988264638777e-06, "loss": 0.09999156, "memory(GiB)": 13.7, "step": 51400, "train_speed(iter/s)": 1.528188 }, { "acc": 0.98555546, "epoch": 24.094211389735175, "grad_norm": 3.9770519733428955, "learning_rate": 5.709115368781547e-06, "loss": 0.04537407, "memory(GiB)": 13.7, "step": 51405, "train_speed(iter/s)": 1.528192 }, { "acc": 0.98038692, "epoch": 24.096554956644013, "grad_norm": 5.12003755569458, "learning_rate": 5.708348074142538e-06, "loss": 0.06994241, "memory(GiB)": 13.7, "step": 51410, "train_speed(iter/s)": 1.528199 }, { "acc": 0.99562502, "epoch": 24.098898523552847, "grad_norm": 0.4873141944408417, "learning_rate": 5.707580762489184e-06, "loss": 0.05487686, "memory(GiB)": 13.7, "step": 51415, "train_speed(iter/s)": 1.528209 }, { "acc": 0.98551292, "epoch": 24.10124209046168, "grad_norm": 4.314334392547607, "learning_rate": 5.706813433839935e-06, "loss": 0.0402211, "memory(GiB)": 13.7, "step": 51420, "train_speed(iter/s)": 1.52821 }, { "acc": 0.97261362, "epoch": 24.10358565737052, "grad_norm": 1.6679728031158447, "learning_rate": 5.70604608821323e-06, "loss": 0.06797519, "memory(GiB)": 13.7, "step": 51425, "train_speed(iter/s)": 1.528213 }, { "acc": 0.98010416, "epoch": 24.105929224279354, "grad_norm": 1.5023167133331299, "learning_rate": 5.705278725627513e-06, "loss": 0.08646169, "memory(GiB)": 13.7, "step": 51430, "train_speed(iter/s)": 1.528227 }, { "acc": 0.98412781, "epoch": 24.108272791188188, "grad_norm": 1.193476915359497, "learning_rate": 5.7045113461012345e-06, "loss": 0.04445499, "memory(GiB)": 13.7, "step": 51435, "train_speed(iter/s)": 1.528231 }, { "acc": 0.97904768, "epoch": 24.110616358097023, "grad_norm": 0.007004653103649616, "learning_rate": 5.703743949652834e-06, "loss": 0.08882431, "memory(GiB)": 13.7, "step": 51440, "train_speed(iter/s)": 1.528234 }, { "acc": 0.97094021, "epoch": 24.11295992500586, "grad_norm": 6.32432746887207, "learning_rate": 5.702976536300759e-06, "loss": 0.07519062, "memory(GiB)": 13.7, "step": 51445, "train_speed(iter/s)": 1.528238 }, { "acc": 0.98604164, "epoch": 24.115303491914695, "grad_norm": 4.017223358154297, "learning_rate": 5.702209106063458e-06, "loss": 0.04154102, "memory(GiB)": 13.7, "step": 51450, "train_speed(iter/s)": 1.528242 }, { "acc": 0.9697917, "epoch": 24.11764705882353, "grad_norm": 6.325743198394775, "learning_rate": 5.701441658959373e-06, "loss": 0.14817743, "memory(GiB)": 13.7, "step": 51455, "train_speed(iter/s)": 1.528247 }, { "acc": 0.99082794, "epoch": 24.119990625732363, "grad_norm": 5.4738054275512695, "learning_rate": 5.700674195006954e-06, "loss": 0.04416616, "memory(GiB)": 13.7, "step": 51460, "train_speed(iter/s)": 1.52825 }, { "acc": 0.98291664, "epoch": 24.1223341926412, "grad_norm": 1.3238465785980225, "learning_rate": 5.699906714224649e-06, "loss": 0.07429724, "memory(GiB)": 13.7, "step": 51465, "train_speed(iter/s)": 1.528263 }, { "acc": 0.97887402, "epoch": 24.124677759550035, "grad_norm": 13.402280807495117, "learning_rate": 5.699139216630901e-06, "loss": 0.08883554, "memory(GiB)": 13.7, "step": 51470, "train_speed(iter/s)": 1.528263 }, { "acc": 0.990625, "epoch": 24.12702132645887, "grad_norm": 0.8150342702865601, "learning_rate": 5.6983717022441644e-06, "loss": 0.03243021, "memory(GiB)": 13.7, "step": 51475, "train_speed(iter/s)": 1.52827 }, { "acc": 0.96895838, "epoch": 24.129364893367704, "grad_norm": 5.038337707519531, "learning_rate": 5.697604171082884e-06, "loss": 0.05799484, "memory(GiB)": 13.7, "step": 51480, "train_speed(iter/s)": 1.528268 }, { "acc": 0.9859375, "epoch": 24.131708460276542, "grad_norm": 0.04624699056148529, "learning_rate": 5.696836623165509e-06, "loss": 0.03858795, "memory(GiB)": 13.7, "step": 51485, "train_speed(iter/s)": 1.528281 }, { "acc": 0.97458334, "epoch": 24.134052027185376, "grad_norm": 6.742818832397461, "learning_rate": 5.69606905851049e-06, "loss": 0.09929588, "memory(GiB)": 13.7, "step": 51490, "train_speed(iter/s)": 1.528292 }, { "acc": 0.98646774, "epoch": 24.13639559409421, "grad_norm": 2.1325926780700684, "learning_rate": 5.695301477136277e-06, "loss": 0.05328234, "memory(GiB)": 13.7, "step": 51495, "train_speed(iter/s)": 1.528296 }, { "acc": 0.98261366, "epoch": 24.13873916100305, "grad_norm": 1.1680188179016113, "learning_rate": 5.694533879061317e-06, "loss": 0.05647384, "memory(GiB)": 13.7, "step": 51500, "train_speed(iter/s)": 1.528299 }, { "acc": 0.97250004, "epoch": 24.141082727911883, "grad_norm": 1.698893666267395, "learning_rate": 5.693766264304063e-06, "loss": 0.04909775, "memory(GiB)": 13.7, "step": 51505, "train_speed(iter/s)": 1.528308 }, { "acc": 0.9908617, "epoch": 24.143426294820717, "grad_norm": 2.906675338745117, "learning_rate": 5.692998632882967e-06, "loss": 0.07772448, "memory(GiB)": 13.7, "step": 51510, "train_speed(iter/s)": 1.528305 }, { "acc": 0.98453808, "epoch": 24.14576986172955, "grad_norm": 1.1168264150619507, "learning_rate": 5.692230984816477e-06, "loss": 0.06675255, "memory(GiB)": 13.7, "step": 51515, "train_speed(iter/s)": 1.5283 }, { "acc": 0.990625, "epoch": 24.14811342863839, "grad_norm": 0.0393088273704052, "learning_rate": 5.691463320123047e-06, "loss": 0.04757456, "memory(GiB)": 13.7, "step": 51520, "train_speed(iter/s)": 1.528301 }, { "acc": 0.98101196, "epoch": 24.150456995547223, "grad_norm": 5.814981460571289, "learning_rate": 5.690695638821127e-06, "loss": 0.12351921, "memory(GiB)": 13.7, "step": 51525, "train_speed(iter/s)": 1.528302 }, { "acc": 0.98293562, "epoch": 24.152800562456058, "grad_norm": 1.444369912147522, "learning_rate": 5.689927940929176e-06, "loss": 0.06566851, "memory(GiB)": 13.7, "step": 51530, "train_speed(iter/s)": 1.52831 }, { "acc": 0.98009806, "epoch": 24.155144129364892, "grad_norm": 0.013610230758786201, "learning_rate": 5.689160226465641e-06, "loss": 0.07389214, "memory(GiB)": 13.7, "step": 51535, "train_speed(iter/s)": 1.528309 }, { "acc": 0.9895834, "epoch": 24.15748769627373, "grad_norm": 3.148270606994629, "learning_rate": 5.688392495448974e-06, "loss": 0.02033288, "memory(GiB)": 13.7, "step": 51540, "train_speed(iter/s)": 1.528317 }, { "acc": 0.98479166, "epoch": 24.159831263182564, "grad_norm": 2.6670403480529785, "learning_rate": 5.6876247478976335e-06, "loss": 0.04975197, "memory(GiB)": 13.7, "step": 51545, "train_speed(iter/s)": 1.528324 }, { "acc": 0.98165188, "epoch": 24.1621748300914, "grad_norm": 0.005811466835439205, "learning_rate": 5.6868569838300695e-06, "loss": 0.03601196, "memory(GiB)": 13.7, "step": 51550, "train_speed(iter/s)": 1.528332 }, { "acc": 0.98676472, "epoch": 24.164518397000233, "grad_norm": 3.665297746658325, "learning_rate": 5.6860892032647405e-06, "loss": 0.04861043, "memory(GiB)": 13.7, "step": 51555, "train_speed(iter/s)": 1.528336 }, { "acc": 0.98282204, "epoch": 24.16686196390907, "grad_norm": 3.935481309890747, "learning_rate": 5.685321406220098e-06, "loss": 0.08906401, "memory(GiB)": 13.7, "step": 51560, "train_speed(iter/s)": 1.52834 }, { "acc": 0.98738976, "epoch": 24.169205530817905, "grad_norm": 0.9771220088005066, "learning_rate": 5.684553592714599e-06, "loss": 0.12460189, "memory(GiB)": 13.7, "step": 51565, "train_speed(iter/s)": 1.528338 }, { "acc": 0.98217258, "epoch": 24.17154909772674, "grad_norm": 2.0777294635772705, "learning_rate": 5.683785762766698e-06, "loss": 0.04372293, "memory(GiB)": 13.7, "step": 51570, "train_speed(iter/s)": 1.528346 }, { "acc": 0.97821426, "epoch": 24.173892664635574, "grad_norm": 0.04306410625576973, "learning_rate": 5.683017916394854e-06, "loss": 0.09018564, "memory(GiB)": 13.7, "step": 51575, "train_speed(iter/s)": 1.528343 }, { "acc": 0.99229164, "epoch": 24.17623623154441, "grad_norm": 0.9949743151664734, "learning_rate": 5.6822500536175215e-06, "loss": 0.02715287, "memory(GiB)": 13.7, "step": 51580, "train_speed(iter/s)": 1.528346 }, { "acc": 0.97166119, "epoch": 24.178579798453246, "grad_norm": 5.541585445404053, "learning_rate": 5.681482174453156e-06, "loss": 0.1561831, "memory(GiB)": 13.7, "step": 51585, "train_speed(iter/s)": 1.528356 }, { "acc": 0.9916338, "epoch": 24.18092336536208, "grad_norm": 3.9871206283569336, "learning_rate": 5.680714278920217e-06, "loss": 0.05842434, "memory(GiB)": 13.7, "step": 51590, "train_speed(iter/s)": 1.528356 }, { "acc": 0.98425598, "epoch": 24.183266932270918, "grad_norm": 4.669477939605713, "learning_rate": 5.679946367037162e-06, "loss": 0.05082228, "memory(GiB)": 13.7, "step": 51595, "train_speed(iter/s)": 1.528369 }, { "acc": 0.97270832, "epoch": 24.185610499179752, "grad_norm": 3.777381181716919, "learning_rate": 5.679178438822446e-06, "loss": 0.0688578, "memory(GiB)": 13.7, "step": 51600, "train_speed(iter/s)": 1.528375 }, { "acc": 0.98425598, "epoch": 24.187954066088587, "grad_norm": 3.2855114936828613, "learning_rate": 5.678410494294532e-06, "loss": 0.08361849, "memory(GiB)": 13.7, "step": 51605, "train_speed(iter/s)": 1.528377 }, { "acc": 0.98625336, "epoch": 24.19029763299742, "grad_norm": 1.37787926197052, "learning_rate": 5.677642533471877e-06, "loss": 0.04418515, "memory(GiB)": 13.7, "step": 51610, "train_speed(iter/s)": 1.528388 }, { "acc": 0.97997026, "epoch": 24.19264119990626, "grad_norm": 4.22723388671875, "learning_rate": 5.6768745563729395e-06, "loss": 0.11847737, "memory(GiB)": 13.7, "step": 51615, "train_speed(iter/s)": 1.528393 }, { "acc": 0.98788691, "epoch": 24.194984766815093, "grad_norm": 2.81992769241333, "learning_rate": 5.676106563016181e-06, "loss": 0.06602125, "memory(GiB)": 13.7, "step": 51620, "train_speed(iter/s)": 1.5284 }, { "acc": 0.98458328, "epoch": 24.197328333723927, "grad_norm": 1.4821312427520752, "learning_rate": 5.675338553420058e-06, "loss": 0.03566675, "memory(GiB)": 13.7, "step": 51625, "train_speed(iter/s)": 1.528402 }, { "acc": 0.98520298, "epoch": 24.19967190063276, "grad_norm": 4.003605365753174, "learning_rate": 5.6745705276030334e-06, "loss": 0.04206541, "memory(GiB)": 13.7, "step": 51630, "train_speed(iter/s)": 1.528398 }, { "acc": 0.98519344, "epoch": 24.2020154675416, "grad_norm": 4.265238285064697, "learning_rate": 5.673802485583569e-06, "loss": 0.04536514, "memory(GiB)": 13.7, "step": 51635, "train_speed(iter/s)": 1.528398 }, { "acc": 0.97687492, "epoch": 24.204359034450434, "grad_norm": 3.625030279159546, "learning_rate": 5.673034427380125e-06, "loss": 0.07281801, "memory(GiB)": 13.7, "step": 51640, "train_speed(iter/s)": 1.5284 }, { "acc": 0.98625002, "epoch": 24.206702601359268, "grad_norm": 3.057006359100342, "learning_rate": 5.672266353011161e-06, "loss": 0.05495344, "memory(GiB)": 13.7, "step": 51645, "train_speed(iter/s)": 1.528397 }, { "acc": 0.97791672, "epoch": 24.209046168268102, "grad_norm": 3.069688081741333, "learning_rate": 5.671498262495143e-06, "loss": 0.07470671, "memory(GiB)": 13.7, "step": 51650, "train_speed(iter/s)": 1.528411 }, { "acc": 0.98812504, "epoch": 24.21138973517694, "grad_norm": 1.1638351678848267, "learning_rate": 5.67073015585053e-06, "loss": 0.08165943, "memory(GiB)": 13.7, "step": 51655, "train_speed(iter/s)": 1.528416 }, { "acc": 0.97987175, "epoch": 24.213733302085775, "grad_norm": 6.012457370758057, "learning_rate": 5.669962033095787e-06, "loss": 0.09705596, "memory(GiB)": 13.7, "step": 51660, "train_speed(iter/s)": 1.528423 }, { "acc": 0.98111115, "epoch": 24.21607686899461, "grad_norm": 3.092160940170288, "learning_rate": 5.669193894249375e-06, "loss": 0.05938601, "memory(GiB)": 13.7, "step": 51665, "train_speed(iter/s)": 1.528416 }, { "acc": 0.98611107, "epoch": 24.218420435903447, "grad_norm": 5.924511909484863, "learning_rate": 5.668425739329759e-06, "loss": 0.0556707, "memory(GiB)": 13.7, "step": 51670, "train_speed(iter/s)": 1.52842 }, { "acc": 0.97979164, "epoch": 24.22076400281228, "grad_norm": 1.6587413549423218, "learning_rate": 5.667657568355402e-06, "loss": 0.04084833, "memory(GiB)": 13.7, "step": 51675, "train_speed(iter/s)": 1.528427 }, { "acc": 0.99434528, "epoch": 24.223107569721115, "grad_norm": 3.7226696014404297, "learning_rate": 5.66688938134477e-06, "loss": 0.03645923, "memory(GiB)": 13.7, "step": 51680, "train_speed(iter/s)": 1.528437 }, { "acc": 0.98997478, "epoch": 24.22545113662995, "grad_norm": 2.409231424331665, "learning_rate": 5.666121178316325e-06, "loss": 0.04612128, "memory(GiB)": 13.7, "step": 51685, "train_speed(iter/s)": 1.528444 }, { "acc": 0.98779764, "epoch": 24.227794703538788, "grad_norm": 6.460410118103027, "learning_rate": 5.6653529592885334e-06, "loss": 0.07936525, "memory(GiB)": 13.7, "step": 51690, "train_speed(iter/s)": 1.528449 }, { "acc": 0.98032532, "epoch": 24.230138270447622, "grad_norm": 3.803924560546875, "learning_rate": 5.664584724279862e-06, "loss": 0.09141625, "memory(GiB)": 13.7, "step": 51695, "train_speed(iter/s)": 1.528454 }, { "acc": 0.98154764, "epoch": 24.232481837356456, "grad_norm": 6.720951080322266, "learning_rate": 5.663816473308776e-06, "loss": 0.04515789, "memory(GiB)": 13.7, "step": 51700, "train_speed(iter/s)": 1.528454 }, { "acc": 0.98625002, "epoch": 24.23482540426529, "grad_norm": 4.79845666885376, "learning_rate": 5.663048206393741e-06, "loss": 0.06075563, "memory(GiB)": 13.7, "step": 51705, "train_speed(iter/s)": 1.528458 }, { "acc": 0.98056545, "epoch": 24.23716897117413, "grad_norm": 3.1389236450195312, "learning_rate": 5.662279923553224e-06, "loss": 0.06974736, "memory(GiB)": 13.7, "step": 51710, "train_speed(iter/s)": 1.528455 }, { "acc": 0.98692703, "epoch": 24.239512538082963, "grad_norm": 3.038383960723877, "learning_rate": 5.66151162480569e-06, "loss": 0.05128812, "memory(GiB)": 13.7, "step": 51715, "train_speed(iter/s)": 1.528462 }, { "acc": 0.97833328, "epoch": 24.241856104991797, "grad_norm": 0.003917430993169546, "learning_rate": 5.660743310169609e-06, "loss": 0.05245498, "memory(GiB)": 13.7, "step": 51720, "train_speed(iter/s)": 1.52847 }, { "acc": 0.98255205, "epoch": 24.24419967190063, "grad_norm": 0.6986363530158997, "learning_rate": 5.659974979663449e-06, "loss": 0.05187352, "memory(GiB)": 13.7, "step": 51725, "train_speed(iter/s)": 1.528468 }, { "acc": 0.98374462, "epoch": 24.24654323880947, "grad_norm": 3.564819812774658, "learning_rate": 5.659206633305676e-06, "loss": 0.09800118, "memory(GiB)": 13.7, "step": 51730, "train_speed(iter/s)": 1.528475 }, { "acc": 0.97364578, "epoch": 24.248886805718303, "grad_norm": 1.9842522144317627, "learning_rate": 5.658438271114759e-06, "loss": 0.0724223, "memory(GiB)": 13.7, "step": 51735, "train_speed(iter/s)": 1.528491 }, { "acc": 0.99022827, "epoch": 24.251230372627138, "grad_norm": 2.6912662982940674, "learning_rate": 5.657669893109168e-06, "loss": 0.05884596, "memory(GiB)": 13.7, "step": 51740, "train_speed(iter/s)": 1.528498 }, { "acc": 0.95911465, "epoch": 24.253573939535976, "grad_norm": 8.225907325744629, "learning_rate": 5.6569014993073735e-06, "loss": 0.1461167, "memory(GiB)": 13.7, "step": 51745, "train_speed(iter/s)": 1.528507 }, { "acc": 0.98481407, "epoch": 24.25591750644481, "grad_norm": 1.1109694242477417, "learning_rate": 5.656133089727841e-06, "loss": 0.05069895, "memory(GiB)": 13.7, "step": 51750, "train_speed(iter/s)": 1.528521 }, { "acc": 0.97328129, "epoch": 24.258261073353644, "grad_norm": 3.4360153675079346, "learning_rate": 5.655364664389043e-06, "loss": 0.12518938, "memory(GiB)": 13.7, "step": 51755, "train_speed(iter/s)": 1.528525 }, { "acc": 0.9780304, "epoch": 24.26060464026248, "grad_norm": 10.60788345336914, "learning_rate": 5.654596223309452e-06, "loss": 0.07723606, "memory(GiB)": 13.7, "step": 51760, "train_speed(iter/s)": 1.528526 }, { "acc": 0.98812504, "epoch": 24.262948207171316, "grad_norm": 3.543855667114258, "learning_rate": 5.653827766507532e-06, "loss": 0.05748907, "memory(GiB)": 13.7, "step": 51765, "train_speed(iter/s)": 1.528528 }, { "acc": 0.97020836, "epoch": 24.26529177408015, "grad_norm": 5.775482177734375, "learning_rate": 5.653059294001764e-06, "loss": 0.10205909, "memory(GiB)": 13.7, "step": 51770, "train_speed(iter/s)": 1.528531 }, { "acc": 0.99258928, "epoch": 24.267635340988985, "grad_norm": 1.9457781314849854, "learning_rate": 5.652290805810612e-06, "loss": 0.03958083, "memory(GiB)": 13.7, "step": 51775, "train_speed(iter/s)": 1.528534 }, { "acc": 0.96791668, "epoch": 24.26997890789782, "grad_norm": 3.0083189010620117, "learning_rate": 5.65152230195255e-06, "loss": 0.08860171, "memory(GiB)": 13.7, "step": 51780, "train_speed(iter/s)": 1.528534 }, { "acc": 0.9848958, "epoch": 24.272322474806657, "grad_norm": 4.834697246551514, "learning_rate": 5.650753782446052e-06, "loss": 0.05365041, "memory(GiB)": 13.7, "step": 51785, "train_speed(iter/s)": 1.528533 }, { "acc": 0.97436886, "epoch": 24.27466604171549, "grad_norm": 3.4775278568267822, "learning_rate": 5.649985247309591e-06, "loss": 0.07864313, "memory(GiB)": 13.7, "step": 51790, "train_speed(iter/s)": 1.52854 }, { "acc": 0.984375, "epoch": 24.277009608624326, "grad_norm": 0.005816129967570305, "learning_rate": 5.649216696561635e-06, "loss": 0.05201381, "memory(GiB)": 13.7, "step": 51795, "train_speed(iter/s)": 1.528546 }, { "acc": 0.98633928, "epoch": 24.27935317553316, "grad_norm": 2.4632577896118164, "learning_rate": 5.648448130220662e-06, "loss": 0.03655486, "memory(GiB)": 13.7, "step": 51800, "train_speed(iter/s)": 1.528541 }, { "acc": 0.98604164, "epoch": 24.281696742441998, "grad_norm": 0.43021151423454285, "learning_rate": 5.647679548305144e-06, "loss": 0.04757594, "memory(GiB)": 13.7, "step": 51805, "train_speed(iter/s)": 1.528541 }, { "acc": 0.97328529, "epoch": 24.284040309350832, "grad_norm": 3.9495749473571777, "learning_rate": 5.646910950833555e-06, "loss": 0.06316361, "memory(GiB)": 13.7, "step": 51810, "train_speed(iter/s)": 1.528542 }, { "acc": 0.98230162, "epoch": 24.286383876259666, "grad_norm": 1.4225895404815674, "learning_rate": 5.646142337824372e-06, "loss": 0.0683768, "memory(GiB)": 13.7, "step": 51815, "train_speed(iter/s)": 1.528544 }, { "acc": 0.98716345, "epoch": 24.2887274431685, "grad_norm": 3.8509714603424072, "learning_rate": 5.645373709296067e-06, "loss": 0.05435169, "memory(GiB)": 13.7, "step": 51820, "train_speed(iter/s)": 1.528542 }, { "acc": 0.97284908, "epoch": 24.29107101007734, "grad_norm": 2.8794310092926025, "learning_rate": 5.6446050652671205e-06, "loss": 0.12724167, "memory(GiB)": 13.7, "step": 51825, "train_speed(iter/s)": 1.528544 }, { "acc": 0.989188, "epoch": 24.293414576986173, "grad_norm": 3.0619559288024902, "learning_rate": 5.643836405756001e-06, "loss": 0.05981987, "memory(GiB)": 13.7, "step": 51830, "train_speed(iter/s)": 1.528539 }, { "acc": 0.990625, "epoch": 24.295758143895007, "grad_norm": 3.918384313583374, "learning_rate": 5.643067730781188e-06, "loss": 0.02678739, "memory(GiB)": 13.7, "step": 51835, "train_speed(iter/s)": 1.528537 }, { "acc": 0.96717262, "epoch": 24.298101710803845, "grad_norm": 6.22844934463501, "learning_rate": 5.642299040361158e-06, "loss": 0.06887616, "memory(GiB)": 13.7, "step": 51840, "train_speed(iter/s)": 1.528542 }, { "acc": 0.96427078, "epoch": 24.30044527771268, "grad_norm": 3.810758113861084, "learning_rate": 5.641530334514388e-06, "loss": 0.08909066, "memory(GiB)": 13.7, "step": 51845, "train_speed(iter/s)": 1.52855 }, { "acc": 0.96368055, "epoch": 24.302788844621514, "grad_norm": 3.8690409660339355, "learning_rate": 5.640761613259352e-06, "loss": 0.12302021, "memory(GiB)": 13.7, "step": 51850, "train_speed(iter/s)": 1.528551 }, { "acc": 0.98479176, "epoch": 24.305132411530348, "grad_norm": 2.7049448490142822, "learning_rate": 5.639992876614534e-06, "loss": 0.03408071, "memory(GiB)": 13.7, "step": 51855, "train_speed(iter/s)": 1.528552 }, { "acc": 0.96541672, "epoch": 24.307475978439186, "grad_norm": 5.40048885345459, "learning_rate": 5.639224124598406e-06, "loss": 0.08655391, "memory(GiB)": 13.7, "step": 51860, "train_speed(iter/s)": 1.528553 }, { "acc": 0.97562504, "epoch": 24.30981954534802, "grad_norm": 6.040224552154541, "learning_rate": 5.638455357229449e-06, "loss": 0.07744873, "memory(GiB)": 13.7, "step": 51865, "train_speed(iter/s)": 1.528547 }, { "acc": 0.98048611, "epoch": 24.312163112256854, "grad_norm": 2.63299560546875, "learning_rate": 5.637686574526142e-06, "loss": 0.11415267, "memory(GiB)": 13.7, "step": 51870, "train_speed(iter/s)": 1.528549 }, { "acc": 0.98833332, "epoch": 24.31450667916569, "grad_norm": 0.012657935731112957, "learning_rate": 5.636917776506961e-06, "loss": 0.08883411, "memory(GiB)": 13.7, "step": 51875, "train_speed(iter/s)": 1.52855 }, { "acc": 0.966572, "epoch": 24.316850246074527, "grad_norm": 9.25549602508545, "learning_rate": 5.636148963190388e-06, "loss": 0.09541311, "memory(GiB)": 13.7, "step": 51880, "train_speed(iter/s)": 1.528551 }, { "acc": 0.9864584, "epoch": 24.31919381298336, "grad_norm": 3.31646728515625, "learning_rate": 5.635380134594903e-06, "loss": 0.04608436, "memory(GiB)": 13.7, "step": 51885, "train_speed(iter/s)": 1.528553 }, { "acc": 0.98000002, "epoch": 24.321537379892195, "grad_norm": 0.12365718930959702, "learning_rate": 5.634611290738986e-06, "loss": 0.05440834, "memory(GiB)": 13.7, "step": 51890, "train_speed(iter/s)": 1.528555 }, { "acc": 0.97967262, "epoch": 24.32388094680103, "grad_norm": 3.867758274078369, "learning_rate": 5.6338424316411155e-06, "loss": 0.09149072, "memory(GiB)": 13.7, "step": 51895, "train_speed(iter/s)": 1.528561 }, { "acc": 0.98520832, "epoch": 24.326224513709867, "grad_norm": 7.523586750030518, "learning_rate": 5.6330735573197744e-06, "loss": 0.07375724, "memory(GiB)": 13.7, "step": 51900, "train_speed(iter/s)": 1.528565 }, { "acc": 0.9895834, "epoch": 24.3285680806187, "grad_norm": 2.8008196353912354, "learning_rate": 5.632304667793442e-06, "loss": 0.03964305, "memory(GiB)": 13.7, "step": 51905, "train_speed(iter/s)": 1.528567 }, { "acc": 0.97035923, "epoch": 24.330911647527536, "grad_norm": 3.733579635620117, "learning_rate": 5.631535763080603e-06, "loss": 0.08097736, "memory(GiB)": 13.7, "step": 51910, "train_speed(iter/s)": 1.528578 }, { "acc": 0.9885417, "epoch": 24.333255214436374, "grad_norm": 3.098619222640991, "learning_rate": 5.630766843199737e-06, "loss": 0.02995966, "memory(GiB)": 13.7, "step": 51915, "train_speed(iter/s)": 1.528588 }, { "acc": 0.98618774, "epoch": 24.335598781345208, "grad_norm": 1.8265830278396606, "learning_rate": 5.629997908169326e-06, "loss": 0.05527899, "memory(GiB)": 13.7, "step": 51920, "train_speed(iter/s)": 1.528588 }, { "acc": 0.98803024, "epoch": 24.337942348254042, "grad_norm": 4.018911838531494, "learning_rate": 5.629228958007853e-06, "loss": 0.03109233, "memory(GiB)": 13.7, "step": 51925, "train_speed(iter/s)": 1.528596 }, { "acc": 0.9947917, "epoch": 24.340285915162877, "grad_norm": 0.016501879319548607, "learning_rate": 5.628459992733802e-06, "loss": 0.01209871, "memory(GiB)": 13.7, "step": 51930, "train_speed(iter/s)": 1.528596 }, { "acc": 0.98738098, "epoch": 24.342629482071715, "grad_norm": 0.10295789688825607, "learning_rate": 5.627691012365655e-06, "loss": 0.03586183, "memory(GiB)": 13.7, "step": 51935, "train_speed(iter/s)": 1.528595 }, { "acc": 0.97423611, "epoch": 24.34497304898055, "grad_norm": 12.882025718688965, "learning_rate": 5.626922016921895e-06, "loss": 0.0645161, "memory(GiB)": 13.7, "step": 51940, "train_speed(iter/s)": 1.5286 }, { "acc": 0.98571434, "epoch": 24.347316615889383, "grad_norm": 0.017437756061553955, "learning_rate": 5.626153006421011e-06, "loss": 0.06585614, "memory(GiB)": 13.7, "step": 51945, "train_speed(iter/s)": 1.528601 }, { "acc": 0.9822916, "epoch": 24.349660182798218, "grad_norm": 0.5042937994003296, "learning_rate": 5.625383980881484e-06, "loss": 0.0458915, "memory(GiB)": 13.7, "step": 51950, "train_speed(iter/s)": 1.528601 }, { "acc": 0.98311253, "epoch": 24.352003749707055, "grad_norm": 5.278584003448486, "learning_rate": 5.624614940321796e-06, "loss": 0.09888715, "memory(GiB)": 13.7, "step": 51955, "train_speed(iter/s)": 1.528598 }, { "acc": 0.99030704, "epoch": 24.35434731661589, "grad_norm": 0.015417368151247501, "learning_rate": 5.623845884760437e-06, "loss": 0.03887391, "memory(GiB)": 13.7, "step": 51960, "train_speed(iter/s)": 1.528601 }, { "acc": 0.98500004, "epoch": 24.356690883524724, "grad_norm": 0.13397003710269928, "learning_rate": 5.623076814215892e-06, "loss": 0.02899257, "memory(GiB)": 13.7, "step": 51965, "train_speed(iter/s)": 1.528603 }, { "acc": 0.98701391, "epoch": 24.35903445043356, "grad_norm": 1.7541382312774658, "learning_rate": 5.622307728706643e-06, "loss": 0.059829, "memory(GiB)": 13.7, "step": 51970, "train_speed(iter/s)": 1.528604 }, { "acc": 0.97907734, "epoch": 24.361378017342396, "grad_norm": 3.334592819213867, "learning_rate": 5.621538628251181e-06, "loss": 0.06819286, "memory(GiB)": 13.7, "step": 51975, "train_speed(iter/s)": 1.528611 }, { "acc": 0.98810091, "epoch": 24.36372158425123, "grad_norm": 2.5535004138946533, "learning_rate": 5.620769512867991e-06, "loss": 0.03310112, "memory(GiB)": 13.7, "step": 51980, "train_speed(iter/s)": 1.528611 }, { "acc": 0.99300594, "epoch": 24.366065151160065, "grad_norm": 1.807796597480774, "learning_rate": 5.620000382575557e-06, "loss": 0.01049244, "memory(GiB)": 13.7, "step": 51985, "train_speed(iter/s)": 1.528622 }, { "acc": 0.97369785, "epoch": 24.3684087180689, "grad_norm": 4.901346206665039, "learning_rate": 5.619231237392371e-06, "loss": 0.04636789, "memory(GiB)": 13.7, "step": 51990, "train_speed(iter/s)": 1.528631 }, { "acc": 0.9802084, "epoch": 24.370752284977737, "grad_norm": 4.726987361907959, "learning_rate": 5.618462077336919e-06, "loss": 0.1156774, "memory(GiB)": 13.7, "step": 51995, "train_speed(iter/s)": 1.528636 }, { "acc": 0.97383928, "epoch": 24.37309585188657, "grad_norm": 6.455972194671631, "learning_rate": 5.6176929024276884e-06, "loss": 0.07182137, "memory(GiB)": 13.7, "step": 52000, "train_speed(iter/s)": 1.528642 }, { "acc": 0.98395834, "epoch": 24.375439418795406, "grad_norm": 3.3252289295196533, "learning_rate": 5.6169237126831685e-06, "loss": 0.03453719, "memory(GiB)": 13.7, "step": 52005, "train_speed(iter/s)": 1.528644 }, { "acc": 0.98145828, "epoch": 24.377782985704243, "grad_norm": 6.704325199127197, "learning_rate": 5.616154508121847e-06, "loss": 0.0539398, "memory(GiB)": 13.7, "step": 52010, "train_speed(iter/s)": 1.528644 }, { "acc": 0.97711306, "epoch": 24.380126552613078, "grad_norm": 8.973315238952637, "learning_rate": 5.615385288762213e-06, "loss": 0.09199717, "memory(GiB)": 13.7, "step": 52015, "train_speed(iter/s)": 1.528654 }, { "acc": 0.9791667, "epoch": 24.382470119521912, "grad_norm": 2.5312137603759766, "learning_rate": 5.614616054622758e-06, "loss": 0.12549436, "memory(GiB)": 13.7, "step": 52020, "train_speed(iter/s)": 1.528662 }, { "acc": 0.98175602, "epoch": 24.384813686430746, "grad_norm": 3.8639962673187256, "learning_rate": 5.61384680572197e-06, "loss": 0.07129999, "memory(GiB)": 13.7, "step": 52025, "train_speed(iter/s)": 1.528661 }, { "acc": 0.98249998, "epoch": 24.387157253339584, "grad_norm": 1.8537970781326294, "learning_rate": 5.613077542078339e-06, "loss": 0.02610607, "memory(GiB)": 13.7, "step": 52030, "train_speed(iter/s)": 1.528657 }, { "acc": 0.97420635, "epoch": 24.38950082024842, "grad_norm": 2.4127423763275146, "learning_rate": 5.6123082637103596e-06, "loss": 0.07564138, "memory(GiB)": 13.7, "step": 52035, "train_speed(iter/s)": 1.528665 }, { "acc": 0.98840828, "epoch": 24.391844387157253, "grad_norm": 3.1059281826019287, "learning_rate": 5.6115389706365166e-06, "loss": 0.05745131, "memory(GiB)": 13.7, "step": 52040, "train_speed(iter/s)": 1.528664 }, { "acc": 0.99160843, "epoch": 24.394187954066087, "grad_norm": 1.6984739303588867, "learning_rate": 5.6107696628753035e-06, "loss": 0.03981162, "memory(GiB)": 13.7, "step": 52045, "train_speed(iter/s)": 1.528672 }, { "acc": 0.9848959, "epoch": 24.396531520974925, "grad_norm": 5.900670051574707, "learning_rate": 5.610000340445215e-06, "loss": 0.06946477, "memory(GiB)": 13.7, "step": 52050, "train_speed(iter/s)": 1.528681 }, { "acc": 0.99360571, "epoch": 24.39887508788376, "grad_norm": 3.144541025161743, "learning_rate": 5.609231003364739e-06, "loss": 0.03179613, "memory(GiB)": 13.7, "step": 52055, "train_speed(iter/s)": 1.528682 }, { "acc": 0.990625, "epoch": 24.401218654792594, "grad_norm": 3.812532424926758, "learning_rate": 5.608461651652369e-06, "loss": 0.05722778, "memory(GiB)": 13.7, "step": 52060, "train_speed(iter/s)": 1.528682 }, { "acc": 0.97569447, "epoch": 24.403562221701428, "grad_norm": 2.594519853591919, "learning_rate": 5.607692285326599e-06, "loss": 0.1174777, "memory(GiB)": 13.7, "step": 52065, "train_speed(iter/s)": 1.52869 }, { "acc": 0.990625, "epoch": 24.405905788610266, "grad_norm": 1.6945561170578003, "learning_rate": 5.6069229044059225e-06, "loss": 0.06060289, "memory(GiB)": 13.7, "step": 52070, "train_speed(iter/s)": 1.528697 }, { "acc": 0.97202377, "epoch": 24.4082493555191, "grad_norm": 4.515036582946777, "learning_rate": 5.606153508908832e-06, "loss": 0.09374555, "memory(GiB)": 13.7, "step": 52075, "train_speed(iter/s)": 1.528711 }, { "acc": 0.98393736, "epoch": 24.410592922427934, "grad_norm": 1.1237356662750244, "learning_rate": 5.60538409885382e-06, "loss": 0.04818635, "memory(GiB)": 13.7, "step": 52080, "train_speed(iter/s)": 1.528714 }, { "acc": 0.97614584, "epoch": 24.412936489336772, "grad_norm": 0.9803817272186279, "learning_rate": 5.60461467425938e-06, "loss": 0.08200299, "memory(GiB)": 13.7, "step": 52085, "train_speed(iter/s)": 1.528715 }, { "acc": 0.99403839, "epoch": 24.415280056245606, "grad_norm": 1.8679468631744385, "learning_rate": 5.603845235144008e-06, "loss": 0.02088769, "memory(GiB)": 13.7, "step": 52090, "train_speed(iter/s)": 1.528717 }, { "acc": 0.96999998, "epoch": 24.41762362315444, "grad_norm": 3.483905553817749, "learning_rate": 5.6030757815262e-06, "loss": 0.06858967, "memory(GiB)": 13.7, "step": 52095, "train_speed(iter/s)": 1.528719 }, { "acc": 0.97470245, "epoch": 24.419967190063275, "grad_norm": 3.888688802719116, "learning_rate": 5.602306313424448e-06, "loss": 0.08905123, "memory(GiB)": 13.7, "step": 52100, "train_speed(iter/s)": 1.528723 }, { "acc": 0.97145834, "epoch": 24.422310756972113, "grad_norm": 4.505045413970947, "learning_rate": 5.60153683085725e-06, "loss": 0.07832767, "memory(GiB)": 13.7, "step": 52105, "train_speed(iter/s)": 1.528728 }, { "acc": 0.97573318, "epoch": 24.424654323880947, "grad_norm": 0.011781686916947365, "learning_rate": 5.6007673338431025e-06, "loss": 0.09115744, "memory(GiB)": 13.7, "step": 52110, "train_speed(iter/s)": 1.528734 }, { "acc": 0.9854166, "epoch": 24.42699789078978, "grad_norm": 2.5791819095611572, "learning_rate": 5.599997822400499e-06, "loss": 0.04292372, "memory(GiB)": 13.7, "step": 52115, "train_speed(iter/s)": 1.528736 }, { "acc": 0.97633934, "epoch": 24.429341457698616, "grad_norm": 5.9383039474487305, "learning_rate": 5.59922829654794e-06, "loss": 0.11706823, "memory(GiB)": 13.7, "step": 52120, "train_speed(iter/s)": 1.528745 }, { "acc": 0.98849211, "epoch": 24.431685024607454, "grad_norm": 4.056149959564209, "learning_rate": 5.5984587563039185e-06, "loss": 0.0514034, "memory(GiB)": 13.7, "step": 52125, "train_speed(iter/s)": 1.528752 }, { "acc": 0.97979164, "epoch": 24.434028591516288, "grad_norm": 4.818658351898193, "learning_rate": 5.597689201686931e-06, "loss": 0.10604421, "memory(GiB)": 13.7, "step": 52130, "train_speed(iter/s)": 1.528764 }, { "acc": 1.0, "epoch": 24.436372158425122, "grad_norm": 4.218371391296387, "learning_rate": 5.596919632715478e-06, "loss": 0.02546823, "memory(GiB)": 13.7, "step": 52135, "train_speed(iter/s)": 1.528773 }, { "acc": 0.98687496, "epoch": 24.438715725333957, "grad_norm": 3.776376485824585, "learning_rate": 5.5961500494080575e-06, "loss": 0.05496863, "memory(GiB)": 13.7, "step": 52140, "train_speed(iter/s)": 1.528774 }, { "acc": 0.97531252, "epoch": 24.441059292242794, "grad_norm": 0.766767680644989, "learning_rate": 5.595380451783164e-06, "loss": 0.05728248, "memory(GiB)": 13.7, "step": 52145, "train_speed(iter/s)": 1.528779 }, { "acc": 0.99092264, "epoch": 24.44340285915163, "grad_norm": 5.205904006958008, "learning_rate": 5.594610839859301e-06, "loss": 0.03078758, "memory(GiB)": 13.7, "step": 52150, "train_speed(iter/s)": 1.528783 }, { "acc": 0.98296127, "epoch": 24.445746426060463, "grad_norm": 0.02135051041841507, "learning_rate": 5.593841213654966e-06, "loss": 0.07154294, "memory(GiB)": 13.7, "step": 52155, "train_speed(iter/s)": 1.528782 }, { "acc": 0.9979167, "epoch": 24.4480899929693, "grad_norm": 0.19598324596881866, "learning_rate": 5.593071573188656e-06, "loss": 0.01447023, "memory(GiB)": 13.7, "step": 52160, "train_speed(iter/s)": 1.528786 }, { "acc": 0.990625, "epoch": 24.450433559878135, "grad_norm": 5.901139259338379, "learning_rate": 5.592301918478872e-06, "loss": 0.0271204, "memory(GiB)": 13.7, "step": 52165, "train_speed(iter/s)": 1.528797 }, { "acc": 0.97041664, "epoch": 24.45277712678697, "grad_norm": 3.5172975063323975, "learning_rate": 5.591532249544115e-06, "loss": 0.05063992, "memory(GiB)": 13.7, "step": 52170, "train_speed(iter/s)": 1.528803 }, { "acc": 0.97967262, "epoch": 24.455120693695804, "grad_norm": 2.094623565673828, "learning_rate": 5.590762566402884e-06, "loss": 0.1330493, "memory(GiB)": 13.7, "step": 52175, "train_speed(iter/s)": 1.528812 }, { "acc": 0.98274002, "epoch": 24.45746426060464, "grad_norm": 1.2250735759735107, "learning_rate": 5.589992869073679e-06, "loss": 0.09912716, "memory(GiB)": 13.7, "step": 52180, "train_speed(iter/s)": 1.528821 }, { "acc": 0.99399033, "epoch": 24.459807827513476, "grad_norm": 3.754617214202881, "learning_rate": 5.589223157575003e-06, "loss": 0.03631166, "memory(GiB)": 13.7, "step": 52185, "train_speed(iter/s)": 1.528827 }, { "acc": 0.97851219, "epoch": 24.46215139442231, "grad_norm": 16.596843719482422, "learning_rate": 5.588453431925356e-06, "loss": 0.08319085, "memory(GiB)": 13.7, "step": 52190, "train_speed(iter/s)": 1.528831 }, { "acc": 0.98797045, "epoch": 24.464494961331145, "grad_norm": 0.920404314994812, "learning_rate": 5.587683692143241e-06, "loss": 0.08176181, "memory(GiB)": 13.7, "step": 52195, "train_speed(iter/s)": 1.528826 }, { "acc": 0.98354168, "epoch": 24.466838528239983, "grad_norm": 2.6683602333068848, "learning_rate": 5.5869139382471585e-06, "loss": 0.0825123, "memory(GiB)": 13.7, "step": 52200, "train_speed(iter/s)": 1.528829 }, { "acc": 0.97541656, "epoch": 24.469182095148817, "grad_norm": 1.1473909616470337, "learning_rate": 5.586144170255612e-06, "loss": 0.08710924, "memory(GiB)": 13.7, "step": 52205, "train_speed(iter/s)": 1.528835 }, { "acc": 0.97099209, "epoch": 24.47152566205765, "grad_norm": 3.441572427749634, "learning_rate": 5.585374388187103e-06, "loss": 0.07878312, "memory(GiB)": 13.7, "step": 52210, "train_speed(iter/s)": 1.528839 }, { "acc": 0.97927084, "epoch": 24.473869228966485, "grad_norm": 5.17175817489624, "learning_rate": 5.5846045920601355e-06, "loss": 0.06003453, "memory(GiB)": 13.7, "step": 52215, "train_speed(iter/s)": 1.528846 }, { "acc": 0.98113098, "epoch": 24.476212795875323, "grad_norm": 2.7658467292785645, "learning_rate": 5.583834781893213e-06, "loss": 0.07179945, "memory(GiB)": 13.7, "step": 52220, "train_speed(iter/s)": 1.528847 }, { "acc": 0.99154758, "epoch": 24.478556362784158, "grad_norm": 6.300023078918457, "learning_rate": 5.583064957704838e-06, "loss": 0.06848111, "memory(GiB)": 13.7, "step": 52225, "train_speed(iter/s)": 1.528848 }, { "acc": 0.97708912, "epoch": 24.480899929692992, "grad_norm": 7.804565906524658, "learning_rate": 5.582295119513514e-06, "loss": 0.09324235, "memory(GiB)": 13.7, "step": 52230, "train_speed(iter/s)": 1.528851 }, { "acc": 0.98732376, "epoch": 24.48324349660183, "grad_norm": 1.832038164138794, "learning_rate": 5.581525267337749e-06, "loss": 0.04343884, "memory(GiB)": 13.7, "step": 52235, "train_speed(iter/s)": 1.528849 }, { "acc": 0.97830353, "epoch": 24.485587063510664, "grad_norm": 4.8049492835998535, "learning_rate": 5.580755401196044e-06, "loss": 0.0883957, "memory(GiB)": 13.7, "step": 52240, "train_speed(iter/s)": 1.528851 }, { "acc": 0.99181557, "epoch": 24.4879306304195, "grad_norm": 3.5945205688476562, "learning_rate": 5.579985521106908e-06, "loss": 0.02783897, "memory(GiB)": 13.7, "step": 52245, "train_speed(iter/s)": 1.528854 }, { "acc": 0.98363094, "epoch": 24.490274197328333, "grad_norm": 3.8174328804016113, "learning_rate": 5.579215627088841e-06, "loss": 0.09213024, "memory(GiB)": 13.7, "step": 52250, "train_speed(iter/s)": 1.528854 }, { "acc": 0.99676476, "epoch": 24.49261776423717, "grad_norm": 0.0005308022373355925, "learning_rate": 5.578445719160351e-06, "loss": 0.02345781, "memory(GiB)": 13.7, "step": 52255, "train_speed(iter/s)": 1.528846 }, { "acc": 0.9947916, "epoch": 24.494961331146005, "grad_norm": 3.540024995803833, "learning_rate": 5.5776757973399475e-06, "loss": 0.03349057, "memory(GiB)": 13.7, "step": 52260, "train_speed(iter/s)": 1.528855 }, { "acc": 0.9852891, "epoch": 24.49730489805484, "grad_norm": 1.5098975896835327, "learning_rate": 5.576905861646131e-06, "loss": 0.04947906, "memory(GiB)": 13.7, "step": 52265, "train_speed(iter/s)": 1.52886 }, { "acc": 0.9958333, "epoch": 24.499648464963673, "grad_norm": 3.867558002471924, "learning_rate": 5.5761359120974136e-06, "loss": 0.02891743, "memory(GiB)": 13.7, "step": 52270, "train_speed(iter/s)": 1.528864 }, { "acc": 0.97925596, "epoch": 24.50199203187251, "grad_norm": 8.785422325134277, "learning_rate": 5.575365948712298e-06, "loss": 0.05797071, "memory(GiB)": 13.7, "step": 52275, "train_speed(iter/s)": 1.528866 }, { "acc": 0.98290176, "epoch": 24.504335598781346, "grad_norm": 9.80848217010498, "learning_rate": 5.5745959715092945e-06, "loss": 0.06660973, "memory(GiB)": 13.7, "step": 52280, "train_speed(iter/s)": 1.528868 }, { "acc": 0.9829464, "epoch": 24.50667916569018, "grad_norm": 7.270085334777832, "learning_rate": 5.573825980506912e-06, "loss": 0.07292984, "memory(GiB)": 13.7, "step": 52285, "train_speed(iter/s)": 1.528867 }, { "acc": 0.98732796, "epoch": 24.509022732599014, "grad_norm": 4.98598051071167, "learning_rate": 5.573055975723654e-06, "loss": 0.09940088, "memory(GiB)": 13.7, "step": 52290, "train_speed(iter/s)": 1.528872 }, { "acc": 0.98937187, "epoch": 24.511366299507852, "grad_norm": 0.0011816192418336868, "learning_rate": 5.572285957178031e-06, "loss": 0.02378043, "memory(GiB)": 13.7, "step": 52295, "train_speed(iter/s)": 1.528876 }, { "acc": 0.98798065, "epoch": 24.513709866416686, "grad_norm": 1.2057088613510132, "learning_rate": 5.571515924888553e-06, "loss": 0.05393001, "memory(GiB)": 13.7, "step": 52300, "train_speed(iter/s)": 1.528876 }, { "acc": 0.98234386, "epoch": 24.51605343332552, "grad_norm": 0.005135348532348871, "learning_rate": 5.570745878873728e-06, "loss": 0.10747586, "memory(GiB)": 13.7, "step": 52305, "train_speed(iter/s)": 1.528882 }, { "acc": 0.96741076, "epoch": 24.518397000234355, "grad_norm": 9.058098793029785, "learning_rate": 5.569975819152065e-06, "loss": 0.09570347, "memory(GiB)": 13.7, "step": 52310, "train_speed(iter/s)": 1.52889 }, { "acc": 0.99343748, "epoch": 24.520740567143193, "grad_norm": 2.018468141555786, "learning_rate": 5.569205745742075e-06, "loss": 0.01935391, "memory(GiB)": 13.7, "step": 52315, "train_speed(iter/s)": 1.528889 }, { "acc": 0.97663994, "epoch": 24.523084134052027, "grad_norm": 5.2466654777526855, "learning_rate": 5.568435658662266e-06, "loss": 0.08942893, "memory(GiB)": 13.7, "step": 52320, "train_speed(iter/s)": 1.528887 }, { "acc": 0.97833328, "epoch": 24.52542770096086, "grad_norm": 7.143250465393066, "learning_rate": 5.567665557931151e-06, "loss": 0.05202605, "memory(GiB)": 13.7, "step": 52325, "train_speed(iter/s)": 1.528892 }, { "acc": 0.97739582, "epoch": 24.5277712678697, "grad_norm": 2.4564900398254395, "learning_rate": 5.56689544356724e-06, "loss": 0.05722624, "memory(GiB)": 13.7, "step": 52330, "train_speed(iter/s)": 1.528891 }, { "acc": 0.9864583, "epoch": 24.530114834778534, "grad_norm": 4.740619659423828, "learning_rate": 5.5661253155890415e-06, "loss": 0.03162881, "memory(GiB)": 13.7, "step": 52335, "train_speed(iter/s)": 1.528888 }, { "acc": 0.98125, "epoch": 24.532458401687368, "grad_norm": 8.169164657592773, "learning_rate": 5.565355174015069e-06, "loss": 0.11207201, "memory(GiB)": 13.7, "step": 52340, "train_speed(iter/s)": 1.528885 }, { "acc": 0.98029766, "epoch": 24.534801968596202, "grad_norm": 1.8475964069366455, "learning_rate": 5.5645850188638335e-06, "loss": 0.04805937, "memory(GiB)": 13.7, "step": 52345, "train_speed(iter/s)": 1.528882 }, { "acc": 0.97061014, "epoch": 24.53714553550504, "grad_norm": 7.835888862609863, "learning_rate": 5.563814850153847e-06, "loss": 0.07955636, "memory(GiB)": 13.7, "step": 52350, "train_speed(iter/s)": 1.528888 }, { "acc": 0.98050594, "epoch": 24.539489102413874, "grad_norm": 0.10106109827756882, "learning_rate": 5.563044667903622e-06, "loss": 0.0605894, "memory(GiB)": 13.7, "step": 52355, "train_speed(iter/s)": 1.5289 }, { "acc": 0.97286701, "epoch": 24.54183266932271, "grad_norm": 1.962156057357788, "learning_rate": 5.562274472131674e-06, "loss": 0.07108725, "memory(GiB)": 13.7, "step": 52360, "train_speed(iter/s)": 1.528909 }, { "acc": 0.97805557, "epoch": 24.544176236231543, "grad_norm": 4.0400800704956055, "learning_rate": 5.56150426285651e-06, "loss": 0.09731776, "memory(GiB)": 13.7, "step": 52365, "train_speed(iter/s)": 1.528907 }, { "acc": 0.98581238, "epoch": 24.54651980314038, "grad_norm": 2.55533504486084, "learning_rate": 5.560734040096649e-06, "loss": 0.05310538, "memory(GiB)": 13.7, "step": 52370, "train_speed(iter/s)": 1.52891 }, { "acc": 0.96622019, "epoch": 24.548863370049215, "grad_norm": 8.587119102478027, "learning_rate": 5.559963803870602e-06, "loss": 0.10769162, "memory(GiB)": 13.7, "step": 52375, "train_speed(iter/s)": 1.528917 }, { "acc": 1.0, "epoch": 24.55120693695805, "grad_norm": 1.2716645002365112, "learning_rate": 5.559193554196881e-06, "loss": 0.00467933, "memory(GiB)": 13.7, "step": 52380, "train_speed(iter/s)": 1.52892 }, { "acc": 0.9677084, "epoch": 24.553550503866884, "grad_norm": 0.3665330111980438, "learning_rate": 5.558423291094004e-06, "loss": 0.11820483, "memory(GiB)": 13.7, "step": 52385, "train_speed(iter/s)": 1.528921 }, { "acc": 0.99375, "epoch": 24.55589407077572, "grad_norm": 2.2028238773345947, "learning_rate": 5.557653014580484e-06, "loss": 0.02922768, "memory(GiB)": 13.7, "step": 52390, "train_speed(iter/s)": 1.528928 }, { "acc": 0.98178911, "epoch": 24.558237637684556, "grad_norm": 5.999290943145752, "learning_rate": 5.556882724674835e-06, "loss": 0.06805279, "memory(GiB)": 13.7, "step": 52395, "train_speed(iter/s)": 1.528934 }, { "acc": 0.9900815, "epoch": 24.56058120459339, "grad_norm": 3.5822415351867676, "learning_rate": 5.556112421395574e-06, "loss": 0.0338078, "memory(GiB)": 13.7, "step": 52400, "train_speed(iter/s)": 1.528936 }, { "acc": 0.9822916, "epoch": 24.562924771502228, "grad_norm": 1.3040848970413208, "learning_rate": 5.555342104761215e-06, "loss": 0.04190343, "memory(GiB)": 13.7, "step": 52405, "train_speed(iter/s)": 1.528935 }, { "acc": 0.99385414, "epoch": 24.565268338411062, "grad_norm": 1.4255543947219849, "learning_rate": 5.554571774790275e-06, "loss": 0.06104596, "memory(GiB)": 13.7, "step": 52410, "train_speed(iter/s)": 1.528939 }, { "acc": 0.98803034, "epoch": 24.567611905319897, "grad_norm": 4.652883052825928, "learning_rate": 5.5538014315012714e-06, "loss": 0.05929853, "memory(GiB)": 13.7, "step": 52415, "train_speed(iter/s)": 1.528946 }, { "acc": 0.97894354, "epoch": 24.56995547222873, "grad_norm": 5.458637714385986, "learning_rate": 5.553031074912717e-06, "loss": 0.05732148, "memory(GiB)": 13.7, "step": 52420, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98104162, "epoch": 24.57229903913757, "grad_norm": 2.258516550064087, "learning_rate": 5.552260705043131e-06, "loss": 0.05078266, "memory(GiB)": 13.7, "step": 52425, "train_speed(iter/s)": 1.528949 }, { "acc": 0.98795786, "epoch": 24.574642606046403, "grad_norm": 3.5647189617156982, "learning_rate": 5.551490321911031e-06, "loss": 0.04378108, "memory(GiB)": 13.7, "step": 52430, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98583336, "epoch": 24.576986172955237, "grad_norm": 4.020650863647461, "learning_rate": 5.550719925534935e-06, "loss": 0.04951019, "memory(GiB)": 13.7, "step": 52435, "train_speed(iter/s)": 1.528952 }, { "acc": 0.98062496, "epoch": 24.57932973986407, "grad_norm": 0.9805629849433899, "learning_rate": 5.549949515933355e-06, "loss": 0.04424128, "memory(GiB)": 13.7, "step": 52440, "train_speed(iter/s)": 1.528951 }, { "acc": 0.99020834, "epoch": 24.58167330677291, "grad_norm": 2.079080820083618, "learning_rate": 5.5491790931248175e-06, "loss": 0.02292685, "memory(GiB)": 13.7, "step": 52445, "train_speed(iter/s)": 1.528957 }, { "acc": 0.9777029, "epoch": 24.584016873681744, "grad_norm": 5.17478084564209, "learning_rate": 5.548408657127836e-06, "loss": 0.1177161, "memory(GiB)": 13.7, "step": 52450, "train_speed(iter/s)": 1.528961 }, { "acc": 0.98583336, "epoch": 24.58636044059058, "grad_norm": 2.573417901992798, "learning_rate": 5.547638207960931e-06, "loss": 0.06236067, "memory(GiB)": 13.7, "step": 52455, "train_speed(iter/s)": 1.52897 }, { "acc": 0.96314487, "epoch": 24.588704007499413, "grad_norm": 3.8149938583374023, "learning_rate": 5.546867745642622e-06, "loss": 0.09226432, "memory(GiB)": 13.7, "step": 52460, "train_speed(iter/s)": 1.528968 }, { "acc": 0.99354172, "epoch": 24.59104757440825, "grad_norm": 1.5622786283493042, "learning_rate": 5.5460972701914275e-06, "loss": 0.02620537, "memory(GiB)": 13.7, "step": 52465, "train_speed(iter/s)": 1.528968 }, { "acc": 0.9885417, "epoch": 24.593391141317085, "grad_norm": 2.7076992988586426, "learning_rate": 5.545326781625865e-06, "loss": 0.03696822, "memory(GiB)": 13.7, "step": 52470, "train_speed(iter/s)": 1.528974 }, { "acc": 0.99333334, "epoch": 24.59573470822592, "grad_norm": 3.8032338619232178, "learning_rate": 5.544556279964456e-06, "loss": 0.03081563, "memory(GiB)": 13.7, "step": 52475, "train_speed(iter/s)": 1.528978 }, { "acc": 0.98549681, "epoch": 24.598078275134753, "grad_norm": 0.22155816853046417, "learning_rate": 5.543785765225723e-06, "loss": 0.05525506, "memory(GiB)": 13.7, "step": 52480, "train_speed(iter/s)": 1.528983 }, { "acc": 0.97998505, "epoch": 24.60042184204359, "grad_norm": 3.4993653297424316, "learning_rate": 5.543015237428184e-06, "loss": 0.06789781, "memory(GiB)": 13.7, "step": 52485, "train_speed(iter/s)": 1.528985 }, { "acc": 0.98819447, "epoch": 24.602765408952425, "grad_norm": 3.059452533721924, "learning_rate": 5.542244696590363e-06, "loss": 0.02314302, "memory(GiB)": 13.7, "step": 52490, "train_speed(iter/s)": 1.528993 }, { "acc": 0.99333334, "epoch": 24.60510897586126, "grad_norm": 1.4963163137435913, "learning_rate": 5.541474142730778e-06, "loss": 0.01581147, "memory(GiB)": 13.7, "step": 52495, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97797623, "epoch": 24.607452542770098, "grad_norm": 9.8631591796875, "learning_rate": 5.540703575867951e-06, "loss": 0.08370148, "memory(GiB)": 13.7, "step": 52500, "train_speed(iter/s)": 1.528998 }, { "acc": 0.96919641, "epoch": 24.609796109678932, "grad_norm": 2.4339311122894287, "learning_rate": 5.539932996020407e-06, "loss": 0.09087912, "memory(GiB)": 13.7, "step": 52505, "train_speed(iter/s)": 1.529009 }, { "acc": 0.98819447, "epoch": 24.612139676587766, "grad_norm": 2.1099400520324707, "learning_rate": 5.5391624032066645e-06, "loss": 0.07573495, "memory(GiB)": 13.7, "step": 52510, "train_speed(iter/s)": 1.529003 }, { "acc": 0.98311882, "epoch": 24.6144832434966, "grad_norm": 3.4026451110839844, "learning_rate": 5.538391797445246e-06, "loss": 0.07079913, "memory(GiB)": 13.7, "step": 52515, "train_speed(iter/s)": 1.529001 }, { "acc": 0.9666666, "epoch": 24.61682681040544, "grad_norm": 5.971985340118408, "learning_rate": 5.537621178754679e-06, "loss": 0.08560258, "memory(GiB)": 13.7, "step": 52520, "train_speed(iter/s)": 1.529006 }, { "acc": 0.98237171, "epoch": 24.619170377314273, "grad_norm": 1.6038883924484253, "learning_rate": 5.53685054715348e-06, "loss": 0.05219334, "memory(GiB)": 13.7, "step": 52525, "train_speed(iter/s)": 1.529015 }, { "acc": 0.97678165, "epoch": 24.621513944223107, "grad_norm": 3.8139443397521973, "learning_rate": 5.536079902660179e-06, "loss": 0.08124288, "memory(GiB)": 13.7, "step": 52530, "train_speed(iter/s)": 1.529017 }, { "acc": 0.97053566, "epoch": 24.62385751113194, "grad_norm": 3.3040754795074463, "learning_rate": 5.535309245293294e-06, "loss": 0.07148553, "memory(GiB)": 13.7, "step": 52535, "train_speed(iter/s)": 1.529027 }, { "acc": 0.9833334, "epoch": 24.62620107804078, "grad_norm": 1.4752967357635498, "learning_rate": 5.534538575071354e-06, "loss": 0.05136179, "memory(GiB)": 13.7, "step": 52540, "train_speed(iter/s)": 1.529028 }, { "acc": 0.97390881, "epoch": 24.628544644949613, "grad_norm": 4.453414440155029, "learning_rate": 5.53376789201288e-06, "loss": 0.06904884, "memory(GiB)": 13.7, "step": 52545, "train_speed(iter/s)": 1.529038 }, { "acc": 0.96496868, "epoch": 24.630888211858448, "grad_norm": 1.413407802581787, "learning_rate": 5.532997196136398e-06, "loss": 0.11325469, "memory(GiB)": 13.7, "step": 52550, "train_speed(iter/s)": 1.529038 }, { "acc": 0.98064394, "epoch": 24.633231778767282, "grad_norm": 2.3959107398986816, "learning_rate": 5.532226487460433e-06, "loss": 0.06892292, "memory(GiB)": 13.7, "step": 52555, "train_speed(iter/s)": 1.529051 }, { "acc": 0.99375, "epoch": 24.63557534567612, "grad_norm": 3.5661025047302246, "learning_rate": 5.531455766003509e-06, "loss": 0.02278271, "memory(GiB)": 13.7, "step": 52560, "train_speed(iter/s)": 1.529051 }, { "acc": 0.98571434, "epoch": 24.637918912584954, "grad_norm": 4.324667453765869, "learning_rate": 5.530685031784154e-06, "loss": 0.05473534, "memory(GiB)": 13.7, "step": 52565, "train_speed(iter/s)": 1.52906 }, { "acc": 0.97390881, "epoch": 24.64026247949379, "grad_norm": 3.411571502685547, "learning_rate": 5.529914284820891e-06, "loss": 0.08633964, "memory(GiB)": 13.7, "step": 52570, "train_speed(iter/s)": 1.529072 }, { "acc": 0.99333334, "epoch": 24.642606046402626, "grad_norm": 5.255230903625488, "learning_rate": 5.529143525132249e-06, "loss": 0.02388633, "memory(GiB)": 13.7, "step": 52575, "train_speed(iter/s)": 1.529073 }, { "acc": 0.99258928, "epoch": 24.64494961331146, "grad_norm": 3.493683338165283, "learning_rate": 5.528372752736756e-06, "loss": 0.04822191, "memory(GiB)": 13.7, "step": 52580, "train_speed(iter/s)": 1.529087 }, { "acc": 0.98386364, "epoch": 24.647293180220295, "grad_norm": 3.2222325801849365, "learning_rate": 5.527601967652932e-06, "loss": 0.08784029, "memory(GiB)": 13.7, "step": 52585, "train_speed(iter/s)": 1.529089 }, { "acc": 0.98134899, "epoch": 24.64963674712913, "grad_norm": 2.95285964012146, "learning_rate": 5.52683116989931e-06, "loss": 0.07768775, "memory(GiB)": 13.7, "step": 52590, "train_speed(iter/s)": 1.529093 }, { "acc": 0.97468748, "epoch": 24.651980314037967, "grad_norm": 6.637959003448486, "learning_rate": 5.526060359494416e-06, "loss": 0.06247395, "memory(GiB)": 13.7, "step": 52595, "train_speed(iter/s)": 1.529099 }, { "acc": 0.98583336, "epoch": 24.6543238809468, "grad_norm": 3.5207202434539795, "learning_rate": 5.525289536456777e-06, "loss": 0.03971863, "memory(GiB)": 13.7, "step": 52600, "train_speed(iter/s)": 1.5291 }, { "acc": 0.97800598, "epoch": 24.656667447855636, "grad_norm": 4.742751121520996, "learning_rate": 5.5245187008049226e-06, "loss": 0.05750039, "memory(GiB)": 13.7, "step": 52605, "train_speed(iter/s)": 1.529099 }, { "acc": 0.9796875, "epoch": 24.65901101476447, "grad_norm": 1.940826177597046, "learning_rate": 5.5237478525573795e-06, "loss": 0.11823053, "memory(GiB)": 13.7, "step": 52610, "train_speed(iter/s)": 1.529104 }, { "acc": 0.99529409, "epoch": 24.661354581673308, "grad_norm": 4.644652366638184, "learning_rate": 5.522976991732676e-06, "loss": 0.03511308, "memory(GiB)": 13.7, "step": 52615, "train_speed(iter/s)": 1.529116 }, { "acc": 0.99386368, "epoch": 24.663698148582142, "grad_norm": 3.1276583671569824, "learning_rate": 5.522206118349345e-06, "loss": 0.04604749, "memory(GiB)": 13.7, "step": 52620, "train_speed(iter/s)": 1.529112 }, { "acc": 0.97672157, "epoch": 24.666041715490977, "grad_norm": 3.346135139465332, "learning_rate": 5.521435232425911e-06, "loss": 0.0624594, "memory(GiB)": 13.7, "step": 52625, "train_speed(iter/s)": 1.529111 }, { "acc": 0.98008928, "epoch": 24.66838528239981, "grad_norm": 3.478100538253784, "learning_rate": 5.5206643339809044e-06, "loss": 0.04931223, "memory(GiB)": 13.7, "step": 52630, "train_speed(iter/s)": 1.529118 }, { "acc": 0.98946428, "epoch": 24.67072884930865, "grad_norm": 0.5398991703987122, "learning_rate": 5.519893423032857e-06, "loss": 0.02516761, "memory(GiB)": 13.7, "step": 52635, "train_speed(iter/s)": 1.52912 }, { "acc": 0.98019352, "epoch": 24.673072416217483, "grad_norm": 4.047401428222656, "learning_rate": 5.519122499600299e-06, "loss": 0.06718785, "memory(GiB)": 13.7, "step": 52640, "train_speed(iter/s)": 1.529122 }, { "acc": 0.96812172, "epoch": 24.675415983126317, "grad_norm": 5.9623613357543945, "learning_rate": 5.5183515637017585e-06, "loss": 0.08022428, "memory(GiB)": 13.7, "step": 52645, "train_speed(iter/s)": 1.529127 }, { "acc": 0.98311958, "epoch": 24.677759550035155, "grad_norm": 0.045073047280311584, "learning_rate": 5.517580615355767e-06, "loss": 0.05103543, "memory(GiB)": 13.7, "step": 52650, "train_speed(iter/s)": 1.529125 }, { "acc": 0.98944054, "epoch": 24.68010311694399, "grad_norm": 4.076014518737793, "learning_rate": 5.516809654580858e-06, "loss": 0.04389668, "memory(GiB)": 13.7, "step": 52655, "train_speed(iter/s)": 1.529129 }, { "acc": 0.984375, "epoch": 24.682446683852824, "grad_norm": 2.653160810470581, "learning_rate": 5.51603868139556e-06, "loss": 0.05311341, "memory(GiB)": 13.7, "step": 52660, "train_speed(iter/s)": 1.529132 }, { "acc": 0.98520832, "epoch": 24.684790250761658, "grad_norm": 6.472113132476807, "learning_rate": 5.515267695818408e-06, "loss": 0.05392758, "memory(GiB)": 13.7, "step": 52665, "train_speed(iter/s)": 1.529135 }, { "acc": 0.98807468, "epoch": 24.687133817670496, "grad_norm": 1.205888032913208, "learning_rate": 5.514496697867931e-06, "loss": 0.03955302, "memory(GiB)": 13.7, "step": 52670, "train_speed(iter/s)": 1.529135 }, { "acc": 0.97587309, "epoch": 24.68947738457933, "grad_norm": 0.0618726909160614, "learning_rate": 5.5137256875626585e-06, "loss": 0.09732514, "memory(GiB)": 13.7, "step": 52675, "train_speed(iter/s)": 1.529136 }, { "acc": 0.98843746, "epoch": 24.691820951488165, "grad_norm": 2.608510732650757, "learning_rate": 5.51295466492113e-06, "loss": 0.07704872, "memory(GiB)": 13.7, "step": 52680, "train_speed(iter/s)": 1.529144 }, { "acc": 0.9916666, "epoch": 24.694164518397, "grad_norm": 3.3917791843414307, "learning_rate": 5.512183629961875e-06, "loss": 0.01813445, "memory(GiB)": 13.7, "step": 52685, "train_speed(iter/s)": 1.52914 }, { "acc": 0.978125, "epoch": 24.696508085305837, "grad_norm": 3.084784507751465, "learning_rate": 5.511412582703425e-06, "loss": 0.07250988, "memory(GiB)": 13.7, "step": 52690, "train_speed(iter/s)": 1.529144 }, { "acc": 0.97687502, "epoch": 24.69885165221467, "grad_norm": 3.3195652961730957, "learning_rate": 5.510641523164316e-06, "loss": 0.06608043, "memory(GiB)": 13.7, "step": 52695, "train_speed(iter/s)": 1.529149 }, { "acc": 0.9927084, "epoch": 24.701195219123505, "grad_norm": 0.011848627589643002, "learning_rate": 5.50987045136308e-06, "loss": 0.04753124, "memory(GiB)": 13.7, "step": 52700, "train_speed(iter/s)": 1.529145 }, { "acc": 0.98447914, "epoch": 24.70353878603234, "grad_norm": 1.0022979974746704, "learning_rate": 5.5090993673182526e-06, "loss": 0.04697656, "memory(GiB)": 13.7, "step": 52705, "train_speed(iter/s)": 1.529146 }, { "acc": 0.97807541, "epoch": 24.705882352941178, "grad_norm": 2.6647064685821533, "learning_rate": 5.5083282710483664e-06, "loss": 0.0456659, "memory(GiB)": 13.7, "step": 52710, "train_speed(iter/s)": 1.52915 }, { "acc": 0.9875, "epoch": 24.708225919850012, "grad_norm": 2.2891008853912354, "learning_rate": 5.507557162571958e-06, "loss": 0.09323276, "memory(GiB)": 13.7, "step": 52715, "train_speed(iter/s)": 1.529154 }, { "acc": 0.96786079, "epoch": 24.710569486758846, "grad_norm": 8.655227661132812, "learning_rate": 5.506786041907559e-06, "loss": 0.12920936, "memory(GiB)": 13.7, "step": 52720, "train_speed(iter/s)": 1.529146 }, { "acc": 0.98881941, "epoch": 24.712913053667684, "grad_norm": 2.1907153129577637, "learning_rate": 5.506014909073709e-06, "loss": 0.024336, "memory(GiB)": 13.7, "step": 52725, "train_speed(iter/s)": 1.529151 }, { "acc": 0.97999458, "epoch": 24.71525662057652, "grad_norm": 0.04275425523519516, "learning_rate": 5.50524376408894e-06, "loss": 0.09672987, "memory(GiB)": 13.7, "step": 52730, "train_speed(iter/s)": 1.529151 }, { "acc": 0.98529758, "epoch": 24.717600187485353, "grad_norm": 2.4113986492156982, "learning_rate": 5.50447260697179e-06, "loss": 0.04053723, "memory(GiB)": 13.7, "step": 52735, "train_speed(iter/s)": 1.52915 }, { "acc": 0.99154758, "epoch": 24.719943754394187, "grad_norm": 3.158996343612671, "learning_rate": 5.503701437740794e-06, "loss": 0.02618068, "memory(GiB)": 13.7, "step": 52740, "train_speed(iter/s)": 1.529161 }, { "acc": 0.99508018, "epoch": 24.722287321303025, "grad_norm": 0.05125446245074272, "learning_rate": 5.502930256414488e-06, "loss": 0.02368101, "memory(GiB)": 13.7, "step": 52745, "train_speed(iter/s)": 1.529163 }, { "acc": 0.96171284, "epoch": 24.72463088821186, "grad_norm": 4.375463008880615, "learning_rate": 5.502159063011411e-06, "loss": 0.14933087, "memory(GiB)": 13.7, "step": 52750, "train_speed(iter/s)": 1.529164 }, { "acc": 0.98923607, "epoch": 24.726974455120693, "grad_norm": 4.9607625007629395, "learning_rate": 5.501387857550098e-06, "loss": 0.07927605, "memory(GiB)": 13.7, "step": 52755, "train_speed(iter/s)": 1.529167 }, { "acc": 0.97766371, "epoch": 24.729318022029528, "grad_norm": 4.218732833862305, "learning_rate": 5.500616640049086e-06, "loss": 0.05774323, "memory(GiB)": 13.7, "step": 52760, "train_speed(iter/s)": 1.52917 }, { "acc": 0.97123919, "epoch": 24.731661588938366, "grad_norm": 5.581195831298828, "learning_rate": 5.499845410526911e-06, "loss": 0.12073927, "memory(GiB)": 13.7, "step": 52765, "train_speed(iter/s)": 1.52917 }, { "acc": 0.97868061, "epoch": 24.7340051558472, "grad_norm": 2.969587564468384, "learning_rate": 5.499074169002114e-06, "loss": 0.10733386, "memory(GiB)": 13.7, "step": 52770, "train_speed(iter/s)": 1.529175 }, { "acc": 0.98080807, "epoch": 24.736348722756034, "grad_norm": 3.376394748687744, "learning_rate": 5.498302915493232e-06, "loss": 0.05807275, "memory(GiB)": 13.7, "step": 52775, "train_speed(iter/s)": 1.529176 }, { "acc": 0.98208332, "epoch": 24.73869228966487, "grad_norm": 3.159794330596924, "learning_rate": 5.497531650018804e-06, "loss": 0.05432702, "memory(GiB)": 13.7, "step": 52780, "train_speed(iter/s)": 1.529177 }, { "acc": 0.96937504, "epoch": 24.741035856573706, "grad_norm": 6.303130626678467, "learning_rate": 5.496760372597367e-06, "loss": 0.05857736, "memory(GiB)": 13.7, "step": 52785, "train_speed(iter/s)": 1.529188 }, { "acc": 0.97354164, "epoch": 24.74337942348254, "grad_norm": 5.984554290771484, "learning_rate": 5.495989083247463e-06, "loss": 0.07115369, "memory(GiB)": 13.7, "step": 52790, "train_speed(iter/s)": 1.529196 }, { "acc": 0.9739583, "epoch": 24.745722990391375, "grad_norm": 5.820163726806641, "learning_rate": 5.495217781987626e-06, "loss": 0.07461212, "memory(GiB)": 13.7, "step": 52795, "train_speed(iter/s)": 1.529206 }, { "acc": 0.98378468, "epoch": 24.74806655730021, "grad_norm": 4.863652229309082, "learning_rate": 5.494446468836402e-06, "loss": 0.06145263, "memory(GiB)": 13.7, "step": 52800, "train_speed(iter/s)": 1.529217 }, { "acc": 0.99627972, "epoch": 24.750410124209047, "grad_norm": 0.8988105058670044, "learning_rate": 5.493675143812325e-06, "loss": 0.0501664, "memory(GiB)": 13.7, "step": 52805, "train_speed(iter/s)": 1.529216 }, { "acc": 0.99201927, "epoch": 24.75275369111788, "grad_norm": 0.8321720361709595, "learning_rate": 5.492903806933939e-06, "loss": 0.02755119, "memory(GiB)": 13.7, "step": 52810, "train_speed(iter/s)": 1.529228 }, { "acc": 0.97633934, "epoch": 24.755097258026716, "grad_norm": 6.546091556549072, "learning_rate": 5.492132458219784e-06, "loss": 0.08151804, "memory(GiB)": 13.7, "step": 52815, "train_speed(iter/s)": 1.529238 }, { "acc": 0.98194447, "epoch": 24.757440824935554, "grad_norm": 2.1123251914978027, "learning_rate": 5.491361097688397e-06, "loss": 0.1275275, "memory(GiB)": 13.7, "step": 52820, "train_speed(iter/s)": 1.529242 }, { "acc": 0.97145824, "epoch": 24.759784391844388, "grad_norm": 0.034060705453157425, "learning_rate": 5.490589725358325e-06, "loss": 0.13356627, "memory(GiB)": 13.7, "step": 52825, "train_speed(iter/s)": 1.52924 }, { "acc": 0.99375, "epoch": 24.762127958753222, "grad_norm": 4.16618537902832, "learning_rate": 5.489818341248105e-06, "loss": 0.02418116, "memory(GiB)": 13.7, "step": 52830, "train_speed(iter/s)": 1.529246 }, { "acc": 0.98041668, "epoch": 24.764471525662056, "grad_norm": 5.178986549377441, "learning_rate": 5.48904694537628e-06, "loss": 0.13405514, "memory(GiB)": 13.7, "step": 52835, "train_speed(iter/s)": 1.529259 }, { "acc": 0.98847218, "epoch": 24.766815092570894, "grad_norm": 5.180561065673828, "learning_rate": 5.48827553776139e-06, "loss": 0.0966323, "memory(GiB)": 13.7, "step": 52840, "train_speed(iter/s)": 1.529263 }, { "acc": 0.98214283, "epoch": 24.76915865947973, "grad_norm": 5.790075302124023, "learning_rate": 5.48750411842198e-06, "loss": 0.07591415, "memory(GiB)": 13.7, "step": 52845, "train_speed(iter/s)": 1.529269 }, { "acc": 0.990625, "epoch": 24.771502226388563, "grad_norm": 4.496615886688232, "learning_rate": 5.48673268737659e-06, "loss": 0.03835966, "memory(GiB)": 13.7, "step": 52850, "train_speed(iter/s)": 1.529267 }, { "acc": 0.99027777, "epoch": 24.773845793297397, "grad_norm": 5.065796375274658, "learning_rate": 5.485961244643764e-06, "loss": 0.03329791, "memory(GiB)": 13.7, "step": 52855, "train_speed(iter/s)": 1.529273 }, { "acc": 0.98298607, "epoch": 24.776189360206235, "grad_norm": 2.949387311935425, "learning_rate": 5.485189790242044e-06, "loss": 0.02589256, "memory(GiB)": 13.7, "step": 52860, "train_speed(iter/s)": 1.529276 }, { "acc": 0.9708334, "epoch": 24.77853292711507, "grad_norm": 3.014913558959961, "learning_rate": 5.484418324189974e-06, "loss": 0.09367981, "memory(GiB)": 13.7, "step": 52865, "train_speed(iter/s)": 1.529274 }, { "acc": 0.9979166, "epoch": 24.780876494023904, "grad_norm": 1.9243543148040771, "learning_rate": 5.483646846506097e-06, "loss": 0.00918662, "memory(GiB)": 13.7, "step": 52870, "train_speed(iter/s)": 1.529271 }, { "acc": 0.98529758, "epoch": 24.783220060932738, "grad_norm": 3.8928627967834473, "learning_rate": 5.4828753572089596e-06, "loss": 0.04156859, "memory(GiB)": 13.7, "step": 52875, "train_speed(iter/s)": 1.529277 }, { "acc": 0.97894917, "epoch": 24.785563627841576, "grad_norm": 5.150589942932129, "learning_rate": 5.4821038563171e-06, "loss": 0.06274139, "memory(GiB)": 13.7, "step": 52880, "train_speed(iter/s)": 1.529283 }, { "acc": 0.99802017, "epoch": 24.78790719475041, "grad_norm": 0.8227632641792297, "learning_rate": 5.481332343849067e-06, "loss": 0.04750344, "memory(GiB)": 13.7, "step": 52885, "train_speed(iter/s)": 1.529284 }, { "acc": 0.97458344, "epoch": 24.790250761659244, "grad_norm": 4.278988361358643, "learning_rate": 5.480560819823406e-06, "loss": 0.06875921, "memory(GiB)": 13.7, "step": 52890, "train_speed(iter/s)": 1.529296 }, { "acc": 0.98208332, "epoch": 24.792594328568082, "grad_norm": 10.22209358215332, "learning_rate": 5.479789284258656e-06, "loss": 0.07201476, "memory(GiB)": 13.7, "step": 52895, "train_speed(iter/s)": 1.529309 }, { "acc": 0.9822917, "epoch": 24.794937895476917, "grad_norm": 3.881627082824707, "learning_rate": 5.47901773717337e-06, "loss": 0.06507161, "memory(GiB)": 13.7, "step": 52900, "train_speed(iter/s)": 1.529307 }, { "acc": 0.97588978, "epoch": 24.79728146238575, "grad_norm": 0.7193773984909058, "learning_rate": 5.4782461785860874e-06, "loss": 0.06961663, "memory(GiB)": 13.7, "step": 52905, "train_speed(iter/s)": 1.529306 }, { "acc": 0.98043098, "epoch": 24.799625029294585, "grad_norm": 2.8764874935150146, "learning_rate": 5.477474608515358e-06, "loss": 0.0816106, "memory(GiB)": 13.7, "step": 52910, "train_speed(iter/s)": 1.529303 }, { "acc": 0.98312492, "epoch": 24.801968596203423, "grad_norm": 4.095782279968262, "learning_rate": 5.476703026979725e-06, "loss": 0.0474299, "memory(GiB)": 13.7, "step": 52915, "train_speed(iter/s)": 1.529304 }, { "acc": 0.98957796, "epoch": 24.804312163112257, "grad_norm": 0.03151758387684822, "learning_rate": 5.475931433997736e-06, "loss": 0.05187171, "memory(GiB)": 13.7, "step": 52920, "train_speed(iter/s)": 1.529306 }, { "acc": 0.975, "epoch": 24.80665573002109, "grad_norm": 2.716679573059082, "learning_rate": 5.475159829587937e-06, "loss": 0.09557716, "memory(GiB)": 13.7, "step": 52925, "train_speed(iter/s)": 1.529307 }, { "acc": 0.98773441, "epoch": 24.808999296929926, "grad_norm": 2.9854843616485596, "learning_rate": 5.474388213768874e-06, "loss": 0.03337334, "memory(GiB)": 13.7, "step": 52930, "train_speed(iter/s)": 1.529312 }, { "acc": 0.98623505, "epoch": 24.811342863838764, "grad_norm": 1.730725884437561, "learning_rate": 5.473616586559097e-06, "loss": 0.05257356, "memory(GiB)": 13.7, "step": 52935, "train_speed(iter/s)": 1.529315 }, { "acc": 0.98812504, "epoch": 24.813686430747598, "grad_norm": 0.17633917927742004, "learning_rate": 5.472844947977149e-06, "loss": 0.05458971, "memory(GiB)": 13.7, "step": 52940, "train_speed(iter/s)": 1.529321 }, { "acc": 0.97979164, "epoch": 24.816029997656432, "grad_norm": 1.2515696287155151, "learning_rate": 5.4720732980415825e-06, "loss": 0.07323232, "memory(GiB)": 13.7, "step": 52945, "train_speed(iter/s)": 1.529326 }, { "acc": 0.98940983, "epoch": 24.818373564565267, "grad_norm": 0.883449912071228, "learning_rate": 5.4713016367709415e-06, "loss": 0.07345444, "memory(GiB)": 13.7, "step": 52950, "train_speed(iter/s)": 1.529334 }, { "acc": 0.98394346, "epoch": 24.820717131474105, "grad_norm": 3.195894956588745, "learning_rate": 5.4705299641837784e-06, "loss": 0.05105717, "memory(GiB)": 13.7, "step": 52955, "train_speed(iter/s)": 1.529337 }, { "acc": 0.97534723, "epoch": 24.82306069838294, "grad_norm": 10.371476173400879, "learning_rate": 5.469758280298638e-06, "loss": 0.06751269, "memory(GiB)": 13.7, "step": 52960, "train_speed(iter/s)": 1.529334 }, { "acc": 0.9864583, "epoch": 24.825404265291773, "grad_norm": 5.601569175720215, "learning_rate": 5.4689865851340705e-06, "loss": 0.06847191, "memory(GiB)": 13.7, "step": 52965, "train_speed(iter/s)": 1.529339 }, { "acc": 0.97822914, "epoch": 24.827747832200608, "grad_norm": 4.772167682647705, "learning_rate": 5.468214878708623e-06, "loss": 0.03900873, "memory(GiB)": 13.7, "step": 52970, "train_speed(iter/s)": 1.529338 }, { "acc": 0.98104162, "epoch": 24.830091399109445, "grad_norm": 6.0376176834106445, "learning_rate": 5.467443161040847e-06, "loss": 0.05565773, "memory(GiB)": 13.7, "step": 52975, "train_speed(iter/s)": 1.529345 }, { "acc": 0.98354168, "epoch": 24.83243496601828, "grad_norm": 3.078522205352783, "learning_rate": 5.466671432149292e-06, "loss": 0.06317987, "memory(GiB)": 13.7, "step": 52980, "train_speed(iter/s)": 1.529351 }, { "acc": 0.97877979, "epoch": 24.834778532927114, "grad_norm": 3.2870709896087646, "learning_rate": 5.4658996920525065e-06, "loss": 0.03152527, "memory(GiB)": 13.7, "step": 52985, "train_speed(iter/s)": 1.529356 }, { "acc": 0.98416624, "epoch": 24.837122099835952, "grad_norm": 1.974971055984497, "learning_rate": 5.465127940769043e-06, "loss": 0.05624856, "memory(GiB)": 13.7, "step": 52990, "train_speed(iter/s)": 1.529362 }, { "acc": 0.9856945, "epoch": 24.839465666744786, "grad_norm": 2.346574544906616, "learning_rate": 5.464356178317448e-06, "loss": 0.02944604, "memory(GiB)": 13.7, "step": 52995, "train_speed(iter/s)": 1.529367 }, { "acc": 0.9859375, "epoch": 24.84180923365362, "grad_norm": 2.719900131225586, "learning_rate": 5.463584404716277e-06, "loss": 0.02615633, "memory(GiB)": 13.7, "step": 53000, "train_speed(iter/s)": 1.529366 }, { "acc": 0.98601189, "epoch": 24.844152800562455, "grad_norm": 1.0584462881088257, "learning_rate": 5.462812619984076e-06, "loss": 0.08321033, "memory(GiB)": 13.7, "step": 53005, "train_speed(iter/s)": 1.529375 }, { "acc": 0.98318453, "epoch": 24.846496367471293, "grad_norm": 7.8593058586120605, "learning_rate": 5.4620408241394e-06, "loss": 0.06147652, "memory(GiB)": 13.7, "step": 53010, "train_speed(iter/s)": 1.529373 }, { "acc": 0.97946434, "epoch": 24.848839934380127, "grad_norm": 5.65551233291626, "learning_rate": 5.461269017200796e-06, "loss": 0.06093166, "memory(GiB)": 13.7, "step": 53015, "train_speed(iter/s)": 1.529382 }, { "acc": 0.97451382, "epoch": 24.85118350128896, "grad_norm": 3.3062567710876465, "learning_rate": 5.460497199186822e-06, "loss": 0.08310622, "memory(GiB)": 13.7, "step": 53020, "train_speed(iter/s)": 1.529387 }, { "acc": 0.98576393, "epoch": 24.853527068197796, "grad_norm": 2.83217716217041, "learning_rate": 5.459725370116024e-06, "loss": 0.06913089, "memory(GiB)": 13.7, "step": 53025, "train_speed(iter/s)": 1.529388 }, { "acc": 0.97833338, "epoch": 24.855870635106633, "grad_norm": 2.1664786338806152, "learning_rate": 5.458953530006959e-06, "loss": 0.0591186, "memory(GiB)": 13.7, "step": 53030, "train_speed(iter/s)": 1.529405 }, { "acc": 0.9958334, "epoch": 24.858214202015468, "grad_norm": 0.0018345331773161888, "learning_rate": 5.458181678878176e-06, "loss": 0.0104142, "memory(GiB)": 13.7, "step": 53035, "train_speed(iter/s)": 1.529405 }, { "acc": 0.98142366, "epoch": 24.860557768924302, "grad_norm": 3.645191192626953, "learning_rate": 5.4574098167482304e-06, "loss": 0.0445879, "memory(GiB)": 13.7, "step": 53040, "train_speed(iter/s)": 1.529409 }, { "acc": 0.99142361, "epoch": 24.862901335833136, "grad_norm": 0.0587523952126503, "learning_rate": 5.456637943635671e-06, "loss": 0.04171126, "memory(GiB)": 13.7, "step": 53045, "train_speed(iter/s)": 1.529411 }, { "acc": 0.9822588, "epoch": 24.865244902741974, "grad_norm": 1.297432780265808, "learning_rate": 5.4558660595590565e-06, "loss": 0.03926677, "memory(GiB)": 13.7, "step": 53050, "train_speed(iter/s)": 1.529409 }, { "acc": 0.98345833, "epoch": 24.86758846965081, "grad_norm": 3.0996768474578857, "learning_rate": 5.455094164536936e-06, "loss": 0.05633154, "memory(GiB)": 13.7, "step": 53055, "train_speed(iter/s)": 1.529414 }, { "acc": 0.9802084, "epoch": 24.869932036559643, "grad_norm": 0.1982259303331375, "learning_rate": 5.454322258587866e-06, "loss": 0.05766223, "memory(GiB)": 13.7, "step": 53060, "train_speed(iter/s)": 1.52941 }, { "acc": 0.97229166, "epoch": 24.87227560346848, "grad_norm": 1.418073058128357, "learning_rate": 5.453550341730401e-06, "loss": 0.11133475, "memory(GiB)": 13.7, "step": 53065, "train_speed(iter/s)": 1.529413 }, { "acc": 0.99777775, "epoch": 24.874619170377315, "grad_norm": 1.4482896327972412, "learning_rate": 5.452778413983092e-06, "loss": 0.02850616, "memory(GiB)": 13.7, "step": 53070, "train_speed(iter/s)": 1.529419 }, { "acc": 0.97842264, "epoch": 24.87696273728615, "grad_norm": 0.31697195768356323, "learning_rate": 5.452006475364497e-06, "loss": 0.0575462, "memory(GiB)": 13.7, "step": 53075, "train_speed(iter/s)": 1.529433 }, { "acc": 0.97875004, "epoch": 24.879306304194984, "grad_norm": 3.9195661544799805, "learning_rate": 5.451234525893171e-06, "loss": 0.09339397, "memory(GiB)": 13.7, "step": 53080, "train_speed(iter/s)": 1.529437 }, { "acc": 0.96986113, "epoch": 24.88164987110382, "grad_norm": 5.147198677062988, "learning_rate": 5.450462565587667e-06, "loss": 0.14324424, "memory(GiB)": 13.7, "step": 53085, "train_speed(iter/s)": 1.529438 }, { "acc": 0.9874053, "epoch": 24.883993438012656, "grad_norm": 1.8630263805389404, "learning_rate": 5.44969059446654e-06, "loss": 0.06223999, "memory(GiB)": 13.7, "step": 53090, "train_speed(iter/s)": 1.529433 }, { "acc": 0.98747025, "epoch": 24.88633700492149, "grad_norm": 5.149849891662598, "learning_rate": 5.448918612548349e-06, "loss": 0.03625166, "memory(GiB)": 13.7, "step": 53095, "train_speed(iter/s)": 1.529435 }, { "acc": 0.984375, "epoch": 24.888680571830324, "grad_norm": 3.2886874675750732, "learning_rate": 5.448146619851644e-06, "loss": 0.07787425, "memory(GiB)": 13.7, "step": 53100, "train_speed(iter/s)": 1.529442 }, { "acc": 0.98812504, "epoch": 24.891024138739162, "grad_norm": 49.88248825073242, "learning_rate": 5.447374616394988e-06, "loss": 0.04343839, "memory(GiB)": 13.7, "step": 53105, "train_speed(iter/s)": 1.529449 }, { "acc": 0.97250004, "epoch": 24.893367705647997, "grad_norm": 2.3928308486938477, "learning_rate": 5.446602602196934e-06, "loss": 0.08205147, "memory(GiB)": 13.7, "step": 53110, "train_speed(iter/s)": 1.529449 }, { "acc": 0.97999992, "epoch": 24.89571127255683, "grad_norm": 5.3597869873046875, "learning_rate": 5.445830577276037e-06, "loss": 0.08776026, "memory(GiB)": 13.7, "step": 53115, "train_speed(iter/s)": 1.529455 }, { "acc": 0.97881479, "epoch": 24.898054839465665, "grad_norm": 1.9390168190002441, "learning_rate": 5.4450585416508585e-06, "loss": 0.06040163, "memory(GiB)": 13.7, "step": 53120, "train_speed(iter/s)": 1.529461 }, { "acc": 0.99219694, "epoch": 24.900398406374503, "grad_norm": 6.534180641174316, "learning_rate": 5.444286495339952e-06, "loss": 0.06083441, "memory(GiB)": 13.7, "step": 53125, "train_speed(iter/s)": 1.529465 }, { "acc": 0.97709322, "epoch": 24.902741973283337, "grad_norm": 3.8276782035827637, "learning_rate": 5.443514438361875e-06, "loss": 0.05253963, "memory(GiB)": 13.7, "step": 53130, "train_speed(iter/s)": 1.529478 }, { "acc": 0.98988094, "epoch": 24.90508554019217, "grad_norm": 5.634547710418701, "learning_rate": 5.442742370735188e-06, "loss": 0.07810017, "memory(GiB)": 13.7, "step": 53135, "train_speed(iter/s)": 1.529481 }, { "acc": 0.9703001, "epoch": 24.90742910710101, "grad_norm": 1.2992804050445557, "learning_rate": 5.4419702924784465e-06, "loss": 0.09209665, "memory(GiB)": 13.7, "step": 53140, "train_speed(iter/s)": 1.529486 }, { "acc": 0.97875004, "epoch": 24.909772674009844, "grad_norm": 9.246699333190918, "learning_rate": 5.441198203610209e-06, "loss": 0.06229317, "memory(GiB)": 13.7, "step": 53145, "train_speed(iter/s)": 1.529487 }, { "acc": 0.98362179, "epoch": 24.912116240918678, "grad_norm": 4.921943664550781, "learning_rate": 5.440426104149036e-06, "loss": 0.06209641, "memory(GiB)": 13.7, "step": 53150, "train_speed(iter/s)": 1.529502 }, { "acc": 0.97694445, "epoch": 24.914459807827512, "grad_norm": 5.072829723358154, "learning_rate": 5.439653994113484e-06, "loss": 0.09227633, "memory(GiB)": 13.7, "step": 53155, "train_speed(iter/s)": 1.529508 }, { "acc": 0.98782196, "epoch": 24.91680337473635, "grad_norm": 4.4624738693237305, "learning_rate": 5.4388818735221115e-06, "loss": 0.04718115, "memory(GiB)": 13.7, "step": 53160, "train_speed(iter/s)": 1.529512 }, { "acc": 0.98187504, "epoch": 24.919146941645185, "grad_norm": 4.548845291137695, "learning_rate": 5.438109742393483e-06, "loss": 0.05771147, "memory(GiB)": 13.7, "step": 53165, "train_speed(iter/s)": 1.529517 }, { "acc": 0.99092264, "epoch": 24.92149050855402, "grad_norm": 0.13777387142181396, "learning_rate": 5.43733760074615e-06, "loss": 0.02971944, "memory(GiB)": 13.7, "step": 53170, "train_speed(iter/s)": 1.529521 }, { "acc": 0.97437496, "epoch": 24.923834075462853, "grad_norm": 3.7132019996643066, "learning_rate": 5.436565448598676e-06, "loss": 0.06613097, "memory(GiB)": 13.7, "step": 53175, "train_speed(iter/s)": 1.529525 }, { "acc": 0.97849636, "epoch": 24.92617764237169, "grad_norm": 3.82444429397583, "learning_rate": 5.435793285969624e-06, "loss": 0.06204842, "memory(GiB)": 13.7, "step": 53180, "train_speed(iter/s)": 1.529527 }, { "acc": 0.98485994, "epoch": 24.928521209280525, "grad_norm": 0.6236667037010193, "learning_rate": 5.435021112877551e-06, "loss": 0.04742938, "memory(GiB)": 13.7, "step": 53185, "train_speed(iter/s)": 1.529538 }, { "acc": 0.99875002, "epoch": 24.93086477618936, "grad_norm": 1.3803894519805908, "learning_rate": 5.434248929341015e-06, "loss": 0.0172347, "memory(GiB)": 13.7, "step": 53190, "train_speed(iter/s)": 1.529534 }, { "acc": 0.98187504, "epoch": 24.933208343098194, "grad_norm": 4.226160526275635, "learning_rate": 5.433476735378582e-06, "loss": 0.09191192, "memory(GiB)": 13.7, "step": 53195, "train_speed(iter/s)": 1.529523 }, { "acc": 0.97979164, "epoch": 24.93555191000703, "grad_norm": 3.5351004600524902, "learning_rate": 5.432704531008809e-06, "loss": 0.09841063, "memory(GiB)": 13.7, "step": 53200, "train_speed(iter/s)": 1.529525 }, { "acc": 0.98586311, "epoch": 24.937895476915866, "grad_norm": 1.971272587776184, "learning_rate": 5.431932316250261e-06, "loss": 0.08256015, "memory(GiB)": 13.7, "step": 53205, "train_speed(iter/s)": 1.529528 }, { "acc": 0.9875, "epoch": 24.9402390438247, "grad_norm": 5.561758041381836, "learning_rate": 5.4311600911214975e-06, "loss": 0.03522943, "memory(GiB)": 13.7, "step": 53210, "train_speed(iter/s)": 1.529538 }, { "acc": 0.97645836, "epoch": 24.942582610733538, "grad_norm": 4.929532527923584, "learning_rate": 5.4303878556410805e-06, "loss": 0.05867654, "memory(GiB)": 13.7, "step": 53215, "train_speed(iter/s)": 1.529551 }, { "acc": 0.98407793, "epoch": 24.944926177642373, "grad_norm": 4.853440284729004, "learning_rate": 5.42961560982757e-06, "loss": 0.04462116, "memory(GiB)": 13.7, "step": 53220, "train_speed(iter/s)": 1.52956 }, { "acc": 0.98520832, "epoch": 24.947269744551207, "grad_norm": 4.905760765075684, "learning_rate": 5.428843353699531e-06, "loss": 0.06149722, "memory(GiB)": 13.7, "step": 53225, "train_speed(iter/s)": 1.529569 }, { "acc": 0.9739584, "epoch": 24.94961331146004, "grad_norm": 6.419871807098389, "learning_rate": 5.4280710872755225e-06, "loss": 0.08593174, "memory(GiB)": 13.7, "step": 53230, "train_speed(iter/s)": 1.529571 }, { "acc": 0.99258928, "epoch": 24.95195687836888, "grad_norm": 2.145648956298828, "learning_rate": 5.427298810574113e-06, "loss": 0.051816, "memory(GiB)": 13.7, "step": 53235, "train_speed(iter/s)": 1.529571 }, { "acc": 0.98552084, "epoch": 24.954300445277713, "grad_norm": 0.008735359646379948, "learning_rate": 5.426526523613862e-06, "loss": 0.07198652, "memory(GiB)": 13.7, "step": 53240, "train_speed(iter/s)": 1.529574 }, { "acc": 0.97718754, "epoch": 24.956644012186548, "grad_norm": 4.577766418457031, "learning_rate": 5.425754226413331e-06, "loss": 0.07563583, "memory(GiB)": 13.7, "step": 53245, "train_speed(iter/s)": 1.529576 }, { "acc": 0.97907734, "epoch": 24.958987579095382, "grad_norm": 4.557933330535889, "learning_rate": 5.424981918991089e-06, "loss": 0.07444999, "memory(GiB)": 13.7, "step": 53250, "train_speed(iter/s)": 1.529592 }, { "acc": 0.96673613, "epoch": 24.96133114600422, "grad_norm": 7.386350154876709, "learning_rate": 5.424209601365693e-06, "loss": 0.09903351, "memory(GiB)": 13.7, "step": 53255, "train_speed(iter/s)": 1.529587 }, { "acc": 0.99134922, "epoch": 24.963674712913054, "grad_norm": 2.8231048583984375, "learning_rate": 5.423437273555711e-06, "loss": 0.07656456, "memory(GiB)": 13.7, "step": 53260, "train_speed(iter/s)": 1.529591 }, { "acc": 0.97784967, "epoch": 24.96601827982189, "grad_norm": 1.1211241483688354, "learning_rate": 5.422664935579707e-06, "loss": 0.08244777, "memory(GiB)": 13.7, "step": 53265, "train_speed(iter/s)": 1.529589 }, { "acc": 0.97458334, "epoch": 24.968361846730723, "grad_norm": 2.746270179748535, "learning_rate": 5.421892587456244e-06, "loss": 0.0861633, "memory(GiB)": 13.7, "step": 53270, "train_speed(iter/s)": 1.529588 }, { "acc": 0.98208332, "epoch": 24.97070541363956, "grad_norm": 2.630406379699707, "learning_rate": 5.421120229203887e-06, "loss": 0.06658735, "memory(GiB)": 13.7, "step": 53275, "train_speed(iter/s)": 1.529597 }, { "acc": 0.975947, "epoch": 24.973048980548395, "grad_norm": 3.601449728012085, "learning_rate": 5.420347860841203e-06, "loss": 0.05598389, "memory(GiB)": 13.7, "step": 53280, "train_speed(iter/s)": 1.529587 }, { "acc": 0.98666668, "epoch": 24.97539254745723, "grad_norm": 5.023185729980469, "learning_rate": 5.419575482386756e-06, "loss": 0.06077893, "memory(GiB)": 13.7, "step": 53285, "train_speed(iter/s)": 1.529591 }, { "acc": 0.97534723, "epoch": 24.977736114366063, "grad_norm": 4.578331470489502, "learning_rate": 5.418803093859109e-06, "loss": 0.07666961, "memory(GiB)": 13.7, "step": 53290, "train_speed(iter/s)": 1.5296 }, { "acc": 0.97873564, "epoch": 24.9800796812749, "grad_norm": 4.193734645843506, "learning_rate": 5.418030695276831e-06, "loss": 0.0537527, "memory(GiB)": 13.7, "step": 53295, "train_speed(iter/s)": 1.529608 }, { "acc": 0.98182535, "epoch": 24.982423248183736, "grad_norm": 0.051664989441633224, "learning_rate": 5.417258286658489e-06, "loss": 0.05456869, "memory(GiB)": 13.7, "step": 53300, "train_speed(iter/s)": 1.529617 }, { "acc": 0.98354168, "epoch": 24.98476681509257, "grad_norm": 3.09574031829834, "learning_rate": 5.416485868022643e-06, "loss": 0.05581349, "memory(GiB)": 13.7, "step": 53305, "train_speed(iter/s)": 1.529623 }, { "acc": 0.97967262, "epoch": 24.987110382001408, "grad_norm": 2.222168445587158, "learning_rate": 5.415713439387866e-06, "loss": 0.058934, "memory(GiB)": 13.7, "step": 53310, "train_speed(iter/s)": 1.529621 }, { "acc": 0.98073864, "epoch": 24.989453948910242, "grad_norm": 5.087361812591553, "learning_rate": 5.414941000772721e-06, "loss": 0.04747448, "memory(GiB)": 13.7, "step": 53315, "train_speed(iter/s)": 1.529618 }, { "acc": 0.99288197, "epoch": 24.991797515819076, "grad_norm": 0.01149769127368927, "learning_rate": 5.414168552195776e-06, "loss": 0.02286602, "memory(GiB)": 13.7, "step": 53320, "train_speed(iter/s)": 1.52962 }, { "acc": 0.97800598, "epoch": 24.99414108272791, "grad_norm": 2.3603873252868652, "learning_rate": 5.413396093675598e-06, "loss": 0.0448112, "memory(GiB)": 13.7, "step": 53325, "train_speed(iter/s)": 1.529625 }, { "acc": 0.97228622, "epoch": 24.99648464963675, "grad_norm": 7.56012487411499, "learning_rate": 5.412623625230755e-06, "loss": 0.07950066, "memory(GiB)": 13.7, "step": 53330, "train_speed(iter/s)": 1.529625 }, { "acc": 0.99333334, "epoch": 24.998828216545583, "grad_norm": 0.09810356795787811, "learning_rate": 5.411851146879814e-06, "loss": 0.01176793, "memory(GiB)": 13.7, "step": 53335, "train_speed(iter/s)": 1.52963 }, { "acc": 0.9916667, "epoch": 25.001171783454417, "grad_norm": 3.4544122219085693, "learning_rate": 5.411078658641342e-06, "loss": 0.02579958, "memory(GiB)": 13.7, "step": 53340, "train_speed(iter/s)": 1.529611 }, { "acc": 0.98705359, "epoch": 25.00351535036325, "grad_norm": 0.16722629964351654, "learning_rate": 5.4103061605339094e-06, "loss": 0.03126442, "memory(GiB)": 13.7, "step": 53345, "train_speed(iter/s)": 1.529615 }, { "acc": 0.97792664, "epoch": 25.00585891727209, "grad_norm": 8.660343170166016, "learning_rate": 5.40953365257608e-06, "loss": 0.04241173, "memory(GiB)": 13.7, "step": 53350, "train_speed(iter/s)": 1.529623 }, { "acc": 0.98716345, "epoch": 25.008202484180924, "grad_norm": 1.110972285270691, "learning_rate": 5.408761134786427e-06, "loss": 0.04558445, "memory(GiB)": 13.7, "step": 53355, "train_speed(iter/s)": 1.529624 }, { "acc": 0.971875, "epoch": 25.010546051089758, "grad_norm": 4.205687999725342, "learning_rate": 5.407988607183519e-06, "loss": 0.0513668, "memory(GiB)": 13.7, "step": 53360, "train_speed(iter/s)": 1.529632 }, { "acc": 0.97309523, "epoch": 25.012889617998592, "grad_norm": 0.7201886773109436, "learning_rate": 5.4072160697859215e-06, "loss": 0.09700939, "memory(GiB)": 13.7, "step": 53365, "train_speed(iter/s)": 1.529624 }, { "acc": 0.9953125, "epoch": 25.01523318490743, "grad_norm": 0.2226077914237976, "learning_rate": 5.406443522612207e-06, "loss": 0.0235153, "memory(GiB)": 13.7, "step": 53370, "train_speed(iter/s)": 1.529629 }, { "acc": 0.99375, "epoch": 25.017576751816264, "grad_norm": 3.298125743865967, "learning_rate": 5.405670965680944e-06, "loss": 0.01685427, "memory(GiB)": 13.7, "step": 53375, "train_speed(iter/s)": 1.529633 }, { "acc": 0.98833332, "epoch": 25.0199203187251, "grad_norm": 6.170043468475342, "learning_rate": 5.404898399010702e-06, "loss": 0.04361951, "memory(GiB)": 13.7, "step": 53380, "train_speed(iter/s)": 1.529635 }, { "acc": 0.98270836, "epoch": 25.022263885633937, "grad_norm": 6.001566410064697, "learning_rate": 5.40412582262005e-06, "loss": 0.0579239, "memory(GiB)": 13.7, "step": 53385, "train_speed(iter/s)": 1.529644 }, { "acc": 0.97088747, "epoch": 25.02460745254277, "grad_norm": 7.116540431976318, "learning_rate": 5.403353236527561e-06, "loss": 0.06404084, "memory(GiB)": 13.7, "step": 53390, "train_speed(iter/s)": 1.529641 }, { "acc": 0.96958332, "epoch": 25.026951019451605, "grad_norm": 0.9274370670318604, "learning_rate": 5.402580640751801e-06, "loss": 0.0802389, "memory(GiB)": 13.7, "step": 53395, "train_speed(iter/s)": 1.529644 }, { "acc": 0.9885416, "epoch": 25.02929458636044, "grad_norm": 4.084650039672852, "learning_rate": 5.401808035311346e-06, "loss": 0.04491539, "memory(GiB)": 13.7, "step": 53400, "train_speed(iter/s)": 1.529649 }, { "acc": 0.99104176, "epoch": 25.031638153269277, "grad_norm": 0.41638118028640747, "learning_rate": 5.401035420224762e-06, "loss": 0.03812703, "memory(GiB)": 13.7, "step": 53405, "train_speed(iter/s)": 1.52965 }, { "acc": 0.98258324, "epoch": 25.03398172017811, "grad_norm": 1.1054075956344604, "learning_rate": 5.400262795510623e-06, "loss": 0.05064009, "memory(GiB)": 13.7, "step": 53410, "train_speed(iter/s)": 1.529654 }, { "acc": 0.97947407, "epoch": 25.036325287086946, "grad_norm": 5.132745265960693, "learning_rate": 5.399490161187502e-06, "loss": 0.05382885, "memory(GiB)": 13.7, "step": 53415, "train_speed(iter/s)": 1.52965 }, { "acc": 0.97717266, "epoch": 25.03866885399578, "grad_norm": 4.855981349945068, "learning_rate": 5.398717517273968e-06, "loss": 0.05304184, "memory(GiB)": 13.7, "step": 53420, "train_speed(iter/s)": 1.529654 }, { "acc": 0.9822916, "epoch": 25.041012420904618, "grad_norm": 9.800128936767578, "learning_rate": 5.3979448637885905e-06, "loss": 0.12739538, "memory(GiB)": 13.7, "step": 53425, "train_speed(iter/s)": 1.529663 }, { "acc": 0.98349361, "epoch": 25.043355987813452, "grad_norm": 1.7942228317260742, "learning_rate": 5.397172200749945e-06, "loss": 0.03419186, "memory(GiB)": 13.7, "step": 53430, "train_speed(iter/s)": 1.529669 }, { "acc": 0.96744041, "epoch": 25.045699554722287, "grad_norm": 3.2546257972717285, "learning_rate": 5.3963995281766035e-06, "loss": 0.09569442, "memory(GiB)": 13.7, "step": 53435, "train_speed(iter/s)": 1.52967 }, { "acc": 0.98520832, "epoch": 25.04804312163112, "grad_norm": 3.938056468963623, "learning_rate": 5.3956268460871375e-06, "loss": 0.06787601, "memory(GiB)": 13.7, "step": 53440, "train_speed(iter/s)": 1.529674 }, { "acc": 0.9776041, "epoch": 25.05038668853996, "grad_norm": 0.010989823378622532, "learning_rate": 5.39485415450012e-06, "loss": 0.08171184, "memory(GiB)": 13.7, "step": 53445, "train_speed(iter/s)": 1.529685 }, { "acc": 0.97625694, "epoch": 25.052730255448793, "grad_norm": 6.332248210906982, "learning_rate": 5.394081453434123e-06, "loss": 0.06027044, "memory(GiB)": 13.7, "step": 53450, "train_speed(iter/s)": 1.52969 }, { "acc": 0.9927083, "epoch": 25.055073822357627, "grad_norm": 1.7064146995544434, "learning_rate": 5.393308742907725e-06, "loss": 0.02292799, "memory(GiB)": 13.7, "step": 53455, "train_speed(iter/s)": 1.529688 }, { "acc": 0.98135414, "epoch": 25.057417389266462, "grad_norm": 2.2804596424102783, "learning_rate": 5.392536022939493e-06, "loss": 0.05017861, "memory(GiB)": 13.7, "step": 53460, "train_speed(iter/s)": 1.529692 }, { "acc": 0.97977676, "epoch": 25.0597609561753, "grad_norm": 3.0722572803497314, "learning_rate": 5.3917632935480036e-06, "loss": 0.07372423, "memory(GiB)": 13.7, "step": 53465, "train_speed(iter/s)": 1.529701 }, { "acc": 0.98343754, "epoch": 25.062104523084134, "grad_norm": 12.923873901367188, "learning_rate": 5.390990554751828e-06, "loss": 0.08194197, "memory(GiB)": 13.7, "step": 53470, "train_speed(iter/s)": 1.529704 }, { "acc": 0.99146824, "epoch": 25.06444808999297, "grad_norm": 4.07189416885376, "learning_rate": 5.390217806569544e-06, "loss": 0.03482414, "memory(GiB)": 13.7, "step": 53475, "train_speed(iter/s)": 1.52971 }, { "acc": 0.98675594, "epoch": 25.066791656901806, "grad_norm": 4.077461242675781, "learning_rate": 5.389445049019722e-06, "loss": 0.03429354, "memory(GiB)": 13.7, "step": 53480, "train_speed(iter/s)": 1.529717 }, { "acc": 0.97666664, "epoch": 25.06913522381064, "grad_norm": 4.910597324371338, "learning_rate": 5.388672282120942e-06, "loss": 0.05995801, "memory(GiB)": 13.7, "step": 53485, "train_speed(iter/s)": 1.529725 }, { "acc": 0.98604164, "epoch": 25.071478790719475, "grad_norm": 3.246114492416382, "learning_rate": 5.387899505891774e-06, "loss": 0.05609908, "memory(GiB)": 13.7, "step": 53490, "train_speed(iter/s)": 1.52973 }, { "acc": 0.97979164, "epoch": 25.07382235762831, "grad_norm": 4.4378485679626465, "learning_rate": 5.3871267203507945e-06, "loss": 0.10998166, "memory(GiB)": 13.7, "step": 53495, "train_speed(iter/s)": 1.529737 }, { "acc": 0.990625, "epoch": 25.076165924537147, "grad_norm": 0.0026566514279693365, "learning_rate": 5.386353925516581e-06, "loss": 0.0283896, "memory(GiB)": 13.7, "step": 53500, "train_speed(iter/s)": 1.529737 }, { "acc": 0.98529758, "epoch": 25.07850949144598, "grad_norm": 3.0810399055480957, "learning_rate": 5.385581121407703e-06, "loss": 0.02650901, "memory(GiB)": 13.7, "step": 53505, "train_speed(iter/s)": 1.529743 }, { "acc": 0.98063002, "epoch": 25.080853058354815, "grad_norm": 0.0015728514408692718, "learning_rate": 5.384808308042742e-06, "loss": 0.08905045, "memory(GiB)": 13.7, "step": 53510, "train_speed(iter/s)": 1.529742 }, { "acc": 0.98485126, "epoch": 25.08319662526365, "grad_norm": 3.6935179233551025, "learning_rate": 5.384035485440272e-06, "loss": 0.06643807, "memory(GiB)": 13.7, "step": 53515, "train_speed(iter/s)": 1.529742 }, { "acc": 0.98520832, "epoch": 25.085540192172488, "grad_norm": 3.4224917888641357, "learning_rate": 5.383262653618869e-06, "loss": 0.05531585, "memory(GiB)": 13.7, "step": 53520, "train_speed(iter/s)": 1.529742 }, { "acc": 0.98779764, "epoch": 25.087883759081322, "grad_norm": 4.968926906585693, "learning_rate": 5.382489812597108e-06, "loss": 0.06401412, "memory(GiB)": 13.7, "step": 53525, "train_speed(iter/s)": 1.52975 }, { "acc": 0.97706842, "epoch": 25.090227325990156, "grad_norm": 2.30177640914917, "learning_rate": 5.381716962393568e-06, "loss": 0.07797305, "memory(GiB)": 13.7, "step": 53530, "train_speed(iter/s)": 1.529761 }, { "acc": 0.98708334, "epoch": 25.09257089289899, "grad_norm": 3.5507867336273193, "learning_rate": 5.380944103026824e-06, "loss": 0.04651849, "memory(GiB)": 13.7, "step": 53535, "train_speed(iter/s)": 1.52976 }, { "acc": 0.98812504, "epoch": 25.09491445980783, "grad_norm": 42.39436721801758, "learning_rate": 5.380171234515454e-06, "loss": 0.02998908, "memory(GiB)": 13.7, "step": 53540, "train_speed(iter/s)": 1.529763 }, { "acc": 0.97052078, "epoch": 25.097258026716663, "grad_norm": 5.486382007598877, "learning_rate": 5.379398356878036e-06, "loss": 0.09074702, "memory(GiB)": 13.7, "step": 53545, "train_speed(iter/s)": 1.529767 }, { "acc": 0.98994045, "epoch": 25.099601593625497, "grad_norm": 2.5598971843719482, "learning_rate": 5.378625470133144e-06, "loss": 0.04914763, "memory(GiB)": 13.7, "step": 53550, "train_speed(iter/s)": 1.529764 }, { "acc": 0.99071426, "epoch": 25.101945160534335, "grad_norm": 4.549767017364502, "learning_rate": 5.377852574299358e-06, "loss": 0.03154835, "memory(GiB)": 13.7, "step": 53555, "train_speed(iter/s)": 1.529764 }, { "acc": 0.96798611, "epoch": 25.10428872744317, "grad_norm": 2.4546854496002197, "learning_rate": 5.377079669395257e-06, "loss": 0.09016336, "memory(GiB)": 13.7, "step": 53560, "train_speed(iter/s)": 1.529762 }, { "acc": 0.99122906, "epoch": 25.106632294352003, "grad_norm": 3.9458980560302734, "learning_rate": 5.376306755439419e-06, "loss": 0.03502377, "memory(GiB)": 13.7, "step": 53565, "train_speed(iter/s)": 1.529764 }, { "acc": 0.9869565, "epoch": 25.108975861260838, "grad_norm": 3.9214673042297363, "learning_rate": 5.375533832450418e-06, "loss": 0.03177777, "memory(GiB)": 13.7, "step": 53570, "train_speed(iter/s)": 1.529769 }, { "acc": 0.96437492, "epoch": 25.111319428169676, "grad_norm": 6.226067066192627, "learning_rate": 5.374760900446839e-06, "loss": 0.11384132, "memory(GiB)": 13.7, "step": 53575, "train_speed(iter/s)": 1.529779 }, { "acc": 0.98633928, "epoch": 25.11366299507851, "grad_norm": 4.328761100769043, "learning_rate": 5.373987959447255e-06, "loss": 0.05529199, "memory(GiB)": 13.7, "step": 53580, "train_speed(iter/s)": 1.529781 }, { "acc": 0.99224644, "epoch": 25.116006561987344, "grad_norm": 4.298207759857178, "learning_rate": 5.373215009470251e-06, "loss": 0.02047048, "memory(GiB)": 13.7, "step": 53585, "train_speed(iter/s)": 1.52979 }, { "acc": 0.98584452, "epoch": 25.11835012889618, "grad_norm": 3.15666127204895, "learning_rate": 5.3724420505344e-06, "loss": 0.03750136, "memory(GiB)": 13.7, "step": 53590, "train_speed(iter/s)": 1.529798 }, { "acc": 0.99409723, "epoch": 25.120693695805016, "grad_norm": 0.12100483477115631, "learning_rate": 5.371669082658284e-06, "loss": 0.03129609, "memory(GiB)": 13.7, "step": 53595, "train_speed(iter/s)": 1.529798 }, { "acc": 0.98478556, "epoch": 25.12303726271385, "grad_norm": 1.9069275856018066, "learning_rate": 5.3708961058604816e-06, "loss": 0.04768623, "memory(GiB)": 13.7, "step": 53600, "train_speed(iter/s)": 1.529808 }, { "acc": 0.98916664, "epoch": 25.125380829622685, "grad_norm": 0.801129162311554, "learning_rate": 5.370123120159576e-06, "loss": 0.01671453, "memory(GiB)": 13.7, "step": 53605, "train_speed(iter/s)": 1.529815 }, { "acc": 0.98673611, "epoch": 25.12772439653152, "grad_norm": 2.2202188968658447, "learning_rate": 5.3693501255741435e-06, "loss": 0.05927683, "memory(GiB)": 13.7, "step": 53610, "train_speed(iter/s)": 1.529824 }, { "acc": 0.9880209, "epoch": 25.130067963440357, "grad_norm": 2.212707996368408, "learning_rate": 5.368577122122766e-06, "loss": 0.03802892, "memory(GiB)": 13.7, "step": 53615, "train_speed(iter/s)": 1.529824 }, { "acc": 0.99375, "epoch": 25.13241153034919, "grad_norm": 0.18686045706272125, "learning_rate": 5.367804109824023e-06, "loss": 0.01120816, "memory(GiB)": 13.7, "step": 53620, "train_speed(iter/s)": 1.529828 }, { "acc": 0.97887783, "epoch": 25.134755097258026, "grad_norm": 5.498000621795654, "learning_rate": 5.367031088696497e-06, "loss": 0.04677864, "memory(GiB)": 13.7, "step": 53625, "train_speed(iter/s)": 1.529829 }, { "acc": 0.98794374, "epoch": 25.137098664166864, "grad_norm": 1.671539068222046, "learning_rate": 5.366258058758768e-06, "loss": 0.07630339, "memory(GiB)": 13.7, "step": 53630, "train_speed(iter/s)": 1.529836 }, { "acc": 0.98517361, "epoch": 25.139442231075698, "grad_norm": 2.139221668243408, "learning_rate": 5.365485020029416e-06, "loss": 0.05485667, "memory(GiB)": 13.7, "step": 53635, "train_speed(iter/s)": 1.52984 }, { "acc": 0.98321428, "epoch": 25.141785797984532, "grad_norm": 2.109959602355957, "learning_rate": 5.364711972527025e-06, "loss": 0.06236778, "memory(GiB)": 13.7, "step": 53640, "train_speed(iter/s)": 1.529848 }, { "acc": 0.99216347, "epoch": 25.144129364893367, "grad_norm": 1.3161516189575195, "learning_rate": 5.363938916270172e-06, "loss": 0.03380644, "memory(GiB)": 13.7, "step": 53645, "train_speed(iter/s)": 1.529854 }, { "acc": 0.98107634, "epoch": 25.146472931802204, "grad_norm": 1.821707010269165, "learning_rate": 5.363165851277443e-06, "loss": 0.0918193, "memory(GiB)": 13.7, "step": 53650, "train_speed(iter/s)": 1.529857 }, { "acc": 0.99499998, "epoch": 25.14881649871104, "grad_norm": 0.004976694472134113, "learning_rate": 5.362392777567418e-06, "loss": 0.02806054, "memory(GiB)": 13.7, "step": 53655, "train_speed(iter/s)": 1.529859 }, { "acc": 0.97156248, "epoch": 25.151160065619873, "grad_norm": 4.749586582183838, "learning_rate": 5.361619695158679e-06, "loss": 0.07078563, "memory(GiB)": 13.7, "step": 53660, "train_speed(iter/s)": 1.529866 }, { "acc": 0.99196434, "epoch": 25.153503632528707, "grad_norm": 2.1604926586151123, "learning_rate": 5.3608466040698096e-06, "loss": 0.02178828, "memory(GiB)": 13.7, "step": 53665, "train_speed(iter/s)": 1.529873 }, { "acc": 0.97854166, "epoch": 25.155847199437545, "grad_norm": 2.8794233798980713, "learning_rate": 5.360073504319394e-06, "loss": 0.07391097, "memory(GiB)": 13.7, "step": 53670, "train_speed(iter/s)": 1.529878 }, { "acc": 0.9842803, "epoch": 25.15819076634638, "grad_norm": 0.026090404018759727, "learning_rate": 5.359300395926008e-06, "loss": 0.03659051, "memory(GiB)": 13.7, "step": 53675, "train_speed(iter/s)": 1.529873 }, { "acc": 0.98003654, "epoch": 25.160534333255214, "grad_norm": 7.737207889556885, "learning_rate": 5.3585272789082434e-06, "loss": 0.11798567, "memory(GiB)": 13.7, "step": 53680, "train_speed(iter/s)": 1.529874 }, { "acc": 0.98630209, "epoch": 25.162877900164048, "grad_norm": 4.535336017608643, "learning_rate": 5.357754153284676e-06, "loss": 0.05280287, "memory(GiB)": 13.7, "step": 53685, "train_speed(iter/s)": 1.529879 }, { "acc": 0.9817708, "epoch": 25.165221467072886, "grad_norm": 3.0217981338500977, "learning_rate": 5.356981019073894e-06, "loss": 0.07472861, "memory(GiB)": 13.7, "step": 53690, "train_speed(iter/s)": 1.52988 }, { "acc": 0.97927084, "epoch": 25.16756503398172, "grad_norm": 6.273250102996826, "learning_rate": 5.356207876294479e-06, "loss": 0.07431667, "memory(GiB)": 13.7, "step": 53695, "train_speed(iter/s)": 1.529892 }, { "acc": 0.97538195, "epoch": 25.169908600890555, "grad_norm": 1.3121546506881714, "learning_rate": 5.355434724965015e-06, "loss": 0.07839251, "memory(GiB)": 13.7, "step": 53700, "train_speed(iter/s)": 1.529902 }, { "acc": 0.98705807, "epoch": 25.172252167799392, "grad_norm": 0.005289793945848942, "learning_rate": 5.354661565104087e-06, "loss": 0.0366271, "memory(GiB)": 13.7, "step": 53705, "train_speed(iter/s)": 1.529897 }, { "acc": 0.98988094, "epoch": 25.174595734708227, "grad_norm": 3.251403570175171, "learning_rate": 5.353888396730279e-06, "loss": 0.02717995, "memory(GiB)": 13.7, "step": 53710, "train_speed(iter/s)": 1.529904 }, { "acc": 0.99458332, "epoch": 25.17693930161706, "grad_norm": 2.5532491207122803, "learning_rate": 5.3531152198621735e-06, "loss": 0.03902611, "memory(GiB)": 13.7, "step": 53715, "train_speed(iter/s)": 1.529906 }, { "acc": 0.97279758, "epoch": 25.179282868525895, "grad_norm": 4.083198070526123, "learning_rate": 5.352342034518357e-06, "loss": 0.07574148, "memory(GiB)": 13.7, "step": 53720, "train_speed(iter/s)": 1.529904 }, { "acc": 0.9840476, "epoch": 25.181626435434733, "grad_norm": 5.6435418128967285, "learning_rate": 5.351568840717413e-06, "loss": 0.06340752, "memory(GiB)": 13.7, "step": 53725, "train_speed(iter/s)": 1.529912 }, { "acc": 0.95874996, "epoch": 25.183970002343568, "grad_norm": 10.035847663879395, "learning_rate": 5.350795638477928e-06, "loss": 0.13922091, "memory(GiB)": 13.7, "step": 53730, "train_speed(iter/s)": 1.529918 }, { "acc": 0.9859375, "epoch": 25.186313569252402, "grad_norm": 0.12259627878665924, "learning_rate": 5.350022427818488e-06, "loss": 0.02801247, "memory(GiB)": 13.7, "step": 53735, "train_speed(iter/s)": 1.529917 }, { "acc": 0.98008928, "epoch": 25.188657136161236, "grad_norm": 3.954308271408081, "learning_rate": 5.349249208757676e-06, "loss": 0.0672771, "memory(GiB)": 13.7, "step": 53740, "train_speed(iter/s)": 1.529906 }, { "acc": 0.99125004, "epoch": 25.191000703070074, "grad_norm": 6.6744704246521, "learning_rate": 5.348475981314079e-06, "loss": 0.08493029, "memory(GiB)": 13.7, "step": 53745, "train_speed(iter/s)": 1.529904 }, { "acc": 0.98258934, "epoch": 25.19334426997891, "grad_norm": 2.9623944759368896, "learning_rate": 5.347702745506282e-06, "loss": 0.06077895, "memory(GiB)": 13.7, "step": 53750, "train_speed(iter/s)": 1.529913 }, { "acc": 0.97897873, "epoch": 25.195687836887743, "grad_norm": 11.685359001159668, "learning_rate": 5.346929501352873e-06, "loss": 0.06863083, "memory(GiB)": 13.7, "step": 53755, "train_speed(iter/s)": 1.529916 }, { "acc": 0.98508015, "epoch": 25.198031403796577, "grad_norm": 3.3578567504882812, "learning_rate": 5.3461562488724374e-06, "loss": 0.04822719, "memory(GiB)": 13.7, "step": 53760, "train_speed(iter/s)": 1.529921 }, { "acc": 0.97250004, "epoch": 25.200374970705415, "grad_norm": 5.965860366821289, "learning_rate": 5.345382988083558e-06, "loss": 0.05583006, "memory(GiB)": 13.7, "step": 53765, "train_speed(iter/s)": 1.529921 }, { "acc": 0.9807044, "epoch": 25.20271853761425, "grad_norm": 4.001852035522461, "learning_rate": 5.344609719004829e-06, "loss": 0.07742306, "memory(GiB)": 13.7, "step": 53770, "train_speed(iter/s)": 1.52993 }, { "acc": 0.98708324, "epoch": 25.205062104523083, "grad_norm": 2.458070755004883, "learning_rate": 5.343836441654828e-06, "loss": 0.03908567, "memory(GiB)": 13.7, "step": 53775, "train_speed(iter/s)": 1.529935 }, { "acc": 0.9958333, "epoch": 25.207405671431918, "grad_norm": 2.5527470111846924, "learning_rate": 5.34306315605215e-06, "loss": 0.03511063, "memory(GiB)": 13.7, "step": 53780, "train_speed(iter/s)": 1.529939 }, { "acc": 0.99680557, "epoch": 25.209749238340756, "grad_norm": 2.7000417709350586, "learning_rate": 5.342289862215377e-06, "loss": 0.02084727, "memory(GiB)": 13.7, "step": 53785, "train_speed(iter/s)": 1.529944 }, { "acc": 0.97927084, "epoch": 25.21209280524959, "grad_norm": 4.452778339385986, "learning_rate": 5.341516560163099e-06, "loss": 0.06086226, "memory(GiB)": 13.7, "step": 53790, "train_speed(iter/s)": 1.529954 }, { "acc": 0.98458328, "epoch": 25.214436372158424, "grad_norm": 0.0029117546509951353, "learning_rate": 5.340743249913905e-06, "loss": 0.04362776, "memory(GiB)": 13.7, "step": 53795, "train_speed(iter/s)": 1.529956 }, { "acc": 0.98708334, "epoch": 25.216779939067262, "grad_norm": 0.11516119539737701, "learning_rate": 5.3399699314863816e-06, "loss": 0.04585304, "memory(GiB)": 13.7, "step": 53800, "train_speed(iter/s)": 1.529954 }, { "acc": 0.97081852, "epoch": 25.219123505976096, "grad_norm": 2.6531078815460205, "learning_rate": 5.339196604899114e-06, "loss": 0.0899395, "memory(GiB)": 13.7, "step": 53805, "train_speed(iter/s)": 1.529962 }, { "acc": 0.99437504, "epoch": 25.22146707288493, "grad_norm": 1.1663146018981934, "learning_rate": 5.3384232701706936e-06, "loss": 0.03003626, "memory(GiB)": 13.7, "step": 53810, "train_speed(iter/s)": 1.529973 }, { "acc": 0.97939491, "epoch": 25.223810639793765, "grad_norm": 4.558999538421631, "learning_rate": 5.337649927319709e-06, "loss": 0.11451335, "memory(GiB)": 13.7, "step": 53815, "train_speed(iter/s)": 1.52998 }, { "acc": 0.99226761, "epoch": 25.226154206702603, "grad_norm": 2.099818706512451, "learning_rate": 5.336876576364745e-06, "loss": 0.02920868, "memory(GiB)": 13.7, "step": 53820, "train_speed(iter/s)": 1.529979 }, { "acc": 0.97159729, "epoch": 25.228497773611437, "grad_norm": 4.822593688964844, "learning_rate": 5.3361032173243956e-06, "loss": 0.07989954, "memory(GiB)": 13.7, "step": 53825, "train_speed(iter/s)": 1.52998 }, { "acc": 0.98187504, "epoch": 25.23084134052027, "grad_norm": 5.437323570251465, "learning_rate": 5.335329850217245e-06, "loss": 0.04468907, "memory(GiB)": 13.7, "step": 53830, "train_speed(iter/s)": 1.529981 }, { "acc": 0.98812504, "epoch": 25.233184907429106, "grad_norm": 2.6242496967315674, "learning_rate": 5.334556475061887e-06, "loss": 0.04016067, "memory(GiB)": 13.7, "step": 53835, "train_speed(iter/s)": 1.529982 }, { "acc": 0.97579985, "epoch": 25.235528474337944, "grad_norm": 3.493506908416748, "learning_rate": 5.33378309187691e-06, "loss": 0.04462389, "memory(GiB)": 13.7, "step": 53840, "train_speed(iter/s)": 1.52999 }, { "acc": 0.98154764, "epoch": 25.237872041246778, "grad_norm": 7.821070671081543, "learning_rate": 5.3330097006809015e-06, "loss": 0.04599046, "memory(GiB)": 13.7, "step": 53845, "train_speed(iter/s)": 1.529989 }, { "acc": 0.97468748, "epoch": 25.240215608155612, "grad_norm": 4.059437274932861, "learning_rate": 5.3322363014924495e-06, "loss": 0.1384746, "memory(GiB)": 13.7, "step": 53850, "train_speed(iter/s)": 1.529981 }, { "acc": 0.99250002, "epoch": 25.242559175064446, "grad_norm": 2.926114559173584, "learning_rate": 5.33146289433015e-06, "loss": 0.02687485, "memory(GiB)": 13.7, "step": 53855, "train_speed(iter/s)": 1.529985 }, { "acc": 0.97871437, "epoch": 25.244902741973284, "grad_norm": 1.9901626110076904, "learning_rate": 5.330689479212588e-06, "loss": 0.09773347, "memory(GiB)": 13.7, "step": 53860, "train_speed(iter/s)": 1.529991 }, { "acc": 0.97352676, "epoch": 25.24724630888212, "grad_norm": 2.479219913482666, "learning_rate": 5.3299160561583556e-06, "loss": 0.09057765, "memory(GiB)": 13.7, "step": 53865, "train_speed(iter/s)": 1.529988 }, { "acc": 0.98735123, "epoch": 25.249589875790953, "grad_norm": 0.5790925621986389, "learning_rate": 5.3291426251860435e-06, "loss": 0.10314448, "memory(GiB)": 13.7, "step": 53870, "train_speed(iter/s)": 1.529985 }, { "acc": 0.98183212, "epoch": 25.25193344269979, "grad_norm": 8.696605682373047, "learning_rate": 5.32836918631424e-06, "loss": 0.06649439, "memory(GiB)": 13.7, "step": 53875, "train_speed(iter/s)": 1.529993 }, { "acc": 0.97434483, "epoch": 25.254277009608625, "grad_norm": 4.007535457611084, "learning_rate": 5.327595739561544e-06, "loss": 0.07009548, "memory(GiB)": 13.7, "step": 53880, "train_speed(iter/s)": 1.529994 }, { "acc": 0.97770834, "epoch": 25.25662057651746, "grad_norm": 4.005455493927002, "learning_rate": 5.326822284946538e-06, "loss": 0.07027999, "memory(GiB)": 13.7, "step": 53885, "train_speed(iter/s)": 1.529999 }, { "acc": 0.9833334, "epoch": 25.258964143426294, "grad_norm": 4.558670520782471, "learning_rate": 5.326048822487817e-06, "loss": 0.02675034, "memory(GiB)": 13.7, "step": 53890, "train_speed(iter/s)": 1.530006 }, { "acc": 0.984375, "epoch": 25.26130771033513, "grad_norm": 2.778510093688965, "learning_rate": 5.32527535220397e-06, "loss": 0.05383469, "memory(GiB)": 13.7, "step": 53895, "train_speed(iter/s)": 1.530002 }, { "acc": 0.9895834, "epoch": 25.263651277243966, "grad_norm": 0.07436991482973099, "learning_rate": 5.3245018741135924e-06, "loss": 0.02919639, "memory(GiB)": 13.7, "step": 53900, "train_speed(iter/s)": 1.529997 }, { "acc": 0.98986111, "epoch": 25.2659948441528, "grad_norm": 3.593118906021118, "learning_rate": 5.323728388235272e-06, "loss": 0.02315225, "memory(GiB)": 13.7, "step": 53905, "train_speed(iter/s)": 1.530007 }, { "acc": 0.9746726, "epoch": 25.268338411061634, "grad_norm": 1.8363115787506104, "learning_rate": 5.322954894587605e-06, "loss": 0.08399556, "memory(GiB)": 13.7, "step": 53910, "train_speed(iter/s)": 1.53001 }, { "acc": 0.98604174, "epoch": 25.270681977970472, "grad_norm": 1.3406434059143066, "learning_rate": 5.322181393189182e-06, "loss": 0.04972438, "memory(GiB)": 13.7, "step": 53915, "train_speed(iter/s)": 1.530016 }, { "acc": 0.98288689, "epoch": 25.273025544879307, "grad_norm": 2.6941285133361816, "learning_rate": 5.321407884058595e-06, "loss": 0.05539287, "memory(GiB)": 13.7, "step": 53920, "train_speed(iter/s)": 1.530026 }, { "acc": 0.97729168, "epoch": 25.27536911178814, "grad_norm": 6.298923969268799, "learning_rate": 5.320634367214436e-06, "loss": 0.07383041, "memory(GiB)": 13.7, "step": 53925, "train_speed(iter/s)": 1.53004 }, { "acc": 0.97453375, "epoch": 25.277712678696975, "grad_norm": 4.93612003326416, "learning_rate": 5.319860842675298e-06, "loss": 0.1019845, "memory(GiB)": 13.7, "step": 53930, "train_speed(iter/s)": 1.530046 }, { "acc": 0.98125, "epoch": 25.280056245605813, "grad_norm": 5.280198574066162, "learning_rate": 5.319087310459774e-06, "loss": 0.06022646, "memory(GiB)": 13.7, "step": 53935, "train_speed(iter/s)": 1.530045 }, { "acc": 0.98395834, "epoch": 25.282399812514647, "grad_norm": 3.0518548488616943, "learning_rate": 5.3183137705864595e-06, "loss": 0.03086425, "memory(GiB)": 13.7, "step": 53940, "train_speed(iter/s)": 1.530051 }, { "acc": 0.98842258, "epoch": 25.28474337942348, "grad_norm": 1.7698888778686523, "learning_rate": 5.317540223073946e-06, "loss": 0.02962216, "memory(GiB)": 13.7, "step": 53945, "train_speed(iter/s)": 1.530062 }, { "acc": 0.984375, "epoch": 25.287086946332316, "grad_norm": 2.8827431201934814, "learning_rate": 5.316766667940825e-06, "loss": 0.061893, "memory(GiB)": 13.7, "step": 53950, "train_speed(iter/s)": 1.530068 }, { "acc": 0.97999992, "epoch": 25.289430513241154, "grad_norm": 2.8925795555114746, "learning_rate": 5.315993105205695e-06, "loss": 0.08359517, "memory(GiB)": 13.7, "step": 53955, "train_speed(iter/s)": 1.530073 }, { "acc": 0.97134876, "epoch": 25.291774080149988, "grad_norm": 3.0854365825653076, "learning_rate": 5.315219534887146e-06, "loss": 0.06687415, "memory(GiB)": 13.7, "step": 53960, "train_speed(iter/s)": 1.530076 }, { "acc": 0.98506403, "epoch": 25.294117647058822, "grad_norm": 34.05414962768555, "learning_rate": 5.3144459570037744e-06, "loss": 0.08899945, "memory(GiB)": 13.7, "step": 53965, "train_speed(iter/s)": 1.530077 }, { "acc": 0.9947917, "epoch": 25.29646121396766, "grad_norm": 3.202857255935669, "learning_rate": 5.313672371574171e-06, "loss": 0.03226989, "memory(GiB)": 13.7, "step": 53970, "train_speed(iter/s)": 1.530085 }, { "acc": 0.96580353, "epoch": 25.298804780876495, "grad_norm": 4.57783842086792, "learning_rate": 5.312898778616934e-06, "loss": 0.10263374, "memory(GiB)": 13.7, "step": 53975, "train_speed(iter/s)": 1.53009 }, { "acc": 0.98779755, "epoch": 25.30114834778533, "grad_norm": 1.9270775318145752, "learning_rate": 5.312125178150656e-06, "loss": 0.03918009, "memory(GiB)": 13.7, "step": 53980, "train_speed(iter/s)": 1.53009 }, { "acc": 0.97645836, "epoch": 25.303491914694163, "grad_norm": 13.650701522827148, "learning_rate": 5.311351570193933e-06, "loss": 0.09137908, "memory(GiB)": 13.7, "step": 53985, "train_speed(iter/s)": 1.530097 }, { "acc": 0.9871726, "epoch": 25.305835481603, "grad_norm": 2.807032346725464, "learning_rate": 5.31057795476536e-06, "loss": 0.0421524, "memory(GiB)": 13.7, "step": 53990, "train_speed(iter/s)": 1.530097 }, { "acc": 0.99404764, "epoch": 25.308179048511835, "grad_norm": 0.02916126698255539, "learning_rate": 5.30980433188353e-06, "loss": 0.01880614, "memory(GiB)": 13.7, "step": 53995, "train_speed(iter/s)": 1.530112 }, { "acc": 0.96635418, "epoch": 25.31052261542067, "grad_norm": 4.076182842254639, "learning_rate": 5.309030701567042e-06, "loss": 0.05657754, "memory(GiB)": 13.7, "step": 54000, "train_speed(iter/s)": 1.530117 }, { "acc": 0.98585567, "epoch": 25.312866182329504, "grad_norm": 1.3080552816390991, "learning_rate": 5.3082570638344895e-06, "loss": 0.08321201, "memory(GiB)": 13.7, "step": 54005, "train_speed(iter/s)": 1.530115 }, { "acc": 0.98465281, "epoch": 25.315209749238342, "grad_norm": 0.08106791228055954, "learning_rate": 5.307483418704467e-06, "loss": 0.07301334, "memory(GiB)": 13.7, "step": 54010, "train_speed(iter/s)": 1.530111 }, { "acc": 0.98447914, "epoch": 25.317553316147176, "grad_norm": 5.781754970550537, "learning_rate": 5.306709766195573e-06, "loss": 0.06762887, "memory(GiB)": 13.7, "step": 54015, "train_speed(iter/s)": 1.530117 }, { "acc": 0.99365177, "epoch": 25.31989688305601, "grad_norm": 2.754408836364746, "learning_rate": 5.305936106326401e-06, "loss": 0.03200455, "memory(GiB)": 13.7, "step": 54020, "train_speed(iter/s)": 1.530123 }, { "acc": 0.99291668, "epoch": 25.322240449964845, "grad_norm": 1.7574485540390015, "learning_rate": 5.305162439115549e-06, "loss": 0.02142078, "memory(GiB)": 13.7, "step": 54025, "train_speed(iter/s)": 1.53013 }, { "acc": 0.97279758, "epoch": 25.324584016873683, "grad_norm": 5.49758243560791, "learning_rate": 5.3043887645816125e-06, "loss": 0.10202628, "memory(GiB)": 13.7, "step": 54030, "train_speed(iter/s)": 1.530127 }, { "acc": 0.97967262, "epoch": 25.326927583782517, "grad_norm": 3.511622190475464, "learning_rate": 5.303615082743189e-06, "loss": 0.06470077, "memory(GiB)": 13.7, "step": 54035, "train_speed(iter/s)": 1.530127 }, { "acc": 0.9812171, "epoch": 25.32927115069135, "grad_norm": 0.660516619682312, "learning_rate": 5.302841393618874e-06, "loss": 0.03861169, "memory(GiB)": 13.7, "step": 54040, "train_speed(iter/s)": 1.530136 }, { "acc": 0.97220383, "epoch": 25.33161471760019, "grad_norm": 6.2368292808532715, "learning_rate": 5.302067697227267e-06, "loss": 0.15253164, "memory(GiB)": 13.7, "step": 54045, "train_speed(iter/s)": 1.530145 }, { "acc": 0.98265877, "epoch": 25.333958284509023, "grad_norm": 2.847501277923584, "learning_rate": 5.301293993586961e-06, "loss": 0.05731536, "memory(GiB)": 13.7, "step": 54050, "train_speed(iter/s)": 1.530146 }, { "acc": 0.99444447, "epoch": 25.336301851417858, "grad_norm": 5.431369304656982, "learning_rate": 5.300520282716556e-06, "loss": 0.03895047, "memory(GiB)": 13.7, "step": 54055, "train_speed(iter/s)": 1.530154 }, { "acc": 0.996875, "epoch": 25.338645418326692, "grad_norm": 1.315219521522522, "learning_rate": 5.299746564634649e-06, "loss": 0.0171609, "memory(GiB)": 13.7, "step": 54060, "train_speed(iter/s)": 1.53016 }, { "acc": 0.99642859, "epoch": 25.34098898523553, "grad_norm": 4.0636491775512695, "learning_rate": 5.298972839359838e-06, "loss": 0.03826753, "memory(GiB)": 13.7, "step": 54065, "train_speed(iter/s)": 1.530154 }, { "acc": 0.9697917, "epoch": 25.343332552144364, "grad_norm": 0.020157339051365852, "learning_rate": 5.298199106910721e-06, "loss": 0.0813682, "memory(GiB)": 13.7, "step": 54070, "train_speed(iter/s)": 1.530163 }, { "acc": 0.9916667, "epoch": 25.3456761190532, "grad_norm": 0.5743891596794128, "learning_rate": 5.297425367305894e-06, "loss": 0.03133779, "memory(GiB)": 13.7, "step": 54075, "train_speed(iter/s)": 1.530165 }, { "acc": 0.98536854, "epoch": 25.348019685962033, "grad_norm": 0.9412806034088135, "learning_rate": 5.296651620563957e-06, "loss": 0.039949, "memory(GiB)": 13.7, "step": 54080, "train_speed(iter/s)": 1.53017 }, { "acc": 0.9739584, "epoch": 25.35036325287087, "grad_norm": 6.229441165924072, "learning_rate": 5.2958778667035095e-06, "loss": 0.06587076, "memory(GiB)": 13.7, "step": 54085, "train_speed(iter/s)": 1.53018 }, { "acc": 0.98718138, "epoch": 25.352706819779705, "grad_norm": 3.126089334487915, "learning_rate": 5.29510410574315e-06, "loss": 0.03486857, "memory(GiB)": 13.7, "step": 54090, "train_speed(iter/s)": 1.530178 }, { "acc": 0.9866518, "epoch": 25.35505038668854, "grad_norm": 3.0191662311553955, "learning_rate": 5.294330337701472e-06, "loss": 0.06945821, "memory(GiB)": 13.7, "step": 54095, "train_speed(iter/s)": 1.530175 }, { "acc": 0.98560762, "epoch": 25.357393953597374, "grad_norm": 0.020173205062747, "learning_rate": 5.293556562597079e-06, "loss": 0.05291088, "memory(GiB)": 13.7, "step": 54100, "train_speed(iter/s)": 1.530185 }, { "acc": 0.98475275, "epoch": 25.35973752050621, "grad_norm": 2.3813345432281494, "learning_rate": 5.29278278044857e-06, "loss": 0.10372894, "memory(GiB)": 13.7, "step": 54105, "train_speed(iter/s)": 1.530187 }, { "acc": 0.98883934, "epoch": 25.362081087415046, "grad_norm": 2.829789876937866, "learning_rate": 5.2920089912745424e-06, "loss": 0.03329177, "memory(GiB)": 13.7, "step": 54110, "train_speed(iter/s)": 1.53019 }, { "acc": 0.97279758, "epoch": 25.36442465432388, "grad_norm": 4.430632591247559, "learning_rate": 5.291235195093596e-06, "loss": 0.12188532, "memory(GiB)": 13.7, "step": 54115, "train_speed(iter/s)": 1.530201 }, { "acc": 0.9832387, "epoch": 25.366768221232718, "grad_norm": 2.5238311290740967, "learning_rate": 5.2904613919243304e-06, "loss": 0.04455722, "memory(GiB)": 13.7, "step": 54120, "train_speed(iter/s)": 1.530201 }, { "acc": 0.97946434, "epoch": 25.369111788141552, "grad_norm": 3.9418914318084717, "learning_rate": 5.289687581785345e-06, "loss": 0.08938378, "memory(GiB)": 13.7, "step": 54125, "train_speed(iter/s)": 1.530208 }, { "acc": 0.97053032, "epoch": 25.371455355050387, "grad_norm": 5.055664539337158, "learning_rate": 5.288913764695244e-06, "loss": 0.10085289, "memory(GiB)": 13.7, "step": 54130, "train_speed(iter/s)": 1.530214 }, { "acc": 0.97781792, "epoch": 25.37379892195922, "grad_norm": 1.4856327772140503, "learning_rate": 5.288139940672621e-06, "loss": 0.06603174, "memory(GiB)": 13.7, "step": 54135, "train_speed(iter/s)": 1.530219 }, { "acc": 0.97895832, "epoch": 25.37614248886806, "grad_norm": 4.948823928833008, "learning_rate": 5.287366109736079e-06, "loss": 0.04751856, "memory(GiB)": 13.7, "step": 54140, "train_speed(iter/s)": 1.530218 }, { "acc": 0.98142862, "epoch": 25.378486055776893, "grad_norm": 1.5494967699050903, "learning_rate": 5.286592271904216e-06, "loss": 0.05417313, "memory(GiB)": 13.7, "step": 54145, "train_speed(iter/s)": 1.530223 }, { "acc": 0.97351189, "epoch": 25.380829622685727, "grad_norm": 2.2759242057800293, "learning_rate": 5.285818427195638e-06, "loss": 0.11190219, "memory(GiB)": 13.7, "step": 54150, "train_speed(iter/s)": 1.530224 }, { "acc": 0.98604164, "epoch": 25.38317318959456, "grad_norm": 2.484724521636963, "learning_rate": 5.28504457562894e-06, "loss": 0.04734313, "memory(GiB)": 13.7, "step": 54155, "train_speed(iter/s)": 1.530232 }, { "acc": 0.9717803, "epoch": 25.3855167565034, "grad_norm": 2.124816417694092, "learning_rate": 5.2842707172227255e-06, "loss": 0.09380884, "memory(GiB)": 13.7, "step": 54160, "train_speed(iter/s)": 1.530234 }, { "acc": 0.96758928, "epoch": 25.387860323412234, "grad_norm": 2.869624137878418, "learning_rate": 5.283496851995597e-06, "loss": 0.07877923, "memory(GiB)": 13.7, "step": 54165, "train_speed(iter/s)": 1.530239 }, { "acc": 0.98425598, "epoch": 25.390203890321068, "grad_norm": 4.050869941711426, "learning_rate": 5.282722979966152e-06, "loss": 0.04654828, "memory(GiB)": 13.7, "step": 54170, "train_speed(iter/s)": 1.530242 }, { "acc": 0.9765625, "epoch": 25.392547457229902, "grad_norm": 2.6367995738983154, "learning_rate": 5.281949101152996e-06, "loss": 0.05578825, "memory(GiB)": 13.7, "step": 54175, "train_speed(iter/s)": 1.530247 }, { "acc": 0.9742857, "epoch": 25.39489102413874, "grad_norm": 5.103669166564941, "learning_rate": 5.281175215574726e-06, "loss": 0.10170583, "memory(GiB)": 13.7, "step": 54180, "train_speed(iter/s)": 1.530248 }, { "acc": 0.98249998, "epoch": 25.397234591047575, "grad_norm": 4.717672348022461, "learning_rate": 5.280401323249946e-06, "loss": 0.03112385, "memory(GiB)": 13.7, "step": 54185, "train_speed(iter/s)": 1.530253 }, { "acc": 0.98790188, "epoch": 25.39957815795641, "grad_norm": 0.05362805724143982, "learning_rate": 5.279627424197259e-06, "loss": 0.04082201, "memory(GiB)": 13.7, "step": 54190, "train_speed(iter/s)": 1.530254 }, { "acc": 0.97634811, "epoch": 25.401921724865247, "grad_norm": 7.594893455505371, "learning_rate": 5.278853518435266e-06, "loss": 0.06608552, "memory(GiB)": 13.7, "step": 54195, "train_speed(iter/s)": 1.530262 }, { "acc": 0.97937498, "epoch": 25.40426529177408, "grad_norm": 3.484187126159668, "learning_rate": 5.278079605982567e-06, "loss": 0.03784322, "memory(GiB)": 13.7, "step": 54200, "train_speed(iter/s)": 1.530267 }, { "acc": 0.97562504, "epoch": 25.406608858682915, "grad_norm": 36.10005187988281, "learning_rate": 5.277305686857767e-06, "loss": 0.11798782, "memory(GiB)": 13.7, "step": 54205, "train_speed(iter/s)": 1.530268 }, { "acc": 0.99213715, "epoch": 25.40895242559175, "grad_norm": 3.089975118637085, "learning_rate": 5.2765317610794675e-06, "loss": 0.04280194, "memory(GiB)": 13.7, "step": 54210, "train_speed(iter/s)": 1.530265 }, { "acc": 0.98668594, "epoch": 25.411295992500587, "grad_norm": 4.310754776000977, "learning_rate": 5.275757828666275e-06, "loss": 0.05158001, "memory(GiB)": 13.7, "step": 54215, "train_speed(iter/s)": 1.530266 }, { "acc": 0.98696423, "epoch": 25.41363955940942, "grad_norm": 3.6743173599243164, "learning_rate": 5.274983889636783e-06, "loss": 0.03897784, "memory(GiB)": 13.7, "step": 54220, "train_speed(iter/s)": 1.530268 }, { "acc": 0.97791672, "epoch": 25.415983126318256, "grad_norm": 5.944795608520508, "learning_rate": 5.274209944009603e-06, "loss": 0.09963428, "memory(GiB)": 13.7, "step": 54225, "train_speed(iter/s)": 1.530265 }, { "acc": 0.9833334, "epoch": 25.41832669322709, "grad_norm": 1.4010212421417236, "learning_rate": 5.273435991803334e-06, "loss": 0.03939328, "memory(GiB)": 13.7, "step": 54230, "train_speed(iter/s)": 1.530284 }, { "acc": 0.99375, "epoch": 25.42067026013593, "grad_norm": 2.8125221729278564, "learning_rate": 5.272662033036581e-06, "loss": 0.03523487, "memory(GiB)": 13.7, "step": 54235, "train_speed(iter/s)": 1.530285 }, { "acc": 0.97877979, "epoch": 25.423013827044763, "grad_norm": 0.537567138671875, "learning_rate": 5.271888067727946e-06, "loss": 0.07766554, "memory(GiB)": 13.7, "step": 54240, "train_speed(iter/s)": 1.530279 }, { "acc": 0.96427078, "epoch": 25.425357393953597, "grad_norm": 7.715771198272705, "learning_rate": 5.271114095896034e-06, "loss": 0.08741363, "memory(GiB)": 13.7, "step": 54245, "train_speed(iter/s)": 1.530282 }, { "acc": 0.99080925, "epoch": 25.42770096086243, "grad_norm": 1.995605230331421, "learning_rate": 5.270340117559448e-06, "loss": 0.03255305, "memory(GiB)": 13.7, "step": 54250, "train_speed(iter/s)": 1.530283 }, { "acc": 0.99109211, "epoch": 25.43004452777127, "grad_norm": 3.5341341495513916, "learning_rate": 5.269566132736794e-06, "loss": 0.08053199, "memory(GiB)": 13.7, "step": 54255, "train_speed(iter/s)": 1.530287 }, { "acc": 0.984375, "epoch": 25.432388094680103, "grad_norm": 0.035687658935785294, "learning_rate": 5.268792141446672e-06, "loss": 0.04301596, "memory(GiB)": 13.7, "step": 54260, "train_speed(iter/s)": 1.530291 }, { "acc": 0.9861805, "epoch": 25.434731661588938, "grad_norm": 2.05989670753479, "learning_rate": 5.268018143707688e-06, "loss": 0.05626248, "memory(GiB)": 13.7, "step": 54265, "train_speed(iter/s)": 1.530293 }, { "acc": 0.98195515, "epoch": 25.437075228497772, "grad_norm": 0.6472309827804565, "learning_rate": 5.267244139538449e-06, "loss": 0.05086118, "memory(GiB)": 13.7, "step": 54270, "train_speed(iter/s)": 1.5303 }, { "acc": 0.98856249, "epoch": 25.43941879540661, "grad_norm": 3.799018144607544, "learning_rate": 5.266470128957555e-06, "loss": 0.05112824, "memory(GiB)": 13.7, "step": 54275, "train_speed(iter/s)": 1.530305 }, { "acc": 0.98549681, "epoch": 25.441762362315444, "grad_norm": 1.6345157623291016, "learning_rate": 5.265696111983614e-06, "loss": 0.0424472, "memory(GiB)": 13.7, "step": 54280, "train_speed(iter/s)": 1.530312 }, { "acc": 0.9854166, "epoch": 25.44410592922428, "grad_norm": 2.035609483718872, "learning_rate": 5.264922088635228e-06, "loss": 0.0820502, "memory(GiB)": 13.7, "step": 54285, "train_speed(iter/s)": 1.530319 }, { "acc": 0.96126986, "epoch": 25.446449496133116, "grad_norm": 9.432055473327637, "learning_rate": 5.264148058931006e-06, "loss": 0.10285296, "memory(GiB)": 13.7, "step": 54290, "train_speed(iter/s)": 1.530325 }, { "acc": 0.97989578, "epoch": 25.44879306304195, "grad_norm": 4.71055269241333, "learning_rate": 5.263374022889549e-06, "loss": 0.06927284, "memory(GiB)": 13.7, "step": 54295, "train_speed(iter/s)": 1.530333 }, { "acc": 0.98865528, "epoch": 25.451136629950785, "grad_norm": 2.9594268798828125, "learning_rate": 5.262599980529466e-06, "loss": 0.02493145, "memory(GiB)": 13.7, "step": 54300, "train_speed(iter/s)": 1.530332 }, { "acc": 0.98467255, "epoch": 25.45348019685962, "grad_norm": 1.5227367877960205, "learning_rate": 5.2618259318693585e-06, "loss": 0.05347962, "memory(GiB)": 13.7, "step": 54305, "train_speed(iter/s)": 1.530329 }, { "acc": 0.96930637, "epoch": 25.455823763768457, "grad_norm": 3.6481070518493652, "learning_rate": 5.261051876927836e-06, "loss": 0.07237367, "memory(GiB)": 13.7, "step": 54310, "train_speed(iter/s)": 1.530334 }, { "acc": 0.99080353, "epoch": 25.45816733067729, "grad_norm": 2.3111629486083984, "learning_rate": 5.2602778157235025e-06, "loss": 0.02748236, "memory(GiB)": 13.7, "step": 54315, "train_speed(iter/s)": 1.530341 }, { "acc": 0.98708334, "epoch": 25.460510897586126, "grad_norm": 3.862966775894165, "learning_rate": 5.259503748274961e-06, "loss": 0.03762057, "memory(GiB)": 13.7, "step": 54320, "train_speed(iter/s)": 1.530347 }, { "acc": 0.9911293, "epoch": 25.46285446449496, "grad_norm": 5.042097091674805, "learning_rate": 5.258729674600822e-06, "loss": 0.06533458, "memory(GiB)": 13.7, "step": 54325, "train_speed(iter/s)": 1.530359 }, { "acc": 0.98619785, "epoch": 25.465198031403798, "grad_norm": 0.033993910998106, "learning_rate": 5.257955594719689e-06, "loss": 0.04971251, "memory(GiB)": 13.7, "step": 54330, "train_speed(iter/s)": 1.530359 }, { "acc": 0.98529758, "epoch": 25.467541598312632, "grad_norm": 3.8166146278381348, "learning_rate": 5.257181508650171e-06, "loss": 0.05719433, "memory(GiB)": 13.7, "step": 54335, "train_speed(iter/s)": 1.53037 }, { "acc": 0.9677084, "epoch": 25.469885165221466, "grad_norm": 7.113099575042725, "learning_rate": 5.256407416410874e-06, "loss": 0.0839955, "memory(GiB)": 13.7, "step": 54340, "train_speed(iter/s)": 1.530386 }, { "acc": 0.97250004, "epoch": 25.4722287321303, "grad_norm": 0.7498279213905334, "learning_rate": 5.255633318020401e-06, "loss": 0.05492536, "memory(GiB)": 13.7, "step": 54345, "train_speed(iter/s)": 1.530387 }, { "acc": 0.99154758, "epoch": 25.47457229903914, "grad_norm": 3.876256227493286, "learning_rate": 5.25485921349736e-06, "loss": 0.04426589, "memory(GiB)": 13.7, "step": 54350, "train_speed(iter/s)": 1.530392 }, { "acc": 0.98354168, "epoch": 25.476915865947973, "grad_norm": 3.106459617614746, "learning_rate": 5.254085102860362e-06, "loss": 0.04252883, "memory(GiB)": 13.7, "step": 54355, "train_speed(iter/s)": 1.530387 }, { "acc": 0.9520834, "epoch": 25.479259432856807, "grad_norm": 6.670534133911133, "learning_rate": 5.253310986128009e-06, "loss": 0.09452935, "memory(GiB)": 13.7, "step": 54360, "train_speed(iter/s)": 1.530393 }, { "acc": 0.984375, "epoch": 25.481602999765645, "grad_norm": 4.0219268798828125, "learning_rate": 5.252536863318911e-06, "loss": 0.07920785, "memory(GiB)": 13.7, "step": 54365, "train_speed(iter/s)": 1.530393 }, { "acc": 0.98266029, "epoch": 25.48394656667448, "grad_norm": 5.0493245124816895, "learning_rate": 5.251762734451676e-06, "loss": 0.06583966, "memory(GiB)": 13.7, "step": 54370, "train_speed(iter/s)": 1.530393 }, { "acc": 0.97559528, "epoch": 25.486290133583314, "grad_norm": 6.668914794921875, "learning_rate": 5.250988599544908e-06, "loss": 0.07985146, "memory(GiB)": 13.7, "step": 54375, "train_speed(iter/s)": 1.530386 }, { "acc": 0.95914764, "epoch": 25.488633700492148, "grad_norm": 5.281929016113281, "learning_rate": 5.25021445861722e-06, "loss": 0.10513631, "memory(GiB)": 13.7, "step": 54380, "train_speed(iter/s)": 1.530391 }, { "acc": 0.98476191, "epoch": 25.490977267400986, "grad_norm": 4.986842632293701, "learning_rate": 5.249440311687218e-06, "loss": 0.05086374, "memory(GiB)": 13.7, "step": 54385, "train_speed(iter/s)": 1.530394 }, { "acc": 0.99296875, "epoch": 25.49332083430982, "grad_norm": 1.7369717359542847, "learning_rate": 5.248666158773505e-06, "loss": 0.0277994, "memory(GiB)": 13.7, "step": 54390, "train_speed(iter/s)": 1.530407 }, { "acc": 0.98874998, "epoch": 25.495664401218654, "grad_norm": 4.288358211517334, "learning_rate": 5.2478919998946934e-06, "loss": 0.05432501, "memory(GiB)": 13.7, "step": 54395, "train_speed(iter/s)": 1.530413 }, { "acc": 0.99020834, "epoch": 25.49800796812749, "grad_norm": 0.006977526005357504, "learning_rate": 5.24711783506939e-06, "loss": 0.02550695, "memory(GiB)": 13.7, "step": 54400, "train_speed(iter/s)": 1.530417 }, { "acc": 0.99070511, "epoch": 25.500351535036327, "grad_norm": 0.9477009177207947, "learning_rate": 5.246343664316207e-06, "loss": 0.03230525, "memory(GiB)": 13.7, "step": 54405, "train_speed(iter/s)": 1.530422 }, { "acc": 0.98589287, "epoch": 25.50269510194516, "grad_norm": 1.7176309823989868, "learning_rate": 5.245569487653748e-06, "loss": 0.04413872, "memory(GiB)": 13.7, "step": 54410, "train_speed(iter/s)": 1.530422 }, { "acc": 0.9669445, "epoch": 25.505038668853995, "grad_norm": 2.947805881500244, "learning_rate": 5.244795305100624e-06, "loss": 0.08592105, "memory(GiB)": 13.7, "step": 54415, "train_speed(iter/s)": 1.530433 }, { "acc": 0.9697917, "epoch": 25.50738223576283, "grad_norm": 4.713466644287109, "learning_rate": 5.244021116675443e-06, "loss": 0.09387648, "memory(GiB)": 13.7, "step": 54420, "train_speed(iter/s)": 1.530438 }, { "acc": 0.99456062, "epoch": 25.509725802671667, "grad_norm": 1.7575058937072754, "learning_rate": 5.243246922396815e-06, "loss": 0.02899829, "memory(GiB)": 13.7, "step": 54425, "train_speed(iter/s)": 1.530441 }, { "acc": 0.99352932, "epoch": 25.5120693695805, "grad_norm": 8.70131778717041, "learning_rate": 5.24247272228335e-06, "loss": 0.02116783, "memory(GiB)": 13.7, "step": 54430, "train_speed(iter/s)": 1.530447 }, { "acc": 0.98552084, "epoch": 25.514412936489336, "grad_norm": 0.9989373087882996, "learning_rate": 5.241698516353653e-06, "loss": 0.04501854, "memory(GiB)": 13.7, "step": 54435, "train_speed(iter/s)": 1.530461 }, { "acc": 0.99196434, "epoch": 25.51675650339817, "grad_norm": 4.279117584228516, "learning_rate": 5.240924304626337e-06, "loss": 0.02713627, "memory(GiB)": 13.7, "step": 54440, "train_speed(iter/s)": 1.530461 }, { "acc": 0.98675594, "epoch": 25.519100070307008, "grad_norm": 4.065451145172119, "learning_rate": 5.240150087120012e-06, "loss": 0.03782741, "memory(GiB)": 13.7, "step": 54445, "train_speed(iter/s)": 1.53047 }, { "acc": 0.98356724, "epoch": 25.521443637215842, "grad_norm": 3.4539952278137207, "learning_rate": 5.239375863853284e-06, "loss": 0.06403306, "memory(GiB)": 13.7, "step": 54450, "train_speed(iter/s)": 1.530474 }, { "acc": 0.9802084, "epoch": 25.523787204124677, "grad_norm": 2.6204042434692383, "learning_rate": 5.238601634844765e-06, "loss": 0.06595532, "memory(GiB)": 13.7, "step": 54455, "train_speed(iter/s)": 1.530473 }, { "acc": 0.990625, "epoch": 25.526130771033515, "grad_norm": 6.082847595214844, "learning_rate": 5.2378274001130665e-06, "loss": 0.05255928, "memory(GiB)": 13.7, "step": 54460, "train_speed(iter/s)": 1.53048 }, { "acc": 0.975, "epoch": 25.52847433794235, "grad_norm": 6.64654016494751, "learning_rate": 5.237053159676797e-06, "loss": 0.06670488, "memory(GiB)": 13.7, "step": 54465, "train_speed(iter/s)": 1.530486 }, { "acc": 0.971875, "epoch": 25.530817904851183, "grad_norm": 3.4245009422302246, "learning_rate": 5.236278913554565e-06, "loss": 0.06137903, "memory(GiB)": 13.7, "step": 54470, "train_speed(iter/s)": 1.530497 }, { "acc": 0.9842803, "epoch": 25.533161471760017, "grad_norm": 6.215526103973389, "learning_rate": 5.235504661764984e-06, "loss": 0.04139698, "memory(GiB)": 13.7, "step": 54475, "train_speed(iter/s)": 1.530498 }, { "acc": 0.97865524, "epoch": 25.535505038668855, "grad_norm": 15.106588363647461, "learning_rate": 5.234730404326662e-06, "loss": 0.06949699, "memory(GiB)": 13.7, "step": 54480, "train_speed(iter/s)": 1.530503 }, { "acc": 0.98104162, "epoch": 25.53784860557769, "grad_norm": 4.3134331703186035, "learning_rate": 5.2339561412582115e-06, "loss": 0.08411987, "memory(GiB)": 13.7, "step": 54485, "train_speed(iter/s)": 1.530505 }, { "acc": 0.98484631, "epoch": 25.540192172486524, "grad_norm": 1.3085873126983643, "learning_rate": 5.233181872578243e-06, "loss": 0.0583682, "memory(GiB)": 13.7, "step": 54490, "train_speed(iter/s)": 1.530507 }, { "acc": 0.99750004, "epoch": 25.54253573939536, "grad_norm": 0.7394360303878784, "learning_rate": 5.232407598305365e-06, "loss": 0.01228397, "memory(GiB)": 13.7, "step": 54495, "train_speed(iter/s)": 1.530515 }, { "acc": 0.97645521, "epoch": 25.544879306304196, "grad_norm": 4.572603702545166, "learning_rate": 5.231633318458191e-06, "loss": 0.13981413, "memory(GiB)": 13.7, "step": 54500, "train_speed(iter/s)": 1.530516 }, { "acc": 0.98671703, "epoch": 25.54722287321303, "grad_norm": 0.07850705832242966, "learning_rate": 5.23085903305533e-06, "loss": 0.03829467, "memory(GiB)": 13.7, "step": 54505, "train_speed(iter/s)": 1.530513 }, { "acc": 0.9739583, "epoch": 25.549566440121865, "grad_norm": 7.03624153137207, "learning_rate": 5.230084742115396e-06, "loss": 0.05355939, "memory(GiB)": 13.7, "step": 54510, "train_speed(iter/s)": 1.530516 }, { "acc": 0.977882, "epoch": 25.5519100070307, "grad_norm": 1.4779025316238403, "learning_rate": 5.229310445657e-06, "loss": 0.06562026, "memory(GiB)": 13.7, "step": 54515, "train_speed(iter/s)": 1.530524 }, { "acc": 0.97890873, "epoch": 25.554253573939537, "grad_norm": 4.0698981285095215, "learning_rate": 5.228536143698752e-06, "loss": 0.07927127, "memory(GiB)": 13.7, "step": 54520, "train_speed(iter/s)": 1.530526 }, { "acc": 0.98149958, "epoch": 25.55659714084837, "grad_norm": 4.693898677825928, "learning_rate": 5.2277618362592634e-06, "loss": 0.11624275, "memory(GiB)": 13.7, "step": 54525, "train_speed(iter/s)": 1.530527 }, { "acc": 0.98819447, "epoch": 25.558940707757206, "grad_norm": 3.382916212081909, "learning_rate": 5.226987523357146e-06, "loss": 0.03987255, "memory(GiB)": 13.7, "step": 54530, "train_speed(iter/s)": 1.530532 }, { "acc": 0.96790209, "epoch": 25.561284274666043, "grad_norm": 2.3840858936309814, "learning_rate": 5.226213205011015e-06, "loss": 0.10477431, "memory(GiB)": 13.7, "step": 54535, "train_speed(iter/s)": 1.530535 }, { "acc": 0.98571424, "epoch": 25.563627841574878, "grad_norm": 2.498194694519043, "learning_rate": 5.225438881239477e-06, "loss": 0.02874093, "memory(GiB)": 13.7, "step": 54540, "train_speed(iter/s)": 1.530544 }, { "acc": 0.979461, "epoch": 25.565971408483712, "grad_norm": 4.021547317504883, "learning_rate": 5.22466455206115e-06, "loss": 0.07518903, "memory(GiB)": 13.7, "step": 54545, "train_speed(iter/s)": 1.530547 }, { "acc": 0.97437496, "epoch": 25.568314975392546, "grad_norm": 6.55288553237915, "learning_rate": 5.223890217494644e-06, "loss": 0.12628782, "memory(GiB)": 13.7, "step": 54550, "train_speed(iter/s)": 1.530545 }, { "acc": 0.98300591, "epoch": 25.570658542301384, "grad_norm": 1.1677364110946655, "learning_rate": 5.22311587755857e-06, "loss": 0.04277092, "memory(GiB)": 13.7, "step": 54555, "train_speed(iter/s)": 1.530546 }, { "acc": 0.98363094, "epoch": 25.57300210921022, "grad_norm": 0.9460127353668213, "learning_rate": 5.222341532271541e-06, "loss": 0.04407322, "memory(GiB)": 13.7, "step": 54560, "train_speed(iter/s)": 1.530544 }, { "acc": 0.97875004, "epoch": 25.575345676119053, "grad_norm": 5.285552501678467, "learning_rate": 5.221567181652171e-06, "loss": 0.05027868, "memory(GiB)": 13.7, "step": 54565, "train_speed(iter/s)": 1.530546 }, { "acc": 0.97770824, "epoch": 25.577689243027887, "grad_norm": 3.5267751216888428, "learning_rate": 5.220792825719071e-06, "loss": 0.04846089, "memory(GiB)": 13.7, "step": 54570, "train_speed(iter/s)": 1.530551 }, { "acc": 0.98988094, "epoch": 25.580032809936725, "grad_norm": 0.6265528798103333, "learning_rate": 5.2200184644908565e-06, "loss": 0.04839299, "memory(GiB)": 13.7, "step": 54575, "train_speed(iter/s)": 1.530559 }, { "acc": 0.96791668, "epoch": 25.58237637684556, "grad_norm": 4.161551475524902, "learning_rate": 5.21924409798614e-06, "loss": 0.08397487, "memory(GiB)": 13.7, "step": 54580, "train_speed(iter/s)": 1.530558 }, { "acc": 0.98236609, "epoch": 25.584719943754394, "grad_norm": 7.49808931350708, "learning_rate": 5.218469726223532e-06, "loss": 0.07955111, "memory(GiB)": 13.7, "step": 54585, "train_speed(iter/s)": 1.530565 }, { "acc": 0.97570887, "epoch": 25.587063510663228, "grad_norm": 2.7294609546661377, "learning_rate": 5.21769534922165e-06, "loss": 0.09319003, "memory(GiB)": 13.7, "step": 54590, "train_speed(iter/s)": 1.530569 }, { "acc": 0.98038692, "epoch": 25.589407077572066, "grad_norm": 4.3503642082214355, "learning_rate": 5.216920966999105e-06, "loss": 0.06830302, "memory(GiB)": 13.7, "step": 54595, "train_speed(iter/s)": 1.530574 }, { "acc": 0.9825695, "epoch": 25.5917506444809, "grad_norm": 3.6271233558654785, "learning_rate": 5.216146579574509e-06, "loss": 0.06152275, "memory(GiB)": 13.7, "step": 54600, "train_speed(iter/s)": 1.53058 }, { "acc": 0.97520294, "epoch": 25.594094211389734, "grad_norm": 2.614550828933716, "learning_rate": 5.21537218696648e-06, "loss": 0.13543807, "memory(GiB)": 13.7, "step": 54605, "train_speed(iter/s)": 1.530587 }, { "acc": 0.98666668, "epoch": 25.596437778298572, "grad_norm": 2.248955011367798, "learning_rate": 5.214597789193626e-06, "loss": 0.04866704, "memory(GiB)": 13.7, "step": 54610, "train_speed(iter/s)": 1.530585 }, { "acc": 0.98125, "epoch": 25.598781345207406, "grad_norm": 4.483426094055176, "learning_rate": 5.213823386274568e-06, "loss": 0.07256433, "memory(GiB)": 13.7, "step": 54615, "train_speed(iter/s)": 1.530582 }, { "acc": 0.9843689, "epoch": 25.60112491211624, "grad_norm": 5.655995845794678, "learning_rate": 5.213048978227916e-06, "loss": 0.0623514, "memory(GiB)": 13.7, "step": 54620, "train_speed(iter/s)": 1.530584 }, { "acc": 0.98416662, "epoch": 25.603468479025075, "grad_norm": 3.538099527359009, "learning_rate": 5.212274565072284e-06, "loss": 0.03050414, "memory(GiB)": 13.7, "step": 54625, "train_speed(iter/s)": 1.530586 }, { "acc": 0.99131947, "epoch": 25.605812045933913, "grad_norm": 3.867481231689453, "learning_rate": 5.211500146826287e-06, "loss": 0.0359424, "memory(GiB)": 13.7, "step": 54630, "train_speed(iter/s)": 1.530592 }, { "acc": 0.98506947, "epoch": 25.608155612842747, "grad_norm": 3.0395593643188477, "learning_rate": 5.210725723508542e-06, "loss": 0.04273728, "memory(GiB)": 13.7, "step": 54635, "train_speed(iter/s)": 1.530596 }, { "acc": 0.98571434, "epoch": 25.61049917975158, "grad_norm": 2.361999273300171, "learning_rate": 5.20995129513766e-06, "loss": 0.04598641, "memory(GiB)": 13.7, "step": 54640, "train_speed(iter/s)": 1.530593 }, { "acc": 0.98397818, "epoch": 25.612842746660416, "grad_norm": 45.04206466674805, "learning_rate": 5.209176861732255e-06, "loss": 0.07943826, "memory(GiB)": 13.7, "step": 54645, "train_speed(iter/s)": 1.530597 }, { "acc": 0.9825695, "epoch": 25.615186313569254, "grad_norm": 2.4654505252838135, "learning_rate": 5.208402423310946e-06, "loss": 0.05461038, "memory(GiB)": 13.7, "step": 54650, "train_speed(iter/s)": 1.530603 }, { "acc": 0.98083334, "epoch": 25.617529880478088, "grad_norm": 0.036411117762327194, "learning_rate": 5.207627979892343e-06, "loss": 0.06923366, "memory(GiB)": 13.7, "step": 54655, "train_speed(iter/s)": 1.530609 }, { "acc": 0.98545523, "epoch": 25.619873447386922, "grad_norm": 5.892541408538818, "learning_rate": 5.206853531495067e-06, "loss": 0.03267786, "memory(GiB)": 13.7, "step": 54660, "train_speed(iter/s)": 1.530613 }, { "acc": 0.990625, "epoch": 25.622217014295757, "grad_norm": 1.0554877519607544, "learning_rate": 5.206079078137729e-06, "loss": 0.02953869, "memory(GiB)": 13.7, "step": 54665, "train_speed(iter/s)": 1.530625 }, { "acc": 0.97754421, "epoch": 25.624560581204594, "grad_norm": 1.448703646659851, "learning_rate": 5.2053046198389445e-06, "loss": 0.06990535, "memory(GiB)": 13.7, "step": 54670, "train_speed(iter/s)": 1.530629 }, { "acc": 0.9942708, "epoch": 25.62690414811343, "grad_norm": 2.907559394836426, "learning_rate": 5.2045301566173324e-06, "loss": 0.04383912, "memory(GiB)": 13.7, "step": 54675, "train_speed(iter/s)": 1.530636 }, { "acc": 0.99181547, "epoch": 25.629247715022263, "grad_norm": 0.011815582402050495, "learning_rate": 5.203755688491503e-06, "loss": 0.03778318, "memory(GiB)": 13.7, "step": 54680, "train_speed(iter/s)": 1.530637 }, { "acc": 0.98611107, "epoch": 25.6315912819311, "grad_norm": 4.06525993347168, "learning_rate": 5.202981215480074e-06, "loss": 0.06611505, "memory(GiB)": 13.7, "step": 54685, "train_speed(iter/s)": 1.530636 }, { "acc": 0.97833328, "epoch": 25.633934848839935, "grad_norm": 4.537609577178955, "learning_rate": 5.2022067376016646e-06, "loss": 0.05273679, "memory(GiB)": 13.7, "step": 54690, "train_speed(iter/s)": 1.530642 }, { "acc": 0.98403845, "epoch": 25.63627841574877, "grad_norm": 2.072328805923462, "learning_rate": 5.201432254874886e-06, "loss": 0.03287542, "memory(GiB)": 13.7, "step": 54695, "train_speed(iter/s)": 1.530649 }, { "acc": 0.98113098, "epoch": 25.638621982657604, "grad_norm": 1.2903481721878052, "learning_rate": 5.2006577673183555e-06, "loss": 0.10555704, "memory(GiB)": 13.7, "step": 54700, "train_speed(iter/s)": 1.530655 }, { "acc": 0.98083344, "epoch": 25.64096554956644, "grad_norm": 4.567513465881348, "learning_rate": 5.199883274950691e-06, "loss": 0.03876854, "memory(GiB)": 13.7, "step": 54705, "train_speed(iter/s)": 1.530655 }, { "acc": 0.97057762, "epoch": 25.643309116475276, "grad_norm": 4.789950370788574, "learning_rate": 5.199108777790508e-06, "loss": 0.11162467, "memory(GiB)": 13.7, "step": 54710, "train_speed(iter/s)": 1.530659 }, { "acc": 0.995788, "epoch": 25.64565268338411, "grad_norm": 0.05517493188381195, "learning_rate": 5.19833427585642e-06, "loss": 0.03779534, "memory(GiB)": 13.7, "step": 54715, "train_speed(iter/s)": 1.530671 }, { "acc": 0.98916664, "epoch": 25.647996250292945, "grad_norm": 3.7338194847106934, "learning_rate": 5.19755976916705e-06, "loss": 0.02088924, "memory(GiB)": 13.7, "step": 54720, "train_speed(iter/s)": 1.530671 }, { "acc": 0.98431015, "epoch": 25.650339817201782, "grad_norm": 1.7439440488815308, "learning_rate": 5.196785257741008e-06, "loss": 0.03417832, "memory(GiB)": 13.7, "step": 54725, "train_speed(iter/s)": 1.530674 }, { "acc": 0.97696428, "epoch": 25.652683384110617, "grad_norm": 4.5151753425598145, "learning_rate": 5.196010741596914e-06, "loss": 0.09517465, "memory(GiB)": 13.7, "step": 54730, "train_speed(iter/s)": 1.530686 }, { "acc": 0.98395824, "epoch": 25.65502695101945, "grad_norm": 3.5267527103424072, "learning_rate": 5.195236220753384e-06, "loss": 0.06858479, "memory(GiB)": 13.7, "step": 54735, "train_speed(iter/s)": 1.530693 }, { "acc": 0.9927084, "epoch": 25.657370517928285, "grad_norm": 1.2723255157470703, "learning_rate": 5.194461695229034e-06, "loss": 0.05903486, "memory(GiB)": 13.7, "step": 54740, "train_speed(iter/s)": 1.530698 }, { "acc": 0.98729172, "epoch": 25.659714084837123, "grad_norm": 3.78804349899292, "learning_rate": 5.193687165042483e-06, "loss": 0.0445554, "memory(GiB)": 13.7, "step": 54745, "train_speed(iter/s)": 1.53071 }, { "acc": 0.9594162, "epoch": 25.662057651745958, "grad_norm": 4.614376068115234, "learning_rate": 5.192912630212347e-06, "loss": 0.14274986, "memory(GiB)": 13.7, "step": 54750, "train_speed(iter/s)": 1.530713 }, { "acc": 0.98761368, "epoch": 25.664401218654792, "grad_norm": 1.227204442024231, "learning_rate": 5.192138090757241e-06, "loss": 0.05511569, "memory(GiB)": 13.7, "step": 54755, "train_speed(iter/s)": 1.530724 }, { "acc": 0.98770828, "epoch": 25.666744785563626, "grad_norm": 0.14743496477603912, "learning_rate": 5.1913635466957915e-06, "loss": 0.02126851, "memory(GiB)": 13.7, "step": 54760, "train_speed(iter/s)": 1.530726 }, { "acc": 0.98361111, "epoch": 25.669088352472464, "grad_norm": 5.884965419769287, "learning_rate": 5.190588998046604e-06, "loss": 0.05405819, "memory(GiB)": 13.7, "step": 54765, "train_speed(iter/s)": 1.530731 }, { "acc": 0.97618427, "epoch": 25.6714319193813, "grad_norm": 7.468116760253906, "learning_rate": 5.189814444828303e-06, "loss": 0.08512825, "memory(GiB)": 13.7, "step": 54770, "train_speed(iter/s)": 1.530736 }, { "acc": 0.98374996, "epoch": 25.673775486290133, "grad_norm": 2.366013288497925, "learning_rate": 5.189039887059503e-06, "loss": 0.06398985, "memory(GiB)": 13.7, "step": 54775, "train_speed(iter/s)": 1.530743 }, { "acc": 0.98694944, "epoch": 25.67611905319897, "grad_norm": 4.667212963104248, "learning_rate": 5.188265324758825e-06, "loss": 0.04890087, "memory(GiB)": 13.7, "step": 54780, "train_speed(iter/s)": 1.530753 }, { "acc": 0.99048796, "epoch": 25.678462620107805, "grad_norm": 2.2231011390686035, "learning_rate": 5.1874907579448866e-06, "loss": 0.04157321, "memory(GiB)": 13.7, "step": 54785, "train_speed(iter/s)": 1.530752 }, { "acc": 0.98073864, "epoch": 25.68080618701664, "grad_norm": 1.6990609169006348, "learning_rate": 5.186716186636303e-06, "loss": 0.04278756, "memory(GiB)": 13.7, "step": 54790, "train_speed(iter/s)": 1.530757 }, { "acc": 0.98249998, "epoch": 25.683149753925473, "grad_norm": 4.078701972961426, "learning_rate": 5.185941610851695e-06, "loss": 0.05938285, "memory(GiB)": 13.7, "step": 54795, "train_speed(iter/s)": 1.530761 }, { "acc": 0.96551247, "epoch": 25.68549332083431, "grad_norm": 4.600831508636475, "learning_rate": 5.18516703060968e-06, "loss": 0.10828342, "memory(GiB)": 13.7, "step": 54800, "train_speed(iter/s)": 1.530762 }, { "acc": 0.9697588, "epoch": 25.687836887743146, "grad_norm": 1.2628576755523682, "learning_rate": 5.184392445928874e-06, "loss": 0.1004723, "memory(GiB)": 13.7, "step": 54805, "train_speed(iter/s)": 1.530768 }, { "acc": 0.99020834, "epoch": 25.69018045465198, "grad_norm": 3.5878777503967285, "learning_rate": 5.1836178568279e-06, "loss": 0.03449568, "memory(GiB)": 13.7, "step": 54810, "train_speed(iter/s)": 1.530768 }, { "acc": 0.99032192, "epoch": 25.692524021560814, "grad_norm": 5.9217424392700195, "learning_rate": 5.182843263325375e-06, "loss": 0.03065517, "memory(GiB)": 13.7, "step": 54815, "train_speed(iter/s)": 1.530771 }, { "acc": 0.98738098, "epoch": 25.694867588469652, "grad_norm": 3.053218364715576, "learning_rate": 5.182068665439914e-06, "loss": 0.03490925, "memory(GiB)": 13.7, "step": 54820, "train_speed(iter/s)": 1.530777 }, { "acc": 0.99375, "epoch": 25.697211155378486, "grad_norm": 0.7240152955055237, "learning_rate": 5.181294063190141e-06, "loss": 0.03718393, "memory(GiB)": 13.7, "step": 54825, "train_speed(iter/s)": 1.530782 }, { "acc": 0.9683279, "epoch": 25.69955472228732, "grad_norm": 2.9000449180603027, "learning_rate": 5.180519456594671e-06, "loss": 0.09454201, "memory(GiB)": 13.7, "step": 54830, "train_speed(iter/s)": 1.530788 }, { "acc": 0.98728628, "epoch": 25.701898289196155, "grad_norm": 1.0100836753845215, "learning_rate": 5.179744845672126e-06, "loss": 0.0315687, "memory(GiB)": 13.7, "step": 54835, "train_speed(iter/s)": 1.53079 }, { "acc": 0.99375, "epoch": 25.704241856104993, "grad_norm": 2.5654430389404297, "learning_rate": 5.178970230441123e-06, "loss": 0.02204301, "memory(GiB)": 13.7, "step": 54840, "train_speed(iter/s)": 1.530793 }, { "acc": 0.97548075, "epoch": 25.706585423013827, "grad_norm": 3.785513162612915, "learning_rate": 5.178195610920286e-06, "loss": 0.0980459, "memory(GiB)": 13.7, "step": 54845, "train_speed(iter/s)": 1.530791 }, { "acc": 0.97729168, "epoch": 25.70892898992266, "grad_norm": 0.6891834735870361, "learning_rate": 5.177420987128225e-06, "loss": 0.09146763, "memory(GiB)": 13.7, "step": 54850, "train_speed(iter/s)": 1.530796 }, { "acc": 0.97673607, "epoch": 25.7112725568315, "grad_norm": 4.710383415222168, "learning_rate": 5.176646359083567e-06, "loss": 0.0426858, "memory(GiB)": 13.7, "step": 54855, "train_speed(iter/s)": 1.5308 }, { "acc": 0.98770294, "epoch": 25.713616123740334, "grad_norm": 1.3921411037445068, "learning_rate": 5.175871726804929e-06, "loss": 0.05415967, "memory(GiB)": 13.7, "step": 54860, "train_speed(iter/s)": 1.530797 }, { "acc": 0.9794445, "epoch": 25.715959690649168, "grad_norm": 7.545900344848633, "learning_rate": 5.175097090310931e-06, "loss": 0.04271127, "memory(GiB)": 13.7, "step": 54865, "train_speed(iter/s)": 1.530796 }, { "acc": 0.97270832, "epoch": 25.718303257558002, "grad_norm": 4.524825572967529, "learning_rate": 5.174322449620192e-06, "loss": 0.06683352, "memory(GiB)": 13.7, "step": 54870, "train_speed(iter/s)": 1.530806 }, { "acc": 0.97608566, "epoch": 25.72064682446684, "grad_norm": 4.85882568359375, "learning_rate": 5.173547804751331e-06, "loss": 0.09182155, "memory(GiB)": 13.7, "step": 54875, "train_speed(iter/s)": 1.530812 }, { "acc": 0.97694445, "epoch": 25.722990391375674, "grad_norm": 8.090649604797363, "learning_rate": 5.172773155722971e-06, "loss": 0.06571519, "memory(GiB)": 13.7, "step": 54880, "train_speed(iter/s)": 1.530814 }, { "acc": 0.96965771, "epoch": 25.72533395828451, "grad_norm": 4.217362880706787, "learning_rate": 5.171998502553731e-06, "loss": 0.09919297, "memory(GiB)": 13.7, "step": 54885, "train_speed(iter/s)": 1.530811 }, { "acc": 0.99020834, "epoch": 25.727677525193343, "grad_norm": 3.5153627395629883, "learning_rate": 5.171223845262228e-06, "loss": 0.03550796, "memory(GiB)": 13.7, "step": 54890, "train_speed(iter/s)": 1.530814 }, { "acc": 0.97863102, "epoch": 25.73002109210218, "grad_norm": 4.595149517059326, "learning_rate": 5.170449183867088e-06, "loss": 0.03307565, "memory(GiB)": 13.7, "step": 54895, "train_speed(iter/s)": 1.530816 }, { "acc": 0.98676472, "epoch": 25.732364659011015, "grad_norm": 0.8759205341339111, "learning_rate": 5.169674518386927e-06, "loss": 0.03128552, "memory(GiB)": 13.7, "step": 54900, "train_speed(iter/s)": 1.530817 }, { "acc": 0.98249998, "epoch": 25.73470822591985, "grad_norm": 4.475148677825928, "learning_rate": 5.168899848840364e-06, "loss": 0.06568581, "memory(GiB)": 13.7, "step": 54905, "train_speed(iter/s)": 1.53082 }, { "acc": 0.9947917, "epoch": 25.737051792828684, "grad_norm": 2.8490729331970215, "learning_rate": 5.168125175246023e-06, "loss": 0.03352938, "memory(GiB)": 13.7, "step": 54910, "train_speed(iter/s)": 1.530821 }, { "acc": 0.9901041, "epoch": 25.73939535973752, "grad_norm": 2.257591962814331, "learning_rate": 5.167350497622525e-06, "loss": 0.0382791, "memory(GiB)": 13.7, "step": 54915, "train_speed(iter/s)": 1.530828 }, { "acc": 0.98423071, "epoch": 25.741738926646356, "grad_norm": 3.7971251010894775, "learning_rate": 5.166575815988486e-06, "loss": 0.0518822, "memory(GiB)": 13.7, "step": 54920, "train_speed(iter/s)": 1.530839 }, { "acc": 0.98311892, "epoch": 25.74408249355519, "grad_norm": 4.171044826507568, "learning_rate": 5.165801130362533e-06, "loss": 0.07749661, "memory(GiB)": 13.7, "step": 54925, "train_speed(iter/s)": 1.530845 }, { "acc": 0.98572302, "epoch": 25.746426060464024, "grad_norm": 3.7680118083953857, "learning_rate": 5.165026440763284e-06, "loss": 0.04018266, "memory(GiB)": 13.7, "step": 54930, "train_speed(iter/s)": 1.530848 }, { "acc": 0.9864583, "epoch": 25.748769627372862, "grad_norm": 5.696200370788574, "learning_rate": 5.164251747209358e-06, "loss": 0.06074356, "memory(GiB)": 13.7, "step": 54935, "train_speed(iter/s)": 1.530854 }, { "acc": 0.97351646, "epoch": 25.751113194281697, "grad_norm": 8.318832397460938, "learning_rate": 5.163477049719379e-06, "loss": 0.08405508, "memory(GiB)": 13.7, "step": 54940, "train_speed(iter/s)": 1.530855 }, { "acc": 0.96497021, "epoch": 25.75345676119053, "grad_norm": 6.910599708557129, "learning_rate": 5.1627023483119675e-06, "loss": 0.11470535, "memory(GiB)": 13.7, "step": 54945, "train_speed(iter/s)": 1.530858 }, { "acc": 0.98637657, "epoch": 25.75580032809937, "grad_norm": 2.3737852573394775, "learning_rate": 5.161927643005743e-06, "loss": 0.07565682, "memory(GiB)": 13.7, "step": 54950, "train_speed(iter/s)": 1.530864 }, { "acc": 0.98395834, "epoch": 25.758143895008203, "grad_norm": 4.641714572906494, "learning_rate": 5.161152933819329e-06, "loss": 0.03361319, "memory(GiB)": 13.7, "step": 54955, "train_speed(iter/s)": 1.530862 }, { "acc": 0.9901042, "epoch": 25.760487461917037, "grad_norm": 0.01807483099400997, "learning_rate": 5.160378220771346e-06, "loss": 0.03230494, "memory(GiB)": 13.7, "step": 54960, "train_speed(iter/s)": 1.530874 }, { "acc": 0.9645834, "epoch": 25.76283102882587, "grad_norm": 3.567108392715454, "learning_rate": 5.159603503880416e-06, "loss": 0.07374581, "memory(GiB)": 13.7, "step": 54965, "train_speed(iter/s)": 1.530883 }, { "acc": 0.984375, "epoch": 25.76517459573471, "grad_norm": 0.6813390254974365, "learning_rate": 5.158828783165162e-06, "loss": 0.06097479, "memory(GiB)": 13.7, "step": 54970, "train_speed(iter/s)": 1.530889 }, { "acc": 0.98571434, "epoch": 25.767518162643544, "grad_norm": 6.882678985595703, "learning_rate": 5.158054058644202e-06, "loss": 0.03460765, "memory(GiB)": 13.7, "step": 54975, "train_speed(iter/s)": 1.530893 }, { "acc": 0.9760416, "epoch": 25.769861729552378, "grad_norm": 0.7271504402160645, "learning_rate": 5.15727933033616e-06, "loss": 0.08643678, "memory(GiB)": 13.7, "step": 54980, "train_speed(iter/s)": 1.530895 }, { "acc": 0.98812504, "epoch": 25.772205296461212, "grad_norm": 4.688937664031982, "learning_rate": 5.156504598259659e-06, "loss": 0.03520174, "memory(GiB)": 13.7, "step": 54985, "train_speed(iter/s)": 1.530893 }, { "acc": 0.99187498, "epoch": 25.77454886337005, "grad_norm": 2.417726516723633, "learning_rate": 5.155729862433321e-06, "loss": 0.06206664, "memory(GiB)": 13.7, "step": 54990, "train_speed(iter/s)": 1.530896 }, { "acc": 0.96885414, "epoch": 25.776892430278885, "grad_norm": 4.2319440841674805, "learning_rate": 5.1549551228757645e-06, "loss": 0.13804278, "memory(GiB)": 13.7, "step": 54995, "train_speed(iter/s)": 1.530893 }, { "acc": 0.98261909, "epoch": 25.77923599718772, "grad_norm": 4.675045490264893, "learning_rate": 5.154180379605616e-06, "loss": 0.06419705, "memory(GiB)": 13.7, "step": 55000, "train_speed(iter/s)": 1.530894 }, { "acc": 0.9848115, "epoch": 25.781579564096553, "grad_norm": 0.8808902502059937, "learning_rate": 5.153405632641495e-06, "loss": 0.05244263, "memory(GiB)": 13.7, "step": 55005, "train_speed(iter/s)": 1.530901 }, { "acc": 0.97165184, "epoch": 25.78392313100539, "grad_norm": 4.366044521331787, "learning_rate": 5.1526308820020265e-06, "loss": 0.06478395, "memory(GiB)": 13.7, "step": 55010, "train_speed(iter/s)": 1.53091 }, { "acc": 0.97915182, "epoch": 25.786266697914225, "grad_norm": 1.1838687658309937, "learning_rate": 5.151856127705831e-06, "loss": 0.07713807, "memory(GiB)": 13.7, "step": 55015, "train_speed(iter/s)": 1.530916 }, { "acc": 0.97696428, "epoch": 25.78861026482306, "grad_norm": 7.254245758056641, "learning_rate": 5.151081369771531e-06, "loss": 0.04970033, "memory(GiB)": 13.7, "step": 55020, "train_speed(iter/s)": 1.530922 }, { "acc": 0.99401941, "epoch": 25.790953831731898, "grad_norm": 1.8095227479934692, "learning_rate": 5.150306608217749e-06, "loss": 0.04805646, "memory(GiB)": 13.7, "step": 55025, "train_speed(iter/s)": 1.530922 }, { "acc": 0.9864583, "epoch": 25.793297398640732, "grad_norm": 0.06312829256057739, "learning_rate": 5.149531843063109e-06, "loss": 0.04490199, "memory(GiB)": 13.7, "step": 55030, "train_speed(iter/s)": 1.530924 }, { "acc": 0.96854172, "epoch": 25.795640965549566, "grad_norm": 1.4816511869430542, "learning_rate": 5.148757074326233e-06, "loss": 0.12495168, "memory(GiB)": 13.7, "step": 55035, "train_speed(iter/s)": 1.53092 }, { "acc": 0.97645836, "epoch": 25.7979845324584, "grad_norm": 5.98737907409668, "learning_rate": 5.147982302025744e-06, "loss": 0.06264833, "memory(GiB)": 13.7, "step": 55040, "train_speed(iter/s)": 1.530921 }, { "acc": 0.97104168, "epoch": 25.80032809936724, "grad_norm": 3.805957078933716, "learning_rate": 5.147207526180263e-06, "loss": 0.075458, "memory(GiB)": 13.7, "step": 55045, "train_speed(iter/s)": 1.530925 }, { "acc": 0.97635269, "epoch": 25.802671666276073, "grad_norm": 5.095722198486328, "learning_rate": 5.146432746808416e-06, "loss": 0.08957187, "memory(GiB)": 13.7, "step": 55050, "train_speed(iter/s)": 1.530933 }, { "acc": 0.98729172, "epoch": 25.805015233184907, "grad_norm": 4.215420246124268, "learning_rate": 5.145657963928828e-06, "loss": 0.04071209, "memory(GiB)": 13.7, "step": 55055, "train_speed(iter/s)": 1.530938 }, { "acc": 0.97657194, "epoch": 25.80735880009374, "grad_norm": 4.936279296875, "learning_rate": 5.144883177560118e-06, "loss": 0.06619556, "memory(GiB)": 13.7, "step": 55060, "train_speed(iter/s)": 1.530942 }, { "acc": 0.99020834, "epoch": 25.80970236700258, "grad_norm": 2.948240280151367, "learning_rate": 5.144108387720911e-06, "loss": 0.03846278, "memory(GiB)": 13.7, "step": 55065, "train_speed(iter/s)": 1.530945 }, { "acc": 0.97312498, "epoch": 25.812045933911413, "grad_norm": 1.9521161317825317, "learning_rate": 5.1433335944298295e-06, "loss": 0.06755609, "memory(GiB)": 13.7, "step": 55070, "train_speed(iter/s)": 1.530946 }, { "acc": 1.0, "epoch": 25.814389500820248, "grad_norm": 0.46966519951820374, "learning_rate": 5.142558797705497e-06, "loss": 0.03979679, "memory(GiB)": 13.7, "step": 55075, "train_speed(iter/s)": 1.530954 }, { "acc": 0.98446331, "epoch": 25.816733067729082, "grad_norm": 2.818889856338501, "learning_rate": 5.141783997566537e-06, "loss": 0.03533808, "memory(GiB)": 13.7, "step": 55080, "train_speed(iter/s)": 1.530966 }, { "acc": 0.97261362, "epoch": 25.81907663463792, "grad_norm": 3.7615818977355957, "learning_rate": 5.141009194031575e-06, "loss": 0.06993587, "memory(GiB)": 13.7, "step": 55085, "train_speed(iter/s)": 1.530976 }, { "acc": 0.99636364, "epoch": 25.821420201546754, "grad_norm": 1.1282087564468384, "learning_rate": 5.140234387119234e-06, "loss": 0.03601376, "memory(GiB)": 13.7, "step": 55090, "train_speed(iter/s)": 1.530981 }, { "acc": 0.98555803, "epoch": 25.82376376845559, "grad_norm": 1.0762702226638794, "learning_rate": 5.13945957684814e-06, "loss": 0.04536867, "memory(GiB)": 13.7, "step": 55095, "train_speed(iter/s)": 1.530987 }, { "acc": 0.98886366, "epoch": 25.826107335364426, "grad_norm": 1.276158094406128, "learning_rate": 5.138684763236909e-06, "loss": 0.04262311, "memory(GiB)": 13.7, "step": 55100, "train_speed(iter/s)": 1.530987 }, { "acc": 0.98864584, "epoch": 25.82845090227326, "grad_norm": 0.6080090999603271, "learning_rate": 5.137909946304174e-06, "loss": 0.03251515, "memory(GiB)": 13.7, "step": 55105, "train_speed(iter/s)": 1.530995 }, { "acc": 0.97559214, "epoch": 25.830794469182095, "grad_norm": 2.5628066062927246, "learning_rate": 5.137135126068552e-06, "loss": 0.07376732, "memory(GiB)": 13.7, "step": 55110, "train_speed(iter/s)": 1.530989 }, { "acc": 0.9902462, "epoch": 25.83313803609093, "grad_norm": 3.9202070236206055, "learning_rate": 5.136360302548673e-06, "loss": 0.08354346, "memory(GiB)": 13.7, "step": 55115, "train_speed(iter/s)": 1.530991 }, { "acc": 0.98133163, "epoch": 25.835481602999767, "grad_norm": 0.010609854944050312, "learning_rate": 5.1355854757631576e-06, "loss": 0.04337973, "memory(GiB)": 13.7, "step": 55120, "train_speed(iter/s)": 1.530995 }, { "acc": 0.99333324, "epoch": 25.8378251699086, "grad_norm": 3.9134521484375, "learning_rate": 5.134810645730631e-06, "loss": 0.05608083, "memory(GiB)": 13.7, "step": 55125, "train_speed(iter/s)": 1.530993 }, { "acc": 0.98029757, "epoch": 25.840168736817436, "grad_norm": 2.547429084777832, "learning_rate": 5.134035812469719e-06, "loss": 0.06915773, "memory(GiB)": 13.7, "step": 55130, "train_speed(iter/s)": 1.530995 }, { "acc": 0.98660717, "epoch": 25.84251230372627, "grad_norm": 1.234189510345459, "learning_rate": 5.133260975999046e-06, "loss": 0.04788, "memory(GiB)": 13.7, "step": 55135, "train_speed(iter/s)": 1.530994 }, { "acc": 0.99375, "epoch": 25.844855870635108, "grad_norm": 0.808857262134552, "learning_rate": 5.132486136337231e-06, "loss": 0.02188821, "memory(GiB)": 13.7, "step": 55140, "train_speed(iter/s)": 1.531 }, { "acc": 0.98447914, "epoch": 25.847199437543942, "grad_norm": 0.9809412360191345, "learning_rate": 5.131711293502907e-06, "loss": 0.0446691, "memory(GiB)": 13.7, "step": 55145, "train_speed(iter/s)": 1.531005 }, { "acc": 0.99323864, "epoch": 25.849543004452777, "grad_norm": 1.4352678060531616, "learning_rate": 5.130936447514693e-06, "loss": 0.03660684, "memory(GiB)": 13.7, "step": 55150, "train_speed(iter/s)": 1.531 }, { "acc": 0.98791666, "epoch": 25.85188657136161, "grad_norm": 4.854159355163574, "learning_rate": 5.1301615983912135e-06, "loss": 0.03102919, "memory(GiB)": 13.7, "step": 55155, "train_speed(iter/s)": 1.531 }, { "acc": 0.98663692, "epoch": 25.85423013827045, "grad_norm": 0.011845733039081097, "learning_rate": 5.129386746151098e-06, "loss": 0.0648573, "memory(GiB)": 13.7, "step": 55160, "train_speed(iter/s)": 1.53101 }, { "acc": 0.97385416, "epoch": 25.856573705179283, "grad_norm": 4.198494911193848, "learning_rate": 5.128611890812968e-06, "loss": 0.07844573, "memory(GiB)": 13.7, "step": 55165, "train_speed(iter/s)": 1.531015 }, { "acc": 0.98310709, "epoch": 25.858917272088117, "grad_norm": 1.6416552066802979, "learning_rate": 5.1278370323954466e-06, "loss": 0.07121757, "memory(GiB)": 13.7, "step": 55170, "train_speed(iter/s)": 1.531017 }, { "acc": 0.9927083, "epoch": 25.861260838996955, "grad_norm": 0.4641493856906891, "learning_rate": 5.127062170917164e-06, "loss": 0.04520862, "memory(GiB)": 13.7, "step": 55175, "train_speed(iter/s)": 1.531023 }, { "acc": 0.9739583, "epoch": 25.86360440590579, "grad_norm": 1.752198338508606, "learning_rate": 5.1262873063967425e-06, "loss": 0.04865817, "memory(GiB)": 13.7, "step": 55180, "train_speed(iter/s)": 1.531031 }, { "acc": 0.98154764, "epoch": 25.865947972814624, "grad_norm": 1.8921953439712524, "learning_rate": 5.125512438852805e-06, "loss": 0.04028162, "memory(GiB)": 13.7, "step": 55185, "train_speed(iter/s)": 1.531039 }, { "acc": 0.96154766, "epoch": 25.868291539723458, "grad_norm": 8.366002082824707, "learning_rate": 5.124737568303982e-06, "loss": 0.1855144, "memory(GiB)": 13.7, "step": 55190, "train_speed(iter/s)": 1.531047 }, { "acc": 0.9875, "epoch": 25.870635106632296, "grad_norm": 7.823328495025635, "learning_rate": 5.123962694768895e-06, "loss": 0.05225441, "memory(GiB)": 13.7, "step": 55195, "train_speed(iter/s)": 1.531057 }, { "acc": 0.98571434, "epoch": 25.87297867354113, "grad_norm": 5.688075542449951, "learning_rate": 5.123187818266169e-06, "loss": 0.05034389, "memory(GiB)": 13.7, "step": 55200, "train_speed(iter/s)": 1.53106 }, { "acc": 0.98760414, "epoch": 25.875322240449965, "grad_norm": 2.6991424560546875, "learning_rate": 5.122412938814433e-06, "loss": 0.05520195, "memory(GiB)": 13.7, "step": 55205, "train_speed(iter/s)": 1.531066 }, { "acc": 0.97758932, "epoch": 25.8776658073588, "grad_norm": 3.2963128089904785, "learning_rate": 5.121638056432309e-06, "loss": 0.06194074, "memory(GiB)": 13.7, "step": 55210, "train_speed(iter/s)": 1.531076 }, { "acc": 0.97520828, "epoch": 25.880009374267637, "grad_norm": 4.816860675811768, "learning_rate": 5.120863171138424e-06, "loss": 0.0875916, "memory(GiB)": 13.7, "step": 55215, "train_speed(iter/s)": 1.531076 }, { "acc": 0.98291664, "epoch": 25.88235294117647, "grad_norm": 3.5247275829315186, "learning_rate": 5.120088282951405e-06, "loss": 0.03716006, "memory(GiB)": 13.7, "step": 55220, "train_speed(iter/s)": 1.531075 }, { "acc": 0.98225269, "epoch": 25.884696508085305, "grad_norm": 2.059809446334839, "learning_rate": 5.119313391889876e-06, "loss": 0.0749326, "memory(GiB)": 13.7, "step": 55225, "train_speed(iter/s)": 1.531079 }, { "acc": 0.98113098, "epoch": 25.88704007499414, "grad_norm": 5.787732124328613, "learning_rate": 5.118538497972464e-06, "loss": 0.04342285, "memory(GiB)": 13.7, "step": 55230, "train_speed(iter/s)": 1.531086 }, { "acc": 0.99302082, "epoch": 25.889383641902977, "grad_norm": 3.247591733932495, "learning_rate": 5.117763601217793e-06, "loss": 0.03025934, "memory(GiB)": 13.7, "step": 55235, "train_speed(iter/s)": 1.53109 }, { "acc": 0.97104168, "epoch": 25.891727208811812, "grad_norm": 5.103538990020752, "learning_rate": 5.116988701644492e-06, "loss": 0.09593646, "memory(GiB)": 13.7, "step": 55240, "train_speed(iter/s)": 1.531098 }, { "acc": 0.98944445, "epoch": 25.894070775720646, "grad_norm": 0.014476745389401913, "learning_rate": 5.116213799271183e-06, "loss": 0.02353051, "memory(GiB)": 13.7, "step": 55245, "train_speed(iter/s)": 1.531105 }, { "acc": 0.9868845, "epoch": 25.89641434262948, "grad_norm": 5.198982238769531, "learning_rate": 5.115438894116496e-06, "loss": 0.04664505, "memory(GiB)": 13.7, "step": 55250, "train_speed(iter/s)": 1.531114 }, { "acc": 0.98187504, "epoch": 25.89875790953832, "grad_norm": 3.6244795322418213, "learning_rate": 5.1146639861990535e-06, "loss": 0.04407719, "memory(GiB)": 13.7, "step": 55255, "train_speed(iter/s)": 1.531119 }, { "acc": 0.98601189, "epoch": 25.901101476447153, "grad_norm": 3.5561325550079346, "learning_rate": 5.113889075537486e-06, "loss": 0.06517674, "memory(GiB)": 13.7, "step": 55260, "train_speed(iter/s)": 1.531125 }, { "acc": 0.9942709, "epoch": 25.903445043355987, "grad_norm": 0.0635947734117508, "learning_rate": 5.113114162150418e-06, "loss": 0.02907608, "memory(GiB)": 13.7, "step": 55265, "train_speed(iter/s)": 1.531126 }, { "acc": 0.98497601, "epoch": 25.905788610264825, "grad_norm": 7.746126174926758, "learning_rate": 5.112339246056477e-06, "loss": 0.07292643, "memory(GiB)": 13.7, "step": 55270, "train_speed(iter/s)": 1.531118 }, { "acc": 0.984375, "epoch": 25.90813217717366, "grad_norm": 0.28983932733535767, "learning_rate": 5.111564327274285e-06, "loss": 0.05305524, "memory(GiB)": 13.7, "step": 55275, "train_speed(iter/s)": 1.531119 }, { "acc": 0.99333334, "epoch": 25.910475744082493, "grad_norm": 1.3423054218292236, "learning_rate": 5.110789405822471e-06, "loss": 0.01583832, "memory(GiB)": 13.7, "step": 55280, "train_speed(iter/s)": 1.531122 }, { "acc": 0.98073864, "epoch": 25.912819310991328, "grad_norm": 0.8812344074249268, "learning_rate": 5.110014481719662e-06, "loss": 0.0732024, "memory(GiB)": 13.7, "step": 55285, "train_speed(iter/s)": 1.531119 }, { "acc": 0.97958336, "epoch": 25.915162877900165, "grad_norm": 2.6475086212158203, "learning_rate": 5.109239554984486e-06, "loss": 0.07695712, "memory(GiB)": 13.7, "step": 55290, "train_speed(iter/s)": 1.531124 }, { "acc": 0.99092264, "epoch": 25.917506444809, "grad_norm": 4.140876293182373, "learning_rate": 5.108464625635569e-06, "loss": 0.04613759, "memory(GiB)": 13.7, "step": 55295, "train_speed(iter/s)": 1.531129 }, { "acc": 0.97713795, "epoch": 25.919850011717834, "grad_norm": 5.502337455749512, "learning_rate": 5.1076896936915355e-06, "loss": 0.0792927, "memory(GiB)": 13.7, "step": 55300, "train_speed(iter/s)": 1.531141 }, { "acc": 0.98104172, "epoch": 25.92219357862667, "grad_norm": 2.4992480278015137, "learning_rate": 5.106914759171015e-06, "loss": 0.04905286, "memory(GiB)": 13.7, "step": 55305, "train_speed(iter/s)": 1.53115 }, { "acc": 0.98127975, "epoch": 25.924537145535506, "grad_norm": 3.8532626628875732, "learning_rate": 5.106139822092634e-06, "loss": 0.06044818, "memory(GiB)": 13.7, "step": 55310, "train_speed(iter/s)": 1.531143 }, { "acc": 0.97946434, "epoch": 25.92688071244434, "grad_norm": 6.744946479797363, "learning_rate": 5.105364882475018e-06, "loss": 0.03581787, "memory(GiB)": 13.7, "step": 55315, "train_speed(iter/s)": 1.531135 }, { "acc": 0.96833334, "epoch": 25.929224279353175, "grad_norm": 6.2986741065979, "learning_rate": 5.104589940336793e-06, "loss": 0.10580455, "memory(GiB)": 13.7, "step": 55320, "train_speed(iter/s)": 1.531143 }, { "acc": 0.98687496, "epoch": 25.93156784626201, "grad_norm": 3.3739964962005615, "learning_rate": 5.10381499569659e-06, "loss": 0.04277219, "memory(GiB)": 13.7, "step": 55325, "train_speed(iter/s)": 1.531149 }, { "acc": 0.96780634, "epoch": 25.933911413170847, "grad_norm": 2.743941068649292, "learning_rate": 5.103040048573031e-06, "loss": 0.10147473, "memory(GiB)": 13.7, "step": 55330, "train_speed(iter/s)": 1.531154 }, { "acc": 0.97458334, "epoch": 25.93625498007968, "grad_norm": 7.172299385070801, "learning_rate": 5.102265098984749e-06, "loss": 0.05088764, "memory(GiB)": 13.7, "step": 55335, "train_speed(iter/s)": 1.531161 }, { "acc": 0.98071423, "epoch": 25.938598546988516, "grad_norm": 4.694273948669434, "learning_rate": 5.101490146950367e-06, "loss": 0.07751448, "memory(GiB)": 13.7, "step": 55340, "train_speed(iter/s)": 1.531155 }, { "acc": 0.98165751, "epoch": 25.94094211389735, "grad_norm": 1.0477705001831055, "learning_rate": 5.100715192488517e-06, "loss": 0.05583184, "memory(GiB)": 13.7, "step": 55345, "train_speed(iter/s)": 1.531155 }, { "acc": 0.9875, "epoch": 25.943285680806188, "grad_norm": 1.721422553062439, "learning_rate": 5.099940235617819e-06, "loss": 0.06538371, "memory(GiB)": 13.7, "step": 55350, "train_speed(iter/s)": 1.531155 }, { "acc": 0.97865524, "epoch": 25.945629247715022, "grad_norm": 0.0021902616135776043, "learning_rate": 5.099165276356906e-06, "loss": 0.04510202, "memory(GiB)": 13.7, "step": 55355, "train_speed(iter/s)": 1.531158 }, { "acc": 0.97210684, "epoch": 25.947972814623856, "grad_norm": 0.009577853605151176, "learning_rate": 5.098390314724402e-06, "loss": 0.08874341, "memory(GiB)": 13.7, "step": 55360, "train_speed(iter/s)": 1.531165 }, { "acc": 0.9822916, "epoch": 25.950316381532694, "grad_norm": 3.082235813140869, "learning_rate": 5.097615350738938e-06, "loss": 0.04104837, "memory(GiB)": 13.7, "step": 55365, "train_speed(iter/s)": 1.531172 }, { "acc": 0.97508926, "epoch": 25.95265994844153, "grad_norm": 3.882211923599243, "learning_rate": 5.096840384419141e-06, "loss": 0.0651743, "memory(GiB)": 13.7, "step": 55370, "train_speed(iter/s)": 1.531177 }, { "acc": 0.99131947, "epoch": 25.955003515350363, "grad_norm": 1.945570468902588, "learning_rate": 5.096065415783636e-06, "loss": 0.02670718, "memory(GiB)": 13.7, "step": 55375, "train_speed(iter/s)": 1.531175 }, { "acc": 0.97895832, "epoch": 25.957347082259197, "grad_norm": 6.167553901672363, "learning_rate": 5.095290444851052e-06, "loss": 0.06580941, "memory(GiB)": 13.7, "step": 55380, "train_speed(iter/s)": 1.531178 }, { "acc": 0.99224539, "epoch": 25.959690649168035, "grad_norm": 6.481282711029053, "learning_rate": 5.094515471640018e-06, "loss": 0.03621287, "memory(GiB)": 13.7, "step": 55385, "train_speed(iter/s)": 1.531178 }, { "acc": 0.97911701, "epoch": 25.96203421607687, "grad_norm": 4.1417131423950195, "learning_rate": 5.093740496169161e-06, "loss": 0.06736739, "memory(GiB)": 13.7, "step": 55390, "train_speed(iter/s)": 1.53119 }, { "acc": 0.97069445, "epoch": 25.964377782985704, "grad_norm": 8.249030113220215, "learning_rate": 5.0929655184571105e-06, "loss": 0.12157972, "memory(GiB)": 13.7, "step": 55395, "train_speed(iter/s)": 1.531194 }, { "acc": 0.9933712, "epoch": 25.966721349894538, "grad_norm": 5.327763080596924, "learning_rate": 5.092190538522491e-06, "loss": 0.03170379, "memory(GiB)": 13.7, "step": 55400, "train_speed(iter/s)": 1.531194 }, { "acc": 0.99167614, "epoch": 25.969064916803376, "grad_norm": 0.09388355910778046, "learning_rate": 5.091415556383931e-06, "loss": 0.0410235, "memory(GiB)": 13.7, "step": 55405, "train_speed(iter/s)": 1.531192 }, { "acc": 0.98283329, "epoch": 25.97140848371221, "grad_norm": 7.848978519439697, "learning_rate": 5.090640572060062e-06, "loss": 0.06183211, "memory(GiB)": 13.7, "step": 55410, "train_speed(iter/s)": 1.5312 }, { "acc": 0.97945156, "epoch": 25.973752050621044, "grad_norm": 1.5346860885620117, "learning_rate": 5.089865585569508e-06, "loss": 0.07830352, "memory(GiB)": 13.7, "step": 55415, "train_speed(iter/s)": 1.531198 }, { "acc": 0.98145828, "epoch": 25.97609561752988, "grad_norm": 3.200139045715332, "learning_rate": 5.089090596930899e-06, "loss": 0.04275811, "memory(GiB)": 13.7, "step": 55420, "train_speed(iter/s)": 1.531204 }, { "acc": 0.97885418, "epoch": 25.978439184438717, "grad_norm": 5.368070125579834, "learning_rate": 5.088315606162863e-06, "loss": 0.06339418, "memory(GiB)": 13.7, "step": 55425, "train_speed(iter/s)": 1.531201 }, { "acc": 0.97830811, "epoch": 25.98078275134755, "grad_norm": 6.554027557373047, "learning_rate": 5.087540613284031e-06, "loss": 0.05845818, "memory(GiB)": 13.7, "step": 55430, "train_speed(iter/s)": 1.531204 }, { "acc": 0.97541056, "epoch": 25.983126318256385, "grad_norm": 3.360640048980713, "learning_rate": 5.086765618313027e-06, "loss": 0.06107374, "memory(GiB)": 13.7, "step": 55435, "train_speed(iter/s)": 1.53121 }, { "acc": 0.98701391, "epoch": 25.985469885165223, "grad_norm": 3.2124102115631104, "learning_rate": 5.08599062126848e-06, "loss": 0.05396936, "memory(GiB)": 13.7, "step": 55440, "train_speed(iter/s)": 1.531214 }, { "acc": 0.99217262, "epoch": 25.987813452074057, "grad_norm": 0.8810589909553528, "learning_rate": 5.0852156221690205e-06, "loss": 0.04700495, "memory(GiB)": 13.7, "step": 55445, "train_speed(iter/s)": 1.531222 }, { "acc": 0.97696438, "epoch": 25.99015701898289, "grad_norm": 5.4402289390563965, "learning_rate": 5.084440621033273e-06, "loss": 0.09711166, "memory(GiB)": 13.7, "step": 55450, "train_speed(iter/s)": 1.531234 }, { "acc": 0.9864584, "epoch": 25.992500585891726, "grad_norm": 0.9449849724769592, "learning_rate": 5.083665617879872e-06, "loss": 0.04878558, "memory(GiB)": 13.7, "step": 55455, "train_speed(iter/s)": 1.531241 }, { "acc": 0.9746726, "epoch": 25.994844152800564, "grad_norm": 4.805382251739502, "learning_rate": 5.0828906127274415e-06, "loss": 0.07162002, "memory(GiB)": 13.7, "step": 55460, "train_speed(iter/s)": 1.531248 }, { "acc": 0.9793849, "epoch": 25.997187719709398, "grad_norm": 1.304509162902832, "learning_rate": 5.082115605594611e-06, "loss": 0.11588497, "memory(GiB)": 13.7, "step": 55465, "train_speed(iter/s)": 1.531252 }, { "acc": 0.97465286, "epoch": 25.999531286618232, "grad_norm": 0.14120714366436005, "learning_rate": 5.08134059650001e-06, "loss": 0.05631751, "memory(GiB)": 13.7, "step": 55470, "train_speed(iter/s)": 1.531253 }, { "acc": 0.98364582, "epoch": 26.001874853527067, "grad_norm": 2.577867031097412, "learning_rate": 5.080565585462268e-06, "loss": 0.03535943, "memory(GiB)": 13.7, "step": 55475, "train_speed(iter/s)": 1.53124 }, { "acc": 0.97923613, "epoch": 26.004218420435905, "grad_norm": 4.729174613952637, "learning_rate": 5.07979057250001e-06, "loss": 0.04266282, "memory(GiB)": 13.7, "step": 55480, "train_speed(iter/s)": 1.531252 }, { "acc": 0.98770828, "epoch": 26.00656198734474, "grad_norm": 1.5134729146957397, "learning_rate": 5.0790155576318675e-06, "loss": 0.03864202, "memory(GiB)": 13.7, "step": 55485, "train_speed(iter/s)": 1.531258 }, { "acc": 0.98803024, "epoch": 26.008905554253573, "grad_norm": 0.8374656438827515, "learning_rate": 5.07824054087647e-06, "loss": 0.03870873, "memory(GiB)": 13.7, "step": 55490, "train_speed(iter/s)": 1.531265 }, { "acc": 0.98298607, "epoch": 26.011249121162408, "grad_norm": 4.152735233306885, "learning_rate": 5.077465522252444e-06, "loss": 0.07166973, "memory(GiB)": 13.7, "step": 55495, "train_speed(iter/s)": 1.531269 }, { "acc": 0.99250002, "epoch": 26.013592688071245, "grad_norm": 3.418224334716797, "learning_rate": 5.076690501778422e-06, "loss": 0.02454257, "memory(GiB)": 13.7, "step": 55500, "train_speed(iter/s)": 1.531273 }, { "acc": 0.97790184, "epoch": 26.01593625498008, "grad_norm": 2.6893298625946045, "learning_rate": 5.075915479473028e-06, "loss": 0.07036066, "memory(GiB)": 13.7, "step": 55505, "train_speed(iter/s)": 1.531269 }, { "acc": 0.9760417, "epoch": 26.018279821888914, "grad_norm": 4.144068717956543, "learning_rate": 5.075140455354895e-06, "loss": 0.06231643, "memory(GiB)": 13.7, "step": 55510, "train_speed(iter/s)": 1.531279 }, { "acc": 0.98815479, "epoch": 26.020623388797752, "grad_norm": 2.462728977203369, "learning_rate": 5.074365429442652e-06, "loss": 0.04145564, "memory(GiB)": 13.7, "step": 55515, "train_speed(iter/s)": 1.531278 }, { "acc": 0.9895834, "epoch": 26.022966955706586, "grad_norm": 4.921077728271484, "learning_rate": 5.073590401754924e-06, "loss": 0.04039674, "memory(GiB)": 13.7, "step": 55520, "train_speed(iter/s)": 1.531281 }, { "acc": 0.97479172, "epoch": 26.02531052261542, "grad_norm": 4.248554229736328, "learning_rate": 5.072815372310344e-06, "loss": 0.05412191, "memory(GiB)": 13.7, "step": 55525, "train_speed(iter/s)": 1.531287 }, { "acc": 0.9739584, "epoch": 26.027654089524255, "grad_norm": 2.8128767013549805, "learning_rate": 5.072040341127541e-06, "loss": 0.09157348, "memory(GiB)": 13.7, "step": 55530, "train_speed(iter/s)": 1.531287 }, { "acc": 0.98431549, "epoch": 26.029997656433093, "grad_norm": 2.69352126121521, "learning_rate": 5.0712653082251405e-06, "loss": 0.10446844, "memory(GiB)": 13.7, "step": 55535, "train_speed(iter/s)": 1.531291 }, { "acc": 0.99743309, "epoch": 26.032341223341927, "grad_norm": 0.6711422801017761, "learning_rate": 5.070490273621778e-06, "loss": 0.02583051, "memory(GiB)": 13.7, "step": 55540, "train_speed(iter/s)": 1.531292 }, { "acc": 0.96726189, "epoch": 26.03468479025076, "grad_norm": 2.395051956176758, "learning_rate": 5.069715237336077e-06, "loss": 0.12322948, "memory(GiB)": 13.7, "step": 55545, "train_speed(iter/s)": 1.53129 }, { "acc": 0.96175594, "epoch": 26.037028357159596, "grad_norm": 5.657266139984131, "learning_rate": 5.0689401993866685e-06, "loss": 0.08463914, "memory(GiB)": 13.7, "step": 55550, "train_speed(iter/s)": 1.53129 }, { "acc": 0.99151039, "epoch": 26.039371924068433, "grad_norm": 3.6413073539733887, "learning_rate": 5.0681651597921846e-06, "loss": 0.03262803, "memory(GiB)": 13.7, "step": 55555, "train_speed(iter/s)": 1.531297 }, { "acc": 0.9875, "epoch": 26.041715490977268, "grad_norm": 0.024672025814652443, "learning_rate": 5.0673901185712525e-06, "loss": 0.04020553, "memory(GiB)": 13.7, "step": 55560, "train_speed(iter/s)": 1.531297 }, { "acc": 0.99027777, "epoch": 26.044059057886102, "grad_norm": 0.9649462103843689, "learning_rate": 5.066615075742499e-06, "loss": 0.03847372, "memory(GiB)": 13.7, "step": 55565, "train_speed(iter/s)": 1.531298 }, { "acc": 0.9802083, "epoch": 26.046402624794936, "grad_norm": 3.3958115577697754, "learning_rate": 5.065840031324558e-06, "loss": 0.06383993, "memory(GiB)": 13.7, "step": 55570, "train_speed(iter/s)": 1.531302 }, { "acc": 0.97592258, "epoch": 26.048746191703774, "grad_norm": 5.98708963394165, "learning_rate": 5.065064985336058e-06, "loss": 0.07091476, "memory(GiB)": 13.7, "step": 55575, "train_speed(iter/s)": 1.531306 }, { "acc": 0.97941523, "epoch": 26.05108975861261, "grad_norm": 2.428713083267212, "learning_rate": 5.064289937795627e-06, "loss": 0.06163825, "memory(GiB)": 13.7, "step": 55580, "train_speed(iter/s)": 1.531314 }, { "acc": 0.9797267, "epoch": 26.053433325521443, "grad_norm": 3.608752727508545, "learning_rate": 5.063514888721895e-06, "loss": 0.07495345, "memory(GiB)": 13.7, "step": 55585, "train_speed(iter/s)": 1.531315 }, { "acc": 0.97091351, "epoch": 26.05577689243028, "grad_norm": 5.896875858306885, "learning_rate": 5.062739838133494e-06, "loss": 0.16017234, "memory(GiB)": 13.7, "step": 55590, "train_speed(iter/s)": 1.531308 }, { "acc": 0.98760414, "epoch": 26.058120459339115, "grad_norm": 0.33830398321151733, "learning_rate": 5.061964786049048e-06, "loss": 0.03982317, "memory(GiB)": 13.7, "step": 55595, "train_speed(iter/s)": 1.531307 }, { "acc": 0.98604164, "epoch": 26.06046402624795, "grad_norm": 0.7792412042617798, "learning_rate": 5.061189732487196e-06, "loss": 0.05143273, "memory(GiB)": 13.7, "step": 55600, "train_speed(iter/s)": 1.531314 }, { "acc": 0.9856945, "epoch": 26.062807593156784, "grad_norm": 2.973773717880249, "learning_rate": 5.060414677466559e-06, "loss": 0.03469543, "memory(GiB)": 13.7, "step": 55605, "train_speed(iter/s)": 1.531314 }, { "acc": 0.982197, "epoch": 26.06515116006562, "grad_norm": 0.07731296122074127, "learning_rate": 5.0596396210057686e-06, "loss": 0.03348312, "memory(GiB)": 13.7, "step": 55610, "train_speed(iter/s)": 1.531315 }, { "acc": 0.97758923, "epoch": 26.067494726974456, "grad_norm": 5.9004974365234375, "learning_rate": 5.058864563123458e-06, "loss": 0.04442341, "memory(GiB)": 13.7, "step": 55615, "train_speed(iter/s)": 1.531317 }, { "acc": 0.98010416, "epoch": 26.06983829388329, "grad_norm": 3.0370354652404785, "learning_rate": 5.058089503838255e-06, "loss": 0.07168864, "memory(GiB)": 13.7, "step": 55620, "train_speed(iter/s)": 1.531324 }, { "acc": 0.98125, "epoch": 26.072181860792124, "grad_norm": 4.434406757354736, "learning_rate": 5.057314443168788e-06, "loss": 0.07048842, "memory(GiB)": 13.7, "step": 55625, "train_speed(iter/s)": 1.531334 }, { "acc": 0.98715286, "epoch": 26.074525427700962, "grad_norm": 0.9284629821777344, "learning_rate": 5.056539381133691e-06, "loss": 0.02356543, "memory(GiB)": 13.7, "step": 55630, "train_speed(iter/s)": 1.531335 }, { "acc": 0.98812504, "epoch": 26.076868994609796, "grad_norm": 4.540231704711914, "learning_rate": 5.055764317751589e-06, "loss": 0.04996482, "memory(GiB)": 13.7, "step": 55635, "train_speed(iter/s)": 1.531337 }, { "acc": 0.9791667, "epoch": 26.07921256151863, "grad_norm": 2.296825408935547, "learning_rate": 5.054989253041116e-06, "loss": 0.09942622, "memory(GiB)": 13.7, "step": 55640, "train_speed(iter/s)": 1.531343 }, { "acc": 0.98673611, "epoch": 26.081556128427465, "grad_norm": 3.3439955711364746, "learning_rate": 5.054214187020899e-06, "loss": 0.08104148, "memory(GiB)": 13.7, "step": 55645, "train_speed(iter/s)": 1.531355 }, { "acc": 0.9822917, "epoch": 26.083899695336303, "grad_norm": 1.5984748601913452, "learning_rate": 5.053439119709571e-06, "loss": 0.05925368, "memory(GiB)": 13.7, "step": 55650, "train_speed(iter/s)": 1.531364 }, { "acc": 0.97784729, "epoch": 26.086243262245137, "grad_norm": 4.168075084686279, "learning_rate": 5.05266405112576e-06, "loss": 0.06721955, "memory(GiB)": 13.7, "step": 55655, "train_speed(iter/s)": 1.531359 }, { "acc": 0.99105234, "epoch": 26.08858682915397, "grad_norm": 2.5230164527893066, "learning_rate": 5.051888981288094e-06, "loss": 0.03585893, "memory(GiB)": 13.7, "step": 55660, "train_speed(iter/s)": 1.531358 }, { "acc": 0.98487587, "epoch": 26.090930396062806, "grad_norm": 3.4498960971832275, "learning_rate": 5.0511139102152095e-06, "loss": 0.06256438, "memory(GiB)": 13.7, "step": 55665, "train_speed(iter/s)": 1.531357 }, { "acc": 0.9874855, "epoch": 26.093273962971644, "grad_norm": 0.017901020124554634, "learning_rate": 5.0503388379257315e-06, "loss": 0.04188622, "memory(GiB)": 13.7, "step": 55670, "train_speed(iter/s)": 1.531363 }, { "acc": 0.99090281, "epoch": 26.095617529880478, "grad_norm": 3.1011803150177, "learning_rate": 5.04956376443829e-06, "loss": 0.07323301, "memory(GiB)": 13.7, "step": 55675, "train_speed(iter/s)": 1.531365 }, { "acc": 0.97865534, "epoch": 26.097961096789312, "grad_norm": 3.042905807495117, "learning_rate": 5.048788689771519e-06, "loss": 0.07659135, "memory(GiB)": 13.7, "step": 55680, "train_speed(iter/s)": 1.531368 }, { "acc": 0.99430437, "epoch": 26.10030466369815, "grad_norm": 4.520633220672607, "learning_rate": 5.048013613944046e-06, "loss": 0.02791095, "memory(GiB)": 13.7, "step": 55685, "train_speed(iter/s)": 1.531368 }, { "acc": 0.98294182, "epoch": 26.102648230606984, "grad_norm": 4.028314590454102, "learning_rate": 5.047238536974501e-06, "loss": 0.08474874, "memory(GiB)": 13.7, "step": 55690, "train_speed(iter/s)": 1.531363 }, { "acc": 0.97854166, "epoch": 26.10499179751582, "grad_norm": 0.10502196848392487, "learning_rate": 5.046463458881517e-06, "loss": 0.07509683, "memory(GiB)": 13.7, "step": 55695, "train_speed(iter/s)": 1.531367 }, { "acc": 0.97937498, "epoch": 26.107335364424653, "grad_norm": 3.4165666103363037, "learning_rate": 5.045688379683718e-06, "loss": 0.09115933, "memory(GiB)": 13.7, "step": 55700, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98228626, "epoch": 26.10967893133349, "grad_norm": 2.7385408878326416, "learning_rate": 5.044913299399743e-06, "loss": 0.04836559, "memory(GiB)": 13.7, "step": 55705, "train_speed(iter/s)": 1.531356 }, { "acc": 0.97693186, "epoch": 26.112022498242325, "grad_norm": 3.3627748489379883, "learning_rate": 5.0441382180482166e-06, "loss": 0.10521591, "memory(GiB)": 13.7, "step": 55710, "train_speed(iter/s)": 1.531355 }, { "acc": 0.98473625, "epoch": 26.11436606515116, "grad_norm": 1.9088542461395264, "learning_rate": 5.043363135647772e-06, "loss": 0.07055938, "memory(GiB)": 13.7, "step": 55715, "train_speed(iter/s)": 1.531361 }, { "acc": 0.99457798, "epoch": 26.116709632059994, "grad_norm": 1.8237619400024414, "learning_rate": 5.042588052217038e-06, "loss": 0.05488264, "memory(GiB)": 13.7, "step": 55720, "train_speed(iter/s)": 1.531365 }, { "acc": 0.99240742, "epoch": 26.11905319896883, "grad_norm": 4.320920944213867, "learning_rate": 5.041812967774645e-06, "loss": 0.04276174, "memory(GiB)": 13.7, "step": 55725, "train_speed(iter/s)": 1.531365 }, { "acc": 0.9875, "epoch": 26.121396765877666, "grad_norm": 2.598407506942749, "learning_rate": 5.0410378823392245e-06, "loss": 0.02807754, "memory(GiB)": 13.7, "step": 55730, "train_speed(iter/s)": 1.531369 }, { "acc": 0.99071035, "epoch": 26.1237403327865, "grad_norm": 0.9641579985618591, "learning_rate": 5.040262795929406e-06, "loss": 0.03395852, "memory(GiB)": 13.7, "step": 55735, "train_speed(iter/s)": 1.53137 }, { "acc": 0.98633928, "epoch": 26.126083899695335, "grad_norm": 3.3789947032928467, "learning_rate": 5.039487708563822e-06, "loss": 0.09232442, "memory(GiB)": 13.7, "step": 55740, "train_speed(iter/s)": 1.531374 }, { "acc": 0.98988094, "epoch": 26.128427466604172, "grad_norm": 2.9112136363983154, "learning_rate": 5.038712620261099e-06, "loss": 0.06148722, "memory(GiB)": 13.7, "step": 55745, "train_speed(iter/s)": 1.531378 }, { "acc": 0.97381945, "epoch": 26.130771033513007, "grad_norm": 1.7166398763656616, "learning_rate": 5.03793753103987e-06, "loss": 0.05360798, "memory(GiB)": 13.7, "step": 55750, "train_speed(iter/s)": 1.531389 }, { "acc": 0.98062506, "epoch": 26.13311460042184, "grad_norm": 4.856975555419922, "learning_rate": 5.037162440918767e-06, "loss": 0.04100661, "memory(GiB)": 13.7, "step": 55755, "train_speed(iter/s)": 1.531391 }, { "acc": 0.96869049, "epoch": 26.13545816733068, "grad_norm": 6.86010217666626, "learning_rate": 5.03638734991642e-06, "loss": 0.06257329, "memory(GiB)": 13.7, "step": 55760, "train_speed(iter/s)": 1.531397 }, { "acc": 0.97114582, "epoch": 26.137801734239513, "grad_norm": 4.346581935882568, "learning_rate": 5.03561225805146e-06, "loss": 0.04374873, "memory(GiB)": 13.7, "step": 55765, "train_speed(iter/s)": 1.531404 }, { "acc": 0.9947917, "epoch": 26.140145301148348, "grad_norm": 3.3522207736968994, "learning_rate": 5.034837165342515e-06, "loss": 0.04802485, "memory(GiB)": 13.7, "step": 55770, "train_speed(iter/s)": 1.531407 }, { "acc": 0.98217258, "epoch": 26.142488868057182, "grad_norm": 2.946873903274536, "learning_rate": 5.034062071808217e-06, "loss": 0.0549792, "memory(GiB)": 13.7, "step": 55775, "train_speed(iter/s)": 1.531415 }, { "acc": 0.9880209, "epoch": 26.14483243496602, "grad_norm": 4.464176654815674, "learning_rate": 5.033286977467197e-06, "loss": 0.05274501, "memory(GiB)": 13.7, "step": 55780, "train_speed(iter/s)": 1.531413 }, { "acc": 0.96446428, "epoch": 26.147176001874854, "grad_norm": 6.8272528648376465, "learning_rate": 5.032511882338086e-06, "loss": 0.08478396, "memory(GiB)": 13.7, "step": 55785, "train_speed(iter/s)": 1.53142 }, { "acc": 0.98666668, "epoch": 26.14951956878369, "grad_norm": 0.04617232084274292, "learning_rate": 5.031736786439515e-06, "loss": 0.04440519, "memory(GiB)": 13.7, "step": 55790, "train_speed(iter/s)": 1.531425 }, { "acc": 0.98157196, "epoch": 26.151863135692523, "grad_norm": 5.690769672393799, "learning_rate": 5.030961689790114e-06, "loss": 0.06620921, "memory(GiB)": 13.7, "step": 55795, "train_speed(iter/s)": 1.531428 }, { "acc": 0.98481064, "epoch": 26.15420670260136, "grad_norm": 1.752089500427246, "learning_rate": 5.030186592408513e-06, "loss": 0.05855863, "memory(GiB)": 13.7, "step": 55800, "train_speed(iter/s)": 1.531436 }, { "acc": 0.98666668, "epoch": 26.156550269510195, "grad_norm": 0.9504814743995667, "learning_rate": 5.0294114943133455e-06, "loss": 0.05718341, "memory(GiB)": 13.7, "step": 55805, "train_speed(iter/s)": 1.531444 }, { "acc": 0.97821426, "epoch": 26.15889383641903, "grad_norm": 3.1205923557281494, "learning_rate": 5.028636395523242e-06, "loss": 0.04517653, "memory(GiB)": 13.7, "step": 55810, "train_speed(iter/s)": 1.531447 }, { "acc": 0.98125, "epoch": 26.161237403327863, "grad_norm": 3.9264349937438965, "learning_rate": 5.02786129605683e-06, "loss": 0.04280912, "memory(GiB)": 13.7, "step": 55815, "train_speed(iter/s)": 1.531442 }, { "acc": 0.98363094, "epoch": 26.1635809702367, "grad_norm": 3.487703561782837, "learning_rate": 5.027086195932742e-06, "loss": 0.06017178, "memory(GiB)": 13.7, "step": 55820, "train_speed(iter/s)": 1.531443 }, { "acc": 0.97256947, "epoch": 26.165924537145536, "grad_norm": 0.06899956613779068, "learning_rate": 5.02631109516961e-06, "loss": 0.05857781, "memory(GiB)": 13.7, "step": 55825, "train_speed(iter/s)": 1.531451 }, { "acc": 0.98980112, "epoch": 26.16826810405437, "grad_norm": 2.917628765106201, "learning_rate": 5.0255359937860635e-06, "loss": 0.03048496, "memory(GiB)": 13.7, "step": 55830, "train_speed(iter/s)": 1.531457 }, { "acc": 0.99445648, "epoch": 26.170611670963208, "grad_norm": 3.415543794631958, "learning_rate": 5.024760891800734e-06, "loss": 0.02216882, "memory(GiB)": 13.7, "step": 55835, "train_speed(iter/s)": 1.531457 }, { "acc": 0.98460388, "epoch": 26.172955237872042, "grad_norm": 1.2630999088287354, "learning_rate": 5.023985789232253e-06, "loss": 0.04614779, "memory(GiB)": 13.7, "step": 55840, "train_speed(iter/s)": 1.531456 }, { "acc": 0.98194447, "epoch": 26.175298804780876, "grad_norm": 5.84366512298584, "learning_rate": 5.023210686099249e-06, "loss": 0.06619943, "memory(GiB)": 13.7, "step": 55845, "train_speed(iter/s)": 1.531461 }, { "acc": 0.97425594, "epoch": 26.17764237168971, "grad_norm": 0.815962016582489, "learning_rate": 5.022435582420357e-06, "loss": 0.10104117, "memory(GiB)": 13.7, "step": 55850, "train_speed(iter/s)": 1.531465 }, { "acc": 0.9875, "epoch": 26.17998593859855, "grad_norm": 4.60995626449585, "learning_rate": 5.021660478214205e-06, "loss": 0.03093077, "memory(GiB)": 13.7, "step": 55855, "train_speed(iter/s)": 1.531461 }, { "acc": 0.96833334, "epoch": 26.182329505507383, "grad_norm": 2.520944595336914, "learning_rate": 5.020885373499425e-06, "loss": 0.06512455, "memory(GiB)": 13.7, "step": 55860, "train_speed(iter/s)": 1.531461 }, { "acc": 0.99375, "epoch": 26.184673072416217, "grad_norm": 2.148031711578369, "learning_rate": 5.020110268294647e-06, "loss": 0.02546646, "memory(GiB)": 13.7, "step": 55865, "train_speed(iter/s)": 1.531466 }, { "acc": 0.97796879, "epoch": 26.18701663932505, "grad_norm": 5.150055408477783, "learning_rate": 5.019335162618502e-06, "loss": 0.05941638, "memory(GiB)": 13.7, "step": 55870, "train_speed(iter/s)": 1.53147 }, { "acc": 0.97785091, "epoch": 26.18936020623389, "grad_norm": 4.257625579833984, "learning_rate": 5.018560056489622e-06, "loss": 0.12100985, "memory(GiB)": 13.7, "step": 55875, "train_speed(iter/s)": 1.531478 }, { "acc": 0.98883934, "epoch": 26.191703773142724, "grad_norm": 3.2646701335906982, "learning_rate": 5.017784949926636e-06, "loss": 0.05956302, "memory(GiB)": 13.7, "step": 55880, "train_speed(iter/s)": 1.531481 }, { "acc": 0.97947311, "epoch": 26.194047340051558, "grad_norm": 5.608457565307617, "learning_rate": 5.017009842948179e-06, "loss": 0.05873, "memory(GiB)": 13.7, "step": 55885, "train_speed(iter/s)": 1.531482 }, { "acc": 0.9895834, "epoch": 26.196390906960392, "grad_norm": 1.8400228023529053, "learning_rate": 5.01623473557288e-06, "loss": 0.01916854, "memory(GiB)": 13.7, "step": 55890, "train_speed(iter/s)": 1.531488 }, { "acc": 0.97835321, "epoch": 26.19873447386923, "grad_norm": 1.9784317016601562, "learning_rate": 5.015459627819369e-06, "loss": 0.08733965, "memory(GiB)": 13.7, "step": 55895, "train_speed(iter/s)": 1.531496 }, { "acc": 0.98467264, "epoch": 26.201078040778064, "grad_norm": 5.330080986022949, "learning_rate": 5.014684519706276e-06, "loss": 0.03879458, "memory(GiB)": 13.7, "step": 55900, "train_speed(iter/s)": 1.531494 }, { "acc": 0.98777771, "epoch": 26.2034216076869, "grad_norm": 1.4967689514160156, "learning_rate": 5.0139094112522345e-06, "loss": 0.04692883, "memory(GiB)": 13.7, "step": 55905, "train_speed(iter/s)": 1.531498 }, { "acc": 0.990625, "epoch": 26.205765174595733, "grad_norm": 3.620065212249756, "learning_rate": 5.013134302475876e-06, "loss": 0.02989308, "memory(GiB)": 13.7, "step": 55910, "train_speed(iter/s)": 1.531498 }, { "acc": 0.97736111, "epoch": 26.20810874150457, "grad_norm": 4.862449645996094, "learning_rate": 5.0123591933958305e-06, "loss": 0.09822683, "memory(GiB)": 13.7, "step": 55915, "train_speed(iter/s)": 1.531502 }, { "acc": 0.98050594, "epoch": 26.210452308413405, "grad_norm": 4.580451488494873, "learning_rate": 5.011584084030726e-06, "loss": 0.05553899, "memory(GiB)": 13.7, "step": 55920, "train_speed(iter/s)": 1.531504 }, { "acc": 0.98321428, "epoch": 26.21279587532224, "grad_norm": 4.4149556159973145, "learning_rate": 5.010808974399199e-06, "loss": 0.03686614, "memory(GiB)": 13.7, "step": 55925, "train_speed(iter/s)": 1.531505 }, { "acc": 0.98245535, "epoch": 26.215139442231077, "grad_norm": 4.039949417114258, "learning_rate": 5.010033864519878e-06, "loss": 0.11141009, "memory(GiB)": 13.7, "step": 55930, "train_speed(iter/s)": 1.531505 }, { "acc": 0.97038698, "epoch": 26.21748300913991, "grad_norm": 4.223409652709961, "learning_rate": 5.009258754411394e-06, "loss": 0.14197432, "memory(GiB)": 13.7, "step": 55935, "train_speed(iter/s)": 1.531508 }, { "acc": 0.98708324, "epoch": 26.219826576048746, "grad_norm": 0.022204481065273285, "learning_rate": 5.008483644092379e-06, "loss": 0.04248776, "memory(GiB)": 13.7, "step": 55940, "train_speed(iter/s)": 1.531508 }, { "acc": 0.97979164, "epoch": 26.22217014295758, "grad_norm": 3.4500162601470947, "learning_rate": 5.007708533581463e-06, "loss": 0.03400818, "memory(GiB)": 13.7, "step": 55945, "train_speed(iter/s)": 1.531508 }, { "acc": 0.99020834, "epoch": 26.224513709866418, "grad_norm": 1.4425510168075562, "learning_rate": 5.006933422897276e-06, "loss": 0.04093167, "memory(GiB)": 13.7, "step": 55950, "train_speed(iter/s)": 1.53151 }, { "acc": 0.98458328, "epoch": 26.226857276775252, "grad_norm": 35.18463134765625, "learning_rate": 5.006158312058454e-06, "loss": 0.05462511, "memory(GiB)": 13.7, "step": 55955, "train_speed(iter/s)": 1.531515 }, { "acc": 0.99245043, "epoch": 26.229200843684087, "grad_norm": 2.9663612842559814, "learning_rate": 5.0053832010836215e-06, "loss": 0.03333231, "memory(GiB)": 13.7, "step": 55960, "train_speed(iter/s)": 1.531525 }, { "acc": 0.97833338, "epoch": 26.23154441059292, "grad_norm": 9.23434829711914, "learning_rate": 5.004608089991414e-06, "loss": 0.10156982, "memory(GiB)": 13.7, "step": 55965, "train_speed(iter/s)": 1.531528 }, { "acc": 0.98562498, "epoch": 26.23388797750176, "grad_norm": 3.92620587348938, "learning_rate": 5.003832978800462e-06, "loss": 0.06036181, "memory(GiB)": 13.7, "step": 55970, "train_speed(iter/s)": 1.531535 }, { "acc": 0.99499998, "epoch": 26.236231544410593, "grad_norm": 10.404608726501465, "learning_rate": 5.003057867529396e-06, "loss": 0.03735763, "memory(GiB)": 13.7, "step": 55975, "train_speed(iter/s)": 1.531539 }, { "acc": 0.98488092, "epoch": 26.238575111319427, "grad_norm": 2.351057529449463, "learning_rate": 5.002282756196847e-06, "loss": 0.04605889, "memory(GiB)": 13.7, "step": 55980, "train_speed(iter/s)": 1.531534 }, { "acc": 0.99177084, "epoch": 26.24091867822826, "grad_norm": 3.1748926639556885, "learning_rate": 5.001507644821446e-06, "loss": 0.0574722, "memory(GiB)": 13.7, "step": 55985, "train_speed(iter/s)": 1.531535 }, { "acc": 0.98549671, "epoch": 26.2432622451371, "grad_norm": 5.337093830108643, "learning_rate": 5.0007325334218245e-06, "loss": 0.07561493, "memory(GiB)": 13.7, "step": 55990, "train_speed(iter/s)": 1.531548 }, { "acc": 0.98020296, "epoch": 26.245605812045934, "grad_norm": 5.72808313369751, "learning_rate": 4.999957422016613e-06, "loss": 0.09628961, "memory(GiB)": 13.7, "step": 55995, "train_speed(iter/s)": 1.531555 }, { "acc": 0.97307549, "epoch": 26.247949378954768, "grad_norm": 6.275268077850342, "learning_rate": 4.999182310624446e-06, "loss": 0.11200068, "memory(GiB)": 13.7, "step": 56000, "train_speed(iter/s)": 1.531556 }, { "acc": 0.99131947, "epoch": 26.250292945863606, "grad_norm": 3.3001599311828613, "learning_rate": 4.99840719926395e-06, "loss": 0.04825462, "memory(GiB)": 13.7, "step": 56005, "train_speed(iter/s)": 1.531561 }, { "acc": 0.98105516, "epoch": 26.25263651277244, "grad_norm": 5.824695110321045, "learning_rate": 4.997632087953758e-06, "loss": 0.10318087, "memory(GiB)": 13.7, "step": 56010, "train_speed(iter/s)": 1.531568 }, { "acc": 0.98089733, "epoch": 26.254980079681275, "grad_norm": 2.972245454788208, "learning_rate": 4.996856976712501e-06, "loss": 0.04690366, "memory(GiB)": 13.7, "step": 56015, "train_speed(iter/s)": 1.531571 }, { "acc": 0.98295898, "epoch": 26.25732364659011, "grad_norm": 1.9267652034759521, "learning_rate": 4.996081865558813e-06, "loss": 0.07973994, "memory(GiB)": 13.7, "step": 56020, "train_speed(iter/s)": 1.531577 }, { "acc": 0.96416664, "epoch": 26.259667213498947, "grad_norm": 8.30100154876709, "learning_rate": 4.995306754511319e-06, "loss": 0.08943225, "memory(GiB)": 13.7, "step": 56025, "train_speed(iter/s)": 1.531587 }, { "acc": 0.96873512, "epoch": 26.26201078040778, "grad_norm": 1.7019007205963135, "learning_rate": 4.994531643588656e-06, "loss": 0.1065184, "memory(GiB)": 13.7, "step": 56030, "train_speed(iter/s)": 1.531595 }, { "acc": 0.98477678, "epoch": 26.264354347316615, "grad_norm": 0.1144934669137001, "learning_rate": 4.99375653280945e-06, "loss": 0.05660354, "memory(GiB)": 13.7, "step": 56035, "train_speed(iter/s)": 1.531596 }, { "acc": 0.99448862, "epoch": 26.26669791422545, "grad_norm": 2.634138822555542, "learning_rate": 4.992981422192336e-06, "loss": 0.02670628, "memory(GiB)": 13.7, "step": 56040, "train_speed(iter/s)": 1.531606 }, { "acc": 0.9822917, "epoch": 26.269041481134288, "grad_norm": 3.4059858322143555, "learning_rate": 4.992206311755945e-06, "loss": 0.03959184, "memory(GiB)": 13.7, "step": 56045, "train_speed(iter/s)": 1.531612 }, { "acc": 0.97833338, "epoch": 26.271385048043122, "grad_norm": 3.4594414234161377, "learning_rate": 4.991431201518905e-06, "loss": 0.1207562, "memory(GiB)": 13.7, "step": 56050, "train_speed(iter/s)": 1.531622 }, { "acc": 0.99125004, "epoch": 26.273728614951956, "grad_norm": 0.006213284097611904, "learning_rate": 4.990656091499851e-06, "loss": 0.06959159, "memory(GiB)": 13.7, "step": 56055, "train_speed(iter/s)": 1.531624 }, { "acc": 0.97861605, "epoch": 26.27607218186079, "grad_norm": 2.166013717651367, "learning_rate": 4.989880981717413e-06, "loss": 0.08381269, "memory(GiB)": 13.7, "step": 56060, "train_speed(iter/s)": 1.531625 }, { "acc": 0.9838542, "epoch": 26.27841574876963, "grad_norm": 6.745355606079102, "learning_rate": 4.9891058721902185e-06, "loss": 0.06412548, "memory(GiB)": 13.7, "step": 56065, "train_speed(iter/s)": 1.531628 }, { "acc": 0.990625, "epoch": 26.280759315678463, "grad_norm": 0.973971426486969, "learning_rate": 4.988330762936903e-06, "loss": 0.03515241, "memory(GiB)": 13.7, "step": 56070, "train_speed(iter/s)": 1.531633 }, { "acc": 0.97270832, "epoch": 26.283102882587297, "grad_norm": 9.4729585647583, "learning_rate": 4.9875556539760965e-06, "loss": 0.07082987, "memory(GiB)": 13.7, "step": 56075, "train_speed(iter/s)": 1.531639 }, { "acc": 0.98705368, "epoch": 26.285446449496135, "grad_norm": 1.1203539371490479, "learning_rate": 4.986780545326428e-06, "loss": 0.03848994, "memory(GiB)": 13.7, "step": 56080, "train_speed(iter/s)": 1.531637 }, { "acc": 0.99020824, "epoch": 26.28779001640497, "grad_norm": 3.657144069671631, "learning_rate": 4.986005437006531e-06, "loss": 0.06250391, "memory(GiB)": 13.7, "step": 56085, "train_speed(iter/s)": 1.53164 }, { "acc": 0.9795929, "epoch": 26.290133583313803, "grad_norm": 5.1240668296813965, "learning_rate": 4.985230329035035e-06, "loss": 0.08675285, "memory(GiB)": 13.7, "step": 56090, "train_speed(iter/s)": 1.531637 }, { "acc": 0.98529758, "epoch": 26.292477150222638, "grad_norm": 2.3318939208984375, "learning_rate": 4.984455221430572e-06, "loss": 0.03245583, "memory(GiB)": 13.7, "step": 56095, "train_speed(iter/s)": 1.531637 }, { "acc": 0.9864583, "epoch": 26.294820717131476, "grad_norm": 0.7325537204742432, "learning_rate": 4.983680114211775e-06, "loss": 0.03578202, "memory(GiB)": 13.7, "step": 56100, "train_speed(iter/s)": 1.531642 }, { "acc": 0.97811012, "epoch": 26.29716428404031, "grad_norm": 3.06404972076416, "learning_rate": 4.9829050073972706e-06, "loss": 0.08662393, "memory(GiB)": 13.7, "step": 56105, "train_speed(iter/s)": 1.531642 }, { "acc": 0.98729172, "epoch": 26.299507850949144, "grad_norm": 3.4947543144226074, "learning_rate": 4.982129901005692e-06, "loss": 0.04359354, "memory(GiB)": 13.7, "step": 56110, "train_speed(iter/s)": 1.531648 }, { "acc": 0.97126904, "epoch": 26.30185141785798, "grad_norm": 3.5557804107666016, "learning_rate": 4.98135479505567e-06, "loss": 0.04970208, "memory(GiB)": 13.7, "step": 56115, "train_speed(iter/s)": 1.531647 }, { "acc": 0.98258934, "epoch": 26.304194984766816, "grad_norm": 1.090538501739502, "learning_rate": 4.980579689565837e-06, "loss": 0.08750865, "memory(GiB)": 13.7, "step": 56120, "train_speed(iter/s)": 1.53165 }, { "acc": 0.9895834, "epoch": 26.30653855167565, "grad_norm": 4.508101940155029, "learning_rate": 4.979804584554822e-06, "loss": 0.02148986, "memory(GiB)": 13.7, "step": 56125, "train_speed(iter/s)": 1.531658 }, { "acc": 0.96048613, "epoch": 26.308882118584485, "grad_norm": 5.651832580566406, "learning_rate": 4.979029480041257e-06, "loss": 0.10259262, "memory(GiB)": 13.7, "step": 56130, "train_speed(iter/s)": 1.531661 }, { "acc": 0.99696426, "epoch": 26.31122568549332, "grad_norm": 0.0033265294041484594, "learning_rate": 4.978254376043772e-06, "loss": 0.02776901, "memory(GiB)": 13.7, "step": 56135, "train_speed(iter/s)": 1.531662 }, { "acc": 0.98175592, "epoch": 26.313569252402157, "grad_norm": 4.757069110870361, "learning_rate": 4.977479272581e-06, "loss": 0.08437697, "memory(GiB)": 13.7, "step": 56140, "train_speed(iter/s)": 1.531658 }, { "acc": 1.0, "epoch": 26.31591281931099, "grad_norm": 0.0027896922547370195, "learning_rate": 4.97670416967157e-06, "loss": 0.0236113, "memory(GiB)": 13.7, "step": 56145, "train_speed(iter/s)": 1.531653 }, { "acc": 0.98604164, "epoch": 26.318256386219826, "grad_norm": 6.547055244445801, "learning_rate": 4.9759290673341144e-06, "loss": 0.04190888, "memory(GiB)": 13.7, "step": 56150, "train_speed(iter/s)": 1.531652 }, { "acc": 0.98145828, "epoch": 26.32059995312866, "grad_norm": 2.1677701473236084, "learning_rate": 4.975153965587262e-06, "loss": 0.077665, "memory(GiB)": 13.7, "step": 56155, "train_speed(iter/s)": 1.53165 }, { "acc": 0.98073864, "epoch": 26.322943520037498, "grad_norm": 6.65265417098999, "learning_rate": 4.974378864449646e-06, "loss": 0.04273617, "memory(GiB)": 13.7, "step": 56160, "train_speed(iter/s)": 1.531651 }, { "acc": 0.97895832, "epoch": 26.325287086946332, "grad_norm": 5.736945629119873, "learning_rate": 4.973603763939896e-06, "loss": 0.08436979, "memory(GiB)": 13.7, "step": 56165, "train_speed(iter/s)": 1.531652 }, { "acc": 0.98520832, "epoch": 26.327630653855167, "grad_norm": 5.483757495880127, "learning_rate": 4.972828664076642e-06, "loss": 0.05732804, "memory(GiB)": 13.7, "step": 56170, "train_speed(iter/s)": 1.531652 }, { "acc": 0.98322926, "epoch": 26.329974220764004, "grad_norm": 1.8322784900665283, "learning_rate": 4.972053564878517e-06, "loss": 0.04421262, "memory(GiB)": 13.7, "step": 56175, "train_speed(iter/s)": 1.531652 }, { "acc": 0.97480659, "epoch": 26.33231778767284, "grad_norm": 6.3380818367004395, "learning_rate": 4.97127846636415e-06, "loss": 0.07344083, "memory(GiB)": 13.7, "step": 56180, "train_speed(iter/s)": 1.531654 }, { "acc": 0.97641096, "epoch": 26.334661354581673, "grad_norm": 2.921154022216797, "learning_rate": 4.970503368552176e-06, "loss": 0.0872044, "memory(GiB)": 13.7, "step": 56185, "train_speed(iter/s)": 1.531657 }, { "acc": 0.9885417, "epoch": 26.337004921490507, "grad_norm": 0.6573591828346252, "learning_rate": 4.96972827146122e-06, "loss": 0.03215327, "memory(GiB)": 13.7, "step": 56190, "train_speed(iter/s)": 1.531664 }, { "acc": 0.97983704, "epoch": 26.339348488399345, "grad_norm": 2.7303307056427, "learning_rate": 4.9689531751099155e-06, "loss": 0.09635195, "memory(GiB)": 13.7, "step": 56195, "train_speed(iter/s)": 1.531673 }, { "acc": 0.97967262, "epoch": 26.34169205530818, "grad_norm": 1.8178105354309082, "learning_rate": 4.9681780795168925e-06, "loss": 0.03775415, "memory(GiB)": 13.7, "step": 56200, "train_speed(iter/s)": 1.531674 }, { "acc": 0.97937498, "epoch": 26.344035622217014, "grad_norm": 2.8842039108276367, "learning_rate": 4.9674029847007824e-06, "loss": 0.04691124, "memory(GiB)": 13.7, "step": 56205, "train_speed(iter/s)": 1.531678 }, { "acc": 0.9905304, "epoch": 26.346379189125848, "grad_norm": 0.0015949782682582736, "learning_rate": 4.966627890680215e-06, "loss": 0.03211234, "memory(GiB)": 13.7, "step": 56210, "train_speed(iter/s)": 1.531682 }, { "acc": 0.98520832, "epoch": 26.348722756034686, "grad_norm": 5.962517261505127, "learning_rate": 4.965852797473823e-06, "loss": 0.05987359, "memory(GiB)": 13.7, "step": 56215, "train_speed(iter/s)": 1.531689 }, { "acc": 0.98981609, "epoch": 26.35106632294352, "grad_norm": 2.7204668521881104, "learning_rate": 4.965077705100236e-06, "loss": 0.0501592, "memory(GiB)": 13.7, "step": 56220, "train_speed(iter/s)": 1.531685 }, { "acc": 0.99437504, "epoch": 26.353409889852355, "grad_norm": 2.08573317527771, "learning_rate": 4.964302613578086e-06, "loss": 0.04830164, "memory(GiB)": 13.7, "step": 56225, "train_speed(iter/s)": 1.531681 }, { "acc": 0.99562502, "epoch": 26.35575345676119, "grad_norm": 1.5408709049224854, "learning_rate": 4.963527522925999e-06, "loss": 0.01592448, "memory(GiB)": 13.7, "step": 56230, "train_speed(iter/s)": 1.531686 }, { "acc": 0.98217258, "epoch": 26.358097023670027, "grad_norm": 12.315425872802734, "learning_rate": 4.9627524331626116e-06, "loss": 0.06865427, "memory(GiB)": 13.7, "step": 56235, "train_speed(iter/s)": 1.531681 }, { "acc": 0.99187498, "epoch": 26.36044059057886, "grad_norm": 1.7974635362625122, "learning_rate": 4.96197734430655e-06, "loss": 0.0381853, "memory(GiB)": 13.7, "step": 56240, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98926029, "epoch": 26.362784157487695, "grad_norm": 0.007881690748035908, "learning_rate": 4.9612022563764474e-06, "loss": 0.05184925, "memory(GiB)": 13.7, "step": 56245, "train_speed(iter/s)": 1.53169 }, { "acc": 0.98215275, "epoch": 26.365127724396533, "grad_norm": 0.6424089074134827, "learning_rate": 4.960427169390933e-06, "loss": 0.06153126, "memory(GiB)": 13.7, "step": 56250, "train_speed(iter/s)": 1.531698 }, { "acc": 0.98049488, "epoch": 26.367471291305367, "grad_norm": 3.5206689834594727, "learning_rate": 4.959652083368637e-06, "loss": 0.0785043, "memory(GiB)": 13.7, "step": 56255, "train_speed(iter/s)": 1.531703 }, { "acc": 0.98395834, "epoch": 26.369814858214202, "grad_norm": 6.910523414611816, "learning_rate": 4.958876998328192e-06, "loss": 0.02739938, "memory(GiB)": 13.7, "step": 56260, "train_speed(iter/s)": 1.531705 }, { "acc": 0.99092264, "epoch": 26.372158425123036, "grad_norm": 5.071954727172852, "learning_rate": 4.9581019142882275e-06, "loss": 0.04455385, "memory(GiB)": 13.7, "step": 56265, "train_speed(iter/s)": 1.531707 }, { "acc": 0.98266945, "epoch": 26.374501992031874, "grad_norm": 1.886185646057129, "learning_rate": 4.957326831267375e-06, "loss": 0.07995739, "memory(GiB)": 13.7, "step": 56270, "train_speed(iter/s)": 1.53171 }, { "acc": 0.9854166, "epoch": 26.37684555894071, "grad_norm": 4.694113254547119, "learning_rate": 4.95655174928426e-06, "loss": 0.02303792, "memory(GiB)": 13.7, "step": 56275, "train_speed(iter/s)": 1.531716 }, { "acc": 0.9708334, "epoch": 26.379189125849543, "grad_norm": 6.504760265350342, "learning_rate": 4.955776668357519e-06, "loss": 0.08753976, "memory(GiB)": 13.7, "step": 56280, "train_speed(iter/s)": 1.531716 }, { "acc": 0.97520828, "epoch": 26.381532692758377, "grad_norm": 2.940476179122925, "learning_rate": 4.955001588505778e-06, "loss": 0.08835243, "memory(GiB)": 13.7, "step": 56285, "train_speed(iter/s)": 1.531721 }, { "acc": 0.99694633, "epoch": 26.383876259667215, "grad_norm": 1.0462416410446167, "learning_rate": 4.954226509747671e-06, "loss": 0.02525189, "memory(GiB)": 13.7, "step": 56290, "train_speed(iter/s)": 1.531726 }, { "acc": 0.99333334, "epoch": 26.38621982657605, "grad_norm": 0.7950911521911621, "learning_rate": 4.953451432101827e-06, "loss": 0.02904971, "memory(GiB)": 13.7, "step": 56295, "train_speed(iter/s)": 1.531729 }, { "acc": 0.97710228, "epoch": 26.388563393484883, "grad_norm": 2.100778341293335, "learning_rate": 4.952676355586874e-06, "loss": 0.10720711, "memory(GiB)": 13.7, "step": 56300, "train_speed(iter/s)": 1.531732 }, { "acc": 0.98309021, "epoch": 26.390906960393718, "grad_norm": 3.445641040802002, "learning_rate": 4.951901280221446e-06, "loss": 0.05381363, "memory(GiB)": 13.7, "step": 56305, "train_speed(iter/s)": 1.531729 }, { "acc": 0.98145828, "epoch": 26.393250527302555, "grad_norm": 1.0518745183944702, "learning_rate": 4.951126206024172e-06, "loss": 0.05296501, "memory(GiB)": 13.7, "step": 56310, "train_speed(iter/s)": 1.531746 }, { "acc": 0.96893425, "epoch": 26.39559409421139, "grad_norm": 4.177363872528076, "learning_rate": 4.95035113301368e-06, "loss": 0.08830265, "memory(GiB)": 13.7, "step": 56315, "train_speed(iter/s)": 1.531751 }, { "acc": 0.96920137, "epoch": 26.397937661120224, "grad_norm": 3.888232946395874, "learning_rate": 4.949576061208603e-06, "loss": 0.09301132, "memory(GiB)": 13.7, "step": 56320, "train_speed(iter/s)": 1.531752 }, { "acc": 0.98083324, "epoch": 26.400281228029062, "grad_norm": 0.018922051414847374, "learning_rate": 4.948800990627571e-06, "loss": 0.03796481, "memory(GiB)": 13.7, "step": 56325, "train_speed(iter/s)": 1.531757 }, { "acc": 0.98317709, "epoch": 26.402624794937896, "grad_norm": 6.844573974609375, "learning_rate": 4.948025921289212e-06, "loss": 0.07601469, "memory(GiB)": 13.7, "step": 56330, "train_speed(iter/s)": 1.531761 }, { "acc": 0.98139877, "epoch": 26.40496836184673, "grad_norm": 4.571558952331543, "learning_rate": 4.9472508532121584e-06, "loss": 0.07066305, "memory(GiB)": 13.7, "step": 56335, "train_speed(iter/s)": 1.531762 }, { "acc": 0.98760824, "epoch": 26.407311928755565, "grad_norm": 3.6312901973724365, "learning_rate": 4.9464757864150386e-06, "loss": 0.06600741, "memory(GiB)": 13.7, "step": 56340, "train_speed(iter/s)": 1.531764 }, { "acc": 0.98332787, "epoch": 26.409655495664403, "grad_norm": 3.5799174308776855, "learning_rate": 4.945700720916483e-06, "loss": 0.07556143, "memory(GiB)": 13.7, "step": 56345, "train_speed(iter/s)": 1.531762 }, { "acc": 0.97898808, "epoch": 26.411999062573237, "grad_norm": 2.4592292308807373, "learning_rate": 4.944925656735123e-06, "loss": 0.0934103, "memory(GiB)": 13.7, "step": 56350, "train_speed(iter/s)": 1.531764 }, { "acc": 0.99543648, "epoch": 26.41434262948207, "grad_norm": 1.4698652029037476, "learning_rate": 4.9441505938895886e-06, "loss": 0.01817922, "memory(GiB)": 13.7, "step": 56355, "train_speed(iter/s)": 1.531766 }, { "acc": 0.98142357, "epoch": 26.416686196390906, "grad_norm": 1.1931136846542358, "learning_rate": 4.9433755323985076e-06, "loss": 0.07734377, "memory(GiB)": 13.7, "step": 56360, "train_speed(iter/s)": 1.531767 }, { "acc": 0.98339014, "epoch": 26.419029763299744, "grad_norm": 2.961744785308838, "learning_rate": 4.942600472280511e-06, "loss": 0.0447896, "memory(GiB)": 13.7, "step": 56365, "train_speed(iter/s)": 1.531773 }, { "acc": 0.99333334, "epoch": 26.421373330208578, "grad_norm": 7.5975823402404785, "learning_rate": 4.9418254135542285e-06, "loss": 0.0783003, "memory(GiB)": 13.7, "step": 56370, "train_speed(iter/s)": 1.531776 }, { "acc": 0.9916667, "epoch": 26.423716897117412, "grad_norm": 4.761990070343018, "learning_rate": 4.94105035623829e-06, "loss": 0.04770171, "memory(GiB)": 13.7, "step": 56375, "train_speed(iter/s)": 1.531777 }, { "acc": 0.98842201, "epoch": 26.426060464026246, "grad_norm": 1.8893263339996338, "learning_rate": 4.940275300351327e-06, "loss": 0.04331414, "memory(GiB)": 13.7, "step": 56380, "train_speed(iter/s)": 1.531786 }, { "acc": 0.9751442, "epoch": 26.428404030935084, "grad_norm": 6.532957077026367, "learning_rate": 4.939500245911966e-06, "loss": 0.0573593, "memory(GiB)": 13.7, "step": 56385, "train_speed(iter/s)": 1.531789 }, { "acc": 0.99437504, "epoch": 26.43074759784392, "grad_norm": 0.36590754985809326, "learning_rate": 4.938725192938841e-06, "loss": 0.01377026, "memory(GiB)": 13.7, "step": 56390, "train_speed(iter/s)": 1.53179 }, { "acc": 0.9895834, "epoch": 26.433091164752753, "grad_norm": 1.5801372528076172, "learning_rate": 4.9379501414505795e-06, "loss": 0.02713077, "memory(GiB)": 13.7, "step": 56395, "train_speed(iter/s)": 1.531788 }, { "acc": 0.97802086, "epoch": 26.435434731661587, "grad_norm": 4.254696846008301, "learning_rate": 4.93717509146581e-06, "loss": 0.0675138, "memory(GiB)": 13.7, "step": 56400, "train_speed(iter/s)": 1.531794 }, { "acc": 0.98500004, "epoch": 26.437778298570425, "grad_norm": 2.6527068614959717, "learning_rate": 4.936400043003162e-06, "loss": 0.04343671, "memory(GiB)": 13.7, "step": 56405, "train_speed(iter/s)": 1.531797 }, { "acc": 0.98363094, "epoch": 26.44012186547926, "grad_norm": 2.4569787979125977, "learning_rate": 4.935624996081269e-06, "loss": 0.06336666, "memory(GiB)": 13.7, "step": 56410, "train_speed(iter/s)": 1.531799 }, { "acc": 0.98785095, "epoch": 26.442465432388094, "grad_norm": 0.5795303583145142, "learning_rate": 4.934849950718757e-06, "loss": 0.04941658, "memory(GiB)": 13.7, "step": 56415, "train_speed(iter/s)": 1.531803 }, { "acc": 0.984375, "epoch": 26.44480899929693, "grad_norm": 1.0971932411193848, "learning_rate": 4.934074906934255e-06, "loss": 0.04696636, "memory(GiB)": 13.7, "step": 56420, "train_speed(iter/s)": 1.531806 }, { "acc": 0.99057541, "epoch": 26.447152566205766, "grad_norm": 5.85703706741333, "learning_rate": 4.9332998647463955e-06, "loss": 0.05331156, "memory(GiB)": 13.7, "step": 56425, "train_speed(iter/s)": 1.531809 }, { "acc": 0.97833328, "epoch": 26.4494961331146, "grad_norm": 5.141806125640869, "learning_rate": 4.932524824173805e-06, "loss": 0.0414923, "memory(GiB)": 13.7, "step": 56430, "train_speed(iter/s)": 1.531812 }, { "acc": 0.996875, "epoch": 26.451839700023434, "grad_norm": 0.27531102299690247, "learning_rate": 4.931749785235116e-06, "loss": 0.01961325, "memory(GiB)": 13.7, "step": 56435, "train_speed(iter/s)": 1.531821 }, { "acc": 0.99383144, "epoch": 26.454183266932272, "grad_norm": 2.5813722610473633, "learning_rate": 4.930974747948957e-06, "loss": 0.01685817, "memory(GiB)": 13.7, "step": 56440, "train_speed(iter/s)": 1.531824 }, { "acc": 0.98138885, "epoch": 26.456526833841107, "grad_norm": 6.100900173187256, "learning_rate": 4.930199712333956e-06, "loss": 0.06523635, "memory(GiB)": 13.7, "step": 56445, "train_speed(iter/s)": 1.531828 }, { "acc": 0.97801542, "epoch": 26.45887040074994, "grad_norm": 2.033291816711426, "learning_rate": 4.929424678408742e-06, "loss": 0.11085051, "memory(GiB)": 13.7, "step": 56450, "train_speed(iter/s)": 1.531831 }, { "acc": 0.98931627, "epoch": 26.461213967658775, "grad_norm": 2.2030510902404785, "learning_rate": 4.928649646191946e-06, "loss": 0.05844084, "memory(GiB)": 13.7, "step": 56455, "train_speed(iter/s)": 1.531842 }, { "acc": 0.97677078, "epoch": 26.463557534567613, "grad_norm": 3.9564802646636963, "learning_rate": 4.927874615702194e-06, "loss": 0.06711369, "memory(GiB)": 13.7, "step": 56460, "train_speed(iter/s)": 1.531845 }, { "acc": 0.98604164, "epoch": 26.465901101476447, "grad_norm": 2.3997230529785156, "learning_rate": 4.92709958695812e-06, "loss": 0.05665668, "memory(GiB)": 13.7, "step": 56465, "train_speed(iter/s)": 1.531844 }, { "acc": 0.9773201, "epoch": 26.46824466838528, "grad_norm": 2.42087459564209, "learning_rate": 4.9263245599783514e-06, "loss": 0.0866343, "memory(GiB)": 13.7, "step": 56470, "train_speed(iter/s)": 1.531846 }, { "acc": 0.97510414, "epoch": 26.470588235294116, "grad_norm": 7.459598541259766, "learning_rate": 4.925549534781515e-06, "loss": 0.04867345, "memory(GiB)": 13.7, "step": 56475, "train_speed(iter/s)": 1.531851 }, { "acc": 0.99489584, "epoch": 26.472931802202954, "grad_norm": 2.271040678024292, "learning_rate": 4.924774511386245e-06, "loss": 0.03316847, "memory(GiB)": 13.7, "step": 56480, "train_speed(iter/s)": 1.531856 }, { "acc": 0.990625, "epoch": 26.475275369111788, "grad_norm": 2.0587494373321533, "learning_rate": 4.923999489811164e-06, "loss": 0.03892296, "memory(GiB)": 13.7, "step": 56485, "train_speed(iter/s)": 1.531862 }, { "acc": 0.97729168, "epoch": 26.477618936020622, "grad_norm": 1.9399222135543823, "learning_rate": 4.923224470074903e-06, "loss": 0.05184302, "memory(GiB)": 13.7, "step": 56490, "train_speed(iter/s)": 1.531866 }, { "acc": 0.98041134, "epoch": 26.47996250292946, "grad_norm": 4.807547569274902, "learning_rate": 4.922449452196094e-06, "loss": 0.08641208, "memory(GiB)": 13.7, "step": 56495, "train_speed(iter/s)": 1.531866 }, { "acc": 0.97758923, "epoch": 26.482306069838295, "grad_norm": 2.212136745452881, "learning_rate": 4.921674436193363e-06, "loss": 0.04751279, "memory(GiB)": 13.7, "step": 56500, "train_speed(iter/s)": 1.531868 }, { "acc": 0.97828522, "epoch": 26.48464963674713, "grad_norm": 1.721773386001587, "learning_rate": 4.920899422085338e-06, "loss": 0.06633658, "memory(GiB)": 13.7, "step": 56505, "train_speed(iter/s)": 1.531871 }, { "acc": 0.99083328, "epoch": 26.486993203655963, "grad_norm": 3.849950075149536, "learning_rate": 4.920124409890651e-06, "loss": 0.0149596, "memory(GiB)": 13.7, "step": 56510, "train_speed(iter/s)": 1.531875 }, { "acc": 0.98937492, "epoch": 26.4893367705648, "grad_norm": 0.0016855279682204127, "learning_rate": 4.91934939962793e-06, "loss": 0.01864032, "memory(GiB)": 13.7, "step": 56515, "train_speed(iter/s)": 1.531879 }, { "acc": 0.98770828, "epoch": 26.491680337473635, "grad_norm": 5.047981262207031, "learning_rate": 4.918574391315803e-06, "loss": 0.05320052, "memory(GiB)": 13.7, "step": 56520, "train_speed(iter/s)": 1.531882 }, { "acc": 0.984375, "epoch": 26.49402390438247, "grad_norm": 3.0061614513397217, "learning_rate": 4.917799384972897e-06, "loss": 0.02253615, "memory(GiB)": 13.7, "step": 56525, "train_speed(iter/s)": 1.531878 }, { "acc": 0.98145838, "epoch": 26.496367471291304, "grad_norm": 2.996875047683716, "learning_rate": 4.917024380617842e-06, "loss": 0.07662705, "memory(GiB)": 13.7, "step": 56530, "train_speed(iter/s)": 1.531877 }, { "acc": 0.98313704, "epoch": 26.498711038200142, "grad_norm": 5.157756328582764, "learning_rate": 4.9162493782692665e-06, "loss": 0.03976984, "memory(GiB)": 13.7, "step": 56535, "train_speed(iter/s)": 1.531882 }, { "acc": 0.98883934, "epoch": 26.501054605108976, "grad_norm": 0.00037132645957171917, "learning_rate": 4.915474377945801e-06, "loss": 0.03784747, "memory(GiB)": 13.7, "step": 56540, "train_speed(iter/s)": 1.53188 }, { "acc": 0.99092264, "epoch": 26.50339817201781, "grad_norm": 2.4681389331817627, "learning_rate": 4.91469937966607e-06, "loss": 0.04556145, "memory(GiB)": 13.7, "step": 56545, "train_speed(iter/s)": 1.531885 }, { "acc": 0.97481833, "epoch": 26.505741738926645, "grad_norm": 1.1893055438995361, "learning_rate": 4.913924383448704e-06, "loss": 0.09018321, "memory(GiB)": 13.7, "step": 56550, "train_speed(iter/s)": 1.531884 }, { "acc": 0.98604164, "epoch": 26.508085305835483, "grad_norm": 2.4186651706695557, "learning_rate": 4.913149389312333e-06, "loss": 0.08729142, "memory(GiB)": 13.7, "step": 56555, "train_speed(iter/s)": 1.53189 }, { "acc": 0.98562498, "epoch": 26.510428872744317, "grad_norm": 0.23233407735824585, "learning_rate": 4.912374397275583e-06, "loss": 0.02490966, "memory(GiB)": 13.7, "step": 56560, "train_speed(iter/s)": 1.531901 }, { "acc": 0.98125, "epoch": 26.51277243965315, "grad_norm": 5.582522392272949, "learning_rate": 4.9115994073570834e-06, "loss": 0.08822596, "memory(GiB)": 13.7, "step": 56565, "train_speed(iter/s)": 1.53191 }, { "acc": 0.98008928, "epoch": 26.51511600656199, "grad_norm": 5.879868030548096, "learning_rate": 4.9108244195754626e-06, "loss": 0.03239857, "memory(GiB)": 13.7, "step": 56570, "train_speed(iter/s)": 1.531915 }, { "acc": 0.99196272, "epoch": 26.517459573470823, "grad_norm": 2.4841864109039307, "learning_rate": 4.910049433949347e-06, "loss": 0.03706957, "memory(GiB)": 13.7, "step": 56575, "train_speed(iter/s)": 1.531922 }, { "acc": 0.98208332, "epoch": 26.519803140379658, "grad_norm": 1.1983287334442139, "learning_rate": 4.909274450497366e-06, "loss": 0.03630006, "memory(GiB)": 13.7, "step": 56580, "train_speed(iter/s)": 1.53192 }, { "acc": 0.96719694, "epoch": 26.522146707288492, "grad_norm": 0.9648714065551758, "learning_rate": 4.908499469238147e-06, "loss": 0.11404617, "memory(GiB)": 13.7, "step": 56585, "train_speed(iter/s)": 1.531926 }, { "acc": 0.99458332, "epoch": 26.52449027419733, "grad_norm": 2.207057237625122, "learning_rate": 4.907724490190319e-06, "loss": 0.04778911, "memory(GiB)": 13.7, "step": 56590, "train_speed(iter/s)": 1.531925 }, { "acc": 0.98167734, "epoch": 26.526833841106164, "grad_norm": 2.896592378616333, "learning_rate": 4.906949513372509e-06, "loss": 0.06522844, "memory(GiB)": 13.7, "step": 56595, "train_speed(iter/s)": 1.531929 }, { "acc": 0.98187504, "epoch": 26.529177408015, "grad_norm": 7.679872989654541, "learning_rate": 4.906174538803346e-06, "loss": 0.11777163, "memory(GiB)": 13.7, "step": 56600, "train_speed(iter/s)": 1.53193 }, { "acc": 0.98988094, "epoch": 26.531520974923833, "grad_norm": 0.5705757141113281, "learning_rate": 4.905399566501459e-06, "loss": 0.0290511, "memory(GiB)": 13.7, "step": 56605, "train_speed(iter/s)": 1.531935 }, { "acc": 0.98883934, "epoch": 26.53386454183267, "grad_norm": 2.766416549682617, "learning_rate": 4.904624596485471e-06, "loss": 0.02655339, "memory(GiB)": 13.7, "step": 56610, "train_speed(iter/s)": 1.531937 }, { "acc": 0.98178024, "epoch": 26.536208108741505, "grad_norm": 2.6780471801757812, "learning_rate": 4.903849628774014e-06, "loss": 0.04658279, "memory(GiB)": 13.7, "step": 56615, "train_speed(iter/s)": 1.53193 }, { "acc": 0.97704315, "epoch": 26.53855167565034, "grad_norm": 3.140934467315674, "learning_rate": 4.903074663385715e-06, "loss": 0.06235049, "memory(GiB)": 13.7, "step": 56620, "train_speed(iter/s)": 1.531945 }, { "acc": 0.98767853, "epoch": 26.540895242559174, "grad_norm": 3.891162157058716, "learning_rate": 4.902299700339199e-06, "loss": 0.04398628, "memory(GiB)": 13.7, "step": 56625, "train_speed(iter/s)": 1.531951 }, { "acc": 0.98869047, "epoch": 26.54323880946801, "grad_norm": 1.8629512786865234, "learning_rate": 4.901524739653098e-06, "loss": 0.06637493, "memory(GiB)": 13.7, "step": 56630, "train_speed(iter/s)": 1.531955 }, { "acc": 0.97791128, "epoch": 26.545582376376846, "grad_norm": 2.1282949447631836, "learning_rate": 4.9007497813460346e-06, "loss": 0.04860433, "memory(GiB)": 13.7, "step": 56635, "train_speed(iter/s)": 1.531954 }, { "acc": 0.97684975, "epoch": 26.54792594328568, "grad_norm": 5.484312534332275, "learning_rate": 4.89997482543664e-06, "loss": 0.07216785, "memory(GiB)": 13.7, "step": 56640, "train_speed(iter/s)": 1.531961 }, { "acc": 0.98363094, "epoch": 26.550269510194518, "grad_norm": 1.1232573986053467, "learning_rate": 4.899199871943542e-06, "loss": 0.04325615, "memory(GiB)": 13.7, "step": 56645, "train_speed(iter/s)": 1.53196 }, { "acc": 0.9833334, "epoch": 26.552613077103352, "grad_norm": 3.7748589515686035, "learning_rate": 4.898424920885365e-06, "loss": 0.03485349, "memory(GiB)": 13.7, "step": 56650, "train_speed(iter/s)": 1.531957 }, { "acc": 0.98330364, "epoch": 26.554956644012186, "grad_norm": 5.459705352783203, "learning_rate": 4.8976499722807354e-06, "loss": 0.07094281, "memory(GiB)": 13.7, "step": 56655, "train_speed(iter/s)": 1.531967 }, { "acc": 0.99375, "epoch": 26.55730021092102, "grad_norm": 0.7154836058616638, "learning_rate": 4.896875026148285e-06, "loss": 0.0148445, "memory(GiB)": 13.7, "step": 56660, "train_speed(iter/s)": 1.53197 }, { "acc": 0.99508934, "epoch": 26.55964377782986, "grad_norm": 1.4823259115219116, "learning_rate": 4.896100082506638e-06, "loss": 0.02490429, "memory(GiB)": 13.7, "step": 56665, "train_speed(iter/s)": 1.53197 }, { "acc": 0.9557291, "epoch": 26.561987344738693, "grad_norm": 6.9389567375183105, "learning_rate": 4.895325141374422e-06, "loss": 0.14061377, "memory(GiB)": 13.7, "step": 56670, "train_speed(iter/s)": 1.53197 }, { "acc": 0.979072, "epoch": 26.564330911647527, "grad_norm": 2.1834521293640137, "learning_rate": 4.894550202770264e-06, "loss": 0.06239704, "memory(GiB)": 13.7, "step": 56675, "train_speed(iter/s)": 1.531976 }, { "acc": 0.97478628, "epoch": 26.56667447855636, "grad_norm": 4.494131088256836, "learning_rate": 4.89377526671279e-06, "loss": 0.05206746, "memory(GiB)": 13.7, "step": 56680, "train_speed(iter/s)": 1.531981 }, { "acc": 0.97317352, "epoch": 26.5690180454652, "grad_norm": 8.246524810791016, "learning_rate": 4.89300033322063e-06, "loss": 0.0889002, "memory(GiB)": 13.7, "step": 56685, "train_speed(iter/s)": 1.531984 }, { "acc": 0.98571434, "epoch": 26.571361612374034, "grad_norm": 4.427855491638184, "learning_rate": 4.8922254023124094e-06, "loss": 0.04936107, "memory(GiB)": 13.7, "step": 56690, "train_speed(iter/s)": 1.531989 }, { "acc": 0.9825695, "epoch": 26.573705179282868, "grad_norm": 1.444319248199463, "learning_rate": 4.891450474006753e-06, "loss": 0.03448758, "memory(GiB)": 13.7, "step": 56695, "train_speed(iter/s)": 1.531993 }, { "acc": 0.97458344, "epoch": 26.576048746191702, "grad_norm": 6.696834564208984, "learning_rate": 4.890675548322288e-06, "loss": 0.07970101, "memory(GiB)": 13.7, "step": 56700, "train_speed(iter/s)": 1.531989 }, { "acc": 0.99140453, "epoch": 26.57839231310054, "grad_norm": 1.2065566778182983, "learning_rate": 4.8899006252776434e-06, "loss": 0.04516498, "memory(GiB)": 13.7, "step": 56705, "train_speed(iter/s)": 1.53199 }, { "acc": 0.98559532, "epoch": 26.580735880009374, "grad_norm": 1.2703462839126587, "learning_rate": 4.889125704891443e-06, "loss": 0.06357139, "memory(GiB)": 13.7, "step": 56710, "train_speed(iter/s)": 1.531988 }, { "acc": 0.98354845, "epoch": 26.58307944691821, "grad_norm": 0.6830381751060486, "learning_rate": 4.888350787182316e-06, "loss": 0.04531134, "memory(GiB)": 13.7, "step": 56715, "train_speed(iter/s)": 1.531991 }, { "acc": 0.98916664, "epoch": 26.585423013827043, "grad_norm": 2.918551445007324, "learning_rate": 4.8875758721688875e-06, "loss": 0.05863408, "memory(GiB)": 13.7, "step": 56720, "train_speed(iter/s)": 1.531987 }, { "acc": 0.99107141, "epoch": 26.58776658073588, "grad_norm": 1.5927332639694214, "learning_rate": 4.886800959869783e-06, "loss": 0.02489848, "memory(GiB)": 13.7, "step": 56725, "train_speed(iter/s)": 1.531985 }, { "acc": 0.98633928, "epoch": 26.590110147644715, "grad_norm": 3.945164918899536, "learning_rate": 4.886026050303631e-06, "loss": 0.02754314, "memory(GiB)": 13.7, "step": 56730, "train_speed(iter/s)": 1.531982 }, { "acc": 0.98968258, "epoch": 26.59245371455355, "grad_norm": 8.224115371704102, "learning_rate": 4.885251143489057e-06, "loss": 0.06196647, "memory(GiB)": 13.7, "step": 56735, "train_speed(iter/s)": 1.531983 }, { "acc": 0.98983173, "epoch": 26.594797281462387, "grad_norm": 2.1744496822357178, "learning_rate": 4.884476239444686e-06, "loss": 0.0417134, "memory(GiB)": 13.7, "step": 56740, "train_speed(iter/s)": 1.531978 }, { "acc": 0.99017859, "epoch": 26.59714084837122, "grad_norm": 1.4667867422103882, "learning_rate": 4.883701338189146e-06, "loss": 0.03341658, "memory(GiB)": 13.7, "step": 56745, "train_speed(iter/s)": 1.531972 }, { "acc": 0.98270836, "epoch": 26.599484415280056, "grad_norm": 6.057270526885986, "learning_rate": 4.882926439741062e-06, "loss": 0.05578614, "memory(GiB)": 13.7, "step": 56750, "train_speed(iter/s)": 1.531974 }, { "acc": 0.9776042, "epoch": 26.60182798218889, "grad_norm": 1.9845975637435913, "learning_rate": 4.882151544119059e-06, "loss": 0.05513298, "memory(GiB)": 13.7, "step": 56755, "train_speed(iter/s)": 1.531978 }, { "acc": 0.978125, "epoch": 26.604171549097728, "grad_norm": 5.827451229095459, "learning_rate": 4.881376651341764e-06, "loss": 0.08143442, "memory(GiB)": 13.7, "step": 56760, "train_speed(iter/s)": 1.531978 }, { "acc": 0.98449993, "epoch": 26.606515116006562, "grad_norm": 2.8636698722839355, "learning_rate": 4.880601761427804e-06, "loss": 0.10032893, "memory(GiB)": 13.7, "step": 56765, "train_speed(iter/s)": 1.531973 }, { "acc": 0.9807292, "epoch": 26.608858682915397, "grad_norm": 2.8828985691070557, "learning_rate": 4.879826874395804e-06, "loss": 0.0622362, "memory(GiB)": 13.7, "step": 56770, "train_speed(iter/s)": 1.531989 }, { "acc": 0.99086304, "epoch": 26.61120224982423, "grad_norm": 1.4629404544830322, "learning_rate": 4.879051990264389e-06, "loss": 0.04350154, "memory(GiB)": 13.7, "step": 56775, "train_speed(iter/s)": 1.531992 }, { "acc": 0.9927084, "epoch": 26.61354581673307, "grad_norm": 0.16149553656578064, "learning_rate": 4.878277109052185e-06, "loss": 0.018981, "memory(GiB)": 13.7, "step": 56780, "train_speed(iter/s)": 1.532002 }, { "acc": 0.98916664, "epoch": 26.615889383641903, "grad_norm": 3.765836000442505, "learning_rate": 4.877502230777818e-06, "loss": 0.05145286, "memory(GiB)": 13.7, "step": 56785, "train_speed(iter/s)": 1.532012 }, { "acc": 0.97639885, "epoch": 26.618232950550738, "grad_norm": 4.385066986083984, "learning_rate": 4.876727355459912e-06, "loss": 0.08078864, "memory(GiB)": 13.7, "step": 56790, "train_speed(iter/s)": 1.532016 }, { "acc": 0.9864584, "epoch": 26.620576517459572, "grad_norm": 1.4591214656829834, "learning_rate": 4.8759524831170965e-06, "loss": 0.04776109, "memory(GiB)": 13.7, "step": 56795, "train_speed(iter/s)": 1.532019 }, { "acc": 0.97652359, "epoch": 26.62292008436841, "grad_norm": 5.375182151794434, "learning_rate": 4.8751776137679916e-06, "loss": 0.05807676, "memory(GiB)": 13.7, "step": 56800, "train_speed(iter/s)": 1.532015 }, { "acc": 0.96791668, "epoch": 26.625263651277244, "grad_norm": 3.009697437286377, "learning_rate": 4.874402747431226e-06, "loss": 0.06232848, "memory(GiB)": 13.7, "step": 56805, "train_speed(iter/s)": 1.532007 }, { "acc": 0.9885417, "epoch": 26.62760721818608, "grad_norm": 4.128295421600342, "learning_rate": 4.873627884125423e-06, "loss": 0.02268009, "memory(GiB)": 13.7, "step": 56810, "train_speed(iter/s)": 1.532016 }, { "acc": 0.98395834, "epoch": 26.629950785094913, "grad_norm": 4.583768367767334, "learning_rate": 4.8728530238692116e-06, "loss": 0.05822899, "memory(GiB)": 13.7, "step": 56815, "train_speed(iter/s)": 1.532016 }, { "acc": 0.98078375, "epoch": 26.63229435200375, "grad_norm": 1.5686309337615967, "learning_rate": 4.872078166681213e-06, "loss": 0.06070412, "memory(GiB)": 13.7, "step": 56820, "train_speed(iter/s)": 1.532019 }, { "acc": 0.99300594, "epoch": 26.634637918912585, "grad_norm": 1.6302964687347412, "learning_rate": 4.871303312580053e-06, "loss": 0.02304095, "memory(GiB)": 13.7, "step": 56825, "train_speed(iter/s)": 1.53202 }, { "acc": 0.98779764, "epoch": 26.63698148582142, "grad_norm": 1.74380362033844, "learning_rate": 4.870528461584355e-06, "loss": 0.059114, "memory(GiB)": 13.7, "step": 56830, "train_speed(iter/s)": 1.532017 }, { "acc": 0.96875, "epoch": 26.639325052730257, "grad_norm": 3.8373289108276367, "learning_rate": 4.869753613712749e-06, "loss": 0.12963657, "memory(GiB)": 13.7, "step": 56835, "train_speed(iter/s)": 1.532023 }, { "acc": 0.97975407, "epoch": 26.64166861963909, "grad_norm": 11.622634887695312, "learning_rate": 4.868978768983855e-06, "loss": 0.05107003, "memory(GiB)": 13.7, "step": 56840, "train_speed(iter/s)": 1.532025 }, { "acc": 0.98776045, "epoch": 26.644012186547926, "grad_norm": 0.1246173083782196, "learning_rate": 4.868203927416299e-06, "loss": 0.050684, "memory(GiB)": 13.7, "step": 56845, "train_speed(iter/s)": 1.532034 }, { "acc": 0.97436962, "epoch": 26.64635575345676, "grad_norm": 4.2062273025512695, "learning_rate": 4.867429089028708e-06, "loss": 0.08253386, "memory(GiB)": 13.7, "step": 56850, "train_speed(iter/s)": 1.532039 }, { "acc": 0.9708333, "epoch": 26.648699320365598, "grad_norm": 3.3339576721191406, "learning_rate": 4.866654253839703e-06, "loss": 0.05100542, "memory(GiB)": 13.7, "step": 56855, "train_speed(iter/s)": 1.532039 }, { "acc": 0.98621025, "epoch": 26.651042887274432, "grad_norm": 2.8047568798065186, "learning_rate": 4.86587942186791e-06, "loss": 0.07510922, "memory(GiB)": 13.7, "step": 56860, "train_speed(iter/s)": 1.532043 }, { "acc": 0.97904758, "epoch": 26.653386454183266, "grad_norm": 2.5485622882843018, "learning_rate": 4.865104593131953e-06, "loss": 0.06928968, "memory(GiB)": 13.7, "step": 56865, "train_speed(iter/s)": 1.532046 }, { "acc": 0.990625, "epoch": 26.6557300210921, "grad_norm": 1.5782551765441895, "learning_rate": 4.864329767650458e-06, "loss": 0.02583502, "memory(GiB)": 13.7, "step": 56870, "train_speed(iter/s)": 1.532049 }, { "acc": 0.96800594, "epoch": 26.65807358800094, "grad_norm": 4.184170722961426, "learning_rate": 4.863554945442047e-06, "loss": 0.0574441, "memory(GiB)": 13.7, "step": 56875, "train_speed(iter/s)": 1.532058 }, { "acc": 0.96994667, "epoch": 26.660417154909773, "grad_norm": 2.3698904514312744, "learning_rate": 4.862780126525345e-06, "loss": 0.11298604, "memory(GiB)": 13.7, "step": 56880, "train_speed(iter/s)": 1.532069 }, { "acc": 0.985322, "epoch": 26.662760721818607, "grad_norm": 0.005467633716762066, "learning_rate": 4.862005310918977e-06, "loss": 0.03410704, "memory(GiB)": 13.7, "step": 56885, "train_speed(iter/s)": 1.53207 }, { "acc": 0.98113098, "epoch": 26.66510428872744, "grad_norm": 1.793898344039917, "learning_rate": 4.861230498641565e-06, "loss": 0.04616149, "memory(GiB)": 13.7, "step": 56890, "train_speed(iter/s)": 1.532068 }, { "acc": 0.98166676, "epoch": 26.66744785563628, "grad_norm": 4.811983585357666, "learning_rate": 4.860455689711736e-06, "loss": 0.06992328, "memory(GiB)": 13.7, "step": 56895, "train_speed(iter/s)": 1.532077 }, { "acc": 0.98041668, "epoch": 26.669791422545114, "grad_norm": 5.186705589294434, "learning_rate": 4.859680884148113e-06, "loss": 0.05168919, "memory(GiB)": 13.7, "step": 56900, "train_speed(iter/s)": 1.532076 }, { "acc": 0.98125, "epoch": 26.672134989453948, "grad_norm": 8.481932640075684, "learning_rate": 4.858906081969317e-06, "loss": 0.06000308, "memory(GiB)": 13.7, "step": 56905, "train_speed(iter/s)": 1.532077 }, { "acc": 0.98291664, "epoch": 26.674478556362786, "grad_norm": 3.060716152191162, "learning_rate": 4.858131283193975e-06, "loss": 0.0355846, "memory(GiB)": 13.7, "step": 56910, "train_speed(iter/s)": 1.532084 }, { "acc": 0.96323862, "epoch": 26.67682212327162, "grad_norm": 1.4501969814300537, "learning_rate": 4.857356487840707e-06, "loss": 0.14466242, "memory(GiB)": 13.7, "step": 56915, "train_speed(iter/s)": 1.532081 }, { "acc": 0.97666664, "epoch": 26.679165690180454, "grad_norm": 4.316895484924316, "learning_rate": 4.85658169592814e-06, "loss": 0.06145369, "memory(GiB)": 13.7, "step": 56920, "train_speed(iter/s)": 1.532088 }, { "acc": 0.99092264, "epoch": 26.68150925708929, "grad_norm": 0.6166378855705261, "learning_rate": 4.855806907474897e-06, "loss": 0.04868885, "memory(GiB)": 13.7, "step": 56925, "train_speed(iter/s)": 1.532088 }, { "acc": 0.98708324, "epoch": 26.683852823998127, "grad_norm": 7.408433437347412, "learning_rate": 4.855032122499599e-06, "loss": 0.03119368, "memory(GiB)": 13.7, "step": 56930, "train_speed(iter/s)": 1.532094 }, { "acc": 0.9916667, "epoch": 26.68619639090696, "grad_norm": 3.166844129562378, "learning_rate": 4.854257341020871e-06, "loss": 0.04489606, "memory(GiB)": 13.7, "step": 56935, "train_speed(iter/s)": 1.5321 }, { "acc": 0.96696424, "epoch": 26.688539957815795, "grad_norm": 5.938810348510742, "learning_rate": 4.853482563057339e-06, "loss": 0.09659489, "memory(GiB)": 13.7, "step": 56940, "train_speed(iter/s)": 1.532099 }, { "acc": 0.97250004, "epoch": 26.69088352472463, "grad_norm": 4.530093193054199, "learning_rate": 4.852707788627622e-06, "loss": 0.07780788, "memory(GiB)": 13.7, "step": 56945, "train_speed(iter/s)": 1.532103 }, { "acc": 0.98635273, "epoch": 26.693227091633467, "grad_norm": 1.5072084665298462, "learning_rate": 4.8519330177503415e-06, "loss": 0.05043594, "memory(GiB)": 13.7, "step": 56950, "train_speed(iter/s)": 1.532115 }, { "acc": 0.98710937, "epoch": 26.6955706585423, "grad_norm": 0.2574935257434845, "learning_rate": 4.851158250444126e-06, "loss": 0.03654746, "memory(GiB)": 13.7, "step": 56955, "train_speed(iter/s)": 1.532115 }, { "acc": 0.98149796, "epoch": 26.697914225451136, "grad_norm": 1.1602084636688232, "learning_rate": 4.8503834867275935e-06, "loss": 0.04563339, "memory(GiB)": 13.7, "step": 56960, "train_speed(iter/s)": 1.53211 }, { "acc": 0.98988094, "epoch": 26.70025779235997, "grad_norm": 2.2929816246032715, "learning_rate": 4.84960872661937e-06, "loss": 0.0302574, "memory(GiB)": 13.7, "step": 56965, "train_speed(iter/s)": 1.532112 }, { "acc": 0.97805061, "epoch": 26.702601359268808, "grad_norm": 4.625156879425049, "learning_rate": 4.848833970138077e-06, "loss": 0.10023415, "memory(GiB)": 13.7, "step": 56970, "train_speed(iter/s)": 1.532112 }, { "acc": 0.9801137, "epoch": 26.704944926177642, "grad_norm": 12.88782787322998, "learning_rate": 4.848059217302336e-06, "loss": 0.08442253, "memory(GiB)": 13.7, "step": 56975, "train_speed(iter/s)": 1.53212 }, { "acc": 0.9697916, "epoch": 26.707288493086477, "grad_norm": 2.523264169692993, "learning_rate": 4.847284468130773e-06, "loss": 0.08733142, "memory(GiB)": 13.7, "step": 56980, "train_speed(iter/s)": 1.532128 }, { "acc": 0.97569447, "epoch": 26.709632059995315, "grad_norm": 2.274306058883667, "learning_rate": 4.846509722642007e-06, "loss": 0.14212804, "memory(GiB)": 13.7, "step": 56985, "train_speed(iter/s)": 1.532131 }, { "acc": 0.99229164, "epoch": 26.71197562690415, "grad_norm": 0.9920704364776611, "learning_rate": 4.84573498085466e-06, "loss": 0.0179553, "memory(GiB)": 13.7, "step": 56990, "train_speed(iter/s)": 1.532143 }, { "acc": 0.98604164, "epoch": 26.714319193812983, "grad_norm": 0.49775809049606323, "learning_rate": 4.844960242787357e-06, "loss": 0.05236861, "memory(GiB)": 13.7, "step": 56995, "train_speed(iter/s)": 1.532152 }, { "acc": 0.985394, "epoch": 26.716662760721817, "grad_norm": 0.0016193082556128502, "learning_rate": 4.844185508458718e-06, "loss": 0.04743545, "memory(GiB)": 13.7, "step": 57000, "train_speed(iter/s)": 1.532162 }, { "acc": 0.98875008, "epoch": 26.719006327630655, "grad_norm": 0.34385251998901367, "learning_rate": 4.843410777887366e-06, "loss": 0.02898079, "memory(GiB)": 13.7, "step": 57005, "train_speed(iter/s)": 1.532169 }, { "acc": 0.98770828, "epoch": 26.72134989453949, "grad_norm": 4.288073539733887, "learning_rate": 4.842636051091923e-06, "loss": 0.04545796, "memory(GiB)": 13.7, "step": 57010, "train_speed(iter/s)": 1.532171 }, { "acc": 0.97208328, "epoch": 26.723693461448324, "grad_norm": 5.808953762054443, "learning_rate": 4.841861328091011e-06, "loss": 0.11913059, "memory(GiB)": 13.7, "step": 57015, "train_speed(iter/s)": 1.53217 }, { "acc": 0.98726196, "epoch": 26.726037028357158, "grad_norm": 2.5368731021881104, "learning_rate": 4.8410866089032495e-06, "loss": 0.04042847, "memory(GiB)": 13.7, "step": 57020, "train_speed(iter/s)": 1.532169 }, { "acc": 0.984375, "epoch": 26.728380595265996, "grad_norm": 4.198308944702148, "learning_rate": 4.840311893547266e-06, "loss": 0.02366374, "memory(GiB)": 13.7, "step": 57025, "train_speed(iter/s)": 1.53217 }, { "acc": 0.98354168, "epoch": 26.73072416217483, "grad_norm": 3.7925052642822266, "learning_rate": 4.839537182041676e-06, "loss": 0.0312316, "memory(GiB)": 13.7, "step": 57030, "train_speed(iter/s)": 1.532177 }, { "acc": 0.97622023, "epoch": 26.733067729083665, "grad_norm": 2.8981244564056396, "learning_rate": 4.838762474405103e-06, "loss": 0.10341005, "memory(GiB)": 13.7, "step": 57035, "train_speed(iter/s)": 1.532175 }, { "acc": 0.97246113, "epoch": 26.7354112959925, "grad_norm": 3.9464802742004395, "learning_rate": 4.83798777065617e-06, "loss": 0.09836481, "memory(GiB)": 13.7, "step": 57040, "train_speed(iter/s)": 1.532182 }, { "acc": 0.9671875, "epoch": 26.737754862901337, "grad_norm": 6.472065448760986, "learning_rate": 4.837213070813497e-06, "loss": 0.09371408, "memory(GiB)": 13.7, "step": 57045, "train_speed(iter/s)": 1.532192 }, { "acc": 0.98966351, "epoch": 26.74009842981017, "grad_norm": 2.0643463134765625, "learning_rate": 4.836438374895703e-06, "loss": 0.0516675, "memory(GiB)": 13.7, "step": 57050, "train_speed(iter/s)": 1.532192 }, { "acc": 0.98030643, "epoch": 26.742441996719005, "grad_norm": 4.770952224731445, "learning_rate": 4.835663682921414e-06, "loss": 0.04467031, "memory(GiB)": 13.7, "step": 57055, "train_speed(iter/s)": 1.532201 }, { "acc": 0.9880209, "epoch": 26.744785563627843, "grad_norm": 1.0913316011428833, "learning_rate": 4.834888994909247e-06, "loss": 0.03539695, "memory(GiB)": 13.7, "step": 57060, "train_speed(iter/s)": 1.532197 }, { "acc": 0.9833334, "epoch": 26.747129130536678, "grad_norm": 3.8040928840637207, "learning_rate": 4.8341143108778265e-06, "loss": 0.07449676, "memory(GiB)": 13.7, "step": 57065, "train_speed(iter/s)": 1.5322 }, { "acc": 0.9808712, "epoch": 26.749472697445512, "grad_norm": 5.8855156898498535, "learning_rate": 4.83333963084577e-06, "loss": 0.074946, "memory(GiB)": 13.7, "step": 57070, "train_speed(iter/s)": 1.532209 }, { "acc": 0.9755208, "epoch": 26.751816264354346, "grad_norm": 2.883274555206299, "learning_rate": 4.832564954831701e-06, "loss": 0.07162039, "memory(GiB)": 13.7, "step": 57075, "train_speed(iter/s)": 1.532209 }, { "acc": 0.9875, "epoch": 26.754159831263184, "grad_norm": 4.118189811706543, "learning_rate": 4.831790282854236e-06, "loss": 0.05963203, "memory(GiB)": 13.7, "step": 57080, "train_speed(iter/s)": 1.532218 }, { "acc": 0.971875, "epoch": 26.75650339817202, "grad_norm": 6.4859724044799805, "learning_rate": 4.831015614932001e-06, "loss": 0.090025, "memory(GiB)": 13.7, "step": 57085, "train_speed(iter/s)": 1.532217 }, { "acc": 0.9760417, "epoch": 26.758846965080853, "grad_norm": 2.1054470539093018, "learning_rate": 4.830240951083611e-06, "loss": 0.07205902, "memory(GiB)": 13.7, "step": 57090, "train_speed(iter/s)": 1.532212 }, { "acc": 0.98760738, "epoch": 26.761190531989687, "grad_norm": 3.9336726665496826, "learning_rate": 4.829466291327691e-06, "loss": 0.09852618, "memory(GiB)": 13.7, "step": 57095, "train_speed(iter/s)": 1.532211 }, { "acc": 0.98916664, "epoch": 26.763534098898525, "grad_norm": 1.6120257377624512, "learning_rate": 4.82869163568286e-06, "loss": 0.04491998, "memory(GiB)": 13.7, "step": 57100, "train_speed(iter/s)": 1.532222 }, { "acc": 0.9822917, "epoch": 26.76587766580736, "grad_norm": 2.5340399742126465, "learning_rate": 4.827916984167736e-06, "loss": 0.02850482, "memory(GiB)": 13.7, "step": 57105, "train_speed(iter/s)": 1.532219 }, { "acc": 0.98357143, "epoch": 26.768221232716193, "grad_norm": 1.7707185745239258, "learning_rate": 4.827142336800943e-06, "loss": 0.05667048, "memory(GiB)": 13.7, "step": 57110, "train_speed(iter/s)": 1.532224 }, { "acc": 0.97276516, "epoch": 26.770564799625028, "grad_norm": 3.1248600482940674, "learning_rate": 4.8263676936010974e-06, "loss": 0.06904492, "memory(GiB)": 13.7, "step": 57115, "train_speed(iter/s)": 1.53223 }, { "acc": 0.98708334, "epoch": 26.772908366533866, "grad_norm": 0.001030952320434153, "learning_rate": 4.825593054586821e-06, "loss": 0.05295979, "memory(GiB)": 13.7, "step": 57120, "train_speed(iter/s)": 1.532235 }, { "acc": 0.98640881, "epoch": 26.7752519334427, "grad_norm": 1.172771692276001, "learning_rate": 4.824818419776732e-06, "loss": 0.0241017, "memory(GiB)": 13.7, "step": 57125, "train_speed(iter/s)": 1.532241 }, { "acc": 0.96833334, "epoch": 26.777595500351534, "grad_norm": 3.664137601852417, "learning_rate": 4.824043789189452e-06, "loss": 0.10594964, "memory(GiB)": 13.7, "step": 57130, "train_speed(iter/s)": 1.532255 }, { "acc": 0.9864583, "epoch": 26.779939067260372, "grad_norm": 0.010497232899069786, "learning_rate": 4.823269162843597e-06, "loss": 0.02640551, "memory(GiB)": 13.7, "step": 57135, "train_speed(iter/s)": 1.53226 }, { "acc": 0.98860931, "epoch": 26.782282634169206, "grad_norm": 1.468721628189087, "learning_rate": 4.822494540757791e-06, "loss": 0.03917448, "memory(GiB)": 13.7, "step": 57140, "train_speed(iter/s)": 1.532258 }, { "acc": 0.98080359, "epoch": 26.78462620107804, "grad_norm": 2.2633001804351807, "learning_rate": 4.821719922950652e-06, "loss": 0.05815634, "memory(GiB)": 13.7, "step": 57145, "train_speed(iter/s)": 1.532251 }, { "acc": 0.9926136, "epoch": 26.786969767986875, "grad_norm": 4.124528408050537, "learning_rate": 4.8209453094408e-06, "loss": 0.03963387, "memory(GiB)": 13.7, "step": 57150, "train_speed(iter/s)": 1.532255 }, { "acc": 0.98641376, "epoch": 26.789313334895713, "grad_norm": 3.043107271194458, "learning_rate": 4.82017070024685e-06, "loss": 0.04324849, "memory(GiB)": 13.7, "step": 57155, "train_speed(iter/s)": 1.532258 }, { "acc": 0.96518307, "epoch": 26.791656901804547, "grad_norm": 2.5547797679901123, "learning_rate": 4.819396095387425e-06, "loss": 0.10847031, "memory(GiB)": 13.7, "step": 57160, "train_speed(iter/s)": 1.53226 }, { "acc": 0.97831411, "epoch": 26.79400046871338, "grad_norm": 4.463615417480469, "learning_rate": 4.818621494881142e-06, "loss": 0.05783455, "memory(GiB)": 13.7, "step": 57165, "train_speed(iter/s)": 1.53226 }, { "acc": 0.97826462, "epoch": 26.796344035622216, "grad_norm": 2.260154962539673, "learning_rate": 4.817846898746623e-06, "loss": 0.06351042, "memory(GiB)": 13.7, "step": 57170, "train_speed(iter/s)": 1.532263 }, { "acc": 0.98395834, "epoch": 26.798687602531054, "grad_norm": 1.0264203548431396, "learning_rate": 4.817072307002482e-06, "loss": 0.03350519, "memory(GiB)": 13.7, "step": 57175, "train_speed(iter/s)": 1.53227 }, { "acc": 0.97499456, "epoch": 26.801031169439888, "grad_norm": 6.628254413604736, "learning_rate": 4.816297719667341e-06, "loss": 0.06169899, "memory(GiB)": 13.7, "step": 57180, "train_speed(iter/s)": 1.532277 }, { "acc": 0.9835227, "epoch": 26.803374736348722, "grad_norm": 2.4374172687530518, "learning_rate": 4.815523136759819e-06, "loss": 0.06351169, "memory(GiB)": 13.7, "step": 57185, "train_speed(iter/s)": 1.53228 }, { "acc": 0.9875, "epoch": 26.805718303257557, "grad_norm": 6.598881721496582, "learning_rate": 4.8147485582985325e-06, "loss": 0.06009768, "memory(GiB)": 13.7, "step": 57190, "train_speed(iter/s)": 1.532285 }, { "acc": 0.9755209, "epoch": 26.808061870166394, "grad_norm": 4.063233375549316, "learning_rate": 4.813973984302099e-06, "loss": 0.07112404, "memory(GiB)": 13.7, "step": 57195, "train_speed(iter/s)": 1.532286 }, { "acc": 0.9864583, "epoch": 26.81040543707523, "grad_norm": 4.132699012756348, "learning_rate": 4.81319941478914e-06, "loss": 0.08672534, "memory(GiB)": 13.7, "step": 57200, "train_speed(iter/s)": 1.532299 }, { "acc": 0.98909721, "epoch": 26.812749003984063, "grad_norm": 0.038282688707113266, "learning_rate": 4.812424849778271e-06, "loss": 0.05172834, "memory(GiB)": 13.7, "step": 57205, "train_speed(iter/s)": 1.5323 }, { "acc": 0.98083334, "epoch": 26.815092570892897, "grad_norm": 2.711111307144165, "learning_rate": 4.811650289288109e-06, "loss": 0.07738571, "memory(GiB)": 13.7, "step": 57210, "train_speed(iter/s)": 1.5323 }, { "acc": 0.97250004, "epoch": 26.817436137801735, "grad_norm": 3.8337390422821045, "learning_rate": 4.810875733337276e-06, "loss": 0.09137212, "memory(GiB)": 13.7, "step": 57215, "train_speed(iter/s)": 1.532298 }, { "acc": 0.9822917, "epoch": 26.81977970471057, "grad_norm": 0.010506822727620602, "learning_rate": 4.8101011819443865e-06, "loss": 0.06059817, "memory(GiB)": 13.7, "step": 57220, "train_speed(iter/s)": 1.532298 }, { "acc": 0.9853838, "epoch": 26.822123271619404, "grad_norm": 0.002497331704944372, "learning_rate": 4.809326635128059e-06, "loss": 0.05761994, "memory(GiB)": 13.7, "step": 57225, "train_speed(iter/s)": 1.532305 }, { "acc": 0.98178825, "epoch": 26.82446683852824, "grad_norm": 2.80305552482605, "learning_rate": 4.808552092906912e-06, "loss": 0.06829536, "memory(GiB)": 13.7, "step": 57230, "train_speed(iter/s)": 1.532308 }, { "acc": 0.97731152, "epoch": 26.826810405437076, "grad_norm": 6.106186389923096, "learning_rate": 4.807777555299562e-06, "loss": 0.06403502, "memory(GiB)": 13.7, "step": 57235, "train_speed(iter/s)": 1.532303 }, { "acc": 0.98467264, "epoch": 26.82915397234591, "grad_norm": 2.985046863555908, "learning_rate": 4.8070030223246254e-06, "loss": 0.05825354, "memory(GiB)": 13.7, "step": 57240, "train_speed(iter/s)": 1.53231 }, { "acc": 0.99237175, "epoch": 26.831497539254745, "grad_norm": 5.2474284172058105, "learning_rate": 4.80622849400072e-06, "loss": 0.05839497, "memory(GiB)": 13.7, "step": 57245, "train_speed(iter/s)": 1.532308 }, { "acc": 0.97994051, "epoch": 26.833841106163582, "grad_norm": 6.293365478515625, "learning_rate": 4.805453970346465e-06, "loss": 0.0691327, "memory(GiB)": 13.7, "step": 57250, "train_speed(iter/s)": 1.532318 }, { "acc": 0.97934532, "epoch": 26.836184673072417, "grad_norm": 5.7952423095703125, "learning_rate": 4.8046794513804745e-06, "loss": 0.09757168, "memory(GiB)": 13.7, "step": 57255, "train_speed(iter/s)": 1.532317 }, { "acc": 0.97763252, "epoch": 26.83852823998125, "grad_norm": 23.41759490966797, "learning_rate": 4.803904937121367e-06, "loss": 0.12606006, "memory(GiB)": 13.7, "step": 57260, "train_speed(iter/s)": 1.532324 }, { "acc": 0.97624998, "epoch": 26.840871806890085, "grad_norm": 4.93068265914917, "learning_rate": 4.80313042758776e-06, "loss": 0.07372425, "memory(GiB)": 13.7, "step": 57265, "train_speed(iter/s)": 1.532326 }, { "acc": 0.98061962, "epoch": 26.843215373798923, "grad_norm": 6.602367401123047, "learning_rate": 4.802355922798267e-06, "loss": 0.06017019, "memory(GiB)": 13.7, "step": 57270, "train_speed(iter/s)": 1.532341 }, { "acc": 0.98052082, "epoch": 26.845558940707758, "grad_norm": 4.177713394165039, "learning_rate": 4.8015814227715094e-06, "loss": 0.08318582, "memory(GiB)": 13.7, "step": 57275, "train_speed(iter/s)": 1.532342 }, { "acc": 0.97644749, "epoch": 26.847902507616592, "grad_norm": 3.3157262802124023, "learning_rate": 4.8008069275261e-06, "loss": 0.07145334, "memory(GiB)": 13.7, "step": 57280, "train_speed(iter/s)": 1.532348 }, { "acc": 0.9802083, "epoch": 26.850246074525426, "grad_norm": 0.013276530429720879, "learning_rate": 4.800032437080654e-06, "loss": 0.05330862, "memory(GiB)": 13.7, "step": 57285, "train_speed(iter/s)": 1.532348 }, { "acc": 0.9690774, "epoch": 26.852589641434264, "grad_norm": 10.433252334594727, "learning_rate": 4.7992579514537915e-06, "loss": 0.115984, "memory(GiB)": 13.7, "step": 57290, "train_speed(iter/s)": 1.532348 }, { "acc": 0.98506947, "epoch": 26.8549332083431, "grad_norm": 0.1988832950592041, "learning_rate": 4.798483470664127e-06, "loss": 0.06470541, "memory(GiB)": 13.7, "step": 57295, "train_speed(iter/s)": 1.532353 }, { "acc": 0.98662033, "epoch": 26.857276775251933, "grad_norm": 5.224228858947754, "learning_rate": 4.797708994730275e-06, "loss": 0.06160617, "memory(GiB)": 13.7, "step": 57300, "train_speed(iter/s)": 1.532365 }, { "acc": 0.98604164, "epoch": 26.859620342160767, "grad_norm": 2.9211342334747314, "learning_rate": 4.796934523670853e-06, "loss": 0.06239231, "memory(GiB)": 13.7, "step": 57305, "train_speed(iter/s)": 1.532369 }, { "acc": 0.988447, "epoch": 26.861963909069605, "grad_norm": 0.00043017754796892405, "learning_rate": 4.796160057504475e-06, "loss": 0.04176025, "memory(GiB)": 13.7, "step": 57310, "train_speed(iter/s)": 1.532375 }, { "acc": 0.97885418, "epoch": 26.86430747597844, "grad_norm": 3.5876123905181885, "learning_rate": 4.795385596249761e-06, "loss": 0.07979443, "memory(GiB)": 13.7, "step": 57315, "train_speed(iter/s)": 1.532382 }, { "acc": 0.98312502, "epoch": 26.866651042887273, "grad_norm": 3.5145819187164307, "learning_rate": 4.794611139925321e-06, "loss": 0.06110853, "memory(GiB)": 13.7, "step": 57320, "train_speed(iter/s)": 1.532384 }, { "acc": 0.97562504, "epoch": 26.86899460979611, "grad_norm": 8.397829055786133, "learning_rate": 4.793836688549774e-06, "loss": 0.04600942, "memory(GiB)": 13.7, "step": 57325, "train_speed(iter/s)": 1.532385 }, { "acc": 0.9583334, "epoch": 26.871338176704946, "grad_norm": 7.707352161407471, "learning_rate": 4.793062242141733e-06, "loss": 0.10974224, "memory(GiB)": 13.7, "step": 57330, "train_speed(iter/s)": 1.53239 }, { "acc": 0.98611116, "epoch": 26.87368174361378, "grad_norm": 3.457797050476074, "learning_rate": 4.792287800719815e-06, "loss": 0.04686062, "memory(GiB)": 13.7, "step": 57335, "train_speed(iter/s)": 1.532396 }, { "acc": 0.98363094, "epoch": 26.876025310522614, "grad_norm": 4.771301746368408, "learning_rate": 4.7915133643026326e-06, "loss": 0.0353994, "memory(GiB)": 13.7, "step": 57340, "train_speed(iter/s)": 1.5324 }, { "acc": 0.9916666, "epoch": 26.878368877431452, "grad_norm": 1.7224438190460205, "learning_rate": 4.7907389329088044e-06, "loss": 0.02295205, "memory(GiB)": 13.7, "step": 57345, "train_speed(iter/s)": 1.532397 }, { "acc": 0.98005953, "epoch": 26.880712444340286, "grad_norm": 1.9609116315841675, "learning_rate": 4.7899645065569425e-06, "loss": 0.05531255, "memory(GiB)": 13.7, "step": 57350, "train_speed(iter/s)": 1.532402 }, { "acc": 0.98916664, "epoch": 26.88305601124912, "grad_norm": 0.06724675744771957, "learning_rate": 4.789190085265661e-06, "loss": 0.02224826, "memory(GiB)": 13.7, "step": 57355, "train_speed(iter/s)": 1.532398 }, { "acc": 0.9864583, "epoch": 26.885399578157955, "grad_norm": 1.7641059160232544, "learning_rate": 4.788415669053579e-06, "loss": 0.04672816, "memory(GiB)": 13.7, "step": 57360, "train_speed(iter/s)": 1.5324 }, { "acc": 0.95791664, "epoch": 26.887743145066793, "grad_norm": 4.285441875457764, "learning_rate": 4.787641257939306e-06, "loss": 0.09709342, "memory(GiB)": 13.7, "step": 57365, "train_speed(iter/s)": 1.532399 }, { "acc": 0.98527355, "epoch": 26.890086711975627, "grad_norm": 0.013323723338544369, "learning_rate": 4.786866851941457e-06, "loss": 0.04170529, "memory(GiB)": 13.7, "step": 57370, "train_speed(iter/s)": 1.532403 }, { "acc": 0.9875, "epoch": 26.89243027888446, "grad_norm": 3.3061158657073975, "learning_rate": 4.786092451078646e-06, "loss": 0.02627095, "memory(GiB)": 13.7, "step": 57375, "train_speed(iter/s)": 1.532404 }, { "acc": 0.98249998, "epoch": 26.894773845793296, "grad_norm": 1.663251280784607, "learning_rate": 4.78531805536949e-06, "loss": 0.03840171, "memory(GiB)": 13.7, "step": 57380, "train_speed(iter/s)": 1.532407 }, { "acc": 0.99246712, "epoch": 26.897117412702134, "grad_norm": 1.9017614126205444, "learning_rate": 4.784543664832599e-06, "loss": 0.02567133, "memory(GiB)": 13.7, "step": 57385, "train_speed(iter/s)": 1.532414 }, { "acc": 0.98193188, "epoch": 26.899460979610968, "grad_norm": 0.663898766040802, "learning_rate": 4.783769279486591e-06, "loss": 0.06176667, "memory(GiB)": 13.7, "step": 57390, "train_speed(iter/s)": 1.532414 }, { "acc": 0.98031254, "epoch": 26.901804546519802, "grad_norm": 5.276034355163574, "learning_rate": 4.782994899350077e-06, "loss": 0.06148907, "memory(GiB)": 13.7, "step": 57395, "train_speed(iter/s)": 1.532408 }, { "acc": 0.99125004, "epoch": 26.90414811342864, "grad_norm": 0.11874580383300781, "learning_rate": 4.782220524441672e-06, "loss": 0.04202115, "memory(GiB)": 13.7, "step": 57400, "train_speed(iter/s)": 1.532412 }, { "acc": 0.98028278, "epoch": 26.906491680337474, "grad_norm": 1.6663103103637695, "learning_rate": 4.781446154779986e-06, "loss": 0.0503792, "memory(GiB)": 13.7, "step": 57405, "train_speed(iter/s)": 1.532421 }, { "acc": 0.98458328, "epoch": 26.90883524724631, "grad_norm": 3.597287654876709, "learning_rate": 4.780671790383637e-06, "loss": 0.0341292, "memory(GiB)": 13.7, "step": 57410, "train_speed(iter/s)": 1.532424 }, { "acc": 0.9927083, "epoch": 26.911178814155143, "grad_norm": 3.551870346069336, "learning_rate": 4.779897431271234e-06, "loss": 0.0242137, "memory(GiB)": 13.7, "step": 57415, "train_speed(iter/s)": 1.532427 }, { "acc": 0.98718748, "epoch": 26.91352238106398, "grad_norm": 0.8067907094955444, "learning_rate": 4.779123077461393e-06, "loss": 0.03191899, "memory(GiB)": 13.7, "step": 57420, "train_speed(iter/s)": 1.532436 }, { "acc": 0.97842255, "epoch": 26.915865947972815, "grad_norm": 0.20934396982192993, "learning_rate": 4.778348728972726e-06, "loss": 0.04411647, "memory(GiB)": 13.7, "step": 57425, "train_speed(iter/s)": 1.53243 }, { "acc": 0.9822916, "epoch": 26.91820951488165, "grad_norm": 3.3192737102508545, "learning_rate": 4.777574385823846e-06, "loss": 0.04128201, "memory(GiB)": 13.7, "step": 57430, "train_speed(iter/s)": 1.53244 }, { "acc": 0.98395834, "epoch": 26.920553081790484, "grad_norm": 0.16533613204956055, "learning_rate": 4.7768000480333645e-06, "loss": 0.05904046, "memory(GiB)": 13.7, "step": 57435, "train_speed(iter/s)": 1.532446 }, { "acc": 0.9802084, "epoch": 26.92289664869932, "grad_norm": 4.532379627227783, "learning_rate": 4.776025715619896e-06, "loss": 0.04604048, "memory(GiB)": 13.7, "step": 57440, "train_speed(iter/s)": 1.532439 }, { "acc": 0.97121248, "epoch": 26.925240215608156, "grad_norm": 3.5936408042907715, "learning_rate": 4.775251388602052e-06, "loss": 0.06009548, "memory(GiB)": 13.7, "step": 57445, "train_speed(iter/s)": 1.532445 }, { "acc": 0.96359539, "epoch": 26.92758378251699, "grad_norm": 7.7357330322265625, "learning_rate": 4.774477066998444e-06, "loss": 0.09561089, "memory(GiB)": 13.7, "step": 57450, "train_speed(iter/s)": 1.53245 }, { "acc": 0.9864584, "epoch": 26.929927349425824, "grad_norm": 3.778841733932495, "learning_rate": 4.773702750827685e-06, "loss": 0.03080404, "memory(GiB)": 13.7, "step": 57455, "train_speed(iter/s)": 1.532454 }, { "acc": 0.98894348, "epoch": 26.932270916334662, "grad_norm": 11.533641815185547, "learning_rate": 4.772928440108385e-06, "loss": 0.04071161, "memory(GiB)": 13.7, "step": 57460, "train_speed(iter/s)": 1.53246 }, { "acc": 0.96208334, "epoch": 26.934614483243497, "grad_norm": 6.554990291595459, "learning_rate": 4.77215413485916e-06, "loss": 0.10256494, "memory(GiB)": 13.7, "step": 57465, "train_speed(iter/s)": 1.532465 }, { "acc": 0.9894886, "epoch": 26.93695805015233, "grad_norm": 3.080282211303711, "learning_rate": 4.771379835098619e-06, "loss": 0.02330763, "memory(GiB)": 13.7, "step": 57470, "train_speed(iter/s)": 1.532468 }, { "acc": 0.98988094, "epoch": 26.93930161706117, "grad_norm": 3.474595308303833, "learning_rate": 4.770605540845373e-06, "loss": 0.03796982, "memory(GiB)": 13.7, "step": 57475, "train_speed(iter/s)": 1.532468 }, { "acc": 0.97279768, "epoch": 26.941645183970003, "grad_norm": 5.389108657836914, "learning_rate": 4.769831252118035e-06, "loss": 0.06910168, "memory(GiB)": 13.7, "step": 57480, "train_speed(iter/s)": 1.532475 }, { "acc": 0.99278851, "epoch": 26.943988750878837, "grad_norm": 2.7786030769348145, "learning_rate": 4.769056968935217e-06, "loss": 0.02750815, "memory(GiB)": 13.7, "step": 57485, "train_speed(iter/s)": 1.532473 }, { "acc": 0.9791666, "epoch": 26.94633231778767, "grad_norm": 7.164087295532227, "learning_rate": 4.768282691315528e-06, "loss": 0.04846692, "memory(GiB)": 13.7, "step": 57490, "train_speed(iter/s)": 1.532472 }, { "acc": 0.97951756, "epoch": 26.94867588469651, "grad_norm": 4.346841335296631, "learning_rate": 4.767508419277581e-06, "loss": 0.05842774, "memory(GiB)": 13.7, "step": 57495, "train_speed(iter/s)": 1.532472 }, { "acc": 0.98571434, "epoch": 26.951019451605344, "grad_norm": 2.3271358013153076, "learning_rate": 4.7667341528399855e-06, "loss": 0.03558154, "memory(GiB)": 13.7, "step": 57500, "train_speed(iter/s)": 1.532469 }, { "acc": 0.98347473, "epoch": 26.953363018514178, "grad_norm": 3.5095903873443604, "learning_rate": 4.7659598920213516e-06, "loss": 0.03776571, "memory(GiB)": 13.7, "step": 57505, "train_speed(iter/s)": 1.532464 }, { "acc": 0.96727676, "epoch": 26.955706585423012, "grad_norm": 0.8329956531524658, "learning_rate": 4.765185636840294e-06, "loss": 0.08893574, "memory(GiB)": 13.7, "step": 57510, "train_speed(iter/s)": 1.532463 }, { "acc": 0.99613094, "epoch": 26.95805015233185, "grad_norm": 0.007620359305292368, "learning_rate": 4.764411387315419e-06, "loss": 0.03300469, "memory(GiB)": 13.7, "step": 57515, "train_speed(iter/s)": 1.532475 }, { "acc": 1.0, "epoch": 26.960393719240685, "grad_norm": 0.297010213136673, "learning_rate": 4.763637143465339e-06, "loss": 0.02265264, "memory(GiB)": 13.7, "step": 57520, "train_speed(iter/s)": 1.532483 }, { "acc": 0.9875, "epoch": 26.96273728614952, "grad_norm": 3.6358845233917236, "learning_rate": 4.762862905308665e-06, "loss": 0.02023981, "memory(GiB)": 13.7, "step": 57525, "train_speed(iter/s)": 1.532485 }, { "acc": 0.99187498, "epoch": 26.965080853058353, "grad_norm": 2.4132769107818604, "learning_rate": 4.762088672864005e-06, "loss": 0.05017142, "memory(GiB)": 13.7, "step": 57530, "train_speed(iter/s)": 1.532491 }, { "acc": 0.99508934, "epoch": 26.96742441996719, "grad_norm": 0.001262275385670364, "learning_rate": 4.7613144461499695e-06, "loss": 0.0383679, "memory(GiB)": 13.7, "step": 57535, "train_speed(iter/s)": 1.532499 }, { "acc": 0.98166676, "epoch": 26.969767986876025, "grad_norm": 8.959996223449707, "learning_rate": 4.76054022518517e-06, "loss": 0.05961599, "memory(GiB)": 13.7, "step": 57540, "train_speed(iter/s)": 1.532505 }, { "acc": 0.97145844, "epoch": 26.97211155378486, "grad_norm": 4.415018558502197, "learning_rate": 4.759766009988215e-06, "loss": 0.07680099, "memory(GiB)": 13.7, "step": 57545, "train_speed(iter/s)": 1.532516 }, { "acc": 0.990625, "epoch": 26.974455120693698, "grad_norm": 3.6883621215820312, "learning_rate": 4.758991800577713e-06, "loss": 0.0464417, "memory(GiB)": 13.7, "step": 57550, "train_speed(iter/s)": 1.532515 }, { "acc": 0.99008923, "epoch": 26.976798687602532, "grad_norm": 2.182480573654175, "learning_rate": 4.758217596972276e-06, "loss": 0.06973596, "memory(GiB)": 13.7, "step": 57555, "train_speed(iter/s)": 1.532516 }, { "acc": 0.9854167, "epoch": 26.979142254511366, "grad_norm": 2.370521306991577, "learning_rate": 4.757443399190511e-06, "loss": 0.04916421, "memory(GiB)": 13.7, "step": 57560, "train_speed(iter/s)": 1.532515 }, { "acc": 0.98214293, "epoch": 26.9814858214202, "grad_norm": 2.466165065765381, "learning_rate": 4.756669207251029e-06, "loss": 0.07190841, "memory(GiB)": 13.7, "step": 57565, "train_speed(iter/s)": 1.532524 }, { "acc": 0.98916664, "epoch": 26.98382938832904, "grad_norm": 0.9557203650474548, "learning_rate": 4.755895021172439e-06, "loss": 0.04895493, "memory(GiB)": 13.7, "step": 57570, "train_speed(iter/s)": 1.532532 }, { "acc": 0.96863098, "epoch": 26.986172955237873, "grad_norm": 5.840921878814697, "learning_rate": 4.755120840973348e-06, "loss": 0.07714317, "memory(GiB)": 13.7, "step": 57575, "train_speed(iter/s)": 1.532531 }, { "acc": 0.99571428, "epoch": 26.988516522146707, "grad_norm": 1.366942286491394, "learning_rate": 4.7543466666723655e-06, "loss": 0.05310624, "memory(GiB)": 13.7, "step": 57580, "train_speed(iter/s)": 1.532533 }, { "acc": 0.98833332, "epoch": 26.99086008905554, "grad_norm": 3.898500442504883, "learning_rate": 4.753572498288102e-06, "loss": 0.06795337, "memory(GiB)": 13.7, "step": 57585, "train_speed(iter/s)": 1.532537 }, { "acc": 0.97979164, "epoch": 26.99320365596438, "grad_norm": 4.19685173034668, "learning_rate": 4.752798335839162e-06, "loss": 0.04549747, "memory(GiB)": 13.7, "step": 57590, "train_speed(iter/s)": 1.532544 }, { "acc": 0.98986111, "epoch": 26.995547222873213, "grad_norm": 2.6232833862304688, "learning_rate": 4.752024179344158e-06, "loss": 0.04186917, "memory(GiB)": 13.7, "step": 57595, "train_speed(iter/s)": 1.532555 }, { "acc": 0.98946428, "epoch": 26.997890789782048, "grad_norm": 4.371729850769043, "learning_rate": 4.751250028821696e-06, "loss": 0.03423272, "memory(GiB)": 13.7, "step": 57600, "train_speed(iter/s)": 1.532558 }, { "acc": 0.97437496, "epoch": 27.000234356690882, "grad_norm": 5.3002495765686035, "learning_rate": 4.750475884290384e-06, "loss": 0.07777961, "memory(GiB)": 13.7, "step": 57605, "train_speed(iter/s)": 1.532538 }, { "acc": 0.98005209, "epoch": 27.00257792359972, "grad_norm": 4.824702262878418, "learning_rate": 4.749701745768833e-06, "loss": 0.05838993, "memory(GiB)": 13.7, "step": 57610, "train_speed(iter/s)": 1.532539 }, { "acc": 0.98812504, "epoch": 27.004921490508554, "grad_norm": 1.8858122825622559, "learning_rate": 4.748927613275645e-06, "loss": 0.05653796, "memory(GiB)": 13.7, "step": 57615, "train_speed(iter/s)": 1.532544 }, { "acc": 0.984375, "epoch": 27.00726505741739, "grad_norm": 0.000603732478339225, "learning_rate": 4.748153486829433e-06, "loss": 0.03312632, "memory(GiB)": 13.7, "step": 57620, "train_speed(iter/s)": 1.532545 }, { "acc": 0.97956352, "epoch": 27.009608624326223, "grad_norm": 2.1549036502838135, "learning_rate": 4.7473793664488e-06, "loss": 0.07366363, "memory(GiB)": 13.7, "step": 57625, "train_speed(iter/s)": 1.532544 }, { "acc": 0.98767853, "epoch": 27.01195219123506, "grad_norm": 2.7577970027923584, "learning_rate": 4.746605252152356e-06, "loss": 0.0363678, "memory(GiB)": 13.7, "step": 57630, "train_speed(iter/s)": 1.532547 }, { "acc": 0.98551588, "epoch": 27.014295758143895, "grad_norm": 5.379064559936523, "learning_rate": 4.745831143958707e-06, "loss": 0.03323569, "memory(GiB)": 13.7, "step": 57635, "train_speed(iter/s)": 1.532552 }, { "acc": 0.98455925, "epoch": 27.01663932505273, "grad_norm": 4.749619960784912, "learning_rate": 4.7450570418864615e-06, "loss": 0.05293822, "memory(GiB)": 13.7, "step": 57640, "train_speed(iter/s)": 1.532555 }, { "acc": 0.9625, "epoch": 27.018982891961567, "grad_norm": 4.440089225769043, "learning_rate": 4.7442829459542266e-06, "loss": 0.08481711, "memory(GiB)": 13.7, "step": 57645, "train_speed(iter/s)": 1.532561 }, { "acc": 0.98738098, "epoch": 27.0213264588704, "grad_norm": 1.8935964107513428, "learning_rate": 4.7435088561806056e-06, "loss": 0.04690775, "memory(GiB)": 13.7, "step": 57650, "train_speed(iter/s)": 1.532566 }, { "acc": 0.978125, "epoch": 27.023670025779236, "grad_norm": 1.9859373569488525, "learning_rate": 4.7427347725842095e-06, "loss": 0.0537772, "memory(GiB)": 13.7, "step": 57655, "train_speed(iter/s)": 1.532565 }, { "acc": 0.99131947, "epoch": 27.02601359268807, "grad_norm": 1.440128207206726, "learning_rate": 4.741960695183643e-06, "loss": 0.03420306, "memory(GiB)": 13.7, "step": 57660, "train_speed(iter/s)": 1.532566 }, { "acc": 0.98125, "epoch": 27.028357159596908, "grad_norm": 3.213949680328369, "learning_rate": 4.74118662399751e-06, "loss": 0.07242646, "memory(GiB)": 13.7, "step": 57665, "train_speed(iter/s)": 1.532569 }, { "acc": 0.98154774, "epoch": 27.030700726505742, "grad_norm": 4.544414520263672, "learning_rate": 4.740412559044419e-06, "loss": 0.05603486, "memory(GiB)": 13.7, "step": 57670, "train_speed(iter/s)": 1.532577 }, { "acc": 0.98028851, "epoch": 27.033044293414576, "grad_norm": 3.23901104927063, "learning_rate": 4.739638500342977e-06, "loss": 0.037769, "memory(GiB)": 13.7, "step": 57675, "train_speed(iter/s)": 1.532587 }, { "acc": 0.978125, "epoch": 27.03538786032341, "grad_norm": 1.2892789840698242, "learning_rate": 4.738864447911787e-06, "loss": 0.0528227, "memory(GiB)": 13.7, "step": 57680, "train_speed(iter/s)": 1.532585 }, { "acc": 0.98758011, "epoch": 27.03773142723225, "grad_norm": 3.8909966945648193, "learning_rate": 4.738090401769456e-06, "loss": 0.02696862, "memory(GiB)": 13.7, "step": 57685, "train_speed(iter/s)": 1.532588 }, { "acc": 0.97688484, "epoch": 27.040074994141083, "grad_norm": 3.0533640384674072, "learning_rate": 4.7373163619345915e-06, "loss": 0.11612751, "memory(GiB)": 13.7, "step": 57690, "train_speed(iter/s)": 1.532583 }, { "acc": 0.99008923, "epoch": 27.042418561049917, "grad_norm": 0.8092672228813171, "learning_rate": 4.736542328425796e-06, "loss": 0.01844853, "memory(GiB)": 13.7, "step": 57695, "train_speed(iter/s)": 1.532586 }, { "acc": 0.97822914, "epoch": 27.04476212795875, "grad_norm": 0.022207144647836685, "learning_rate": 4.735768301261677e-06, "loss": 0.04326242, "memory(GiB)": 13.7, "step": 57700, "train_speed(iter/s)": 1.532585 }, { "acc": 0.98113098, "epoch": 27.04710569486759, "grad_norm": 4.694229602813721, "learning_rate": 4.734994280460837e-06, "loss": 0.05207661, "memory(GiB)": 13.7, "step": 57705, "train_speed(iter/s)": 1.532589 }, { "acc": 0.98468208, "epoch": 27.049449261776424, "grad_norm": 2.054487466812134, "learning_rate": 4.7342202660418815e-06, "loss": 0.05877817, "memory(GiB)": 13.7, "step": 57710, "train_speed(iter/s)": 1.532589 }, { "acc": 0.9833334, "epoch": 27.051792828685258, "grad_norm": 6.137451171875, "learning_rate": 4.733446258023417e-06, "loss": 0.05397722, "memory(GiB)": 13.7, "step": 57715, "train_speed(iter/s)": 1.532583 }, { "acc": 0.97228622, "epoch": 27.054136395594096, "grad_norm": 2.7009153366088867, "learning_rate": 4.7326722564240465e-06, "loss": 0.08430288, "memory(GiB)": 13.7, "step": 57720, "train_speed(iter/s)": 1.532584 }, { "acc": 0.98705349, "epoch": 27.05647996250293, "grad_norm": 4.7829155921936035, "learning_rate": 4.731898261262375e-06, "loss": 0.07861182, "memory(GiB)": 13.7, "step": 57725, "train_speed(iter/s)": 1.532585 }, { "acc": 0.98125, "epoch": 27.058823529411764, "grad_norm": 7.62675666809082, "learning_rate": 4.7311242725570075e-06, "loss": 0.11877017, "memory(GiB)": 13.7, "step": 57730, "train_speed(iter/s)": 1.532594 }, { "acc": 0.98363972, "epoch": 27.0611670963206, "grad_norm": 2.5897529125213623, "learning_rate": 4.730350290326549e-06, "loss": 0.02209784, "memory(GiB)": 13.7, "step": 57735, "train_speed(iter/s)": 1.532597 }, { "acc": 0.99002972, "epoch": 27.063510663229437, "grad_norm": 1.4133243560791016, "learning_rate": 4.729576314589599e-06, "loss": 0.04993278, "memory(GiB)": 13.7, "step": 57740, "train_speed(iter/s)": 1.532607 }, { "acc": 0.97982883, "epoch": 27.06585423013827, "grad_norm": 2.9630496501922607, "learning_rate": 4.728802345364766e-06, "loss": 0.06361705, "memory(GiB)": 13.7, "step": 57745, "train_speed(iter/s)": 1.532611 }, { "acc": 0.98065481, "epoch": 27.068197797047105, "grad_norm": 5.437293529510498, "learning_rate": 4.728028382670653e-06, "loss": 0.07203299, "memory(GiB)": 13.7, "step": 57750, "train_speed(iter/s)": 1.532608 }, { "acc": 0.98666668, "epoch": 27.07054136395594, "grad_norm": 6.136569023132324, "learning_rate": 4.727254426525859e-06, "loss": 0.06786103, "memory(GiB)": 13.7, "step": 57755, "train_speed(iter/s)": 1.532609 }, { "acc": 0.99125004, "epoch": 27.072884930864777, "grad_norm": 4.908578395843506, "learning_rate": 4.726480476948995e-06, "loss": 0.03054294, "memory(GiB)": 13.7, "step": 57760, "train_speed(iter/s)": 1.532621 }, { "acc": 0.97215281, "epoch": 27.07522849777361, "grad_norm": 4.226669788360596, "learning_rate": 4.725706533958657e-06, "loss": 0.04897005, "memory(GiB)": 13.7, "step": 57765, "train_speed(iter/s)": 1.532623 }, { "acc": 0.98104172, "epoch": 27.077572064682446, "grad_norm": 6.168759346008301, "learning_rate": 4.724932597573453e-06, "loss": 0.04352672, "memory(GiB)": 13.7, "step": 57770, "train_speed(iter/s)": 1.532627 }, { "acc": 0.97833328, "epoch": 27.07991563159128, "grad_norm": 1.8281276226043701, "learning_rate": 4.724158667811984e-06, "loss": 0.07882979, "memory(GiB)": 13.7, "step": 57775, "train_speed(iter/s)": 1.53262 }, { "acc": 0.96623516, "epoch": 27.082259198500118, "grad_norm": 3.591371774673462, "learning_rate": 4.723384744692853e-06, "loss": 0.08884563, "memory(GiB)": 13.7, "step": 57780, "train_speed(iter/s)": 1.532628 }, { "acc": 0.97084284, "epoch": 27.084602765408953, "grad_norm": 3.358506441116333, "learning_rate": 4.7226108282346605e-06, "loss": 0.07721125, "memory(GiB)": 13.7, "step": 57785, "train_speed(iter/s)": 1.532638 }, { "acc": 0.97875004, "epoch": 27.086946332317787, "grad_norm": 5.0325446128845215, "learning_rate": 4.721836918456013e-06, "loss": 0.0544238, "memory(GiB)": 13.7, "step": 57790, "train_speed(iter/s)": 1.532645 }, { "acc": 0.98457108, "epoch": 27.089289899226625, "grad_norm": 0.227816641330719, "learning_rate": 4.721063015375509e-06, "loss": 0.0472958, "memory(GiB)": 13.7, "step": 57795, "train_speed(iter/s)": 1.53265 }, { "acc": 0.996875, "epoch": 27.09163346613546, "grad_norm": 3.563250780105591, "learning_rate": 4.720289119011751e-06, "loss": 0.02352576, "memory(GiB)": 13.7, "step": 57800, "train_speed(iter/s)": 1.532654 }, { "acc": 0.9578125, "epoch": 27.093977033044293, "grad_norm": 11.961053848266602, "learning_rate": 4.719515229383345e-06, "loss": 0.12899077, "memory(GiB)": 13.7, "step": 57805, "train_speed(iter/s)": 1.532667 }, { "acc": 0.9798317, "epoch": 27.096320599953128, "grad_norm": 4.266091823577881, "learning_rate": 4.7187413465088865e-06, "loss": 0.04609941, "memory(GiB)": 13.7, "step": 57810, "train_speed(iter/s)": 1.532669 }, { "acc": 0.99196434, "epoch": 27.098664166861965, "grad_norm": 3.01863956451416, "learning_rate": 4.717967470406983e-06, "loss": 0.03865803, "memory(GiB)": 13.7, "step": 57815, "train_speed(iter/s)": 1.532672 }, { "acc": 0.98467264, "epoch": 27.1010077337708, "grad_norm": 4.347279071807861, "learning_rate": 4.717193601096233e-06, "loss": 0.04930567, "memory(GiB)": 13.7, "step": 57820, "train_speed(iter/s)": 1.532675 }, { "acc": 0.98359375, "epoch": 27.103351300679634, "grad_norm": 2.7161643505096436, "learning_rate": 4.7164197385952385e-06, "loss": 0.05541955, "memory(GiB)": 13.7, "step": 57825, "train_speed(iter/s)": 1.532679 }, { "acc": 0.98805809, "epoch": 27.10569486758847, "grad_norm": 0.14374081790447235, "learning_rate": 4.715645882922599e-06, "loss": 0.06130886, "memory(GiB)": 13.7, "step": 57830, "train_speed(iter/s)": 1.532685 }, { "acc": 0.98244047, "epoch": 27.108038434497306, "grad_norm": 2.848220109939575, "learning_rate": 4.714872034096917e-06, "loss": 0.05046203, "memory(GiB)": 13.7, "step": 57835, "train_speed(iter/s)": 1.532695 }, { "acc": 0.98874998, "epoch": 27.11038200140614, "grad_norm": 3.5129895210266113, "learning_rate": 4.714098192136792e-06, "loss": 0.06060449, "memory(GiB)": 13.7, "step": 57840, "train_speed(iter/s)": 1.532703 }, { "acc": 0.97979164, "epoch": 27.112725568314975, "grad_norm": 4.344701290130615, "learning_rate": 4.713324357060826e-06, "loss": 0.06967989, "memory(GiB)": 13.7, "step": 57845, "train_speed(iter/s)": 1.532704 }, { "acc": 0.990625, "epoch": 27.11506913522381, "grad_norm": 0.3801271617412567, "learning_rate": 4.71255052888762e-06, "loss": 0.05078379, "memory(GiB)": 13.7, "step": 57850, "train_speed(iter/s)": 1.532704 }, { "acc": 0.9770834, "epoch": 27.117412702132647, "grad_norm": 5.298990249633789, "learning_rate": 4.711776707635772e-06, "loss": 0.08527579, "memory(GiB)": 13.7, "step": 57855, "train_speed(iter/s)": 1.532703 }, { "acc": 0.99071426, "epoch": 27.11975626904148, "grad_norm": 3.559171438217163, "learning_rate": 4.711002893323886e-06, "loss": 0.03716701, "memory(GiB)": 13.7, "step": 57860, "train_speed(iter/s)": 1.532709 }, { "acc": 0.98402786, "epoch": 27.122099835950316, "grad_norm": 5.4909796714782715, "learning_rate": 4.710229085970557e-06, "loss": 0.05815306, "memory(GiB)": 13.7, "step": 57865, "train_speed(iter/s)": 1.53271 }, { "acc": 0.98534718, "epoch": 27.12444340285915, "grad_norm": 5.3424072265625, "learning_rate": 4.709455285594387e-06, "loss": 0.07167934, "memory(GiB)": 13.7, "step": 57870, "train_speed(iter/s)": 1.532719 }, { "acc": 0.98279762, "epoch": 27.126786969767988, "grad_norm": 4.032310962677002, "learning_rate": 4.708681492213977e-06, "loss": 0.05732705, "memory(GiB)": 13.7, "step": 57875, "train_speed(iter/s)": 1.532721 }, { "acc": 0.98670635, "epoch": 27.129130536676822, "grad_norm": 2.602119207382202, "learning_rate": 4.707907705847925e-06, "loss": 0.08182737, "memory(GiB)": 13.7, "step": 57880, "train_speed(iter/s)": 1.532725 }, { "acc": 0.97805805, "epoch": 27.131474103585656, "grad_norm": 1.640007734298706, "learning_rate": 4.707133926514829e-06, "loss": 0.06815512, "memory(GiB)": 13.7, "step": 57885, "train_speed(iter/s)": 1.53273 }, { "acc": 0.9984375, "epoch": 27.133817670494494, "grad_norm": 1.271602749824524, "learning_rate": 4.706360154233291e-06, "loss": 0.02894513, "memory(GiB)": 13.7, "step": 57890, "train_speed(iter/s)": 1.532735 }, { "acc": 0.98354168, "epoch": 27.13616123740333, "grad_norm": 2.4563369750976562, "learning_rate": 4.705586389021909e-06, "loss": 0.0554465, "memory(GiB)": 13.7, "step": 57895, "train_speed(iter/s)": 1.532737 }, { "acc": 0.97752972, "epoch": 27.138504804312163, "grad_norm": 5.231841564178467, "learning_rate": 4.70481263089928e-06, "loss": 0.05535124, "memory(GiB)": 13.7, "step": 57900, "train_speed(iter/s)": 1.532736 }, { "acc": 0.99341345, "epoch": 27.140848371220997, "grad_norm": 0.01876680552959442, "learning_rate": 4.704038879884005e-06, "loss": 0.0292206, "memory(GiB)": 13.7, "step": 57905, "train_speed(iter/s)": 1.532734 }, { "acc": 0.98258924, "epoch": 27.143191938129835, "grad_norm": 5.854289531707764, "learning_rate": 4.703265135994682e-06, "loss": 0.0632051, "memory(GiB)": 13.7, "step": 57910, "train_speed(iter/s)": 1.532729 }, { "acc": 0.97696428, "epoch": 27.14553550503867, "grad_norm": 2.92313289642334, "learning_rate": 4.702491399249907e-06, "loss": 0.05692247, "memory(GiB)": 13.7, "step": 57915, "train_speed(iter/s)": 1.53273 }, { "acc": 0.98604164, "epoch": 27.147879071947504, "grad_norm": 0.3783787786960602, "learning_rate": 4.70171766966828e-06, "loss": 0.04374356, "memory(GiB)": 13.7, "step": 57920, "train_speed(iter/s)": 1.532725 }, { "acc": 0.9791667, "epoch": 27.150222638856338, "grad_norm": 3.509493112564087, "learning_rate": 4.7009439472684e-06, "loss": 0.05614542, "memory(GiB)": 13.7, "step": 57925, "train_speed(iter/s)": 1.532723 }, { "acc": 0.98553562, "epoch": 27.152566205765176, "grad_norm": 0.009887488558888435, "learning_rate": 4.700170232068861e-06, "loss": 0.05942761, "memory(GiB)": 13.7, "step": 57930, "train_speed(iter/s)": 1.532727 }, { "acc": 0.9875, "epoch": 27.15490977267401, "grad_norm": 1.8738781213760376, "learning_rate": 4.699396524088265e-06, "loss": 0.05879361, "memory(GiB)": 13.7, "step": 57935, "train_speed(iter/s)": 1.532728 }, { "acc": 0.97215281, "epoch": 27.157253339582844, "grad_norm": 7.143036842346191, "learning_rate": 4.698622823345207e-06, "loss": 0.06864741, "memory(GiB)": 13.7, "step": 57940, "train_speed(iter/s)": 1.532731 }, { "acc": 0.9916666, "epoch": 27.15959690649168, "grad_norm": 2.519423007965088, "learning_rate": 4.697849129858285e-06, "loss": 0.03974478, "memory(GiB)": 13.7, "step": 57945, "train_speed(iter/s)": 1.532735 }, { "acc": 0.98050594, "epoch": 27.161940473400517, "grad_norm": 1.1643551588058472, "learning_rate": 4.697075443646095e-06, "loss": 0.06559114, "memory(GiB)": 13.7, "step": 57950, "train_speed(iter/s)": 1.532738 }, { "acc": 0.97553024, "epoch": 27.16428404030935, "grad_norm": 5.889980792999268, "learning_rate": 4.696301764727235e-06, "loss": 0.0894578, "memory(GiB)": 13.7, "step": 57955, "train_speed(iter/s)": 1.532739 }, { "acc": 0.9875, "epoch": 27.166627607218185, "grad_norm": 0.13378725945949554, "learning_rate": 4.695528093120299e-06, "loss": 0.06022704, "memory(GiB)": 13.7, "step": 57960, "train_speed(iter/s)": 1.532741 }, { "acc": 0.99437504, "epoch": 27.168971174127023, "grad_norm": 0.009583218023180962, "learning_rate": 4.694754428843889e-06, "loss": 0.02078234, "memory(GiB)": 13.7, "step": 57965, "train_speed(iter/s)": 1.532745 }, { "acc": 0.97631941, "epoch": 27.171314741035857, "grad_norm": 6.042267799377441, "learning_rate": 4.6939807719165965e-06, "loss": 0.05480236, "memory(GiB)": 13.7, "step": 57970, "train_speed(iter/s)": 1.532744 }, { "acc": 0.97568455, "epoch": 27.17365830794469, "grad_norm": 3.7743899822235107, "learning_rate": 4.693207122357018e-06, "loss": 0.09873734, "memory(GiB)": 13.7, "step": 57975, "train_speed(iter/s)": 1.532739 }, { "acc": 0.98083334, "epoch": 27.176001874853526, "grad_norm": 2.8651936054229736, "learning_rate": 4.6924334801837524e-06, "loss": 0.03677211, "memory(GiB)": 13.7, "step": 57980, "train_speed(iter/s)": 1.53274 }, { "acc": 0.99020834, "epoch": 27.178345441762364, "grad_norm": 1.0390323400497437, "learning_rate": 4.691659845415393e-06, "loss": 0.02456602, "memory(GiB)": 13.7, "step": 57985, "train_speed(iter/s)": 1.532742 }, { "acc": 0.9828125, "epoch": 27.180689008671198, "grad_norm": 4.3379034996032715, "learning_rate": 4.6908862180705376e-06, "loss": 0.07140157, "memory(GiB)": 13.7, "step": 57990, "train_speed(iter/s)": 1.532747 }, { "acc": 0.98249998, "epoch": 27.183032575580032, "grad_norm": 3.0028326511383057, "learning_rate": 4.690112598167781e-06, "loss": 0.05450761, "memory(GiB)": 13.7, "step": 57995, "train_speed(iter/s)": 1.53275 }, { "acc": 0.98874998, "epoch": 27.185376142488867, "grad_norm": 2.472722291946411, "learning_rate": 4.689338985725716e-06, "loss": 0.01950337, "memory(GiB)": 13.7, "step": 58000, "train_speed(iter/s)": 1.532754 }, { "acc": 0.98270292, "epoch": 27.187719709397705, "grad_norm": 3.9815330505371094, "learning_rate": 4.688565380762939e-06, "loss": 0.06835672, "memory(GiB)": 13.7, "step": 58005, "train_speed(iter/s)": 1.532757 }, { "acc": 0.98395834, "epoch": 27.19006327630654, "grad_norm": 5.316542148590088, "learning_rate": 4.687791783298046e-06, "loss": 0.07040442, "memory(GiB)": 13.7, "step": 58010, "train_speed(iter/s)": 1.53276 }, { "acc": 0.98594704, "epoch": 27.192406843215373, "grad_norm": 4.474465370178223, "learning_rate": 4.687018193349631e-06, "loss": 0.04771194, "memory(GiB)": 13.7, "step": 58015, "train_speed(iter/s)": 1.532766 }, { "acc": 0.980723, "epoch": 27.194750410124207, "grad_norm": 5.8569254875183105, "learning_rate": 4.68624461093629e-06, "loss": 0.05164692, "memory(GiB)": 13.7, "step": 58020, "train_speed(iter/s)": 1.532771 }, { "acc": 0.99125004, "epoch": 27.197093977033045, "grad_norm": 1.1943966150283813, "learning_rate": 4.685471036076616e-06, "loss": 0.01713103, "memory(GiB)": 13.7, "step": 58025, "train_speed(iter/s)": 1.532772 }, { "acc": 0.99375, "epoch": 27.19943754394188, "grad_norm": 1.8933535814285278, "learning_rate": 4.684697468789204e-06, "loss": 0.01984737, "memory(GiB)": 13.7, "step": 58030, "train_speed(iter/s)": 1.532774 }, { "acc": 0.99187498, "epoch": 27.201781110850714, "grad_norm": 0.6679850816726685, "learning_rate": 4.683923909092645e-06, "loss": 0.03641166, "memory(GiB)": 13.7, "step": 58035, "train_speed(iter/s)": 1.532776 }, { "acc": 0.98604164, "epoch": 27.204124677759552, "grad_norm": 2.3150041103363037, "learning_rate": 4.683150357005537e-06, "loss": 0.03577879, "memory(GiB)": 13.7, "step": 58040, "train_speed(iter/s)": 1.532779 }, { "acc": 0.96000004, "epoch": 27.206468244668386, "grad_norm": 8.165478706359863, "learning_rate": 4.6823768125464705e-06, "loss": 0.09736776, "memory(GiB)": 13.7, "step": 58045, "train_speed(iter/s)": 1.532778 }, { "acc": 0.965625, "epoch": 27.20881181157722, "grad_norm": 18.742483139038086, "learning_rate": 4.681603275734041e-06, "loss": 0.14231174, "memory(GiB)": 13.7, "step": 58050, "train_speed(iter/s)": 1.53278 }, { "acc": 0.99229164, "epoch": 27.211155378486055, "grad_norm": 1.1487679481506348, "learning_rate": 4.680829746586841e-06, "loss": 0.03598451, "memory(GiB)": 13.7, "step": 58055, "train_speed(iter/s)": 1.532784 }, { "acc": 0.99499454, "epoch": 27.213498945394893, "grad_norm": 2.4231789112091064, "learning_rate": 4.680056225123463e-06, "loss": 0.04684671, "memory(GiB)": 13.7, "step": 58060, "train_speed(iter/s)": 1.532782 }, { "acc": 0.9864583, "epoch": 27.215842512303727, "grad_norm": 0.10943884402513504, "learning_rate": 4.679282711362502e-06, "loss": 0.04473356, "memory(GiB)": 13.7, "step": 58065, "train_speed(iter/s)": 1.532791 }, { "acc": 0.98845921, "epoch": 27.21818607921256, "grad_norm": 4.242766380310059, "learning_rate": 4.67850920532255e-06, "loss": 0.0460996, "memory(GiB)": 13.7, "step": 58070, "train_speed(iter/s)": 1.532793 }, { "acc": 0.98708324, "epoch": 27.220529646121395, "grad_norm": 2.9754109382629395, "learning_rate": 4.6777357070221975e-06, "loss": 0.02094086, "memory(GiB)": 13.7, "step": 58075, "train_speed(iter/s)": 1.532801 }, { "acc": 0.97696428, "epoch": 27.222873213030233, "grad_norm": 7.121384620666504, "learning_rate": 4.676962216480038e-06, "loss": 0.05775141, "memory(GiB)": 13.7, "step": 58080, "train_speed(iter/s)": 1.532801 }, { "acc": 0.97569447, "epoch": 27.225216779939068, "grad_norm": 4.708309173583984, "learning_rate": 4.676188733714664e-06, "loss": 0.09387464, "memory(GiB)": 13.7, "step": 58085, "train_speed(iter/s)": 1.532807 }, { "acc": 0.9822916, "epoch": 27.227560346847902, "grad_norm": 3.5981621742248535, "learning_rate": 4.6754152587446676e-06, "loss": 0.06046711, "memory(GiB)": 13.7, "step": 58090, "train_speed(iter/s)": 1.532812 }, { "acc": 0.991572, "epoch": 27.229903913756736, "grad_norm": 3.579432725906372, "learning_rate": 4.67464179158864e-06, "loss": 0.02301912, "memory(GiB)": 13.7, "step": 58095, "train_speed(iter/s)": 1.532809 }, { "acc": 0.99541664, "epoch": 27.232247480665574, "grad_norm": 1.651110291481018, "learning_rate": 4.673868332265174e-06, "loss": 0.03484203, "memory(GiB)": 13.7, "step": 58100, "train_speed(iter/s)": 1.532823 }, { "acc": 0.9833334, "epoch": 27.23459104757441, "grad_norm": 6.350496292114258, "learning_rate": 4.6730948807928595e-06, "loss": 0.076829, "memory(GiB)": 13.7, "step": 58105, "train_speed(iter/s)": 1.532829 }, { "acc": 0.96826839, "epoch": 27.236934614483243, "grad_norm": 3.4610514640808105, "learning_rate": 4.672321437190289e-06, "loss": 0.08202027, "memory(GiB)": 13.7, "step": 58110, "train_speed(iter/s)": 1.53283 }, { "acc": 0.98327379, "epoch": 27.239278181392077, "grad_norm": 0.4035472869873047, "learning_rate": 4.671548001476054e-06, "loss": 0.04772876, "memory(GiB)": 13.7, "step": 58115, "train_speed(iter/s)": 1.532832 }, { "acc": 0.9822916, "epoch": 27.241621748300915, "grad_norm": 5.053966522216797, "learning_rate": 4.670774573668743e-06, "loss": 0.05360509, "memory(GiB)": 13.7, "step": 58120, "train_speed(iter/s)": 1.532836 }, { "acc": 0.99204865, "epoch": 27.24396531520975, "grad_norm": 0.003833270166069269, "learning_rate": 4.670001153786947e-06, "loss": 0.01813719, "memory(GiB)": 13.7, "step": 58125, "train_speed(iter/s)": 1.532841 }, { "acc": 0.98590279, "epoch": 27.246308882118583, "grad_norm": 4.805279731750488, "learning_rate": 4.6692277418492605e-06, "loss": 0.0338941, "memory(GiB)": 13.7, "step": 58130, "train_speed(iter/s)": 1.532851 }, { "acc": 0.98397732, "epoch": 27.24865244902742, "grad_norm": 4.457964897155762, "learning_rate": 4.668454337874268e-06, "loss": 0.04162399, "memory(GiB)": 13.7, "step": 58135, "train_speed(iter/s)": 1.532842 }, { "acc": 0.97749996, "epoch": 27.250996015936256, "grad_norm": 2.005208969116211, "learning_rate": 4.667680941880563e-06, "loss": 0.07611201, "memory(GiB)": 13.7, "step": 58140, "train_speed(iter/s)": 1.532845 }, { "acc": 0.96708336, "epoch": 27.25333958284509, "grad_norm": 4.903356552124023, "learning_rate": 4.666907553886736e-06, "loss": 0.07388299, "memory(GiB)": 13.7, "step": 58145, "train_speed(iter/s)": 1.532852 }, { "acc": 0.98279762, "epoch": 27.255683149753924, "grad_norm": 2.5851247310638428, "learning_rate": 4.666134173911374e-06, "loss": 0.04685421, "memory(GiB)": 13.7, "step": 58150, "train_speed(iter/s)": 1.532854 }, { "acc": 0.98169641, "epoch": 27.258026716662762, "grad_norm": 9.70866584777832, "learning_rate": 4.66536080197307e-06, "loss": 0.11778091, "memory(GiB)": 13.7, "step": 58155, "train_speed(iter/s)": 1.532859 }, { "acc": 0.98303032, "epoch": 27.260370283571596, "grad_norm": 1.7671672105789185, "learning_rate": 4.66458743809041e-06, "loss": 0.0712222, "memory(GiB)": 13.7, "step": 58160, "train_speed(iter/s)": 1.532865 }, { "acc": 0.98086987, "epoch": 27.26271385048043, "grad_norm": 1.3098353147506714, "learning_rate": 4.663814082281983e-06, "loss": 0.06388327, "memory(GiB)": 13.7, "step": 58165, "train_speed(iter/s)": 1.532869 }, { "acc": 0.97994041, "epoch": 27.265057417389265, "grad_norm": 4.375906944274902, "learning_rate": 4.663040734566381e-06, "loss": 0.06942658, "memory(GiB)": 13.7, "step": 58170, "train_speed(iter/s)": 1.532871 }, { "acc": 0.990625, "epoch": 27.267400984298103, "grad_norm": 6.223606586456299, "learning_rate": 4.662267394962191e-06, "loss": 0.02264756, "memory(GiB)": 13.7, "step": 58175, "train_speed(iter/s)": 1.532872 }, { "acc": 0.99571428, "epoch": 27.269744551206937, "grad_norm": 3.09104061126709, "learning_rate": 4.661494063488001e-06, "loss": 0.02571354, "memory(GiB)": 13.7, "step": 58180, "train_speed(iter/s)": 1.532885 }, { "acc": 0.98154764, "epoch": 27.27208811811577, "grad_norm": 4.140177249908447, "learning_rate": 4.6607207401624e-06, "loss": 0.05228837, "memory(GiB)": 13.7, "step": 58185, "train_speed(iter/s)": 1.532886 }, { "acc": 0.99226189, "epoch": 27.274431685024606, "grad_norm": 1.1553409099578857, "learning_rate": 4.6599474250039764e-06, "loss": 0.04178025, "memory(GiB)": 13.7, "step": 58190, "train_speed(iter/s)": 1.532883 }, { "acc": 0.97833328, "epoch": 27.276775251933444, "grad_norm": 1.7994259595870972, "learning_rate": 4.659174118031318e-06, "loss": 0.04732325, "memory(GiB)": 13.7, "step": 58195, "train_speed(iter/s)": 1.532891 }, { "acc": 0.9817708, "epoch": 27.279118818842278, "grad_norm": 0.3926350772380829, "learning_rate": 4.658400819263014e-06, "loss": 0.07582567, "memory(GiB)": 13.7, "step": 58200, "train_speed(iter/s)": 1.532893 }, { "acc": 0.97479172, "epoch": 27.281462385751112, "grad_norm": 1.583134651184082, "learning_rate": 4.657627528717649e-06, "loss": 0.03888653, "memory(GiB)": 13.7, "step": 58205, "train_speed(iter/s)": 1.532889 }, { "acc": 0.97562504, "epoch": 27.28380595265995, "grad_norm": 0.7702942490577698, "learning_rate": 4.656854246413811e-06, "loss": 0.0489899, "memory(GiB)": 13.7, "step": 58210, "train_speed(iter/s)": 1.532894 }, { "acc": 0.98675594, "epoch": 27.286149519568784, "grad_norm": 3.2510437965393066, "learning_rate": 4.656080972370089e-06, "loss": 0.03261384, "memory(GiB)": 13.7, "step": 58215, "train_speed(iter/s)": 1.5329 }, { "acc": 0.98024559, "epoch": 27.28849308647762, "grad_norm": 2.3081254959106445, "learning_rate": 4.655307706605069e-06, "loss": 0.0870128, "memory(GiB)": 13.7, "step": 58220, "train_speed(iter/s)": 1.532903 }, { "acc": 0.98687496, "epoch": 27.290836653386453, "grad_norm": 0.011607421562075615, "learning_rate": 4.654534449137336e-06, "loss": 0.04320251, "memory(GiB)": 13.7, "step": 58225, "train_speed(iter/s)": 1.532904 }, { "acc": 0.98891945, "epoch": 27.29318022029529, "grad_norm": 4.0711774826049805, "learning_rate": 4.653761199985479e-06, "loss": 0.0388338, "memory(GiB)": 13.7, "step": 58230, "train_speed(iter/s)": 1.532907 }, { "acc": 0.98998508, "epoch": 27.295523787204125, "grad_norm": 2.0068483352661133, "learning_rate": 4.652987959168084e-06, "loss": 0.02320077, "memory(GiB)": 13.7, "step": 58235, "train_speed(iter/s)": 1.532917 }, { "acc": 0.97833328, "epoch": 27.29786735411296, "grad_norm": 0.8103562593460083, "learning_rate": 4.652214726703739e-06, "loss": 0.04487991, "memory(GiB)": 13.7, "step": 58240, "train_speed(iter/s)": 1.532921 }, { "acc": 0.98619041, "epoch": 27.300210921021794, "grad_norm": 6.085431098937988, "learning_rate": 4.651441502611026e-06, "loss": 0.04110172, "memory(GiB)": 13.7, "step": 58245, "train_speed(iter/s)": 1.532929 }, { "acc": 0.99375, "epoch": 27.30255448793063, "grad_norm": 1.0383729934692383, "learning_rate": 4.650668286908532e-06, "loss": 0.02481279, "memory(GiB)": 13.7, "step": 58250, "train_speed(iter/s)": 1.53293 }, { "acc": 0.9802084, "epoch": 27.304898054839466, "grad_norm": 7.710353374481201, "learning_rate": 4.649895079614842e-06, "loss": 0.08464187, "memory(GiB)": 13.7, "step": 58255, "train_speed(iter/s)": 1.532935 }, { "acc": 0.99321423, "epoch": 27.3072416217483, "grad_norm": 1.6376862525939941, "learning_rate": 4.649121880748545e-06, "loss": 0.03389531, "memory(GiB)": 13.7, "step": 58260, "train_speed(iter/s)": 1.532943 }, { "acc": 0.97866077, "epoch": 27.309585188657135, "grad_norm": 5.058478355407715, "learning_rate": 4.648348690328221e-06, "loss": 0.07123571, "memory(GiB)": 13.7, "step": 58265, "train_speed(iter/s)": 1.532944 }, { "acc": 0.98916664, "epoch": 27.311928755565972, "grad_norm": 4.746395587921143, "learning_rate": 4.64757550837246e-06, "loss": 0.03953222, "memory(GiB)": 13.7, "step": 58270, "train_speed(iter/s)": 1.532947 }, { "acc": 0.9875, "epoch": 27.314272322474807, "grad_norm": 3.6669185161590576, "learning_rate": 4.646802334899843e-06, "loss": 0.10468378, "memory(GiB)": 13.7, "step": 58275, "train_speed(iter/s)": 1.532952 }, { "acc": 0.9811554, "epoch": 27.31661588938364, "grad_norm": 2.8997342586517334, "learning_rate": 4.6460291699289574e-06, "loss": 0.05280471, "memory(GiB)": 13.7, "step": 58280, "train_speed(iter/s)": 1.532957 }, { "acc": 0.97659721, "epoch": 27.318959456292475, "grad_norm": 0.010967905633151531, "learning_rate": 4.6452560134783845e-06, "loss": 0.12691805, "memory(GiB)": 13.7, "step": 58285, "train_speed(iter/s)": 1.532959 }, { "acc": 0.97300606, "epoch": 27.321303023201313, "grad_norm": 0.014207328669726849, "learning_rate": 4.644482865566711e-06, "loss": 0.04775747, "memory(GiB)": 13.7, "step": 58290, "train_speed(iter/s)": 1.532966 }, { "acc": 0.97145834, "epoch": 27.323646590110148, "grad_norm": 2.5420360565185547, "learning_rate": 4.643709726212517e-06, "loss": 0.07383698, "memory(GiB)": 13.7, "step": 58295, "train_speed(iter/s)": 1.532965 }, { "acc": 0.97642365, "epoch": 27.325990157018982, "grad_norm": 2.730837821960449, "learning_rate": 4.642936595434392e-06, "loss": 0.09968456, "memory(GiB)": 13.7, "step": 58300, "train_speed(iter/s)": 1.532965 }, { "acc": 0.99437504, "epoch": 27.32833372392782, "grad_norm": 2.098609685897827, "learning_rate": 4.642163473250916e-06, "loss": 0.03680806, "memory(GiB)": 13.7, "step": 58305, "train_speed(iter/s)": 1.532961 }, { "acc": 0.9850893, "epoch": 27.330677290836654, "grad_norm": 5.576367378234863, "learning_rate": 4.641390359680672e-06, "loss": 0.04279977, "memory(GiB)": 13.7, "step": 58310, "train_speed(iter/s)": 1.532968 }, { "acc": 0.97979164, "epoch": 27.33302085774549, "grad_norm": 0.797684371471405, "learning_rate": 4.640617254742244e-06, "loss": 0.07829905, "memory(GiB)": 13.7, "step": 58315, "train_speed(iter/s)": 1.532977 }, { "acc": 0.98633928, "epoch": 27.335364424654323, "grad_norm": 2.9116835594177246, "learning_rate": 4.639844158454217e-06, "loss": 0.07349997, "memory(GiB)": 13.7, "step": 58320, "train_speed(iter/s)": 1.532985 }, { "acc": 0.99375, "epoch": 27.33770799156316, "grad_norm": 2.1244213581085205, "learning_rate": 4.639071070835171e-06, "loss": 0.02422252, "memory(GiB)": 13.7, "step": 58325, "train_speed(iter/s)": 1.532988 }, { "acc": 0.97332802, "epoch": 27.340051558471995, "grad_norm": 5.598813533782959, "learning_rate": 4.638297991903688e-06, "loss": 0.10870142, "memory(GiB)": 13.7, "step": 58330, "train_speed(iter/s)": 1.532994 }, { "acc": 1.0, "epoch": 27.34239512538083, "grad_norm": 1.420918345451355, "learning_rate": 4.6375249216783516e-06, "loss": 0.01804982, "memory(GiB)": 13.7, "step": 58335, "train_speed(iter/s)": 1.532997 }, { "acc": 0.9895834, "epoch": 27.344738692289663, "grad_norm": 1.9087347984313965, "learning_rate": 4.636751860177742e-06, "loss": 0.02016481, "memory(GiB)": 13.7, "step": 58340, "train_speed(iter/s)": 1.532997 }, { "acc": 0.9864584, "epoch": 27.3470822591985, "grad_norm": 2.1767122745513916, "learning_rate": 4.635978807420445e-06, "loss": 0.03841266, "memory(GiB)": 13.7, "step": 58345, "train_speed(iter/s)": 1.532995 }, { "acc": 0.96437492, "epoch": 27.349425826107336, "grad_norm": 8.86945915222168, "learning_rate": 4.635205763425039e-06, "loss": 0.09119146, "memory(GiB)": 13.7, "step": 58350, "train_speed(iter/s)": 1.532999 }, { "acc": 0.9856945, "epoch": 27.35176939301617, "grad_norm": 2.6616437435150146, "learning_rate": 4.634432728210105e-06, "loss": 0.06031606, "memory(GiB)": 13.7, "step": 58355, "train_speed(iter/s)": 1.532995 }, { "acc": 0.98234577, "epoch": 27.354112959925004, "grad_norm": 5.403968334197998, "learning_rate": 4.633659701794228e-06, "loss": 0.10254428, "memory(GiB)": 13.7, "step": 58360, "train_speed(iter/s)": 1.532997 }, { "acc": 0.97041664, "epoch": 27.356456526833842, "grad_norm": 5.628401279449463, "learning_rate": 4.632886684195987e-06, "loss": 0.07387208, "memory(GiB)": 13.7, "step": 58365, "train_speed(iter/s)": 1.532997 }, { "acc": 0.98083334, "epoch": 27.358800093742676, "grad_norm": 4.162131309509277, "learning_rate": 4.63211367543396e-06, "loss": 0.06461719, "memory(GiB)": 13.7, "step": 58370, "train_speed(iter/s)": 1.532999 }, { "acc": 0.97857151, "epoch": 27.36114366065151, "grad_norm": 3.4096179008483887, "learning_rate": 4.631340675526731e-06, "loss": 0.0643017, "memory(GiB)": 13.7, "step": 58375, "train_speed(iter/s)": 1.533002 }, { "acc": 0.98291664, "epoch": 27.36348722756035, "grad_norm": 4.666624546051025, "learning_rate": 4.63056768449288e-06, "loss": 0.03955592, "memory(GiB)": 13.7, "step": 58380, "train_speed(iter/s)": 1.53301 }, { "acc": 0.98808413, "epoch": 27.365830794469183, "grad_norm": 1.1847892999649048, "learning_rate": 4.629794702350986e-06, "loss": 0.04883853, "memory(GiB)": 13.7, "step": 58385, "train_speed(iter/s)": 1.533009 }, { "acc": 0.99406252, "epoch": 27.368174361378017, "grad_norm": 4.42960786819458, "learning_rate": 4.629021729119629e-06, "loss": 0.02397718, "memory(GiB)": 13.7, "step": 58390, "train_speed(iter/s)": 1.533013 }, { "acc": 0.99083328, "epoch": 27.37051792828685, "grad_norm": 3.4187405109405518, "learning_rate": 4.62824876481739e-06, "loss": 0.04366289, "memory(GiB)": 13.7, "step": 58395, "train_speed(iter/s)": 1.533017 }, { "acc": 0.98291664, "epoch": 27.37286149519569, "grad_norm": 1.8866980075836182, "learning_rate": 4.627475809462847e-06, "loss": 0.04285571, "memory(GiB)": 13.7, "step": 58400, "train_speed(iter/s)": 1.533019 }, { "acc": 0.96511364, "epoch": 27.375205062104524, "grad_norm": 2.539811611175537, "learning_rate": 4.626702863074581e-06, "loss": 0.13563869, "memory(GiB)": 13.7, "step": 58405, "train_speed(iter/s)": 1.533024 }, { "acc": 0.97937498, "epoch": 27.377548629013358, "grad_norm": 2.553629159927368, "learning_rate": 4.62592992567117e-06, "loss": 0.06538105, "memory(GiB)": 13.7, "step": 58410, "train_speed(iter/s)": 1.533025 }, { "acc": 0.97516031, "epoch": 27.379892195922192, "grad_norm": 2.5085160732269287, "learning_rate": 4.625156997271193e-06, "loss": 0.09647162, "memory(GiB)": 13.7, "step": 58415, "train_speed(iter/s)": 1.533035 }, { "acc": 0.98187504, "epoch": 27.38223576283103, "grad_norm": 6.769698143005371, "learning_rate": 4.624384077893228e-06, "loss": 0.04889119, "memory(GiB)": 13.7, "step": 58420, "train_speed(iter/s)": 1.533041 }, { "acc": 0.98576384, "epoch": 27.384579329739864, "grad_norm": 1.0455032587051392, "learning_rate": 4.623611167555855e-06, "loss": 0.03566549, "memory(GiB)": 13.7, "step": 58425, "train_speed(iter/s)": 1.53304 }, { "acc": 0.98023815, "epoch": 27.3869228966487, "grad_norm": 0.05285279080271721, "learning_rate": 4.6228382662776495e-06, "loss": 0.05474601, "memory(GiB)": 13.7, "step": 58430, "train_speed(iter/s)": 1.533045 }, { "acc": 0.99069939, "epoch": 27.389266463557533, "grad_norm": 2.0672237873077393, "learning_rate": 4.622065374077194e-06, "loss": 0.04800569, "memory(GiB)": 13.7, "step": 58435, "train_speed(iter/s)": 1.533047 }, { "acc": 0.9770834, "epoch": 27.39161003046637, "grad_norm": 0.000919781974516809, "learning_rate": 4.62129249097306e-06, "loss": 0.06097865, "memory(GiB)": 13.7, "step": 58440, "train_speed(iter/s)": 1.533052 }, { "acc": 0.99949999, "epoch": 27.393953597375205, "grad_norm": 1.870806336402893, "learning_rate": 4.620519616983832e-06, "loss": 0.0296896, "memory(GiB)": 13.7, "step": 58445, "train_speed(iter/s)": 1.533061 }, { "acc": 0.99323864, "epoch": 27.39629716428404, "grad_norm": 2.152770757675171, "learning_rate": 4.619746752128085e-06, "loss": 0.04368573, "memory(GiB)": 13.7, "step": 58450, "train_speed(iter/s)": 1.533059 }, { "acc": 0.9764904, "epoch": 27.398640731192877, "grad_norm": 4.078812599182129, "learning_rate": 4.618973896424393e-06, "loss": 0.07074464, "memory(GiB)": 13.7, "step": 58455, "train_speed(iter/s)": 1.533066 }, { "acc": 0.99618053, "epoch": 27.40098429810171, "grad_norm": 0.05918949097394943, "learning_rate": 4.618201049891335e-06, "loss": 0.02627233, "memory(GiB)": 13.7, "step": 58460, "train_speed(iter/s)": 1.533065 }, { "acc": 0.99125004, "epoch": 27.403327865010546, "grad_norm": 0.03632724657654762, "learning_rate": 4.6174282125474885e-06, "loss": 0.06712534, "memory(GiB)": 13.7, "step": 58465, "train_speed(iter/s)": 1.533068 }, { "acc": 0.99050598, "epoch": 27.40567143191938, "grad_norm": 0.0808875635266304, "learning_rate": 4.6166553844114295e-06, "loss": 0.02948163, "memory(GiB)": 13.7, "step": 58470, "train_speed(iter/s)": 1.533075 }, { "acc": 0.98180799, "epoch": 27.408014998828218, "grad_norm": 2.76243257522583, "learning_rate": 4.615882565501733e-06, "loss": 0.05761812, "memory(GiB)": 13.7, "step": 58475, "train_speed(iter/s)": 1.533075 }, { "acc": 0.98347473, "epoch": 27.410358565737052, "grad_norm": 3.1101434230804443, "learning_rate": 4.615109755836977e-06, "loss": 0.07196266, "memory(GiB)": 13.7, "step": 58480, "train_speed(iter/s)": 1.533077 }, { "acc": 0.98291664, "epoch": 27.412702132645887, "grad_norm": 7.215599060058594, "learning_rate": 4.6143369554357344e-06, "loss": 0.03812808, "memory(GiB)": 13.7, "step": 58485, "train_speed(iter/s)": 1.533076 }, { "acc": 0.97875004, "epoch": 27.41504569955472, "grad_norm": 3.6993629932403564, "learning_rate": 4.613564164316586e-06, "loss": 0.05271863, "memory(GiB)": 13.7, "step": 58490, "train_speed(iter/s)": 1.533082 }, { "acc": 1.0, "epoch": 27.41738926646356, "grad_norm": 2.8183393478393555, "learning_rate": 4.612791382498103e-06, "loss": 0.02118238, "memory(GiB)": 13.7, "step": 58495, "train_speed(iter/s)": 1.533079 }, { "acc": 0.97416668, "epoch": 27.419732833372393, "grad_norm": 5.481387615203857, "learning_rate": 4.61201860999886e-06, "loss": 0.09699598, "memory(GiB)": 13.7, "step": 58500, "train_speed(iter/s)": 1.53308 }, { "acc": 0.98500004, "epoch": 27.422076400281227, "grad_norm": 0.7861188054084778, "learning_rate": 4.611245846837434e-06, "loss": 0.03160114, "memory(GiB)": 13.7, "step": 58505, "train_speed(iter/s)": 1.533076 }, { "acc": 0.98403034, "epoch": 27.42441996719006, "grad_norm": 9.475790023803711, "learning_rate": 4.6104730930324e-06, "loss": 0.06932631, "memory(GiB)": 13.7, "step": 58510, "train_speed(iter/s)": 1.533086 }, { "acc": 0.9833334, "epoch": 27.4267635340989, "grad_norm": 10.51118278503418, "learning_rate": 4.6097003486023295e-06, "loss": 0.05590214, "memory(GiB)": 13.7, "step": 58515, "train_speed(iter/s)": 1.533089 }, { "acc": 0.99060135, "epoch": 27.429107101007734, "grad_norm": 0.0040580290369689465, "learning_rate": 4.6089276135658e-06, "loss": 0.04286305, "memory(GiB)": 13.7, "step": 58520, "train_speed(iter/s)": 1.53309 }, { "acc": 0.98309526, "epoch": 27.431450667916568, "grad_norm": 4.610063076019287, "learning_rate": 4.6081548879413845e-06, "loss": 0.06546148, "memory(GiB)": 13.7, "step": 58525, "train_speed(iter/s)": 1.533086 }, { "acc": 0.9869791, "epoch": 27.433794234825406, "grad_norm": 4.491144180297852, "learning_rate": 4.607382171747656e-06, "loss": 0.05629873, "memory(GiB)": 13.7, "step": 58530, "train_speed(iter/s)": 1.53309 }, { "acc": 0.96986265, "epoch": 27.43613780173424, "grad_norm": 2.696423292160034, "learning_rate": 4.60660946500319e-06, "loss": 0.08610605, "memory(GiB)": 13.7, "step": 58535, "train_speed(iter/s)": 1.533088 }, { "acc": 0.99208336, "epoch": 27.438481368643075, "grad_norm": 2.1415791511535645, "learning_rate": 4.605836767726557e-06, "loss": 0.04110177, "memory(GiB)": 13.7, "step": 58540, "train_speed(iter/s)": 1.533081 }, { "acc": 0.9788826, "epoch": 27.44082493555191, "grad_norm": 3.1746959686279297, "learning_rate": 4.605064079936331e-06, "loss": 0.07472798, "memory(GiB)": 13.7, "step": 58545, "train_speed(iter/s)": 1.533079 }, { "acc": 0.9895834, "epoch": 27.443168502460747, "grad_norm": 0.05951916053891182, "learning_rate": 4.604291401651087e-06, "loss": 0.04479033, "memory(GiB)": 13.7, "step": 58550, "train_speed(iter/s)": 1.533076 }, { "acc": 0.97959318, "epoch": 27.44551206936958, "grad_norm": 1.191590666770935, "learning_rate": 4.603518732889395e-06, "loss": 0.09607238, "memory(GiB)": 13.7, "step": 58555, "train_speed(iter/s)": 1.533077 }, { "acc": 0.99216728, "epoch": 27.447855636278415, "grad_norm": 2.57847261428833, "learning_rate": 4.602746073669828e-06, "loss": 0.02448266, "memory(GiB)": 13.7, "step": 58560, "train_speed(iter/s)": 1.53308 }, { "acc": 0.98356152, "epoch": 27.45019920318725, "grad_norm": 4.905375957489014, "learning_rate": 4.601973424010959e-06, "loss": 0.08355286, "memory(GiB)": 13.7, "step": 58565, "train_speed(iter/s)": 1.533082 }, { "acc": 0.9794445, "epoch": 27.452542770096088, "grad_norm": 3.262449026107788, "learning_rate": 4.60120078393136e-06, "loss": 0.06048841, "memory(GiB)": 13.7, "step": 58570, "train_speed(iter/s)": 1.533085 }, { "acc": 0.96928024, "epoch": 27.454886337004922, "grad_norm": 6.278398513793945, "learning_rate": 4.600428153449604e-06, "loss": 0.09903086, "memory(GiB)": 13.7, "step": 58575, "train_speed(iter/s)": 1.533088 }, { "acc": 0.97624998, "epoch": 27.457229903913756, "grad_norm": 3.031270742416382, "learning_rate": 4.599655532584259e-06, "loss": 0.04823485, "memory(GiB)": 13.7, "step": 58580, "train_speed(iter/s)": 1.533091 }, { "acc": 0.9802083, "epoch": 27.45957347082259, "grad_norm": 3.834664821624756, "learning_rate": 4.598882921353899e-06, "loss": 0.12875385, "memory(GiB)": 13.7, "step": 58585, "train_speed(iter/s)": 1.533089 }, { "acc": 0.98381405, "epoch": 27.46191703773143, "grad_norm": 0.9582579731941223, "learning_rate": 4.5981103197770925e-06, "loss": 0.07173999, "memory(GiB)": 13.7, "step": 58590, "train_speed(iter/s)": 1.53309 }, { "acc": 0.9895833, "epoch": 27.464260604640263, "grad_norm": 3.583065986633301, "learning_rate": 4.597337727872413e-06, "loss": 0.02350981, "memory(GiB)": 13.7, "step": 58595, "train_speed(iter/s)": 1.533096 }, { "acc": 0.99375, "epoch": 27.466604171549097, "grad_norm": 4.487730503082275, "learning_rate": 4.59656514565843e-06, "loss": 0.03449814, "memory(GiB)": 13.7, "step": 58600, "train_speed(iter/s)": 1.533095 }, { "acc": 0.98673706, "epoch": 27.46894773845793, "grad_norm": 1.5911927223205566, "learning_rate": 4.595792573153714e-06, "loss": 0.06629646, "memory(GiB)": 13.7, "step": 58605, "train_speed(iter/s)": 1.533091 }, { "acc": 0.99020834, "epoch": 27.47129130536677, "grad_norm": 1.5039421319961548, "learning_rate": 4.595020010376834e-06, "loss": 0.02309044, "memory(GiB)": 13.7, "step": 58610, "train_speed(iter/s)": 1.533086 }, { "acc": 0.9874054, "epoch": 27.473634872275603, "grad_norm": 7.327849864959717, "learning_rate": 4.594247457346363e-06, "loss": 0.04080803, "memory(GiB)": 13.7, "step": 58615, "train_speed(iter/s)": 1.533095 }, { "acc": 0.99129467, "epoch": 27.475978439184438, "grad_norm": 0.6411941647529602, "learning_rate": 4.593474914080867e-06, "loss": 0.02664742, "memory(GiB)": 13.7, "step": 58620, "train_speed(iter/s)": 1.533092 }, { "acc": 0.97354164, "epoch": 27.478322006093276, "grad_norm": 3.169003486633301, "learning_rate": 4.592702380598917e-06, "loss": 0.10850446, "memory(GiB)": 13.7, "step": 58625, "train_speed(iter/s)": 1.533097 }, { "acc": 0.99437504, "epoch": 27.48066557300211, "grad_norm": 3.684323310852051, "learning_rate": 4.591929856919082e-06, "loss": 0.01501015, "memory(GiB)": 13.7, "step": 58630, "train_speed(iter/s)": 1.533105 }, { "acc": 0.996875, "epoch": 27.483009139910944, "grad_norm": 2.622173547744751, "learning_rate": 4.591157343059931e-06, "loss": 0.05299774, "memory(GiB)": 13.7, "step": 58635, "train_speed(iter/s)": 1.533104 }, { "acc": 0.9833334, "epoch": 27.48535270681978, "grad_norm": 4.128764629364014, "learning_rate": 4.590384839040033e-06, "loss": 0.12164255, "memory(GiB)": 13.7, "step": 58640, "train_speed(iter/s)": 1.533111 }, { "acc": 0.990625, "epoch": 27.487696273728616, "grad_norm": 0.6839669346809387, "learning_rate": 4.5896123448779555e-06, "loss": 0.018595, "memory(GiB)": 13.7, "step": 58645, "train_speed(iter/s)": 1.533112 }, { "acc": 0.97550592, "epoch": 27.49003984063745, "grad_norm": 6.300889492034912, "learning_rate": 4.588839860592268e-06, "loss": 0.09220549, "memory(GiB)": 13.7, "step": 58650, "train_speed(iter/s)": 1.533119 }, { "acc": 0.99040184, "epoch": 27.492383407546285, "grad_norm": 0.05115024372935295, "learning_rate": 4.5880673862015365e-06, "loss": 0.03053376, "memory(GiB)": 13.7, "step": 58655, "train_speed(iter/s)": 1.533126 }, { "acc": 0.97333336, "epoch": 27.49472697445512, "grad_norm": 5.368299961090088, "learning_rate": 4.587294921724333e-06, "loss": 0.10106291, "memory(GiB)": 13.7, "step": 58660, "train_speed(iter/s)": 1.533138 }, { "acc": 0.97984772, "epoch": 27.497070541363957, "grad_norm": 5.895561218261719, "learning_rate": 4.58652246717922e-06, "loss": 0.08288602, "memory(GiB)": 13.7, "step": 58665, "train_speed(iter/s)": 1.533143 }, { "acc": 0.98154755, "epoch": 27.49941410827279, "grad_norm": 1.0053967237472534, "learning_rate": 4.5857500225847665e-06, "loss": 0.05430664, "memory(GiB)": 13.7, "step": 58670, "train_speed(iter/s)": 1.533145 }, { "acc": 0.99020834, "epoch": 27.501757675181626, "grad_norm": 5.998598098754883, "learning_rate": 4.584977587959541e-06, "loss": 0.05388628, "memory(GiB)": 13.7, "step": 58675, "train_speed(iter/s)": 1.533151 }, { "acc": 0.98624992, "epoch": 27.50410124209046, "grad_norm": 3.659071445465088, "learning_rate": 4.584205163322108e-06, "loss": 0.03474665, "memory(GiB)": 13.7, "step": 58680, "train_speed(iter/s)": 1.533156 }, { "acc": 0.9916667, "epoch": 27.506444808999298, "grad_norm": 4.420560359954834, "learning_rate": 4.583432748691036e-06, "loss": 0.05531401, "memory(GiB)": 13.7, "step": 58685, "train_speed(iter/s)": 1.533164 }, { "acc": 0.98633928, "epoch": 27.508788375908132, "grad_norm": 0.08565673977136612, "learning_rate": 4.58266034408489e-06, "loss": 0.03269032, "memory(GiB)": 13.7, "step": 58690, "train_speed(iter/s)": 1.53317 }, { "acc": 0.98151789, "epoch": 27.511131942816967, "grad_norm": 1.2016347646713257, "learning_rate": 4.581887949522238e-06, "loss": 0.06350839, "memory(GiB)": 13.7, "step": 58695, "train_speed(iter/s)": 1.533175 }, { "acc": 0.99404764, "epoch": 27.513475509725804, "grad_norm": 0.11050690710544586, "learning_rate": 4.581115565021645e-06, "loss": 0.01752647, "memory(GiB)": 13.7, "step": 58700, "train_speed(iter/s)": 1.533177 }, { "acc": 0.97776518, "epoch": 27.51581907663464, "grad_norm": 3.760230302810669, "learning_rate": 4.580343190601674e-06, "loss": 0.09774102, "memory(GiB)": 13.7, "step": 58705, "train_speed(iter/s)": 1.533184 }, { "acc": 0.96583338, "epoch": 27.518162643543473, "grad_norm": 8.813370704650879, "learning_rate": 4.579570826280892e-06, "loss": 0.0869776, "memory(GiB)": 13.7, "step": 58710, "train_speed(iter/s)": 1.533183 }, { "acc": 0.98258934, "epoch": 27.520506210452307, "grad_norm": 0.336623877286911, "learning_rate": 4.5787984720778665e-06, "loss": 0.09144417, "memory(GiB)": 13.7, "step": 58715, "train_speed(iter/s)": 1.53319 }, { "acc": 0.9833333, "epoch": 27.522849777361145, "grad_norm": 6.513248920440674, "learning_rate": 4.57802612801116e-06, "loss": 0.05285952, "memory(GiB)": 13.7, "step": 58720, "train_speed(iter/s)": 1.533198 }, { "acc": 0.9869463, "epoch": 27.52519334426998, "grad_norm": 2.5615463256835938, "learning_rate": 4.577253794099336e-06, "loss": 0.04708216, "memory(GiB)": 13.7, "step": 58725, "train_speed(iter/s)": 1.533205 }, { "acc": 0.98986111, "epoch": 27.527536911178814, "grad_norm": 0.7665647268295288, "learning_rate": 4.576481470360961e-06, "loss": 0.03165369, "memory(GiB)": 13.7, "step": 58730, "train_speed(iter/s)": 1.533211 }, { "acc": 0.97520838, "epoch": 27.529880478087648, "grad_norm": 2.094672918319702, "learning_rate": 4.575709156814597e-06, "loss": 0.04785706, "memory(GiB)": 13.7, "step": 58735, "train_speed(iter/s)": 1.533218 }, { "acc": 0.97210941, "epoch": 27.532224044996486, "grad_norm": 3.9428632259368896, "learning_rate": 4.574936853478811e-06, "loss": 0.06596597, "memory(GiB)": 13.7, "step": 58740, "train_speed(iter/s)": 1.53322 }, { "acc": 0.97541676, "epoch": 27.53456761190532, "grad_norm": 4.43266487121582, "learning_rate": 4.574164560372166e-06, "loss": 0.08459609, "memory(GiB)": 13.7, "step": 58745, "train_speed(iter/s)": 1.533225 }, { "acc": 0.97786713, "epoch": 27.536911178814155, "grad_norm": 2.798222541809082, "learning_rate": 4.573392277513224e-06, "loss": 0.06037552, "memory(GiB)": 13.7, "step": 58750, "train_speed(iter/s)": 1.533234 }, { "acc": 0.97488098, "epoch": 27.53925474572299, "grad_norm": 7.542962074279785, "learning_rate": 4.572620004920547e-06, "loss": 0.07118089, "memory(GiB)": 13.7, "step": 58755, "train_speed(iter/s)": 1.533234 }, { "acc": 0.99437504, "epoch": 27.541598312631827, "grad_norm": 5.229119300842285, "learning_rate": 4.571847742612699e-06, "loss": 0.03595126, "memory(GiB)": 13.7, "step": 58760, "train_speed(iter/s)": 1.53324 }, { "acc": 0.98188448, "epoch": 27.54394187954066, "grad_norm": 2.348154306411743, "learning_rate": 4.571075490608244e-06, "loss": 0.06139793, "memory(GiB)": 13.7, "step": 58765, "train_speed(iter/s)": 1.533248 }, { "acc": 0.98668652, "epoch": 27.546285446449495, "grad_norm": 6.280740261077881, "learning_rate": 4.5703032489257435e-06, "loss": 0.04210663, "memory(GiB)": 13.7, "step": 58770, "train_speed(iter/s)": 1.533246 }, { "acc": 0.9882143, "epoch": 27.54862901335833, "grad_norm": 1.9560424089431763, "learning_rate": 4.56953101758376e-06, "loss": 0.03609798, "memory(GiB)": 13.7, "step": 58775, "train_speed(iter/s)": 1.533253 }, { "acc": 0.98666668, "epoch": 27.550972580267167, "grad_norm": 4.070876598358154, "learning_rate": 4.568758796600854e-06, "loss": 0.07836231, "memory(GiB)": 13.7, "step": 58780, "train_speed(iter/s)": 1.533253 }, { "acc": 0.99375, "epoch": 27.553316147176, "grad_norm": 2.9196133613586426, "learning_rate": 4.56798658599559e-06, "loss": 0.03118501, "memory(GiB)": 13.7, "step": 58785, "train_speed(iter/s)": 1.533258 }, { "acc": 0.990625, "epoch": 27.555659714084836, "grad_norm": 1.810974359512329, "learning_rate": 4.567214385786526e-06, "loss": 0.04204056, "memory(GiB)": 13.7, "step": 58790, "train_speed(iter/s)": 1.533261 }, { "acc": 0.97145834, "epoch": 27.558003280993674, "grad_norm": 5.090307235717773, "learning_rate": 4.566442195992225e-06, "loss": 0.10332818, "memory(GiB)": 13.7, "step": 58795, "train_speed(iter/s)": 1.533266 }, { "acc": 0.99191284, "epoch": 27.560346847902508, "grad_norm": 2.862649440765381, "learning_rate": 4.565670016631248e-06, "loss": 0.03023344, "memory(GiB)": 13.7, "step": 58800, "train_speed(iter/s)": 1.533268 }, { "acc": 0.97765875, "epoch": 27.562690414811343, "grad_norm": 1.3619333505630493, "learning_rate": 4.564897847722155e-06, "loss": 0.06877456, "memory(GiB)": 13.7, "step": 58805, "train_speed(iter/s)": 1.533271 }, { "acc": 0.9901042, "epoch": 27.565033981720177, "grad_norm": 0.6705213189125061, "learning_rate": 4.564125689283505e-06, "loss": 0.03007944, "memory(GiB)": 13.7, "step": 58810, "train_speed(iter/s)": 1.533276 }, { "acc": 0.9927083, "epoch": 27.567377548629015, "grad_norm": 3.769118309020996, "learning_rate": 4.563353541333862e-06, "loss": 0.05179735, "memory(GiB)": 13.7, "step": 58815, "train_speed(iter/s)": 1.533272 }, { "acc": 0.98099213, "epoch": 27.56972111553785, "grad_norm": 3.4310548305511475, "learning_rate": 4.562581403891784e-06, "loss": 0.06153566, "memory(GiB)": 13.7, "step": 58820, "train_speed(iter/s)": 1.533275 }, { "acc": 0.97895832, "epoch": 27.572064682446683, "grad_norm": 3.5045204162597656, "learning_rate": 4.5618092769758305e-06, "loss": 0.06936539, "memory(GiB)": 13.7, "step": 58825, "train_speed(iter/s)": 1.533281 }, { "acc": 0.98207798, "epoch": 27.574408249355518, "grad_norm": 4.7311692237854, "learning_rate": 4.561037160604559e-06, "loss": 0.09945865, "memory(GiB)": 13.7, "step": 58830, "train_speed(iter/s)": 1.533286 }, { "acc": 0.98592262, "epoch": 27.576751816264355, "grad_norm": 0.9264413714408875, "learning_rate": 4.560265054796532e-06, "loss": 0.03612168, "memory(GiB)": 13.7, "step": 58835, "train_speed(iter/s)": 1.533277 }, { "acc": 0.97494049, "epoch": 27.57909538317319, "grad_norm": 3.1494851112365723, "learning_rate": 4.5594929595703044e-06, "loss": 0.058934, "memory(GiB)": 13.7, "step": 58840, "train_speed(iter/s)": 1.533281 }, { "acc": 0.98402786, "epoch": 27.581438950082024, "grad_norm": 3.298124313354492, "learning_rate": 4.5587208749444385e-06, "loss": 0.04630315, "memory(GiB)": 13.7, "step": 58845, "train_speed(iter/s)": 1.533281 }, { "acc": 0.98604164, "epoch": 27.58378251699086, "grad_norm": 3.3809022903442383, "learning_rate": 4.557948800937493e-06, "loss": 0.02730333, "memory(GiB)": 13.7, "step": 58850, "train_speed(iter/s)": 1.53328 }, { "acc": 0.99187498, "epoch": 27.586126083899696, "grad_norm": 0.0018849180778488517, "learning_rate": 4.557176737568022e-06, "loss": 0.02035255, "memory(GiB)": 13.7, "step": 58855, "train_speed(iter/s)": 1.533284 }, { "acc": 0.9757143, "epoch": 27.58846965080853, "grad_norm": 5.20713996887207, "learning_rate": 4.5564046848545865e-06, "loss": 0.0847769, "memory(GiB)": 13.7, "step": 58860, "train_speed(iter/s)": 1.533285 }, { "acc": 0.98458328, "epoch": 27.590813217717365, "grad_norm": 4.027787208557129, "learning_rate": 4.555632642815744e-06, "loss": 0.02695426, "memory(GiB)": 13.7, "step": 58865, "train_speed(iter/s)": 1.533287 }, { "acc": 0.9829546, "epoch": 27.593156784626203, "grad_norm": 0.07597050815820694, "learning_rate": 4.554860611470051e-06, "loss": 0.07223778, "memory(GiB)": 13.7, "step": 58870, "train_speed(iter/s)": 1.533295 }, { "acc": 0.9888195, "epoch": 27.595500351535037, "grad_norm": 0.4642236530780792, "learning_rate": 4.554088590836067e-06, "loss": 0.02719443, "memory(GiB)": 13.7, "step": 58875, "train_speed(iter/s)": 1.533295 }, { "acc": 0.98911705, "epoch": 27.59784391844387, "grad_norm": 2.1498911380767822, "learning_rate": 4.553316580932344e-06, "loss": 0.03143805, "memory(GiB)": 13.7, "step": 58880, "train_speed(iter/s)": 1.533301 }, { "acc": 0.97341347, "epoch": 27.600187485352706, "grad_norm": 4.718146800994873, "learning_rate": 4.5525445817774414e-06, "loss": 0.04415435, "memory(GiB)": 13.7, "step": 58885, "train_speed(iter/s)": 1.533303 }, { "acc": 0.97837801, "epoch": 27.602531052261543, "grad_norm": 7.086987018585205, "learning_rate": 4.5517725933899166e-06, "loss": 0.05820331, "memory(GiB)": 13.7, "step": 58890, "train_speed(iter/s)": 1.53331 }, { "acc": 0.996875, "epoch": 27.604874619170378, "grad_norm": 4.039907932281494, "learning_rate": 4.551000615788324e-06, "loss": 0.0128977, "memory(GiB)": 13.7, "step": 58895, "train_speed(iter/s)": 1.533312 }, { "acc": 0.98850269, "epoch": 27.607218186079212, "grad_norm": 3.2443950176239014, "learning_rate": 4.550228648991219e-06, "loss": 0.09381425, "memory(GiB)": 13.7, "step": 58900, "train_speed(iter/s)": 1.533311 }, { "acc": 0.9859375, "epoch": 27.609561752988046, "grad_norm": 1.0158993005752563, "learning_rate": 4.549456693017159e-06, "loss": 0.03926319, "memory(GiB)": 13.7, "step": 58905, "train_speed(iter/s)": 1.533314 }, { "acc": 0.97701387, "epoch": 27.611905319896884, "grad_norm": 1.9322566986083984, "learning_rate": 4.5486847478847e-06, "loss": 0.06814107, "memory(GiB)": 13.7, "step": 58910, "train_speed(iter/s)": 1.533315 }, { "acc": 0.98199406, "epoch": 27.61424888680572, "grad_norm": 2.0495665073394775, "learning_rate": 4.547912813612392e-06, "loss": 0.03842909, "memory(GiB)": 13.7, "step": 58915, "train_speed(iter/s)": 1.533323 }, { "acc": 0.9894886, "epoch": 27.616592453714553, "grad_norm": 3.3098649978637695, "learning_rate": 4.5471408902187945e-06, "loss": 0.04194155, "memory(GiB)": 13.7, "step": 58920, "train_speed(iter/s)": 1.533323 }, { "acc": 0.97950897, "epoch": 27.618936020623387, "grad_norm": 3.2403135299682617, "learning_rate": 4.546368977722461e-06, "loss": 0.05190022, "memory(GiB)": 13.7, "step": 58925, "train_speed(iter/s)": 1.533329 }, { "acc": 0.98708334, "epoch": 27.621279587532225, "grad_norm": 2.3394412994384766, "learning_rate": 4.545597076141943e-06, "loss": 0.04608898, "memory(GiB)": 13.7, "step": 58930, "train_speed(iter/s)": 1.533329 }, { "acc": 0.98374996, "epoch": 27.62362315444106, "grad_norm": 6.193129539489746, "learning_rate": 4.5448251854958e-06, "loss": 0.05446448, "memory(GiB)": 13.7, "step": 58935, "train_speed(iter/s)": 1.533331 }, { "acc": 0.9921875, "epoch": 27.625966721349894, "grad_norm": 3.165161609649658, "learning_rate": 4.54405330580258e-06, "loss": 0.03100535, "memory(GiB)": 13.7, "step": 58940, "train_speed(iter/s)": 1.533332 }, { "acc": 0.9869791, "epoch": 27.62831028825873, "grad_norm": 4.62983512878418, "learning_rate": 4.5432814370808405e-06, "loss": 0.05851548, "memory(GiB)": 13.7, "step": 58945, "train_speed(iter/s)": 1.53334 }, { "acc": 0.98898811, "epoch": 27.630653855167566, "grad_norm": 1.2342976331710815, "learning_rate": 4.5425095793491345e-06, "loss": 0.03833844, "memory(GiB)": 13.7, "step": 58950, "train_speed(iter/s)": 1.533345 }, { "acc": 0.97455349, "epoch": 27.6329974220764, "grad_norm": 0.9108947515487671, "learning_rate": 4.5417377326260126e-06, "loss": 0.06057968, "memory(GiB)": 13.7, "step": 58955, "train_speed(iter/s)": 1.533339 }, { "acc": 0.98059216, "epoch": 27.635340988985234, "grad_norm": 5.686983108520508, "learning_rate": 4.540965896930027e-06, "loss": 0.06502922, "memory(GiB)": 13.7, "step": 58960, "train_speed(iter/s)": 1.533344 }, { "acc": 0.98083334, "epoch": 27.637684555894072, "grad_norm": 4.712086200714111, "learning_rate": 4.5401940722797325e-06, "loss": 0.04722837, "memory(GiB)": 13.7, "step": 58965, "train_speed(iter/s)": 1.533345 }, { "acc": 0.97979164, "epoch": 27.640028122802907, "grad_norm": 2.772179126739502, "learning_rate": 4.5394222586936795e-06, "loss": 0.06043468, "memory(GiB)": 13.7, "step": 58970, "train_speed(iter/s)": 1.533352 }, { "acc": 0.99196434, "epoch": 27.64237168971174, "grad_norm": 2.7939751148223877, "learning_rate": 4.538650456190422e-06, "loss": 0.03170438, "memory(GiB)": 13.7, "step": 58975, "train_speed(iter/s)": 1.533358 }, { "acc": 0.98208332, "epoch": 27.644715256620575, "grad_norm": 4.438361644744873, "learning_rate": 4.53787866478851e-06, "loss": 0.04581762, "memory(GiB)": 13.7, "step": 58980, "train_speed(iter/s)": 1.533353 }, { "acc": 0.99875002, "epoch": 27.647058823529413, "grad_norm": 0.02138855680823326, "learning_rate": 4.537106884506494e-06, "loss": 0.00834778, "memory(GiB)": 13.7, "step": 58985, "train_speed(iter/s)": 1.533358 }, { "acc": 0.98770828, "epoch": 27.649402390438247, "grad_norm": 2.781322956085205, "learning_rate": 4.536335115362926e-06, "loss": 0.03534309, "memory(GiB)": 13.7, "step": 58990, "train_speed(iter/s)": 1.533362 }, { "acc": 0.97904758, "epoch": 27.65174595734708, "grad_norm": 0.35427147150039673, "learning_rate": 4.535563357376359e-06, "loss": 0.06077524, "memory(GiB)": 13.7, "step": 58995, "train_speed(iter/s)": 1.533368 }, { "acc": 0.98758011, "epoch": 27.654089524255916, "grad_norm": 5.0550384521484375, "learning_rate": 4.53479161056534e-06, "loss": 0.04260367, "memory(GiB)": 13.7, "step": 59000, "train_speed(iter/s)": 1.533371 }, { "acc": 0.97312508, "epoch": 27.656433091164754, "grad_norm": 3.677239418029785, "learning_rate": 4.534019874948421e-06, "loss": 0.08269974, "memory(GiB)": 13.7, "step": 59005, "train_speed(iter/s)": 1.533376 }, { "acc": 0.9831399, "epoch": 27.658776658073588, "grad_norm": 1.4035823345184326, "learning_rate": 4.533248150544152e-06, "loss": 0.07842765, "memory(GiB)": 13.7, "step": 59010, "train_speed(iter/s)": 1.533378 }, { "acc": 0.99308605, "epoch": 27.661120224982422, "grad_norm": 2.193145751953125, "learning_rate": 4.53247643737108e-06, "loss": 0.06045569, "memory(GiB)": 13.7, "step": 59015, "train_speed(iter/s)": 1.533382 }, { "acc": 0.98976631, "epoch": 27.66346379189126, "grad_norm": 0.00966494157910347, "learning_rate": 4.53170473544776e-06, "loss": 0.04764689, "memory(GiB)": 13.7, "step": 59020, "train_speed(iter/s)": 1.533385 }, { "acc": 0.98187504, "epoch": 27.665807358800095, "grad_norm": 6.16832971572876, "learning_rate": 4.530933044792737e-06, "loss": 0.08054055, "memory(GiB)": 13.7, "step": 59025, "train_speed(iter/s)": 1.533386 }, { "acc": 0.98395834, "epoch": 27.66815092570893, "grad_norm": 0.6947839856147766, "learning_rate": 4.53016136542456e-06, "loss": 0.07849985, "memory(GiB)": 13.7, "step": 59030, "train_speed(iter/s)": 1.533389 }, { "acc": 0.98395834, "epoch": 27.670494492617763, "grad_norm": 4.544286727905273, "learning_rate": 4.529389697361781e-06, "loss": 0.07652012, "memory(GiB)": 13.7, "step": 59035, "train_speed(iter/s)": 1.533385 }, { "acc": 0.98206844, "epoch": 27.6728380595266, "grad_norm": 1.5053976774215698, "learning_rate": 4.528618040622945e-06, "loss": 0.06323323, "memory(GiB)": 13.7, "step": 59040, "train_speed(iter/s)": 1.533391 }, { "acc": 0.99072914, "epoch": 27.675181626435435, "grad_norm": 0.6954430341720581, "learning_rate": 4.527846395226599e-06, "loss": 0.02443507, "memory(GiB)": 13.7, "step": 59045, "train_speed(iter/s)": 1.533398 }, { "acc": 0.97645836, "epoch": 27.67752519334427, "grad_norm": 3.6346776485443115, "learning_rate": 4.527074761191295e-06, "loss": 0.10409883, "memory(GiB)": 13.7, "step": 59050, "train_speed(iter/s)": 1.533401 }, { "acc": 0.98374462, "epoch": 27.679868760253104, "grad_norm": 1.4778153896331787, "learning_rate": 4.5263031385355784e-06, "loss": 0.03682505, "memory(GiB)": 13.7, "step": 59055, "train_speed(iter/s)": 1.533401 }, { "acc": 0.98514881, "epoch": 27.682212327161942, "grad_norm": 0.11485720425844193, "learning_rate": 4.525531527277995e-06, "loss": 0.05031192, "memory(GiB)": 13.7, "step": 59060, "train_speed(iter/s)": 1.533392 }, { "acc": 0.97270298, "epoch": 27.684555894070776, "grad_norm": 5.843075275421143, "learning_rate": 4.524759927437094e-06, "loss": 0.13593088, "memory(GiB)": 13.7, "step": 59065, "train_speed(iter/s)": 1.533401 }, { "acc": 0.9828125, "epoch": 27.68689946097961, "grad_norm": 0.6048439741134644, "learning_rate": 4.523988339031422e-06, "loss": 0.04882432, "memory(GiB)": 13.7, "step": 59070, "train_speed(iter/s)": 1.533405 }, { "acc": 0.98500004, "epoch": 27.689243027888445, "grad_norm": 6.233067035675049, "learning_rate": 4.5232167620795235e-06, "loss": 0.0630336, "memory(GiB)": 13.7, "step": 59075, "train_speed(iter/s)": 1.533406 }, { "acc": 0.984375, "epoch": 27.691586594797283, "grad_norm": 4.028061866760254, "learning_rate": 4.5224451965999485e-06, "loss": 0.05110009, "memory(GiB)": 13.7, "step": 59080, "train_speed(iter/s)": 1.53341 }, { "acc": 0.98135414, "epoch": 27.693930161706117, "grad_norm": 5.697128772735596, "learning_rate": 4.5216736426112395e-06, "loss": 0.10930525, "memory(GiB)": 13.7, "step": 59085, "train_speed(iter/s)": 1.533408 }, { "acc": 0.996875, "epoch": 27.69627372861495, "grad_norm": 0.005888941697776318, "learning_rate": 4.520902100131941e-06, "loss": 0.02095789, "memory(GiB)": 13.7, "step": 59090, "train_speed(iter/s)": 1.533411 }, { "acc": 0.99236107, "epoch": 27.698617295523785, "grad_norm": 5.126645565032959, "learning_rate": 4.520130569180602e-06, "loss": 0.03065811, "memory(GiB)": 13.7, "step": 59095, "train_speed(iter/s)": 1.533414 }, { "acc": 0.97592258, "epoch": 27.700960862432623, "grad_norm": 6.229861736297607, "learning_rate": 4.5193590497757664e-06, "loss": 0.09670802, "memory(GiB)": 13.7, "step": 59100, "train_speed(iter/s)": 1.533415 }, { "acc": 0.9767992, "epoch": 27.703304429341458, "grad_norm": 10.328436851501465, "learning_rate": 4.518587541935978e-06, "loss": 0.0807067, "memory(GiB)": 13.7, "step": 59105, "train_speed(iter/s)": 1.533417 }, { "acc": 0.97986107, "epoch": 27.705647996250292, "grad_norm": 0.7494180202484131, "learning_rate": 4.517816045679782e-06, "loss": 0.05192775, "memory(GiB)": 13.7, "step": 59110, "train_speed(iter/s)": 1.53342 }, { "acc": 0.98937502, "epoch": 27.70799156315913, "grad_norm": 0.28713342547416687, "learning_rate": 4.517044561025721e-06, "loss": 0.03552327, "memory(GiB)": 13.7, "step": 59115, "train_speed(iter/s)": 1.533423 }, { "acc": 0.97645836, "epoch": 27.710335130067964, "grad_norm": 0.5556022524833679, "learning_rate": 4.516273087992344e-06, "loss": 0.08226002, "memory(GiB)": 13.7, "step": 59120, "train_speed(iter/s)": 1.533424 }, { "acc": 0.990625, "epoch": 27.7126786969768, "grad_norm": 3.1924068927764893, "learning_rate": 4.515501626598189e-06, "loss": 0.02799856, "memory(GiB)": 13.7, "step": 59125, "train_speed(iter/s)": 1.533426 }, { "acc": 0.97868309, "epoch": 27.715022263885633, "grad_norm": 3.555154800415039, "learning_rate": 4.514730176861802e-06, "loss": 0.0536449, "memory(GiB)": 13.7, "step": 59130, "train_speed(iter/s)": 1.533431 }, { "acc": 0.97270832, "epoch": 27.71736583079447, "grad_norm": 0.9028953313827515, "learning_rate": 4.513958738801724e-06, "loss": 0.09878949, "memory(GiB)": 13.7, "step": 59135, "train_speed(iter/s)": 1.533436 }, { "acc": 0.98055553, "epoch": 27.719709397703305, "grad_norm": 4.645018100738525, "learning_rate": 4.5131873124365015e-06, "loss": 0.05350865, "memory(GiB)": 13.7, "step": 59140, "train_speed(iter/s)": 1.533443 }, { "acc": 0.98467264, "epoch": 27.72205296461214, "grad_norm": 2.018911123275757, "learning_rate": 4.512415897784674e-06, "loss": 0.04472774, "memory(GiB)": 13.7, "step": 59145, "train_speed(iter/s)": 1.533444 }, { "acc": 0.97847223, "epoch": 27.724396531520973, "grad_norm": 4.579296112060547, "learning_rate": 4.511644494864785e-06, "loss": 0.09131252, "memory(GiB)": 13.7, "step": 59150, "train_speed(iter/s)": 1.533445 }, { "acc": 0.9958334, "epoch": 27.72674009842981, "grad_norm": 2.4917521476745605, "learning_rate": 4.510873103695377e-06, "loss": 0.0353462, "memory(GiB)": 13.7, "step": 59155, "train_speed(iter/s)": 1.533439 }, { "acc": 0.97937498, "epoch": 27.729083665338646, "grad_norm": 6.24687385559082, "learning_rate": 4.510101724294992e-06, "loss": 0.05291387, "memory(GiB)": 13.7, "step": 59160, "train_speed(iter/s)": 1.53344 }, { "acc": 0.98774672, "epoch": 27.73142723224748, "grad_norm": 2.519057035446167, "learning_rate": 4.509330356682168e-06, "loss": 0.04940731, "memory(GiB)": 13.7, "step": 59165, "train_speed(iter/s)": 1.533447 }, { "acc": 0.98312502, "epoch": 27.733770799156314, "grad_norm": 2.226945638656616, "learning_rate": 4.50855900087545e-06, "loss": 0.04850361, "memory(GiB)": 13.7, "step": 59170, "train_speed(iter/s)": 1.53345 }, { "acc": 0.98830357, "epoch": 27.736114366065152, "grad_norm": 1.1065305471420288, "learning_rate": 4.507787656893378e-06, "loss": 0.02280914, "memory(GiB)": 13.7, "step": 59175, "train_speed(iter/s)": 1.533455 }, { "acc": 0.97354164, "epoch": 27.738457932973986, "grad_norm": 5.444204807281494, "learning_rate": 4.50701632475449e-06, "loss": 0.06101016, "memory(GiB)": 13.7, "step": 59180, "train_speed(iter/s)": 1.533467 }, { "acc": 0.97167988, "epoch": 27.74080149988282, "grad_norm": 5.334790229797363, "learning_rate": 4.50624500447733e-06, "loss": 0.07636209, "memory(GiB)": 13.7, "step": 59185, "train_speed(iter/s)": 1.533471 }, { "acc": 0.98675594, "epoch": 27.74314506679166, "grad_norm": 3.1052772998809814, "learning_rate": 4.505473696080435e-06, "loss": 0.07391583, "memory(GiB)": 13.7, "step": 59190, "train_speed(iter/s)": 1.533472 }, { "acc": 0.97304382, "epoch": 27.745488633700493, "grad_norm": 2.4442389011383057, "learning_rate": 4.504702399582346e-06, "loss": 0.07270486, "memory(GiB)": 13.7, "step": 59195, "train_speed(iter/s)": 1.533473 }, { "acc": 0.97748508, "epoch": 27.747832200609327, "grad_norm": 3.587937116622925, "learning_rate": 4.503931115001604e-06, "loss": 0.07807977, "memory(GiB)": 13.7, "step": 59200, "train_speed(iter/s)": 1.533475 }, { "acc": 0.98145828, "epoch": 27.75017576751816, "grad_norm": 4.32978630065918, "learning_rate": 4.5031598423567476e-06, "loss": 0.05679254, "memory(GiB)": 13.7, "step": 59205, "train_speed(iter/s)": 1.533476 }, { "acc": 0.99611111, "epoch": 27.752519334427, "grad_norm": 2.5127899646759033, "learning_rate": 4.502388581666313e-06, "loss": 0.03840696, "memory(GiB)": 13.7, "step": 59210, "train_speed(iter/s)": 1.53348 }, { "acc": 0.98135414, "epoch": 27.754862901335834, "grad_norm": 6.565942287445068, "learning_rate": 4.50161733294884e-06, "loss": 0.05476724, "memory(GiB)": 13.7, "step": 59215, "train_speed(iter/s)": 1.533479 }, { "acc": 0.98076382, "epoch": 27.757206468244668, "grad_norm": 2.907951593399048, "learning_rate": 4.500846096222868e-06, "loss": 0.03640278, "memory(GiB)": 13.7, "step": 59220, "train_speed(iter/s)": 1.533485 }, { "acc": 0.98562498, "epoch": 27.759550035153502, "grad_norm": 0.00044182955753058195, "learning_rate": 4.500074871506934e-06, "loss": 0.02904823, "memory(GiB)": 13.7, "step": 59225, "train_speed(iter/s)": 1.533486 }, { "acc": 0.98053036, "epoch": 27.76189360206234, "grad_norm": 8.495162010192871, "learning_rate": 4.499303658819577e-06, "loss": 0.06248523, "memory(GiB)": 13.7, "step": 59230, "train_speed(iter/s)": 1.533487 }, { "acc": 0.97113094, "epoch": 27.764237168971174, "grad_norm": 7.435680866241455, "learning_rate": 4.498532458179332e-06, "loss": 0.09355435, "memory(GiB)": 13.7, "step": 59235, "train_speed(iter/s)": 1.533494 }, { "acc": 0.990625, "epoch": 27.76658073588001, "grad_norm": 2.5174355506896973, "learning_rate": 4.497761269604739e-06, "loss": 0.0280785, "memory(GiB)": 13.7, "step": 59240, "train_speed(iter/s)": 1.533501 }, { "acc": 0.96826305, "epoch": 27.768924302788843, "grad_norm": 9.667747497558594, "learning_rate": 4.496990093114334e-06, "loss": 0.13462064, "memory(GiB)": 13.7, "step": 59245, "train_speed(iter/s)": 1.533507 }, { "acc": 1.0, "epoch": 27.77126786969768, "grad_norm": 2.2994115352630615, "learning_rate": 4.496218928726652e-06, "loss": 0.01445013, "memory(GiB)": 13.7, "step": 59250, "train_speed(iter/s)": 1.533513 }, { "acc": 0.98842258, "epoch": 27.773611436606515, "grad_norm": 4.2183332443237305, "learning_rate": 4.495447776460229e-06, "loss": 0.03270999, "memory(GiB)": 13.7, "step": 59255, "train_speed(iter/s)": 1.533513 }, { "acc": 0.98043728, "epoch": 27.77595500351535, "grad_norm": 1.8844735622406006, "learning_rate": 4.494676636333604e-06, "loss": 0.07875713, "memory(GiB)": 13.7, "step": 59260, "train_speed(iter/s)": 1.533517 }, { "acc": 0.97937508, "epoch": 27.778298570424184, "grad_norm": 1.0851200819015503, "learning_rate": 4.49390550836531e-06, "loss": 0.05631753, "memory(GiB)": 13.7, "step": 59265, "train_speed(iter/s)": 1.533513 }, { "acc": 0.97292614, "epoch": 27.78064213733302, "grad_norm": 0.7700355052947998, "learning_rate": 4.493134392573884e-06, "loss": 0.09363769, "memory(GiB)": 13.7, "step": 59270, "train_speed(iter/s)": 1.533519 }, { "acc": 0.99050598, "epoch": 27.782985704241856, "grad_norm": 2.7732062339782715, "learning_rate": 4.49236328897786e-06, "loss": 0.0391681, "memory(GiB)": 13.7, "step": 59275, "train_speed(iter/s)": 1.533518 }, { "acc": 0.9958334, "epoch": 27.78532927115069, "grad_norm": 0.21377216279506683, "learning_rate": 4.491592197595772e-06, "loss": 0.02902963, "memory(GiB)": 13.7, "step": 59280, "train_speed(iter/s)": 1.533524 }, { "acc": 0.97465277, "epoch": 27.787672838059528, "grad_norm": 1.1008554697036743, "learning_rate": 4.490821118446158e-06, "loss": 0.05250472, "memory(GiB)": 13.7, "step": 59285, "train_speed(iter/s)": 1.533525 }, { "acc": 0.97989578, "epoch": 27.790016404968362, "grad_norm": 3.075873851776123, "learning_rate": 4.490050051547549e-06, "loss": 0.04496567, "memory(GiB)": 13.7, "step": 59290, "train_speed(iter/s)": 1.533526 }, { "acc": 0.99125004, "epoch": 27.792359971877197, "grad_norm": 1.035305380821228, "learning_rate": 4.489278996918479e-06, "loss": 0.04208837, "memory(GiB)": 13.7, "step": 59295, "train_speed(iter/s)": 1.533529 }, { "acc": 0.98189487, "epoch": 27.79470353878603, "grad_norm": 2.4504218101501465, "learning_rate": 4.4885079545774835e-06, "loss": 0.06368043, "memory(GiB)": 13.7, "step": 59300, "train_speed(iter/s)": 1.533531 }, { "acc": 0.97791672, "epoch": 27.79704710569487, "grad_norm": 2.9323489665985107, "learning_rate": 4.487736924543094e-06, "loss": 0.05862705, "memory(GiB)": 13.7, "step": 59305, "train_speed(iter/s)": 1.533535 }, { "acc": 0.97319937, "epoch": 27.799390672603703, "grad_norm": 1.9695298671722412, "learning_rate": 4.486965906833844e-06, "loss": 0.09282629, "memory(GiB)": 13.7, "step": 59310, "train_speed(iter/s)": 1.533538 }, { "acc": 0.98666124, "epoch": 27.801734239512538, "grad_norm": 7.369495868682861, "learning_rate": 4.486194901468267e-06, "loss": 0.04210554, "memory(GiB)": 13.7, "step": 59315, "train_speed(iter/s)": 1.533537 }, { "acc": 0.97101192, "epoch": 27.804077806421372, "grad_norm": 4.117796421051025, "learning_rate": 4.485423908464895e-06, "loss": 0.12108151, "memory(GiB)": 13.7, "step": 59320, "train_speed(iter/s)": 1.533547 }, { "acc": 0.98738098, "epoch": 27.80642137333021, "grad_norm": 1.562599539756775, "learning_rate": 4.4846529278422595e-06, "loss": 0.04931083, "memory(GiB)": 13.7, "step": 59325, "train_speed(iter/s)": 1.533554 }, { "acc": 0.97166672, "epoch": 27.808764940239044, "grad_norm": 5.080496788024902, "learning_rate": 4.4838819596188955e-06, "loss": 0.06853732, "memory(GiB)": 13.7, "step": 59330, "train_speed(iter/s)": 1.533558 }, { "acc": 0.99208336, "epoch": 27.81110850714788, "grad_norm": 1.4647905826568604, "learning_rate": 4.48311100381333e-06, "loss": 0.03528081, "memory(GiB)": 13.7, "step": 59335, "train_speed(iter/s)": 1.533568 }, { "acc": 0.9697917, "epoch": 27.813452074056713, "grad_norm": 6.325583457946777, "learning_rate": 4.4823400604440955e-06, "loss": 0.08667867, "memory(GiB)": 13.7, "step": 59340, "train_speed(iter/s)": 1.533578 }, { "acc": 0.99437504, "epoch": 27.81579564096555, "grad_norm": 0.03802911192178726, "learning_rate": 4.481569129529724e-06, "loss": 0.01140064, "memory(GiB)": 13.7, "step": 59345, "train_speed(iter/s)": 1.533579 }, { "acc": 0.97660255, "epoch": 27.818139207874385, "grad_norm": 0.30847370624542236, "learning_rate": 4.480798211088747e-06, "loss": 0.04238376, "memory(GiB)": 13.7, "step": 59350, "train_speed(iter/s)": 1.533584 }, { "acc": 0.97937498, "epoch": 27.82048277478322, "grad_norm": 2.4621291160583496, "learning_rate": 4.4800273051396915e-06, "loss": 0.05220612, "memory(GiB)": 13.7, "step": 59355, "train_speed(iter/s)": 1.533591 }, { "acc": 0.98175602, "epoch": 27.822826341692057, "grad_norm": 1.1695964336395264, "learning_rate": 4.479256411701091e-06, "loss": 0.05216663, "memory(GiB)": 13.7, "step": 59360, "train_speed(iter/s)": 1.533596 }, { "acc": 0.96947374, "epoch": 27.82516990860089, "grad_norm": 7.971511363983154, "learning_rate": 4.478485530791474e-06, "loss": 0.13388004, "memory(GiB)": 13.7, "step": 59365, "train_speed(iter/s)": 1.533603 }, { "acc": 0.97245045, "epoch": 27.827513475509726, "grad_norm": 1.7273753881454468, "learning_rate": 4.477714662429371e-06, "loss": 0.08473868, "memory(GiB)": 13.7, "step": 59370, "train_speed(iter/s)": 1.533606 }, { "acc": 0.98145828, "epoch": 27.82985704241856, "grad_norm": 6.270238399505615, "learning_rate": 4.476943806633309e-06, "loss": 0.04749662, "memory(GiB)": 13.7, "step": 59375, "train_speed(iter/s)": 1.533608 }, { "acc": 0.98487186, "epoch": 27.832200609327398, "grad_norm": 4.1897053718566895, "learning_rate": 4.476172963421817e-06, "loss": 0.06222236, "memory(GiB)": 13.7, "step": 59380, "train_speed(iter/s)": 1.533616 }, { "acc": 0.97566967, "epoch": 27.834544176236232, "grad_norm": 7.359073162078857, "learning_rate": 4.475402132813425e-06, "loss": 0.07707448, "memory(GiB)": 13.7, "step": 59385, "train_speed(iter/s)": 1.533621 }, { "acc": 0.98224201, "epoch": 27.836887743145066, "grad_norm": 2.02929425239563, "learning_rate": 4.47463131482666e-06, "loss": 0.09835426, "memory(GiB)": 13.7, "step": 59390, "train_speed(iter/s)": 1.533621 }, { "acc": 0.9895833, "epoch": 27.8392313100539, "grad_norm": 2.4449684619903564, "learning_rate": 4.473860509480053e-06, "loss": 0.04225692, "memory(GiB)": 13.7, "step": 59395, "train_speed(iter/s)": 1.533627 }, { "acc": 0.98145828, "epoch": 27.84157487696274, "grad_norm": 2.121058940887451, "learning_rate": 4.473089716792126e-06, "loss": 0.04525871, "memory(GiB)": 13.7, "step": 59400, "train_speed(iter/s)": 1.533628 }, { "acc": 0.98708324, "epoch": 27.843918443871573, "grad_norm": 0.8044453859329224, "learning_rate": 4.472318936781412e-06, "loss": 0.04143335, "memory(GiB)": 13.7, "step": 59405, "train_speed(iter/s)": 1.53363 }, { "acc": 0.98708334, "epoch": 27.846262010780407, "grad_norm": 2.889882802963257, "learning_rate": 4.471548169466434e-06, "loss": 0.03074338, "memory(GiB)": 13.7, "step": 59410, "train_speed(iter/s)": 1.533633 }, { "acc": 0.98703365, "epoch": 27.84860557768924, "grad_norm": 1.5873548984527588, "learning_rate": 4.470777414865722e-06, "loss": 0.05183386, "memory(GiB)": 13.7, "step": 59415, "train_speed(iter/s)": 1.533628 }, { "acc": 0.98492565, "epoch": 27.85094914459808, "grad_norm": 1.9883421659469604, "learning_rate": 4.470006672997799e-06, "loss": 0.04740688, "memory(GiB)": 13.7, "step": 59420, "train_speed(iter/s)": 1.533636 }, { "acc": 0.98498545, "epoch": 27.853292711506914, "grad_norm": 0.9718759655952454, "learning_rate": 4.469235943881194e-06, "loss": 0.04356004, "memory(GiB)": 13.7, "step": 59425, "train_speed(iter/s)": 1.533642 }, { "acc": 0.96930561, "epoch": 27.855636278415748, "grad_norm": 1.4293973445892334, "learning_rate": 4.46846522753443e-06, "loss": 0.04475828, "memory(GiB)": 13.7, "step": 59430, "train_speed(iter/s)": 1.533647 }, { "acc": 0.97213545, "epoch": 27.857979845324586, "grad_norm": 2.1703884601593018, "learning_rate": 4.467694523976035e-06, "loss": 0.06685821, "memory(GiB)": 13.7, "step": 59435, "train_speed(iter/s)": 1.533651 }, { "acc": 0.97630177, "epoch": 27.86032341223342, "grad_norm": 0.0301241222769022, "learning_rate": 4.466923833224533e-06, "loss": 0.07000872, "memory(GiB)": 13.7, "step": 59440, "train_speed(iter/s)": 1.533652 }, { "acc": 0.98110571, "epoch": 27.862666979142254, "grad_norm": 1.9157909154891968, "learning_rate": 4.46615315529845e-06, "loss": 0.09034195, "memory(GiB)": 13.7, "step": 59445, "train_speed(iter/s)": 1.533655 }, { "acc": 1.0, "epoch": 27.86501054605109, "grad_norm": 0.26416006684303284, "learning_rate": 4.465382490216309e-06, "loss": 0.02995999, "memory(GiB)": 13.7, "step": 59450, "train_speed(iter/s)": 1.533657 }, { "acc": 0.98800592, "epoch": 27.867354112959926, "grad_norm": 3.344947099685669, "learning_rate": 4.464611837996636e-06, "loss": 0.03189915, "memory(GiB)": 13.7, "step": 59455, "train_speed(iter/s)": 1.533663 }, { "acc": 0.97904758, "epoch": 27.86969767986876, "grad_norm": 4.423295974731445, "learning_rate": 4.4638411986579516e-06, "loss": 0.06914074, "memory(GiB)": 13.7, "step": 59460, "train_speed(iter/s)": 1.533675 }, { "acc": 0.96511364, "epoch": 27.872041246777595, "grad_norm": 6.084482192993164, "learning_rate": 4.463070572218784e-06, "loss": 0.0932312, "memory(GiB)": 13.7, "step": 59465, "train_speed(iter/s)": 1.533682 }, { "acc": 0.99146175, "epoch": 27.87438481368643, "grad_norm": 5.109735488891602, "learning_rate": 4.462299958697652e-06, "loss": 0.03647756, "memory(GiB)": 13.7, "step": 59470, "train_speed(iter/s)": 1.533691 }, { "acc": 0.98194942, "epoch": 27.876728380595267, "grad_norm": 1.8318426609039307, "learning_rate": 4.461529358113083e-06, "loss": 0.06876634, "memory(GiB)": 13.7, "step": 59475, "train_speed(iter/s)": 1.533696 }, { "acc": 0.97770834, "epoch": 27.8790719475041, "grad_norm": 0.03465458005666733, "learning_rate": 4.460758770483596e-06, "loss": 0.0409718, "memory(GiB)": 13.7, "step": 59480, "train_speed(iter/s)": 1.533699 }, { "acc": 0.97027779, "epoch": 27.881415514412936, "grad_norm": 3.988490581512451, "learning_rate": 4.459988195827715e-06, "loss": 0.07187301, "memory(GiB)": 13.7, "step": 59485, "train_speed(iter/s)": 1.533705 }, { "acc": 0.97041664, "epoch": 27.88375908132177, "grad_norm": 4.137472152709961, "learning_rate": 4.459217634163963e-06, "loss": 0.08456245, "memory(GiB)": 13.7, "step": 59490, "train_speed(iter/s)": 1.533712 }, { "acc": 0.98571434, "epoch": 27.886102648230608, "grad_norm": 5.5694990158081055, "learning_rate": 4.458447085510862e-06, "loss": 0.09808471, "memory(GiB)": 13.7, "step": 59495, "train_speed(iter/s)": 1.533714 }, { "acc": 0.99437504, "epoch": 27.888446215139442, "grad_norm": 1.0926307439804077, "learning_rate": 4.45767654988693e-06, "loss": 0.01075128, "memory(GiB)": 13.7, "step": 59500, "train_speed(iter/s)": 1.533717 }, { "acc": 0.98836803, "epoch": 27.890789782048277, "grad_norm": 4.4377288818359375, "learning_rate": 4.456906027310691e-06, "loss": 0.03921355, "memory(GiB)": 13.7, "step": 59505, "train_speed(iter/s)": 1.533714 }, { "acc": 0.97979164, "epoch": 27.893133348957114, "grad_norm": 5.550501823425293, "learning_rate": 4.456135517800666e-06, "loss": 0.05738055, "memory(GiB)": 13.7, "step": 59510, "train_speed(iter/s)": 1.533712 }, { "acc": 0.98133926, "epoch": 27.89547691586595, "grad_norm": 1.6518642902374268, "learning_rate": 4.4553650213753735e-06, "loss": 0.0599758, "memory(GiB)": 13.7, "step": 59515, "train_speed(iter/s)": 1.533712 }, { "acc": 0.97312498, "epoch": 27.897820482774783, "grad_norm": 0.010151972994208336, "learning_rate": 4.454594538053336e-06, "loss": 0.07258851, "memory(GiB)": 13.7, "step": 59520, "train_speed(iter/s)": 1.53372 }, { "acc": 0.98202381, "epoch": 27.900164049683617, "grad_norm": 4.823441028594971, "learning_rate": 4.453824067853072e-06, "loss": 0.05054934, "memory(GiB)": 13.7, "step": 59525, "train_speed(iter/s)": 1.533718 }, { "acc": 0.98571434, "epoch": 27.902507616592455, "grad_norm": 1.9915904998779297, "learning_rate": 4.4530536107931005e-06, "loss": 0.06317701, "memory(GiB)": 13.7, "step": 59530, "train_speed(iter/s)": 1.533712 }, { "acc": 0.97562504, "epoch": 27.90485118350129, "grad_norm": 5.8394951820373535, "learning_rate": 4.452283166891943e-06, "loss": 0.03147314, "memory(GiB)": 13.7, "step": 59535, "train_speed(iter/s)": 1.533717 }, { "acc": 0.9854167, "epoch": 27.907194750410124, "grad_norm": 0.013028346933424473, "learning_rate": 4.451512736168118e-06, "loss": 0.02574972, "memory(GiB)": 13.7, "step": 59540, "train_speed(iter/s)": 1.533726 }, { "acc": 0.97958336, "epoch": 27.909538317318958, "grad_norm": 4.962377548217773, "learning_rate": 4.450742318640142e-06, "loss": 0.05761094, "memory(GiB)": 13.7, "step": 59545, "train_speed(iter/s)": 1.533731 }, { "acc": 0.9927084, "epoch": 27.911881884227796, "grad_norm": 0.02324802614748478, "learning_rate": 4.449971914326535e-06, "loss": 0.00842486, "memory(GiB)": 13.7, "step": 59550, "train_speed(iter/s)": 1.533737 }, { "acc": 0.9895833, "epoch": 27.91422545113663, "grad_norm": 4.287469387054443, "learning_rate": 4.449201523245814e-06, "loss": 0.08290852, "memory(GiB)": 13.7, "step": 59555, "train_speed(iter/s)": 1.533748 }, { "acc": 0.9864583, "epoch": 27.916569018045465, "grad_norm": 2.7451257705688477, "learning_rate": 4.4484311454164965e-06, "loss": 0.02843629, "memory(GiB)": 13.7, "step": 59560, "train_speed(iter/s)": 1.533748 }, { "acc": 0.97308598, "epoch": 27.9189125849543, "grad_norm": 3.799720048904419, "learning_rate": 4.447660780857102e-06, "loss": 0.05289456, "memory(GiB)": 13.7, "step": 59565, "train_speed(iter/s)": 1.533761 }, { "acc": 0.97787695, "epoch": 27.921256151863137, "grad_norm": 5.215110778808594, "learning_rate": 4.4468904295861464e-06, "loss": 0.08304904, "memory(GiB)": 13.7, "step": 59570, "train_speed(iter/s)": 1.533769 }, { "acc": 0.98708344, "epoch": 27.92359971877197, "grad_norm": 1.3499449491500854, "learning_rate": 4.446120091622144e-06, "loss": 0.04733801, "memory(GiB)": 13.7, "step": 59575, "train_speed(iter/s)": 1.533771 }, { "acc": 0.98910255, "epoch": 27.925943285680805, "grad_norm": 1.6905133724212646, "learning_rate": 4.445349766983617e-06, "loss": 0.03283856, "memory(GiB)": 13.7, "step": 59580, "train_speed(iter/s)": 1.533775 }, { "acc": 0.97270832, "epoch": 27.92828685258964, "grad_norm": 4.584286212921143, "learning_rate": 4.444579455689075e-06, "loss": 0.08862864, "memory(GiB)": 13.7, "step": 59585, "train_speed(iter/s)": 1.533778 }, { "acc": 0.98798609, "epoch": 27.930630419498478, "grad_norm": 3.412778615951538, "learning_rate": 4.443809157757036e-06, "loss": 0.0699446, "memory(GiB)": 13.7, "step": 59590, "train_speed(iter/s)": 1.533782 }, { "acc": 0.98592262, "epoch": 27.932973986407312, "grad_norm": 2.0710859298706055, "learning_rate": 4.443038873206017e-06, "loss": 0.04438744, "memory(GiB)": 13.7, "step": 59595, "train_speed(iter/s)": 1.533792 }, { "acc": 0.98565359, "epoch": 27.935317553316146, "grad_norm": 2.4048829078674316, "learning_rate": 4.442268602054534e-06, "loss": 0.0805917, "memory(GiB)": 13.7, "step": 59600, "train_speed(iter/s)": 1.533792 }, { "acc": 0.96895838, "epoch": 27.937661120224984, "grad_norm": 9.094268798828125, "learning_rate": 4.4414983443210955e-06, "loss": 0.07466977, "memory(GiB)": 13.7, "step": 59605, "train_speed(iter/s)": 1.533796 }, { "acc": 0.97763882, "epoch": 27.94000468713382, "grad_norm": 5.564738750457764, "learning_rate": 4.440728100024224e-06, "loss": 0.04776488, "memory(GiB)": 13.7, "step": 59610, "train_speed(iter/s)": 1.533802 }, { "acc": 0.99155636, "epoch": 27.942348254042653, "grad_norm": 0.2607141137123108, "learning_rate": 4.439957869182427e-06, "loss": 0.03434104, "memory(GiB)": 13.7, "step": 59615, "train_speed(iter/s)": 1.533803 }, { "acc": 0.9958334, "epoch": 27.944691820951487, "grad_norm": 1.6998190879821777, "learning_rate": 4.439187651814224e-06, "loss": 0.04345852, "memory(GiB)": 13.7, "step": 59620, "train_speed(iter/s)": 1.533806 }, { "acc": 0.9864583, "epoch": 27.947035387860325, "grad_norm": 4.561212539672852, "learning_rate": 4.438417447938126e-06, "loss": 0.03676789, "memory(GiB)": 13.7, "step": 59625, "train_speed(iter/s)": 1.533813 }, { "acc": 0.97588539, "epoch": 27.94937895476916, "grad_norm": 3.0381429195404053, "learning_rate": 4.437647257572645e-06, "loss": 0.06789594, "memory(GiB)": 13.7, "step": 59630, "train_speed(iter/s)": 1.533817 }, { "acc": 0.98811474, "epoch": 27.951722521677993, "grad_norm": 3.6240134239196777, "learning_rate": 4.436877080736294e-06, "loss": 0.06520392, "memory(GiB)": 13.7, "step": 59635, "train_speed(iter/s)": 1.533819 }, { "acc": 0.98249998, "epoch": 27.954066088586828, "grad_norm": 4.884079933166504, "learning_rate": 4.436106917447588e-06, "loss": 0.03193442, "memory(GiB)": 13.7, "step": 59640, "train_speed(iter/s)": 1.533818 }, { "acc": 0.97354164, "epoch": 27.956409655495666, "grad_norm": 4.143205642700195, "learning_rate": 4.435336767725035e-06, "loss": 0.07134518, "memory(GiB)": 13.7, "step": 59645, "train_speed(iter/s)": 1.533819 }, { "acc": 0.9885417, "epoch": 27.9587532224045, "grad_norm": 0.0029575915541499853, "learning_rate": 4.434566631587152e-06, "loss": 0.04022635, "memory(GiB)": 13.7, "step": 59650, "train_speed(iter/s)": 1.533821 }, { "acc": 0.9770834, "epoch": 27.961096789313334, "grad_norm": 3.971221923828125, "learning_rate": 4.433796509052446e-06, "loss": 0.04474654, "memory(GiB)": 13.7, "step": 59655, "train_speed(iter/s)": 1.533826 }, { "acc": 0.98416672, "epoch": 27.96344035622217, "grad_norm": 0.7495741248130798, "learning_rate": 4.433026400139431e-06, "loss": 0.02726793, "memory(GiB)": 13.7, "step": 59660, "train_speed(iter/s)": 1.53383 }, { "acc": 0.97279758, "epoch": 27.965783923131006, "grad_norm": 7.222599506378174, "learning_rate": 4.432256304866618e-06, "loss": 0.057722, "memory(GiB)": 13.7, "step": 59665, "train_speed(iter/s)": 1.53384 }, { "acc": 0.990625, "epoch": 27.96812749003984, "grad_norm": 2.5322041511535645, "learning_rate": 4.431486223252516e-06, "loss": 0.05673561, "memory(GiB)": 13.7, "step": 59670, "train_speed(iter/s)": 1.533843 }, { "acc": 0.98612175, "epoch": 27.970471056948675, "grad_norm": 5.767786502838135, "learning_rate": 4.430716155315636e-06, "loss": 0.04189183, "memory(GiB)": 13.7, "step": 59675, "train_speed(iter/s)": 1.533843 }, { "acc": 0.98917999, "epoch": 27.972814623857513, "grad_norm": 1.7908718585968018, "learning_rate": 4.429946101074486e-06, "loss": 0.04265119, "memory(GiB)": 13.7, "step": 59680, "train_speed(iter/s)": 1.533852 }, { "acc": 0.9869792, "epoch": 27.975158190766347, "grad_norm": 3.0283377170562744, "learning_rate": 4.4291760605475796e-06, "loss": 0.05087226, "memory(GiB)": 13.7, "step": 59685, "train_speed(iter/s)": 1.533862 }, { "acc": 0.98419647, "epoch": 27.97750175767518, "grad_norm": 5.467323303222656, "learning_rate": 4.428406033753421e-06, "loss": 0.0619756, "memory(GiB)": 13.7, "step": 59690, "train_speed(iter/s)": 1.533862 }, { "acc": 0.98729172, "epoch": 27.979845324584016, "grad_norm": 4.706544399261475, "learning_rate": 4.427636020710525e-06, "loss": 0.02635933, "memory(GiB)": 13.7, "step": 59695, "train_speed(iter/s)": 1.533873 }, { "acc": 0.975, "epoch": 27.982188891492854, "grad_norm": 0.9079429507255554, "learning_rate": 4.426866021437395e-06, "loss": 0.03955371, "memory(GiB)": 13.7, "step": 59700, "train_speed(iter/s)": 1.533877 }, { "acc": 0.98286781, "epoch": 27.984532458401688, "grad_norm": 1.9690178632736206, "learning_rate": 4.426096035952544e-06, "loss": 0.09183304, "memory(GiB)": 13.7, "step": 59705, "train_speed(iter/s)": 1.533877 }, { "acc": 0.97981157, "epoch": 27.986876025310522, "grad_norm": 0.9156001806259155, "learning_rate": 4.425326064274474e-06, "loss": 0.04161493, "memory(GiB)": 13.7, "step": 59710, "train_speed(iter/s)": 1.533875 }, { "acc": 0.98194447, "epoch": 27.989219592219357, "grad_norm": 5.3062968254089355, "learning_rate": 4.424556106421698e-06, "loss": 0.04557978, "memory(GiB)": 13.7, "step": 59715, "train_speed(iter/s)": 1.533879 }, { "acc": 0.97062502, "epoch": 27.991563159128194, "grad_norm": 9.86440372467041, "learning_rate": 4.423786162412719e-06, "loss": 0.07659578, "memory(GiB)": 13.7, "step": 59720, "train_speed(iter/s)": 1.533887 }, { "acc": 0.97979164, "epoch": 27.99390672603703, "grad_norm": 2.583822250366211, "learning_rate": 4.4230162322660465e-06, "loss": 0.0261794, "memory(GiB)": 13.7, "step": 59725, "train_speed(iter/s)": 1.533892 }, { "acc": 0.96696434, "epoch": 27.996250292945863, "grad_norm": 7.858311176300049, "learning_rate": 4.4222463160001885e-06, "loss": 0.07520283, "memory(GiB)": 13.7, "step": 59730, "train_speed(iter/s)": 1.5339 }, { "acc": 0.98416672, "epoch": 27.998593859854697, "grad_norm": 1.094624638557434, "learning_rate": 4.4214764136336465e-06, "loss": 0.07965518, "memory(GiB)": 13.7, "step": 59735, "train_speed(iter/s)": 1.533899 }, { "acc": 0.98436966, "epoch": 28.000937426763535, "grad_norm": 2.1212291717529297, "learning_rate": 4.420706525184932e-06, "loss": 0.07459126, "memory(GiB)": 13.7, "step": 59740, "train_speed(iter/s)": 1.533879 }, { "acc": 0.9791667, "epoch": 28.00328099367237, "grad_norm": 3.9950718879699707, "learning_rate": 4.419936650672546e-06, "loss": 0.04429211, "memory(GiB)": 13.7, "step": 59745, "train_speed(iter/s)": 1.53388 }, { "acc": 0.98812504, "epoch": 28.005624560581204, "grad_norm": 4.285733222961426, "learning_rate": 4.419166790114997e-06, "loss": 0.02835274, "memory(GiB)": 13.7, "step": 59750, "train_speed(iter/s)": 1.533883 }, { "acc": 0.99020824, "epoch": 28.00796812749004, "grad_norm": 4.385765552520752, "learning_rate": 4.418396943530788e-06, "loss": 0.10583664, "memory(GiB)": 13.7, "step": 59755, "train_speed(iter/s)": 1.533887 }, { "acc": 0.97056046, "epoch": 28.010311694398876, "grad_norm": 5.455070495605469, "learning_rate": 4.4176271109384246e-06, "loss": 0.10756814, "memory(GiB)": 13.7, "step": 59760, "train_speed(iter/s)": 1.533888 }, { "acc": 0.984375, "epoch": 28.01265526130771, "grad_norm": 4.149819374084473, "learning_rate": 4.416857292356408e-06, "loss": 0.03928838, "memory(GiB)": 13.7, "step": 59765, "train_speed(iter/s)": 1.533886 }, { "acc": 0.98988094, "epoch": 28.014998828216545, "grad_norm": 3.780855178833008, "learning_rate": 4.416087487803247e-06, "loss": 0.04879366, "memory(GiB)": 13.7, "step": 59770, "train_speed(iter/s)": 1.533897 }, { "acc": 0.99599905, "epoch": 28.017342395125382, "grad_norm": 3.62223482131958, "learning_rate": 4.415317697297443e-06, "loss": 0.0235467, "memory(GiB)": 13.7, "step": 59775, "train_speed(iter/s)": 1.533904 }, { "acc": 0.97895832, "epoch": 28.019685962034217, "grad_norm": 3.6328163146972656, "learning_rate": 4.414547920857498e-06, "loss": 0.06664207, "memory(GiB)": 13.7, "step": 59780, "train_speed(iter/s)": 1.533898 }, { "acc": 0.9875, "epoch": 28.02202952894305, "grad_norm": 1.1467911005020142, "learning_rate": 4.413778158501917e-06, "loss": 0.03102342, "memory(GiB)": 13.7, "step": 59785, "train_speed(iter/s)": 1.533894 }, { "acc": 0.9895834, "epoch": 28.024373095851885, "grad_norm": 5.006167411804199, "learning_rate": 4.413008410249202e-06, "loss": 0.02783774, "memory(GiB)": 13.7, "step": 59790, "train_speed(iter/s)": 1.533906 }, { "acc": 0.9729166, "epoch": 28.026716662760723, "grad_norm": 1.3083707094192505, "learning_rate": 4.412238676117853e-06, "loss": 0.08703667, "memory(GiB)": 13.7, "step": 59795, "train_speed(iter/s)": 1.533906 }, { "acc": 0.98604164, "epoch": 28.029060229669557, "grad_norm": 5.300158977508545, "learning_rate": 4.411468956126375e-06, "loss": 0.04511863, "memory(GiB)": 13.7, "step": 59800, "train_speed(iter/s)": 1.533914 }, { "acc": 0.98500004, "epoch": 28.03140379657839, "grad_norm": 4.132549285888672, "learning_rate": 4.410699250293268e-06, "loss": 0.03507612, "memory(GiB)": 13.7, "step": 59805, "train_speed(iter/s)": 1.533917 }, { "acc": 0.990625, "epoch": 28.033747363487226, "grad_norm": 3.7266738414764404, "learning_rate": 4.409929558637032e-06, "loss": 0.02253437, "memory(GiB)": 13.7, "step": 59810, "train_speed(iter/s)": 1.533921 }, { "acc": 0.98083334, "epoch": 28.036090930396064, "grad_norm": 5.0279083251953125, "learning_rate": 4.409159881176171e-06, "loss": 0.05245706, "memory(GiB)": 13.7, "step": 59815, "train_speed(iter/s)": 1.533926 }, { "acc": 0.97481232, "epoch": 28.0384344973049, "grad_norm": 4.459962844848633, "learning_rate": 4.408390217929183e-06, "loss": 0.0904392, "memory(GiB)": 13.7, "step": 59820, "train_speed(iter/s)": 1.533929 }, { "acc": 0.98544645, "epoch": 28.040778064213733, "grad_norm": 2.368539333343506, "learning_rate": 4.407620568914569e-06, "loss": 0.04066106, "memory(GiB)": 13.7, "step": 59825, "train_speed(iter/s)": 1.533933 }, { "acc": 0.9979167, "epoch": 28.043121631122567, "grad_norm": 0.002962526399642229, "learning_rate": 4.406850934150828e-06, "loss": 0.00621119, "memory(GiB)": 13.7, "step": 59830, "train_speed(iter/s)": 1.533937 }, { "acc": 0.9916667, "epoch": 28.045465198031405, "grad_norm": 4.119890213012695, "learning_rate": 4.406081313656462e-06, "loss": 0.03354061, "memory(GiB)": 13.7, "step": 59835, "train_speed(iter/s)": 1.533937 }, { "acc": 0.97416668, "epoch": 28.04780876494024, "grad_norm": 0.005884604062885046, "learning_rate": 4.405311707449967e-06, "loss": 0.03754998, "memory(GiB)": 13.7, "step": 59840, "train_speed(iter/s)": 1.53394 }, { "acc": 0.95404768, "epoch": 28.050152331849073, "grad_norm": 5.92896032333374, "learning_rate": 4.404542115549843e-06, "loss": 0.14142842, "memory(GiB)": 13.7, "step": 59845, "train_speed(iter/s)": 1.53394 }, { "acc": 0.97999992, "epoch": 28.05249589875791, "grad_norm": 5.815431118011475, "learning_rate": 4.40377253797459e-06, "loss": 0.04621827, "memory(GiB)": 13.7, "step": 59850, "train_speed(iter/s)": 1.533943 }, { "acc": 0.97517853, "epoch": 28.054839465666745, "grad_norm": 4.635831356048584, "learning_rate": 4.403002974742703e-06, "loss": 0.04646028, "memory(GiB)": 13.7, "step": 59855, "train_speed(iter/s)": 1.533944 }, { "acc": 0.98716345, "epoch": 28.05718303257558, "grad_norm": 1.8038253784179688, "learning_rate": 4.402233425872683e-06, "loss": 0.04772983, "memory(GiB)": 13.7, "step": 59860, "train_speed(iter/s)": 1.533945 }, { "acc": 0.98154755, "epoch": 28.059526599484414, "grad_norm": 5.932756423950195, "learning_rate": 4.401463891383025e-06, "loss": 0.15254004, "memory(GiB)": 13.7, "step": 59865, "train_speed(iter/s)": 1.533955 }, { "acc": 0.97666664, "epoch": 28.061870166393252, "grad_norm": 5.865447521209717, "learning_rate": 4.400694371292228e-06, "loss": 0.04740239, "memory(GiB)": 13.7, "step": 59870, "train_speed(iter/s)": 1.533964 }, { "acc": 0.98458328, "epoch": 28.064213733302086, "grad_norm": 2.87191104888916, "learning_rate": 4.399924865618789e-06, "loss": 0.04928169, "memory(GiB)": 13.7, "step": 59875, "train_speed(iter/s)": 1.533964 }, { "acc": 0.97947311, "epoch": 28.06655730021092, "grad_norm": 3.9224183559417725, "learning_rate": 4.3991553743812015e-06, "loss": 0.06340362, "memory(GiB)": 13.7, "step": 59880, "train_speed(iter/s)": 1.533974 }, { "acc": 0.97852678, "epoch": 28.068900867119755, "grad_norm": 2.8694841861724854, "learning_rate": 4.398385897597963e-06, "loss": 0.05614387, "memory(GiB)": 13.7, "step": 59885, "train_speed(iter/s)": 1.533978 }, { "acc": 0.9838541, "epoch": 28.071244434028593, "grad_norm": 8.297454833984375, "learning_rate": 4.397616435287569e-06, "loss": 0.06426577, "memory(GiB)": 13.7, "step": 59890, "train_speed(iter/s)": 1.533976 }, { "acc": 0.97800598, "epoch": 28.073588000937427, "grad_norm": 0.03154642507433891, "learning_rate": 4.396846987468516e-06, "loss": 0.05357221, "memory(GiB)": 13.7, "step": 59895, "train_speed(iter/s)": 1.533973 }, { "acc": 0.98050594, "epoch": 28.07593156784626, "grad_norm": 4.7886834144592285, "learning_rate": 4.396077554159298e-06, "loss": 0.0594664, "memory(GiB)": 13.7, "step": 59900, "train_speed(iter/s)": 1.533977 }, { "acc": 0.98168564, "epoch": 28.078275134755096, "grad_norm": 2.6693828105926514, "learning_rate": 4.395308135378411e-06, "loss": 0.06756239, "memory(GiB)": 13.7, "step": 59905, "train_speed(iter/s)": 1.533982 }, { "acc": 0.97817879, "epoch": 28.080618701663933, "grad_norm": 5.943799018859863, "learning_rate": 4.394538731144346e-06, "loss": 0.06640327, "memory(GiB)": 13.7, "step": 59910, "train_speed(iter/s)": 1.533987 }, { "acc": 0.98490133, "epoch": 28.082962268572768, "grad_norm": 4.092806816101074, "learning_rate": 4.393769341475603e-06, "loss": 0.04895019, "memory(GiB)": 13.7, "step": 59915, "train_speed(iter/s)": 1.533992 }, { "acc": 0.96520834, "epoch": 28.085305835481602, "grad_norm": 6.831028461456299, "learning_rate": 4.39299996639067e-06, "loss": 0.07474012, "memory(GiB)": 13.7, "step": 59920, "train_speed(iter/s)": 1.533999 }, { "acc": 0.96446428, "epoch": 28.08764940239044, "grad_norm": 4.170519828796387, "learning_rate": 4.392230605908041e-06, "loss": 0.06430618, "memory(GiB)": 13.7, "step": 59925, "train_speed(iter/s)": 1.533997 }, { "acc": 0.98634796, "epoch": 28.089992969299274, "grad_norm": 0.0059379516169428825, "learning_rate": 4.391461260046209e-06, "loss": 0.04756458, "memory(GiB)": 13.7, "step": 59930, "train_speed(iter/s)": 1.53399 }, { "acc": 0.97943115, "epoch": 28.09233653620811, "grad_norm": 4.395282745361328, "learning_rate": 4.39069192882367e-06, "loss": 0.05467055, "memory(GiB)": 13.7, "step": 59935, "train_speed(iter/s)": 1.533989 }, { "acc": 0.98363094, "epoch": 28.094680103116943, "grad_norm": 4.442441940307617, "learning_rate": 4.389922612258911e-06, "loss": 0.03202431, "memory(GiB)": 13.7, "step": 59940, "train_speed(iter/s)": 1.53399 }, { "acc": 0.984375, "epoch": 28.09702367002578, "grad_norm": 0.9434884786605835, "learning_rate": 4.389153310370428e-06, "loss": 0.0428631, "memory(GiB)": 13.7, "step": 59945, "train_speed(iter/s)": 1.533993 }, { "acc": 0.98633928, "epoch": 28.099367236934615, "grad_norm": 3.6993603706359863, "learning_rate": 4.388384023176711e-06, "loss": 0.05950183, "memory(GiB)": 13.7, "step": 59950, "train_speed(iter/s)": 1.534002 }, { "acc": 0.9802084, "epoch": 28.10171080384345, "grad_norm": 3.351903200149536, "learning_rate": 4.3876147506962506e-06, "loss": 0.05275837, "memory(GiB)": 13.7, "step": 59955, "train_speed(iter/s)": 1.534012 }, { "acc": 0.98785391, "epoch": 28.104054370752284, "grad_norm": 2.3369808197021484, "learning_rate": 4.3868454929475405e-06, "loss": 0.0614747, "memory(GiB)": 13.7, "step": 59960, "train_speed(iter/s)": 1.534018 }, { "acc": 0.97425594, "epoch": 28.10639793766112, "grad_norm": 2.6434166431427, "learning_rate": 4.3860762499490665e-06, "loss": 0.0475789, "memory(GiB)": 13.7, "step": 59965, "train_speed(iter/s)": 1.534021 }, { "acc": 0.9875, "epoch": 28.108741504569956, "grad_norm": 2.3512206077575684, "learning_rate": 4.385307021719321e-06, "loss": 0.09391297, "memory(GiB)": 13.7, "step": 59970, "train_speed(iter/s)": 1.534024 }, { "acc": 0.9833334, "epoch": 28.11108507147879, "grad_norm": 8.045843124389648, "learning_rate": 4.3845378082767935e-06, "loss": 0.06518464, "memory(GiB)": 13.7, "step": 59975, "train_speed(iter/s)": 1.53402 }, { "acc": 0.97509804, "epoch": 28.113428638387624, "grad_norm": 6.497120380401611, "learning_rate": 4.383768609639974e-06, "loss": 0.08382028, "memory(GiB)": 13.7, "step": 59980, "train_speed(iter/s)": 1.534023 }, { "acc": 0.98779764, "epoch": 28.115772205296462, "grad_norm": 7.484834671020508, "learning_rate": 4.38299942582735e-06, "loss": 0.03317273, "memory(GiB)": 13.7, "step": 59985, "train_speed(iter/s)": 1.534033 }, { "acc": 0.9947917, "epoch": 28.118115772205297, "grad_norm": 2.786726713180542, "learning_rate": 4.382230256857412e-06, "loss": 0.05008883, "memory(GiB)": 13.7, "step": 59990, "train_speed(iter/s)": 1.534033 }, { "acc": 0.98506947, "epoch": 28.12045933911413, "grad_norm": 3.281363010406494, "learning_rate": 4.381461102748648e-06, "loss": 0.06043965, "memory(GiB)": 13.7, "step": 59995, "train_speed(iter/s)": 1.534039 }, { "acc": 0.97548618, "epoch": 28.12280290602297, "grad_norm": 5.8595356941223145, "learning_rate": 4.380691963519545e-06, "loss": 0.07651612, "memory(GiB)": 13.7, "step": 60000, "train_speed(iter/s)": 1.534039 }, { "epoch": 28.12280290602297, "eval_acc": 0.7752073919201456, "eval_loss": 1.206713318824768, "eval_runtime": 144.1283, "eval_samples_per_second": 55.978, "eval_steps_per_second": 7.001, "step": 60000 }, { "acc": 0.98395834, "epoch": 28.125146472931803, "grad_norm": 1.691522479057312, "learning_rate": 4.379922839188591e-06, "loss": 0.06028517, "memory(GiB)": 13.7, "step": 60005, "train_speed(iter/s)": 1.527229 }, { "acc": 0.99280643, "epoch": 28.127490039840637, "grad_norm": 3.306178331375122, "learning_rate": 4.379153729774273e-06, "loss": 0.03907855, "memory(GiB)": 13.7, "step": 60010, "train_speed(iter/s)": 1.527233 }, { "acc": 0.98172131, "epoch": 28.12983360674947, "grad_norm": 1.5822515487670898, "learning_rate": 4.378384635295077e-06, "loss": 0.08278723, "memory(GiB)": 13.7, "step": 60015, "train_speed(iter/s)": 1.52724 }, { "acc": 0.984375, "epoch": 28.13217717365831, "grad_norm": 2.2134547233581543, "learning_rate": 4.377615555769493e-06, "loss": 0.0290712, "memory(GiB)": 13.7, "step": 60020, "train_speed(iter/s)": 1.527246 }, { "acc": 0.99081354, "epoch": 28.134520740567144, "grad_norm": 9.41348648071289, "learning_rate": 4.376846491216003e-06, "loss": 0.02803418, "memory(GiB)": 13.7, "step": 60025, "train_speed(iter/s)": 1.527244 }, { "acc": 0.98035316, "epoch": 28.136864307475978, "grad_norm": 2.2106454372406006, "learning_rate": 4.376077441653094e-06, "loss": 0.04199594, "memory(GiB)": 13.7, "step": 60030, "train_speed(iter/s)": 1.527249 }, { "acc": 0.99154758, "epoch": 28.139207874384812, "grad_norm": 0.038658805191516876, "learning_rate": 4.375308407099253e-06, "loss": 0.02854091, "memory(GiB)": 13.7, "step": 60035, "train_speed(iter/s)": 1.527253 }, { "acc": 0.99104176, "epoch": 28.14155144129365, "grad_norm": 1.9526137113571167, "learning_rate": 4.374539387572965e-06, "loss": 0.06258692, "memory(GiB)": 13.7, "step": 60040, "train_speed(iter/s)": 1.527251 }, { "acc": 0.99058037, "epoch": 28.143895008202485, "grad_norm": 4.527859687805176, "learning_rate": 4.373770383092712e-06, "loss": 0.05047402, "memory(GiB)": 13.7, "step": 60045, "train_speed(iter/s)": 1.527255 }, { "acc": 0.97427082, "epoch": 28.14623857511132, "grad_norm": 3.7431256771087646, "learning_rate": 4.373001393676981e-06, "loss": 0.05881191, "memory(GiB)": 13.7, "step": 60050, "train_speed(iter/s)": 1.527255 }, { "acc": 0.97529764, "epoch": 28.148582142020153, "grad_norm": 1.0832643508911133, "learning_rate": 4.372232419344255e-06, "loss": 0.06474327, "memory(GiB)": 13.7, "step": 60055, "train_speed(iter/s)": 1.527264 }, { "acc": 0.97666664, "epoch": 28.15092570892899, "grad_norm": 5.211841106414795, "learning_rate": 4.371463460113016e-06, "loss": 0.07760125, "memory(GiB)": 13.7, "step": 60060, "train_speed(iter/s)": 1.527264 }, { "acc": 0.96830359, "epoch": 28.153269275837825, "grad_norm": 4.079926490783691, "learning_rate": 4.3706945160017495e-06, "loss": 0.05340394, "memory(GiB)": 13.7, "step": 60065, "train_speed(iter/s)": 1.527268 }, { "acc": 0.9963315, "epoch": 28.15561284274666, "grad_norm": 2.0290544033050537, "learning_rate": 4.369925587028937e-06, "loss": 0.02627996, "memory(GiB)": 13.7, "step": 60070, "train_speed(iter/s)": 1.527271 }, { "acc": 0.97000408, "epoch": 28.157956409655494, "grad_norm": 3.621408462524414, "learning_rate": 4.369156673213063e-06, "loss": 0.10454028, "memory(GiB)": 13.7, "step": 60075, "train_speed(iter/s)": 1.527271 }, { "acc": 0.99306011, "epoch": 28.160299976564332, "grad_norm": 0.0027003062423318624, "learning_rate": 4.368387774572609e-06, "loss": 0.03002807, "memory(GiB)": 13.7, "step": 60080, "train_speed(iter/s)": 1.527274 }, { "acc": 0.97488098, "epoch": 28.162643543473166, "grad_norm": 2.0370419025421143, "learning_rate": 4.367618891126057e-06, "loss": 0.04495874, "memory(GiB)": 13.7, "step": 60085, "train_speed(iter/s)": 1.527284 }, { "acc": 0.98395834, "epoch": 28.164987110382, "grad_norm": 2.842747688293457, "learning_rate": 4.366850022891886e-06, "loss": 0.04593864, "memory(GiB)": 13.7, "step": 60090, "train_speed(iter/s)": 1.527288 }, { "acc": 0.99253845, "epoch": 28.16733067729084, "grad_norm": 3.5450785160064697, "learning_rate": 4.366081169888579e-06, "loss": 0.06639494, "memory(GiB)": 13.7, "step": 60095, "train_speed(iter/s)": 1.52729 }, { "acc": 0.98106689, "epoch": 28.169674244199673, "grad_norm": 1.2554028034210205, "learning_rate": 4.365312332134616e-06, "loss": 0.05829211, "memory(GiB)": 13.7, "step": 60100, "train_speed(iter/s)": 1.527292 }, { "acc": 0.97562494, "epoch": 28.172017811108507, "grad_norm": 4.28925085067749, "learning_rate": 4.364543509648478e-06, "loss": 0.04154239, "memory(GiB)": 13.7, "step": 60105, "train_speed(iter/s)": 1.527297 }, { "acc": 0.96583328, "epoch": 28.17436137801734, "grad_norm": 4.723423480987549, "learning_rate": 4.363774702448646e-06, "loss": 0.06571155, "memory(GiB)": 13.7, "step": 60110, "train_speed(iter/s)": 1.527298 }, { "acc": 0.99155712, "epoch": 28.17670494492618, "grad_norm": 2.994079351425171, "learning_rate": 4.363005910553596e-06, "loss": 0.05743776, "memory(GiB)": 13.7, "step": 60115, "train_speed(iter/s)": 1.527299 }, { "acc": 0.99333324, "epoch": 28.179048511835013, "grad_norm": 1.2660149335861206, "learning_rate": 4.362237133981811e-06, "loss": 0.04042723, "memory(GiB)": 13.7, "step": 60120, "train_speed(iter/s)": 1.527301 }, { "acc": 0.988447, "epoch": 28.181392078743848, "grad_norm": 2.202392339706421, "learning_rate": 4.36146837275177e-06, "loss": 0.04706124, "memory(GiB)": 13.7, "step": 60125, "train_speed(iter/s)": 1.5273 }, { "acc": 0.971875, "epoch": 28.183735645652682, "grad_norm": 4.49874210357666, "learning_rate": 4.360699626881949e-06, "loss": 0.05231292, "memory(GiB)": 13.7, "step": 60130, "train_speed(iter/s)": 1.527306 }, { "acc": 0.9828125, "epoch": 28.18607921256152, "grad_norm": 3.7829947471618652, "learning_rate": 4.359930896390825e-06, "loss": 0.03642979, "memory(GiB)": 13.7, "step": 60135, "train_speed(iter/s)": 1.527311 }, { "acc": 0.99092264, "epoch": 28.188422779470354, "grad_norm": 0.009036971256136894, "learning_rate": 4.35916218129688e-06, "loss": 0.02223908, "memory(GiB)": 13.7, "step": 60140, "train_speed(iter/s)": 1.527312 }, { "acc": 0.9916667, "epoch": 28.19076634637919, "grad_norm": 3.8529438972473145, "learning_rate": 4.358393481618587e-06, "loss": 0.03551118, "memory(GiB)": 13.7, "step": 60145, "train_speed(iter/s)": 1.527314 }, { "acc": 0.98550587, "epoch": 28.193109913288023, "grad_norm": 4.199671268463135, "learning_rate": 4.357624797374428e-06, "loss": 0.06655737, "memory(GiB)": 13.7, "step": 60150, "train_speed(iter/s)": 1.527312 }, { "acc": 0.98932695, "epoch": 28.19545348019686, "grad_norm": 1.3623363971710205, "learning_rate": 4.356856128582875e-06, "loss": 0.04869103, "memory(GiB)": 13.7, "step": 60155, "train_speed(iter/s)": 1.527317 }, { "acc": 0.9860836, "epoch": 28.197797047105695, "grad_norm": 7.239130020141602, "learning_rate": 4.356087475262406e-06, "loss": 0.06214405, "memory(GiB)": 13.7, "step": 60160, "train_speed(iter/s)": 1.527323 }, { "acc": 0.98416672, "epoch": 28.20014061401453, "grad_norm": 0.022799793630838394, "learning_rate": 4.355318837431498e-06, "loss": 0.04439923, "memory(GiB)": 13.7, "step": 60165, "train_speed(iter/s)": 1.527326 }, { "acc": 0.99125004, "epoch": 28.202484180923367, "grad_norm": 2.499903678894043, "learning_rate": 4.354550215108627e-06, "loss": 0.03478109, "memory(GiB)": 13.7, "step": 60170, "train_speed(iter/s)": 1.527336 }, { "acc": 0.98520298, "epoch": 28.2048277478322, "grad_norm": 3.6820480823516846, "learning_rate": 4.3537816083122645e-06, "loss": 0.058658, "memory(GiB)": 13.7, "step": 60175, "train_speed(iter/s)": 1.527345 }, { "acc": 0.97520828, "epoch": 28.207171314741036, "grad_norm": 3.392744779586792, "learning_rate": 4.353013017060888e-06, "loss": 0.07009101, "memory(GiB)": 13.7, "step": 60180, "train_speed(iter/s)": 1.527351 }, { "acc": 0.98604164, "epoch": 28.20951488164987, "grad_norm": 1.6704301834106445, "learning_rate": 4.352244441372972e-06, "loss": 0.03629996, "memory(GiB)": 13.7, "step": 60185, "train_speed(iter/s)": 1.527345 }, { "acc": 0.9875, "epoch": 28.211858448558708, "grad_norm": 2.481921434402466, "learning_rate": 4.351475881266988e-06, "loss": 0.02764936, "memory(GiB)": 13.7, "step": 60190, "train_speed(iter/s)": 1.527351 }, { "acc": 0.98869047, "epoch": 28.214202015467542, "grad_norm": 4.256706237792969, "learning_rate": 4.350707336761413e-06, "loss": 0.04102286, "memory(GiB)": 13.7, "step": 60195, "train_speed(iter/s)": 1.52736 }, { "acc": 0.99020834, "epoch": 28.216545582376376, "grad_norm": 2.109476327896118, "learning_rate": 4.349938807874719e-06, "loss": 0.01676974, "memory(GiB)": 13.7, "step": 60200, "train_speed(iter/s)": 1.527361 }, { "acc": 0.9697917, "epoch": 28.21888914928521, "grad_norm": 5.76984977722168, "learning_rate": 4.349170294625378e-06, "loss": 0.08649375, "memory(GiB)": 13.7, "step": 60205, "train_speed(iter/s)": 1.527359 }, { "acc": 0.99020824, "epoch": 28.22123271619405, "grad_norm": 1.1871289014816284, "learning_rate": 4.348401797031866e-06, "loss": 0.03962261, "memory(GiB)": 13.7, "step": 60210, "train_speed(iter/s)": 1.527359 }, { "acc": 0.9583334, "epoch": 28.223576283102883, "grad_norm": 5.815615177154541, "learning_rate": 4.34763331511265e-06, "loss": 0.08156886, "memory(GiB)": 13.7, "step": 60215, "train_speed(iter/s)": 1.527363 }, { "acc": 0.97724171, "epoch": 28.225919850011717, "grad_norm": 7.480358123779297, "learning_rate": 4.346864848886203e-06, "loss": 0.09488463, "memory(GiB)": 13.7, "step": 60220, "train_speed(iter/s)": 1.527358 }, { "acc": 0.98549681, "epoch": 28.22826341692055, "grad_norm": 6.71846342086792, "learning_rate": 4.346096398371e-06, "loss": 0.04082816, "memory(GiB)": 13.7, "step": 60225, "train_speed(iter/s)": 1.527361 }, { "acc": 0.97969704, "epoch": 28.23060698382939, "grad_norm": 4.618531703948975, "learning_rate": 4.34532796358551e-06, "loss": 0.04888461, "memory(GiB)": 13.7, "step": 60230, "train_speed(iter/s)": 1.527366 }, { "acc": 0.98458328, "epoch": 28.232950550738224, "grad_norm": 4.523139953613281, "learning_rate": 4.344559544548202e-06, "loss": 0.05160687, "memory(GiB)": 13.7, "step": 60235, "train_speed(iter/s)": 1.527367 }, { "acc": 0.98714008, "epoch": 28.235294117647058, "grad_norm": 5.454616546630859, "learning_rate": 4.343791141277548e-06, "loss": 0.06409621, "memory(GiB)": 13.7, "step": 60240, "train_speed(iter/s)": 1.527368 }, { "acc": 0.99386368, "epoch": 28.237637684555892, "grad_norm": 1.534870982170105, "learning_rate": 4.343022753792019e-06, "loss": 0.03625732, "memory(GiB)": 13.7, "step": 60245, "train_speed(iter/s)": 1.527369 }, { "acc": 0.990625, "epoch": 28.23998125146473, "grad_norm": 0.12344487756490707, "learning_rate": 4.342254382110081e-06, "loss": 0.03175627, "memory(GiB)": 13.7, "step": 60250, "train_speed(iter/s)": 1.527377 }, { "acc": 0.97961311, "epoch": 28.242324818373564, "grad_norm": 4.5444231033325195, "learning_rate": 4.341486026250206e-06, "loss": 0.06770064, "memory(GiB)": 13.7, "step": 60255, "train_speed(iter/s)": 1.527376 }, { "acc": 0.9828125, "epoch": 28.2446683852824, "grad_norm": 0.08076875656843185, "learning_rate": 4.340717686230862e-06, "loss": 0.0374068, "memory(GiB)": 13.7, "step": 60260, "train_speed(iter/s)": 1.527384 }, { "acc": 0.97592258, "epoch": 28.247011952191237, "grad_norm": 3.9870502948760986, "learning_rate": 4.339949362070515e-06, "loss": 0.08267568, "memory(GiB)": 13.7, "step": 60265, "train_speed(iter/s)": 1.527383 }, { "acc": 0.9927083, "epoch": 28.24935551910007, "grad_norm": 0.7343704700469971, "learning_rate": 4.339181053787639e-06, "loss": 0.04420141, "memory(GiB)": 13.7, "step": 60270, "train_speed(iter/s)": 1.527384 }, { "acc": 0.98058605, "epoch": 28.251699086008905, "grad_norm": 1.6162384748458862, "learning_rate": 4.3384127614006956e-06, "loss": 0.05089869, "memory(GiB)": 13.7, "step": 60275, "train_speed(iter/s)": 1.52739 }, { "acc": 0.98874998, "epoch": 28.25404265291774, "grad_norm": 3.0420451164245605, "learning_rate": 4.337644484928154e-06, "loss": 0.05361918, "memory(GiB)": 13.7, "step": 60280, "train_speed(iter/s)": 1.527397 }, { "acc": 0.97872028, "epoch": 28.256386219826577, "grad_norm": 2.4622676372528076, "learning_rate": 4.3368762243884824e-06, "loss": 0.07864336, "memory(GiB)": 13.7, "step": 60285, "train_speed(iter/s)": 1.527398 }, { "acc": 0.98770828, "epoch": 28.25872978673541, "grad_norm": 2.2179126739501953, "learning_rate": 4.336107979800145e-06, "loss": 0.03742826, "memory(GiB)": 13.7, "step": 60290, "train_speed(iter/s)": 1.527401 }, { "acc": 0.99333334, "epoch": 28.261073353644246, "grad_norm": 4.1944580078125, "learning_rate": 4.335339751181611e-06, "loss": 0.04083467, "memory(GiB)": 13.7, "step": 60295, "train_speed(iter/s)": 1.527402 }, { "acc": 0.98442917, "epoch": 28.26341692055308, "grad_norm": 3.8910677433013916, "learning_rate": 4.334571538551344e-06, "loss": 0.04136143, "memory(GiB)": 13.7, "step": 60300, "train_speed(iter/s)": 1.527407 }, { "acc": 0.98968754, "epoch": 28.265760487461918, "grad_norm": 5.059136867523193, "learning_rate": 4.333803341927809e-06, "loss": 0.02353191, "memory(GiB)": 13.7, "step": 60305, "train_speed(iter/s)": 1.527411 }, { "acc": 0.98425598, "epoch": 28.268104054370752, "grad_norm": 0.0012426545144990087, "learning_rate": 4.333035161329469e-06, "loss": 0.06173036, "memory(GiB)": 13.7, "step": 60310, "train_speed(iter/s)": 1.527416 }, { "acc": 0.97062502, "epoch": 28.270447621279587, "grad_norm": 4.267955303192139, "learning_rate": 4.332266996774793e-06, "loss": 0.06644037, "memory(GiB)": 13.7, "step": 60315, "train_speed(iter/s)": 1.527424 }, { "acc": 0.97409182, "epoch": 28.27279118818842, "grad_norm": 4.55548620223999, "learning_rate": 4.331498848282243e-06, "loss": 0.09591192, "memory(GiB)": 13.7, "step": 60320, "train_speed(iter/s)": 1.527423 }, { "acc": 0.97194939, "epoch": 28.27513475509726, "grad_norm": 2.4415833950042725, "learning_rate": 4.330730715870283e-06, "loss": 0.06480792, "memory(GiB)": 13.7, "step": 60325, "train_speed(iter/s)": 1.527423 }, { "acc": 0.98395824, "epoch": 28.277478322006093, "grad_norm": 4.653163433074951, "learning_rate": 4.329962599557376e-06, "loss": 0.06289074, "memory(GiB)": 13.7, "step": 60330, "train_speed(iter/s)": 1.52743 }, { "acc": 0.98023472, "epoch": 28.279821888914928, "grad_norm": 3.3425683975219727, "learning_rate": 4.329194499361987e-06, "loss": 0.0571811, "memory(GiB)": 13.7, "step": 60335, "train_speed(iter/s)": 1.527436 }, { "acc": 0.99196434, "epoch": 28.282165455823765, "grad_norm": 1.072550892829895, "learning_rate": 4.328426415302574e-06, "loss": 0.0371043, "memory(GiB)": 13.7, "step": 60340, "train_speed(iter/s)": 1.527445 }, { "acc": 0.98888893, "epoch": 28.2845090227326, "grad_norm": 6.598301410675049, "learning_rate": 4.327658347397603e-06, "loss": 0.02276979, "memory(GiB)": 13.7, "step": 60345, "train_speed(iter/s)": 1.52745 }, { "acc": 0.99053202, "epoch": 28.286852589641434, "grad_norm": 0.6796748638153076, "learning_rate": 4.326890295665536e-06, "loss": 0.05033404, "memory(GiB)": 13.7, "step": 60350, "train_speed(iter/s)": 1.527456 }, { "acc": 0.98779755, "epoch": 28.28919615655027, "grad_norm": 5.068202972412109, "learning_rate": 4.326122260124831e-06, "loss": 0.06652443, "memory(GiB)": 13.7, "step": 60355, "train_speed(iter/s)": 1.527467 }, { "acc": 0.98728561, "epoch": 28.291539723459106, "grad_norm": 0.04932399094104767, "learning_rate": 4.325354240793953e-06, "loss": 0.03695899, "memory(GiB)": 13.7, "step": 60360, "train_speed(iter/s)": 1.527468 }, { "acc": 1.0, "epoch": 28.29388329036794, "grad_norm": 0.00102695741225034, "learning_rate": 4.324586237691359e-06, "loss": 0.02048454, "memory(GiB)": 13.7, "step": 60365, "train_speed(iter/s)": 1.527471 }, { "acc": 0.97458334, "epoch": 28.296226857276775, "grad_norm": 5.372826099395752, "learning_rate": 4.323818250835514e-06, "loss": 0.08865545, "memory(GiB)": 13.7, "step": 60370, "train_speed(iter/s)": 1.527485 }, { "acc": 0.98180809, "epoch": 28.29857042418561, "grad_norm": 1.5736324787139893, "learning_rate": 4.323050280244875e-06, "loss": 0.04393817, "memory(GiB)": 13.7, "step": 60375, "train_speed(iter/s)": 1.527491 }, { "acc": 0.97000008, "epoch": 28.300913991094447, "grad_norm": 4.154497146606445, "learning_rate": 4.322282325937902e-06, "loss": 0.08621459, "memory(GiB)": 13.7, "step": 60380, "train_speed(iter/s)": 1.527497 }, { "acc": 0.98708334, "epoch": 28.30325755800328, "grad_norm": 3.3553683757781982, "learning_rate": 4.3215143879330515e-06, "loss": 0.03995562, "memory(GiB)": 13.7, "step": 60385, "train_speed(iter/s)": 1.527504 }, { "acc": 0.97979164, "epoch": 28.305601124912116, "grad_norm": 3.3400330543518066, "learning_rate": 4.320746466248786e-06, "loss": 0.05943193, "memory(GiB)": 13.7, "step": 60390, "train_speed(iter/s)": 1.52751 }, { "acc": 0.98624992, "epoch": 28.30794469182095, "grad_norm": 3.635572671890259, "learning_rate": 4.319978560903561e-06, "loss": 0.07134173, "memory(GiB)": 13.7, "step": 60395, "train_speed(iter/s)": 1.527507 }, { "acc": 0.98594704, "epoch": 28.310288258729788, "grad_norm": 3.083709716796875, "learning_rate": 4.319210671915837e-06, "loss": 0.04409676, "memory(GiB)": 13.7, "step": 60400, "train_speed(iter/s)": 1.527508 }, { "acc": 0.98875008, "epoch": 28.312631825638622, "grad_norm": 3.382715940475464, "learning_rate": 4.31844279930407e-06, "loss": 0.03535184, "memory(GiB)": 13.7, "step": 60405, "train_speed(iter/s)": 1.527503 }, { "acc": 0.98562498, "epoch": 28.314975392547456, "grad_norm": 3.141998052597046, "learning_rate": 4.317674943086716e-06, "loss": 0.02890338, "memory(GiB)": 13.7, "step": 60410, "train_speed(iter/s)": 1.527506 }, { "acc": 0.96555061, "epoch": 28.317318959456294, "grad_norm": 2.0135416984558105, "learning_rate": 4.316907103282235e-06, "loss": 0.07418483, "memory(GiB)": 13.7, "step": 60415, "train_speed(iter/s)": 1.527514 }, { "acc": 0.97937498, "epoch": 28.31966252636513, "grad_norm": 3.794525384902954, "learning_rate": 4.3161392799090814e-06, "loss": 0.03347093, "memory(GiB)": 13.7, "step": 60420, "train_speed(iter/s)": 1.527511 }, { "acc": 0.98968754, "epoch": 28.322006093273963, "grad_norm": 4.686939239501953, "learning_rate": 4.31537147298571e-06, "loss": 0.04798882, "memory(GiB)": 13.7, "step": 60425, "train_speed(iter/s)": 1.527518 }, { "acc": 0.97633934, "epoch": 28.324349660182797, "grad_norm": 3.095874309539795, "learning_rate": 4.314603682530579e-06, "loss": 0.09180236, "memory(GiB)": 13.7, "step": 60430, "train_speed(iter/s)": 1.52752 }, { "acc": 0.98030643, "epoch": 28.326693227091635, "grad_norm": 3.3522748947143555, "learning_rate": 4.313835908562141e-06, "loss": 0.05343158, "memory(GiB)": 13.7, "step": 60435, "train_speed(iter/s)": 1.527522 }, { "acc": 0.96520824, "epoch": 28.32903679400047, "grad_norm": 3.154423713684082, "learning_rate": 4.313068151098852e-06, "loss": 0.10037528, "memory(GiB)": 13.7, "step": 60440, "train_speed(iter/s)": 1.527521 }, { "acc": 0.9788393, "epoch": 28.331380360909304, "grad_norm": 1.8081200122833252, "learning_rate": 4.312300410159167e-06, "loss": 0.0492437, "memory(GiB)": 13.7, "step": 60445, "train_speed(iter/s)": 1.527523 }, { "acc": 0.97665176, "epoch": 28.333723927818138, "grad_norm": 2.118105888366699, "learning_rate": 4.311532685761538e-06, "loss": 0.07045076, "memory(GiB)": 13.7, "step": 60450, "train_speed(iter/s)": 1.527524 }, { "acc": 0.97217264, "epoch": 28.336067494726976, "grad_norm": 5.898458003997803, "learning_rate": 4.31076497792442e-06, "loss": 0.068016, "memory(GiB)": 13.7, "step": 60455, "train_speed(iter/s)": 1.52753 }, { "acc": 0.9882143, "epoch": 28.33841106163581, "grad_norm": 3.4168388843536377, "learning_rate": 4.3099972866662676e-06, "loss": 0.03828892, "memory(GiB)": 13.7, "step": 60460, "train_speed(iter/s)": 1.527528 }, { "acc": 0.98812504, "epoch": 28.340754628544644, "grad_norm": 2.4759933948516846, "learning_rate": 4.309229612005531e-06, "loss": 0.0306245, "memory(GiB)": 13.7, "step": 60465, "train_speed(iter/s)": 1.527531 }, { "acc": 0.98604164, "epoch": 28.34309819545348, "grad_norm": 0.004565975163131952, "learning_rate": 4.308461953960662e-06, "loss": 0.0482565, "memory(GiB)": 13.7, "step": 60470, "train_speed(iter/s)": 1.527531 }, { "acc": 0.97904758, "epoch": 28.345441762362316, "grad_norm": 1.372864007949829, "learning_rate": 4.307694312550116e-06, "loss": 0.05949246, "memory(GiB)": 13.7, "step": 60475, "train_speed(iter/s)": 1.527541 }, { "acc": 0.9833334, "epoch": 28.34778532927115, "grad_norm": 0.012851580046117306, "learning_rate": 4.306926687792343e-06, "loss": 0.04066827, "memory(GiB)": 13.7, "step": 60480, "train_speed(iter/s)": 1.527542 }, { "acc": 0.98195515, "epoch": 28.350128896179985, "grad_norm": 1.2325754165649414, "learning_rate": 4.306159079705792e-06, "loss": 0.07458768, "memory(GiB)": 13.7, "step": 60485, "train_speed(iter/s)": 1.527554 }, { "acc": 0.97392855, "epoch": 28.352472463088823, "grad_norm": 4.881041049957275, "learning_rate": 4.305391488308916e-06, "loss": 0.1258862, "memory(GiB)": 13.7, "step": 60490, "train_speed(iter/s)": 1.527557 }, { "acc": 0.98823862, "epoch": 28.354816029997657, "grad_norm": 3.981509208679199, "learning_rate": 4.304623913620166e-06, "loss": 0.03430483, "memory(GiB)": 13.7, "step": 60495, "train_speed(iter/s)": 1.527569 }, { "acc": 0.98556547, "epoch": 28.35715959690649, "grad_norm": 2.9299087524414062, "learning_rate": 4.30385635565799e-06, "loss": 0.03232748, "memory(GiB)": 13.7, "step": 60500, "train_speed(iter/s)": 1.527577 }, { "acc": 0.96821423, "epoch": 28.359503163815326, "grad_norm": 2.668083906173706, "learning_rate": 4.3030888144408415e-06, "loss": 0.06939554, "memory(GiB)": 13.7, "step": 60505, "train_speed(iter/s)": 1.527575 }, { "acc": 0.9895833, "epoch": 28.361846730724164, "grad_norm": 0.0035125466529279947, "learning_rate": 4.3023212899871634e-06, "loss": 0.0297079, "memory(GiB)": 13.7, "step": 60510, "train_speed(iter/s)": 1.527582 }, { "acc": 0.96725693, "epoch": 28.364190297632998, "grad_norm": 4.555363178253174, "learning_rate": 4.301553782315409e-06, "loss": 0.07421272, "memory(GiB)": 13.7, "step": 60515, "train_speed(iter/s)": 1.527591 }, { "acc": 0.97919645, "epoch": 28.366533864541832, "grad_norm": 4.569122791290283, "learning_rate": 4.300786291444025e-06, "loss": 0.04056087, "memory(GiB)": 13.7, "step": 60520, "train_speed(iter/s)": 1.527599 }, { "acc": 0.97583332, "epoch": 28.368877431450667, "grad_norm": 5.0360188484191895, "learning_rate": 4.30001881739146e-06, "loss": 0.05592961, "memory(GiB)": 13.7, "step": 60525, "train_speed(iter/s)": 1.527604 }, { "acc": 0.98185406, "epoch": 28.371220998359505, "grad_norm": 3.5180039405822754, "learning_rate": 4.299251360176161e-06, "loss": 0.03135539, "memory(GiB)": 13.7, "step": 60530, "train_speed(iter/s)": 1.527609 }, { "acc": 0.97562504, "epoch": 28.37356456526834, "grad_norm": 3.773167610168457, "learning_rate": 4.298483919816576e-06, "loss": 0.0614049, "memory(GiB)": 13.7, "step": 60535, "train_speed(iter/s)": 1.527611 }, { "acc": 0.97937508, "epoch": 28.375908132177173, "grad_norm": 3.6604111194610596, "learning_rate": 4.29771649633115e-06, "loss": 0.05075345, "memory(GiB)": 13.7, "step": 60540, "train_speed(iter/s)": 1.527617 }, { "acc": 0.99169016, "epoch": 28.378251699086007, "grad_norm": 1.1624372005462646, "learning_rate": 4.296949089738334e-06, "loss": 0.02730617, "memory(GiB)": 13.7, "step": 60545, "train_speed(iter/s)": 1.527618 }, { "acc": 0.98354502, "epoch": 28.380595265994845, "grad_norm": 2.5045716762542725, "learning_rate": 4.296181700056568e-06, "loss": 0.03894625, "memory(GiB)": 13.7, "step": 60550, "train_speed(iter/s)": 1.527616 }, { "acc": 0.9947916, "epoch": 28.38293883290368, "grad_norm": 3.298560619354248, "learning_rate": 4.295414327304301e-06, "loss": 0.01693648, "memory(GiB)": 13.7, "step": 60555, "train_speed(iter/s)": 1.527618 }, { "acc": 0.97979164, "epoch": 28.385282399812514, "grad_norm": 8.067317962646484, "learning_rate": 4.294646971499975e-06, "loss": 0.07945907, "memory(GiB)": 13.7, "step": 60560, "train_speed(iter/s)": 1.527623 }, { "acc": 0.97550602, "epoch": 28.387625966721348, "grad_norm": 8.327802658081055, "learning_rate": 4.293879632662039e-06, "loss": 0.08663806, "memory(GiB)": 13.7, "step": 60565, "train_speed(iter/s)": 1.52763 }, { "acc": 0.9889679, "epoch": 28.389969533630186, "grad_norm": 1.2192178964614868, "learning_rate": 4.293112310808935e-06, "loss": 0.02466237, "memory(GiB)": 13.7, "step": 60570, "train_speed(iter/s)": 1.527633 }, { "acc": 0.98571434, "epoch": 28.39231310053902, "grad_norm": 8.657090187072754, "learning_rate": 4.292345005959106e-06, "loss": 0.06388072, "memory(GiB)": 13.7, "step": 60575, "train_speed(iter/s)": 1.527634 }, { "acc": 0.9822916, "epoch": 28.394656667447855, "grad_norm": 4.172425746917725, "learning_rate": 4.291577718130998e-06, "loss": 0.0900623, "memory(GiB)": 13.7, "step": 60580, "train_speed(iter/s)": 1.527639 }, { "acc": 0.99020834, "epoch": 28.397000234356693, "grad_norm": 0.017510870471596718, "learning_rate": 4.290810447343053e-06, "loss": 0.02482012, "memory(GiB)": 13.7, "step": 60585, "train_speed(iter/s)": 1.527643 }, { "acc": 0.98868303, "epoch": 28.399343801265527, "grad_norm": 1.4374934434890747, "learning_rate": 4.2900431936137125e-06, "loss": 0.05193113, "memory(GiB)": 13.7, "step": 60590, "train_speed(iter/s)": 1.527644 }, { "acc": 0.97902775, "epoch": 28.40168736817436, "grad_norm": 5.055357933044434, "learning_rate": 4.289275956961419e-06, "loss": 0.05002878, "memory(GiB)": 13.7, "step": 60595, "train_speed(iter/s)": 1.527645 }, { "acc": 0.98916664, "epoch": 28.404030935083195, "grad_norm": 1.5104752779006958, "learning_rate": 4.288508737404615e-06, "loss": 0.03413448, "memory(GiB)": 13.7, "step": 60600, "train_speed(iter/s)": 1.527646 }, { "acc": 0.99229164, "epoch": 28.406374501992033, "grad_norm": 6.576184272766113, "learning_rate": 4.287741534961742e-06, "loss": 0.03848222, "memory(GiB)": 13.7, "step": 60605, "train_speed(iter/s)": 1.527652 }, { "acc": 0.97979527, "epoch": 28.408718068900868, "grad_norm": 2.4433252811431885, "learning_rate": 4.286974349651242e-06, "loss": 0.05288776, "memory(GiB)": 13.7, "step": 60610, "train_speed(iter/s)": 1.527657 }, { "acc": 0.98798609, "epoch": 28.411061635809702, "grad_norm": 2.8432772159576416, "learning_rate": 4.286207181491553e-06, "loss": 0.04624709, "memory(GiB)": 13.7, "step": 60615, "train_speed(iter/s)": 1.52766 }, { "acc": 0.97809525, "epoch": 28.413405202718536, "grad_norm": 4.014163017272949, "learning_rate": 4.285440030501118e-06, "loss": 0.05816555, "memory(GiB)": 13.7, "step": 60620, "train_speed(iter/s)": 1.527659 }, { "acc": 0.99375, "epoch": 28.415748769627374, "grad_norm": 1.7037330865859985, "learning_rate": 4.284672896698375e-06, "loss": 0.02814043, "memory(GiB)": 13.7, "step": 60625, "train_speed(iter/s)": 1.527658 }, { "acc": 0.98208332, "epoch": 28.41809233653621, "grad_norm": 0.026202665641903877, "learning_rate": 4.283905780101765e-06, "loss": 0.05636179, "memory(GiB)": 13.7, "step": 60630, "train_speed(iter/s)": 1.527663 }, { "acc": 0.99695644, "epoch": 28.420435903445043, "grad_norm": 1.8712835311889648, "learning_rate": 4.283138680729723e-06, "loss": 0.01398735, "memory(GiB)": 13.7, "step": 60635, "train_speed(iter/s)": 1.527664 }, { "acc": 0.98552084, "epoch": 28.422779470353877, "grad_norm": 1.7437889575958252, "learning_rate": 4.282371598600693e-06, "loss": 0.06735772, "memory(GiB)": 13.7, "step": 60640, "train_speed(iter/s)": 1.527673 }, { "acc": 0.96840277, "epoch": 28.425123037262715, "grad_norm": 1.4381130933761597, "learning_rate": 4.281604533733109e-06, "loss": 0.06087514, "memory(GiB)": 13.7, "step": 60645, "train_speed(iter/s)": 1.527678 }, { "acc": 0.98466339, "epoch": 28.42746660417155, "grad_norm": 2.0422208309173584, "learning_rate": 4.280837486145411e-06, "loss": 0.02778868, "memory(GiB)": 13.7, "step": 60650, "train_speed(iter/s)": 1.527684 }, { "acc": 0.98738098, "epoch": 28.429810171080383, "grad_norm": 2.146583080291748, "learning_rate": 4.280070455856036e-06, "loss": 0.0415411, "memory(GiB)": 13.7, "step": 60655, "train_speed(iter/s)": 1.527686 }, { "acc": 0.96803846, "epoch": 28.43215373798922, "grad_norm": 6.35309362411499, "learning_rate": 4.279303442883419e-06, "loss": 0.08476405, "memory(GiB)": 13.7, "step": 60660, "train_speed(iter/s)": 1.527692 }, { "acc": 0.9770834, "epoch": 28.434497304898056, "grad_norm": 0.001978076994419098, "learning_rate": 4.278536447245999e-06, "loss": 0.04391888, "memory(GiB)": 13.7, "step": 60665, "train_speed(iter/s)": 1.527696 }, { "acc": 0.98170385, "epoch": 28.43684087180689, "grad_norm": 0.054467640817165375, "learning_rate": 4.277769468962212e-06, "loss": 0.05142573, "memory(GiB)": 13.7, "step": 60670, "train_speed(iter/s)": 1.527699 }, { "acc": 0.99591351, "epoch": 28.439184438715724, "grad_norm": 2.2862844467163086, "learning_rate": 4.277002508050491e-06, "loss": 0.01598845, "memory(GiB)": 13.7, "step": 60675, "train_speed(iter/s)": 1.527705 }, { "acc": 0.9770833, "epoch": 28.441528005624562, "grad_norm": 2.4139230251312256, "learning_rate": 4.276235564529273e-06, "loss": 0.11661407, "memory(GiB)": 13.7, "step": 60680, "train_speed(iter/s)": 1.527707 }, { "acc": 0.98828373, "epoch": 28.443871572533396, "grad_norm": 3.8340437412261963, "learning_rate": 4.275468638416993e-06, "loss": 0.05752635, "memory(GiB)": 13.7, "step": 60685, "train_speed(iter/s)": 1.527713 }, { "acc": 0.99437504, "epoch": 28.44621513944223, "grad_norm": 0.8561998009681702, "learning_rate": 4.274701729732084e-06, "loss": 0.0228985, "memory(GiB)": 13.7, "step": 60690, "train_speed(iter/s)": 1.527715 }, { "acc": 0.98201637, "epoch": 28.448558706351065, "grad_norm": 9.217840194702148, "learning_rate": 4.273934838492983e-06, "loss": 0.04411571, "memory(GiB)": 13.7, "step": 60695, "train_speed(iter/s)": 1.527726 }, { "acc": 0.990625, "epoch": 28.450902273259903, "grad_norm": 3.606009006500244, "learning_rate": 4.27316796471812e-06, "loss": 0.02393645, "memory(GiB)": 13.7, "step": 60700, "train_speed(iter/s)": 1.527723 }, { "acc": 0.985322, "epoch": 28.453245840168737, "grad_norm": 1.9979350566864014, "learning_rate": 4.27240110842593e-06, "loss": 0.04432264, "memory(GiB)": 13.7, "step": 60705, "train_speed(iter/s)": 1.527721 }, { "acc": 0.990625, "epoch": 28.45558940707757, "grad_norm": 0.023642780259251595, "learning_rate": 4.271634269634846e-06, "loss": 0.03976145, "memory(GiB)": 13.7, "step": 60710, "train_speed(iter/s)": 1.527728 }, { "acc": 0.990625, "epoch": 28.457932973986406, "grad_norm": 8.534662246704102, "learning_rate": 4.2708674483633e-06, "loss": 0.03451948, "memory(GiB)": 13.7, "step": 60715, "train_speed(iter/s)": 1.527734 }, { "acc": 0.9822917, "epoch": 28.460276540895244, "grad_norm": 0.978191614151001, "learning_rate": 4.270100644629721e-06, "loss": 0.03981086, "memory(GiB)": 13.7, "step": 60720, "train_speed(iter/s)": 1.527736 }, { "acc": 0.975, "epoch": 28.462620107804078, "grad_norm": 5.013210773468018, "learning_rate": 4.269333858452545e-06, "loss": 0.07572421, "memory(GiB)": 13.7, "step": 60725, "train_speed(iter/s)": 1.527746 }, { "acc": 0.9895834, "epoch": 28.464963674712912, "grad_norm": 0.00816910807043314, "learning_rate": 4.2685670898502005e-06, "loss": 0.02143755, "memory(GiB)": 13.7, "step": 60730, "train_speed(iter/s)": 1.527747 }, { "acc": 0.99027777, "epoch": 28.467307241621747, "grad_norm": 1.5324851274490356, "learning_rate": 4.267800338841119e-06, "loss": 0.02481149, "memory(GiB)": 13.7, "step": 60735, "train_speed(iter/s)": 1.527752 }, { "acc": 0.97330046, "epoch": 28.469650808530584, "grad_norm": 0.005411928053945303, "learning_rate": 4.26703360544373e-06, "loss": 0.04587067, "memory(GiB)": 13.7, "step": 60740, "train_speed(iter/s)": 1.527755 }, { "acc": 0.98125, "epoch": 28.47199437543942, "grad_norm": 3.0141642093658447, "learning_rate": 4.266266889676462e-06, "loss": 0.05444025, "memory(GiB)": 13.7, "step": 60745, "train_speed(iter/s)": 1.527759 }, { "acc": 0.9958333, "epoch": 28.474337942348253, "grad_norm": 0.0697416141629219, "learning_rate": 4.265500191557747e-06, "loss": 0.01162678, "memory(GiB)": 13.7, "step": 60750, "train_speed(iter/s)": 1.527767 }, { "acc": 0.98386364, "epoch": 28.47668150925709, "grad_norm": 1.0833165645599365, "learning_rate": 4.264733511106015e-06, "loss": 0.02614209, "memory(GiB)": 13.7, "step": 60755, "train_speed(iter/s)": 1.527772 }, { "acc": 0.98166666, "epoch": 28.479025076165925, "grad_norm": 3.1562843322753906, "learning_rate": 4.26396684833969e-06, "loss": 0.0428123, "memory(GiB)": 13.7, "step": 60760, "train_speed(iter/s)": 1.527776 }, { "acc": 0.98173609, "epoch": 28.48136864307476, "grad_norm": 3.2806618213653564, "learning_rate": 4.263200203277201e-06, "loss": 0.0499703, "memory(GiB)": 13.7, "step": 60765, "train_speed(iter/s)": 1.527775 }, { "acc": 0.96279755, "epoch": 28.483712209983594, "grad_norm": 6.326317310333252, "learning_rate": 4.26243357593698e-06, "loss": 0.10011559, "memory(GiB)": 13.7, "step": 60770, "train_speed(iter/s)": 1.527773 }, { "acc": 0.99154758, "epoch": 28.48605577689243, "grad_norm": 6.248162269592285, "learning_rate": 4.261666966337449e-06, "loss": 0.04558766, "memory(GiB)": 13.7, "step": 60775, "train_speed(iter/s)": 1.527777 }, { "acc": 0.97529764, "epoch": 28.488399343801266, "grad_norm": 7.258309841156006, "learning_rate": 4.260900374497036e-06, "loss": 0.10073167, "memory(GiB)": 13.7, "step": 60780, "train_speed(iter/s)": 1.527784 }, { "acc": 0.98760414, "epoch": 28.4907429107101, "grad_norm": 1.6321091651916504, "learning_rate": 4.26013380043417e-06, "loss": 0.08765298, "memory(GiB)": 13.7, "step": 60785, "train_speed(iter/s)": 1.527786 }, { "acc": 0.98395834, "epoch": 28.493086477618935, "grad_norm": 1.8543617725372314, "learning_rate": 4.259367244167274e-06, "loss": 0.04613683, "memory(GiB)": 13.7, "step": 60790, "train_speed(iter/s)": 1.527797 }, { "acc": 0.98418102, "epoch": 28.495430044527772, "grad_norm": 5.316013336181641, "learning_rate": 4.258600705714777e-06, "loss": 0.04234857, "memory(GiB)": 13.7, "step": 60795, "train_speed(iter/s)": 1.527808 }, { "acc": 0.98145838, "epoch": 28.497773611436607, "grad_norm": 0.8797255158424377, "learning_rate": 4.2578341850951e-06, "loss": 0.06050975, "memory(GiB)": 13.7, "step": 60800, "train_speed(iter/s)": 1.527813 }, { "acc": 0.98543892, "epoch": 28.50011717834544, "grad_norm": 0.013994102366268635, "learning_rate": 4.257067682326669e-06, "loss": 0.06111345, "memory(GiB)": 13.7, "step": 60805, "train_speed(iter/s)": 1.527811 }, { "acc": 0.99296875, "epoch": 28.502460745254275, "grad_norm": 2.057518720626831, "learning_rate": 4.2563011974279086e-06, "loss": 0.03080408, "memory(GiB)": 13.7, "step": 60810, "train_speed(iter/s)": 1.527813 }, { "acc": 0.96427078, "epoch": 28.504804312163113, "grad_norm": 4.786558628082275, "learning_rate": 4.255534730417243e-06, "loss": 0.10163952, "memory(GiB)": 13.7, "step": 60815, "train_speed(iter/s)": 1.527809 }, { "acc": 0.98239584, "epoch": 28.507147879071947, "grad_norm": 6.2903947830200195, "learning_rate": 4.254768281313094e-06, "loss": 0.08707572, "memory(GiB)": 13.7, "step": 60820, "train_speed(iter/s)": 1.527816 }, { "acc": 0.9822917, "epoch": 28.509491445980782, "grad_norm": 4.758963584899902, "learning_rate": 4.254001850133886e-06, "loss": 0.03549478, "memory(GiB)": 13.7, "step": 60825, "train_speed(iter/s)": 1.527827 }, { "acc": 0.97765865, "epoch": 28.51183501288962, "grad_norm": 4.340928077697754, "learning_rate": 4.2532354368980425e-06, "loss": 0.07108803, "memory(GiB)": 13.7, "step": 60830, "train_speed(iter/s)": 1.527831 }, { "acc": 0.98505211, "epoch": 28.514178579798454, "grad_norm": 0.005561357829719782, "learning_rate": 4.252469041623982e-06, "loss": 0.04544645, "memory(GiB)": 13.7, "step": 60835, "train_speed(iter/s)": 1.52784 }, { "acc": 0.98071432, "epoch": 28.51652214670729, "grad_norm": 2.8440046310424805, "learning_rate": 4.251702664330131e-06, "loss": 0.06666004, "memory(GiB)": 13.7, "step": 60840, "train_speed(iter/s)": 1.527852 }, { "acc": 0.97946434, "epoch": 28.518865713616123, "grad_norm": 5.085383415222168, "learning_rate": 4.250936305034906e-06, "loss": 0.04096499, "memory(GiB)": 13.7, "step": 60845, "train_speed(iter/s)": 1.527856 }, { "acc": 0.98083334, "epoch": 28.52120928052496, "grad_norm": 3.828684091567993, "learning_rate": 4.25016996375673e-06, "loss": 0.07359434, "memory(GiB)": 13.7, "step": 60850, "train_speed(iter/s)": 1.527861 }, { "acc": 0.98217258, "epoch": 28.523552847433795, "grad_norm": 2.386619806289673, "learning_rate": 4.249403640514023e-06, "loss": 0.05847098, "memory(GiB)": 13.7, "step": 60855, "train_speed(iter/s)": 1.527868 }, { "acc": 0.98312492, "epoch": 28.52589641434263, "grad_norm": 3.935276746749878, "learning_rate": 4.2486373353252055e-06, "loss": 0.06793871, "memory(GiB)": 13.7, "step": 60860, "train_speed(iter/s)": 1.527875 }, { "acc": 0.98779774, "epoch": 28.528239981251463, "grad_norm": 2.7280516624450684, "learning_rate": 4.247871048208696e-06, "loss": 0.05403713, "memory(GiB)": 13.7, "step": 60865, "train_speed(iter/s)": 1.527881 }, { "acc": 0.98041668, "epoch": 28.5305835481603, "grad_norm": 4.222269058227539, "learning_rate": 4.247104779182913e-06, "loss": 0.07464982, "memory(GiB)": 13.7, "step": 60870, "train_speed(iter/s)": 1.52789 }, { "acc": 0.98571434, "epoch": 28.532927115069135, "grad_norm": 2.442638635635376, "learning_rate": 4.246338528266278e-06, "loss": 0.06644824, "memory(GiB)": 13.7, "step": 60875, "train_speed(iter/s)": 1.527897 }, { "acc": 0.97875004, "epoch": 28.53527068197797, "grad_norm": 1.4559932947158813, "learning_rate": 4.245572295477208e-06, "loss": 0.04697854, "memory(GiB)": 13.7, "step": 60880, "train_speed(iter/s)": 1.527901 }, { "acc": 0.98452387, "epoch": 28.537614248886804, "grad_norm": 6.015143871307373, "learning_rate": 4.244806080834117e-06, "loss": 0.0723832, "memory(GiB)": 13.7, "step": 60885, "train_speed(iter/s)": 1.527905 }, { "acc": 0.98708334, "epoch": 28.539957815795642, "grad_norm": 2.058314085006714, "learning_rate": 4.244039884355426e-06, "loss": 0.06108164, "memory(GiB)": 13.7, "step": 60890, "train_speed(iter/s)": 1.527905 }, { "acc": 0.971875, "epoch": 28.542301382704476, "grad_norm": 1.6478750705718994, "learning_rate": 4.2432737060595504e-06, "loss": 0.08243471, "memory(GiB)": 13.7, "step": 60895, "train_speed(iter/s)": 1.527909 }, { "acc": 0.97911701, "epoch": 28.54464494961331, "grad_norm": 1.5848840475082397, "learning_rate": 4.242507545964909e-06, "loss": 0.04340816, "memory(GiB)": 13.7, "step": 60900, "train_speed(iter/s)": 1.527909 }, { "acc": 0.95528851, "epoch": 28.54698851652215, "grad_norm": 10.057445526123047, "learning_rate": 4.2417414040899145e-06, "loss": 0.12935926, "memory(GiB)": 13.7, "step": 60905, "train_speed(iter/s)": 1.527908 }, { "acc": 0.98291664, "epoch": 28.549332083430983, "grad_norm": 1.5204331874847412, "learning_rate": 4.240975280452984e-06, "loss": 0.03396932, "memory(GiB)": 13.7, "step": 60910, "train_speed(iter/s)": 1.52791 }, { "acc": 0.97553024, "epoch": 28.551675650339817, "grad_norm": 0.7941609025001526, "learning_rate": 4.2402091750725336e-06, "loss": 0.05347077, "memory(GiB)": 13.7, "step": 60915, "train_speed(iter/s)": 1.527915 }, { "acc": 0.98842258, "epoch": 28.55401921724865, "grad_norm": 1.9571775197982788, "learning_rate": 4.239443087966976e-06, "loss": 0.04229421, "memory(GiB)": 13.7, "step": 60920, "train_speed(iter/s)": 1.527919 }, { "acc": 0.98320885, "epoch": 28.55636278415749, "grad_norm": 3.102071762084961, "learning_rate": 4.238677019154727e-06, "loss": 0.07449448, "memory(GiB)": 13.7, "step": 60925, "train_speed(iter/s)": 1.527931 }, { "acc": 0.9905303, "epoch": 28.558706351066323, "grad_norm": 1.669776201248169, "learning_rate": 4.2379109686542e-06, "loss": 0.03887664, "memory(GiB)": 13.7, "step": 60930, "train_speed(iter/s)": 1.527937 }, { "acc": 0.99125004, "epoch": 28.561049917975158, "grad_norm": 2.8058409690856934, "learning_rate": 4.237144936483807e-06, "loss": 0.03683998, "memory(GiB)": 13.7, "step": 60935, "train_speed(iter/s)": 1.527945 }, { "acc": 0.98907204, "epoch": 28.563393484883992, "grad_norm": 2.270273208618164, "learning_rate": 4.2363789226619615e-06, "loss": 0.05516254, "memory(GiB)": 13.7, "step": 60940, "train_speed(iter/s)": 1.52794 }, { "acc": 0.97562504, "epoch": 28.56573705179283, "grad_norm": 3.7661755084991455, "learning_rate": 4.235612927207077e-06, "loss": 0.08135021, "memory(GiB)": 13.7, "step": 60945, "train_speed(iter/s)": 1.527944 }, { "acc": 0.98994045, "epoch": 28.568080618701664, "grad_norm": 3.7848432064056396, "learning_rate": 4.234846950137566e-06, "loss": 0.02868289, "memory(GiB)": 13.7, "step": 60950, "train_speed(iter/s)": 1.52795 }, { "acc": 0.99333324, "epoch": 28.5704241856105, "grad_norm": 1.3772751092910767, "learning_rate": 4.234080991471836e-06, "loss": 0.04159532, "memory(GiB)": 13.7, "step": 60955, "train_speed(iter/s)": 1.527948 }, { "acc": 0.98425598, "epoch": 28.572767752519333, "grad_norm": 3.067702531814575, "learning_rate": 4.233315051228304e-06, "loss": 0.03955844, "memory(GiB)": 13.7, "step": 60960, "train_speed(iter/s)": 1.527952 }, { "acc": 0.98208332, "epoch": 28.57511131942817, "grad_norm": 8.65683364868164, "learning_rate": 4.232549129425378e-06, "loss": 0.06139548, "memory(GiB)": 13.7, "step": 60965, "train_speed(iter/s)": 1.527956 }, { "acc": 0.98051472, "epoch": 28.577454886337005, "grad_norm": 0.216273695230484, "learning_rate": 4.231783226081466e-06, "loss": 0.09043885, "memory(GiB)": 13.7, "step": 60970, "train_speed(iter/s)": 1.527958 }, { "acc": 0.98531246, "epoch": 28.57979845324584, "grad_norm": 2.8841707706451416, "learning_rate": 4.23101734121498e-06, "loss": 0.06981465, "memory(GiB)": 13.7, "step": 60975, "train_speed(iter/s)": 1.527957 }, { "acc": 0.98916664, "epoch": 28.582142020154677, "grad_norm": 0.8873396515846252, "learning_rate": 4.230251474844331e-06, "loss": 0.0215703, "memory(GiB)": 13.7, "step": 60980, "train_speed(iter/s)": 1.527954 }, { "acc": 0.97842255, "epoch": 28.58448558706351, "grad_norm": 0.8889298439025879, "learning_rate": 4.229485626987923e-06, "loss": 0.06668071, "memory(GiB)": 13.7, "step": 60985, "train_speed(iter/s)": 1.527949 }, { "acc": 0.98722725, "epoch": 28.586829153972346, "grad_norm": 1.2466742992401123, "learning_rate": 4.22871979766417e-06, "loss": 0.07231257, "memory(GiB)": 13.7, "step": 60990, "train_speed(iter/s)": 1.527952 }, { "acc": 0.97666664, "epoch": 28.58917272088118, "grad_norm": 0.004316363483667374, "learning_rate": 4.227953986891476e-06, "loss": 0.10531528, "memory(GiB)": 13.7, "step": 60995, "train_speed(iter/s)": 1.527953 }, { "acc": 0.97547626, "epoch": 28.591516287790018, "grad_norm": 3.4338009357452393, "learning_rate": 4.227188194688252e-06, "loss": 0.05376072, "memory(GiB)": 13.7, "step": 61000, "train_speed(iter/s)": 1.52795 }, { "acc": 0.97844696, "epoch": 28.593859854698852, "grad_norm": 2.352419376373291, "learning_rate": 4.226422421072903e-06, "loss": 0.09147392, "memory(GiB)": 13.7, "step": 61005, "train_speed(iter/s)": 1.527953 }, { "acc": 0.98978624, "epoch": 28.596203421607687, "grad_norm": 10.978004455566406, "learning_rate": 4.225656666063836e-06, "loss": 0.04286082, "memory(GiB)": 13.7, "step": 61010, "train_speed(iter/s)": 1.52796 }, { "acc": 0.99333324, "epoch": 28.59854698851652, "grad_norm": 0.2556511461734772, "learning_rate": 4.2248909296794555e-06, "loss": 0.02198419, "memory(GiB)": 13.7, "step": 61015, "train_speed(iter/s)": 1.527961 }, { "acc": 0.96416664, "epoch": 28.60089055542536, "grad_norm": 6.514791011810303, "learning_rate": 4.22412521193817e-06, "loss": 0.08577232, "memory(GiB)": 13.7, "step": 61020, "train_speed(iter/s)": 1.527969 }, { "acc": 0.99017859, "epoch": 28.603234122334193, "grad_norm": 2.3842384815216064, "learning_rate": 4.223359512858385e-06, "loss": 0.05545011, "memory(GiB)": 13.7, "step": 61025, "train_speed(iter/s)": 1.527973 }, { "acc": 0.98586311, "epoch": 28.605577689243027, "grad_norm": 4.24769401550293, "learning_rate": 4.222593832458501e-06, "loss": 0.03821703, "memory(GiB)": 13.7, "step": 61030, "train_speed(iter/s)": 1.527979 }, { "acc": 0.9927084, "epoch": 28.60792125615186, "grad_norm": 2.433391809463501, "learning_rate": 4.221828170756928e-06, "loss": 0.03683991, "memory(GiB)": 13.7, "step": 61035, "train_speed(iter/s)": 1.527985 }, { "acc": 0.99020834, "epoch": 28.6102648230607, "grad_norm": 2.0203094482421875, "learning_rate": 4.2210625277720665e-06, "loss": 0.06522475, "memory(GiB)": 13.7, "step": 61040, "train_speed(iter/s)": 1.527992 }, { "acc": 0.98647728, "epoch": 28.612608389969534, "grad_norm": 2.1633405685424805, "learning_rate": 4.220296903522322e-06, "loss": 0.06720333, "memory(GiB)": 13.7, "step": 61045, "train_speed(iter/s)": 1.527997 }, { "acc": 0.98083344, "epoch": 28.614951956878368, "grad_norm": 8.061161041259766, "learning_rate": 4.219531298026099e-06, "loss": 0.0450894, "memory(GiB)": 13.7, "step": 61050, "train_speed(iter/s)": 1.528007 }, { "acc": 0.98562498, "epoch": 28.617295523787202, "grad_norm": 4.322394371032715, "learning_rate": 4.218765711301795e-06, "loss": 0.04128088, "memory(GiB)": 13.7, "step": 61055, "train_speed(iter/s)": 1.528014 }, { "acc": 0.9825695, "epoch": 28.61963909069604, "grad_norm": 3.0656285285949707, "learning_rate": 4.218000143367815e-06, "loss": 0.05809847, "memory(GiB)": 13.7, "step": 61060, "train_speed(iter/s)": 1.528015 }, { "acc": 0.98500004, "epoch": 28.621982657604875, "grad_norm": 1.7436118125915527, "learning_rate": 4.217234594242562e-06, "loss": 0.03202448, "memory(GiB)": 13.7, "step": 61065, "train_speed(iter/s)": 1.528017 }, { "acc": 0.98291664, "epoch": 28.62432622451371, "grad_norm": 3.3059310913085938, "learning_rate": 4.216469063944435e-06, "loss": 0.05309364, "memory(GiB)": 13.7, "step": 61070, "train_speed(iter/s)": 1.528017 }, { "acc": 0.9902462, "epoch": 28.626669791422547, "grad_norm": 0.7962923049926758, "learning_rate": 4.215703552491837e-06, "loss": 0.05964435, "memory(GiB)": 13.7, "step": 61075, "train_speed(iter/s)": 1.528024 }, { "acc": 0.97986116, "epoch": 28.62901335833138, "grad_norm": 2.7289352416992188, "learning_rate": 4.214938059903168e-06, "loss": 0.05962136, "memory(GiB)": 13.7, "step": 61080, "train_speed(iter/s)": 1.528039 }, { "acc": 0.9869792, "epoch": 28.631356925240215, "grad_norm": 3.593108892440796, "learning_rate": 4.214172586196826e-06, "loss": 0.04441869, "memory(GiB)": 13.7, "step": 61085, "train_speed(iter/s)": 1.528042 }, { "acc": 0.97833328, "epoch": 28.63370049214905, "grad_norm": 3.7958290576934814, "learning_rate": 4.213407131391213e-06, "loss": 0.0389468, "memory(GiB)": 13.7, "step": 61090, "train_speed(iter/s)": 1.528042 }, { "acc": 0.98485222, "epoch": 28.636044059057888, "grad_norm": 6.091813564300537, "learning_rate": 4.212641695504725e-06, "loss": 0.05523661, "memory(GiB)": 13.7, "step": 61095, "train_speed(iter/s)": 1.528048 }, { "acc": 0.98666668, "epoch": 28.638387625966722, "grad_norm": 0.21470284461975098, "learning_rate": 4.211876278555763e-06, "loss": 0.04329676, "memory(GiB)": 13.7, "step": 61100, "train_speed(iter/s)": 1.528048 }, { "acc": 0.99562502, "epoch": 28.640731192875556, "grad_norm": 0.004132192116230726, "learning_rate": 4.2111108805627245e-06, "loss": 0.0357312, "memory(GiB)": 13.7, "step": 61105, "train_speed(iter/s)": 1.528051 }, { "acc": 0.98245544, "epoch": 28.64307475978439, "grad_norm": 3.208470106124878, "learning_rate": 4.210345501544008e-06, "loss": 0.0916995, "memory(GiB)": 13.7, "step": 61110, "train_speed(iter/s)": 1.528052 }, { "acc": 0.98979168, "epoch": 28.64541832669323, "grad_norm": 3.961595296859741, "learning_rate": 4.209580141518007e-06, "loss": 0.03832012, "memory(GiB)": 13.7, "step": 61115, "train_speed(iter/s)": 1.528046 }, { "acc": 0.99910717, "epoch": 28.647761893602063, "grad_norm": 1.1122947931289673, "learning_rate": 4.208814800503124e-06, "loss": 0.02498085, "memory(GiB)": 13.7, "step": 61120, "train_speed(iter/s)": 1.528041 }, { "acc": 0.98291664, "epoch": 28.650105460510897, "grad_norm": 9.352858543395996, "learning_rate": 4.208049478517751e-06, "loss": 0.04227175, "memory(GiB)": 13.7, "step": 61125, "train_speed(iter/s)": 1.528043 }, { "acc": 0.99375, "epoch": 28.65244902741973, "grad_norm": 0.7605379223823547, "learning_rate": 4.207284175580286e-06, "loss": 0.02618033, "memory(GiB)": 13.7, "step": 61130, "train_speed(iter/s)": 1.528046 }, { "acc": 0.99020834, "epoch": 28.65479259432857, "grad_norm": 3.18536114692688, "learning_rate": 4.206518891709123e-06, "loss": 0.01692821, "memory(GiB)": 13.7, "step": 61135, "train_speed(iter/s)": 1.52805 }, { "acc": 0.97354164, "epoch": 28.657136161237403, "grad_norm": 4.1269989013671875, "learning_rate": 4.2057536269226575e-06, "loss": 0.05454601, "memory(GiB)": 13.7, "step": 61140, "train_speed(iter/s)": 1.52805 }, { "acc": 0.98624992, "epoch": 28.659479728146238, "grad_norm": 0.5813647508621216, "learning_rate": 4.204988381239283e-06, "loss": 0.06819167, "memory(GiB)": 13.7, "step": 61145, "train_speed(iter/s)": 1.528054 }, { "acc": 0.97967262, "epoch": 28.661823295055076, "grad_norm": 7.02099609375, "learning_rate": 4.2042231546773945e-06, "loss": 0.05380579, "memory(GiB)": 13.7, "step": 61150, "train_speed(iter/s)": 1.528054 }, { "acc": 0.98572302, "epoch": 28.66416686196391, "grad_norm": 0.004975094925612211, "learning_rate": 4.203457947255387e-06, "loss": 0.02652147, "memory(GiB)": 13.7, "step": 61155, "train_speed(iter/s)": 1.528061 }, { "acc": 0.9708334, "epoch": 28.666510428872744, "grad_norm": 0.0077917203307151794, "learning_rate": 4.20269275899165e-06, "loss": 0.07171068, "memory(GiB)": 13.7, "step": 61160, "train_speed(iter/s)": 1.528059 }, { "acc": 0.9770833, "epoch": 28.66885399578158, "grad_norm": 5.834362030029297, "learning_rate": 4.201927589904579e-06, "loss": 0.07847685, "memory(GiB)": 13.7, "step": 61165, "train_speed(iter/s)": 1.528062 }, { "acc": 0.97442703, "epoch": 28.671197562690416, "grad_norm": 8.03335952758789, "learning_rate": 4.201162440012565e-06, "loss": 0.08225696, "memory(GiB)": 13.7, "step": 61170, "train_speed(iter/s)": 1.528066 }, { "acc": 0.97854166, "epoch": 28.67354112959925, "grad_norm": 2.475167989730835, "learning_rate": 4.200397309334001e-06, "loss": 0.04498241, "memory(GiB)": 13.7, "step": 61175, "train_speed(iter/s)": 1.528068 }, { "acc": 0.98666668, "epoch": 28.675884696508085, "grad_norm": 5.746376037597656, "learning_rate": 4.1996321978872765e-06, "loss": 0.05490848, "memory(GiB)": 13.7, "step": 61180, "train_speed(iter/s)": 1.528079 }, { "acc": 0.98210526, "epoch": 28.67822826341692, "grad_norm": 4.920472621917725, "learning_rate": 4.1988671056907835e-06, "loss": 0.06729014, "memory(GiB)": 13.7, "step": 61185, "train_speed(iter/s)": 1.52808 }, { "acc": 0.99508934, "epoch": 28.680571830325757, "grad_norm": 5.67137336730957, "learning_rate": 4.198102032762909e-06, "loss": 0.0321668, "memory(GiB)": 13.7, "step": 61190, "train_speed(iter/s)": 1.528086 }, { "acc": 0.97798615, "epoch": 28.68291539723459, "grad_norm": 0.9343663454055786, "learning_rate": 4.197336979122048e-06, "loss": 0.04376088, "memory(GiB)": 13.7, "step": 61195, "train_speed(iter/s)": 1.528096 }, { "acc": 0.98187504, "epoch": 28.685258964143426, "grad_norm": 4.084310054779053, "learning_rate": 4.1965719447865875e-06, "loss": 0.06237566, "memory(GiB)": 13.7, "step": 61200, "train_speed(iter/s)": 1.528097 }, { "acc": 0.9802084, "epoch": 28.68760253105226, "grad_norm": 4.456459045410156, "learning_rate": 4.195806929774915e-06, "loss": 0.09720259, "memory(GiB)": 13.7, "step": 61205, "train_speed(iter/s)": 1.528102 }, { "acc": 0.9885416, "epoch": 28.689946097961098, "grad_norm": 3.7084157466888428, "learning_rate": 4.195041934105421e-06, "loss": 0.03122545, "memory(GiB)": 13.7, "step": 61210, "train_speed(iter/s)": 1.528105 }, { "acc": 0.97930059, "epoch": 28.692289664869932, "grad_norm": 3.812962532043457, "learning_rate": 4.194276957796494e-06, "loss": 0.06552576, "memory(GiB)": 13.7, "step": 61215, "train_speed(iter/s)": 1.528111 }, { "acc": 0.98700848, "epoch": 28.694633231778766, "grad_norm": 3.4125816822052, "learning_rate": 4.193512000866518e-06, "loss": 0.05078988, "memory(GiB)": 13.7, "step": 61220, "train_speed(iter/s)": 1.528118 }, { "acc": 0.984375, "epoch": 28.6969767986876, "grad_norm": 1.822081208229065, "learning_rate": 4.192747063333884e-06, "loss": 0.05464041, "memory(GiB)": 13.7, "step": 61225, "train_speed(iter/s)": 1.528123 }, { "acc": 0.97621527, "epoch": 28.69932036559644, "grad_norm": 3.333178997039795, "learning_rate": 4.1919821452169766e-06, "loss": 0.07244196, "memory(GiB)": 13.7, "step": 61230, "train_speed(iter/s)": 1.528125 }, { "acc": 0.98250008, "epoch": 28.701663932505273, "grad_norm": 3.0551605224609375, "learning_rate": 4.19121724653418e-06, "loss": 0.03893837, "memory(GiB)": 13.7, "step": 61235, "train_speed(iter/s)": 1.528132 }, { "acc": 0.9905304, "epoch": 28.704007499414107, "grad_norm": 4.608488082885742, "learning_rate": 4.1904523673038846e-06, "loss": 0.03405313, "memory(GiB)": 13.7, "step": 61240, "train_speed(iter/s)": 1.528128 }, { "acc": 0.97729168, "epoch": 28.706351066322945, "grad_norm": 7.122734069824219, "learning_rate": 4.189687507544472e-06, "loss": 0.09908245, "memory(GiB)": 13.7, "step": 61245, "train_speed(iter/s)": 1.528125 }, { "acc": 0.9885417, "epoch": 28.70869463323178, "grad_norm": 3.4587409496307373, "learning_rate": 4.1889226672743286e-06, "loss": 0.03068529, "memory(GiB)": 13.7, "step": 61250, "train_speed(iter/s)": 1.528131 }, { "acc": 0.9791667, "epoch": 28.711038200140614, "grad_norm": 1.2317583560943604, "learning_rate": 4.188157846511838e-06, "loss": 0.0414308, "memory(GiB)": 13.7, "step": 61255, "train_speed(iter/s)": 1.528133 }, { "acc": 0.9831502, "epoch": 28.713381767049448, "grad_norm": 2.5881993770599365, "learning_rate": 4.187393045275386e-06, "loss": 0.0584583, "memory(GiB)": 13.7, "step": 61260, "train_speed(iter/s)": 1.528138 }, { "acc": 0.97624998, "epoch": 28.715725333958286, "grad_norm": 7.374365329742432, "learning_rate": 4.18662826358335e-06, "loss": 0.05325283, "memory(GiB)": 13.7, "step": 61265, "train_speed(iter/s)": 1.528139 }, { "acc": 0.97256947, "epoch": 28.71806890086712, "grad_norm": 2.9055862426757812, "learning_rate": 4.1858635014541194e-06, "loss": 0.06689835, "memory(GiB)": 13.7, "step": 61270, "train_speed(iter/s)": 1.528149 }, { "acc": 0.97974205, "epoch": 28.720412467775954, "grad_norm": 5.643508434295654, "learning_rate": 4.185098758906073e-06, "loss": 0.06654071, "memory(GiB)": 13.7, "step": 61275, "train_speed(iter/s)": 1.528154 }, { "acc": 0.99281254, "epoch": 28.72275603468479, "grad_norm": 0.7885032296180725, "learning_rate": 4.184334035957594e-06, "loss": 0.02633361, "memory(GiB)": 13.7, "step": 61280, "train_speed(iter/s)": 1.528162 }, { "acc": 0.98343754, "epoch": 28.725099601593627, "grad_norm": 2.544234275817871, "learning_rate": 4.183569332627063e-06, "loss": 0.05136719, "memory(GiB)": 13.7, "step": 61285, "train_speed(iter/s)": 1.528166 }, { "acc": 0.97104168, "epoch": 28.72744316850246, "grad_norm": 3.2531871795654297, "learning_rate": 4.182804648932861e-06, "loss": 0.05376984, "memory(GiB)": 13.7, "step": 61290, "train_speed(iter/s)": 1.528168 }, { "acc": 0.99385414, "epoch": 28.729786735411295, "grad_norm": 2.4866206645965576, "learning_rate": 4.182039984893369e-06, "loss": 0.04696214, "memory(GiB)": 13.7, "step": 61295, "train_speed(iter/s)": 1.528178 }, { "acc": 0.98719702, "epoch": 28.73213030232013, "grad_norm": 2.275178909301758, "learning_rate": 4.18127534052697e-06, "loss": 0.04718102, "memory(GiB)": 13.7, "step": 61300, "train_speed(iter/s)": 1.52818 }, { "acc": 0.98767853, "epoch": 28.734473869228967, "grad_norm": 4.734692096710205, "learning_rate": 4.180510715852037e-06, "loss": 0.06465194, "memory(GiB)": 13.7, "step": 61305, "train_speed(iter/s)": 1.528181 }, { "acc": 0.9895833, "epoch": 28.7368174361378, "grad_norm": 2.504897117614746, "learning_rate": 4.179746110886953e-06, "loss": 0.03437451, "memory(GiB)": 13.7, "step": 61310, "train_speed(iter/s)": 1.528182 }, { "acc": 0.98833332, "epoch": 28.739161003046636, "grad_norm": 7.9664530754089355, "learning_rate": 4.178981525650098e-06, "loss": 0.06950611, "memory(GiB)": 13.7, "step": 61315, "train_speed(iter/s)": 1.52818 }, { "acc": 0.9677083, "epoch": 28.741504569955474, "grad_norm": 3.7001254558563232, "learning_rate": 4.178216960159845e-06, "loss": 0.08697597, "memory(GiB)": 13.7, "step": 61320, "train_speed(iter/s)": 1.52818 }, { "acc": 0.98145828, "epoch": 28.743848136864308, "grad_norm": 3.9001922607421875, "learning_rate": 4.177452414434577e-06, "loss": 0.04962154, "memory(GiB)": 13.7, "step": 61325, "train_speed(iter/s)": 1.528178 }, { "acc": 0.9875, "epoch": 28.746191703773142, "grad_norm": 1.0502090454101562, "learning_rate": 4.176687888492669e-06, "loss": 0.03274479, "memory(GiB)": 13.7, "step": 61330, "train_speed(iter/s)": 1.52818 }, { "acc": 0.97929516, "epoch": 28.748535270681977, "grad_norm": 1.1957505941390991, "learning_rate": 4.175923382352497e-06, "loss": 0.09068199, "memory(GiB)": 13.7, "step": 61335, "train_speed(iter/s)": 1.528176 }, { "acc": 0.96853399, "epoch": 28.750878837590815, "grad_norm": 1.781700849533081, "learning_rate": 4.17515889603244e-06, "loss": 0.08203019, "memory(GiB)": 13.7, "step": 61340, "train_speed(iter/s)": 1.528176 }, { "acc": 0.98609848, "epoch": 28.75322240449965, "grad_norm": 0.7921129465103149, "learning_rate": 4.17439442955087e-06, "loss": 0.04782758, "memory(GiB)": 13.7, "step": 61345, "train_speed(iter/s)": 1.52818 }, { "acc": 0.99177084, "epoch": 28.755565971408483, "grad_norm": 0.91061931848526, "learning_rate": 4.1736299829261635e-06, "loss": 0.03298528, "memory(GiB)": 13.7, "step": 61350, "train_speed(iter/s)": 1.528185 }, { "acc": 0.97514877, "epoch": 28.757909538317318, "grad_norm": 3.249316692352295, "learning_rate": 4.1728655561766965e-06, "loss": 0.10542462, "memory(GiB)": 13.7, "step": 61355, "train_speed(iter/s)": 1.528188 }, { "acc": 0.99330359, "epoch": 28.760253105226155, "grad_norm": 1.928628921508789, "learning_rate": 4.172101149320843e-06, "loss": 0.03266722, "memory(GiB)": 13.7, "step": 61360, "train_speed(iter/s)": 1.528192 }, { "acc": 0.97592258, "epoch": 28.76259667213499, "grad_norm": 2.1026604175567627, "learning_rate": 4.171336762376974e-06, "loss": 0.09382112, "memory(GiB)": 13.7, "step": 61365, "train_speed(iter/s)": 1.528192 }, { "acc": 0.96733131, "epoch": 28.764940239043824, "grad_norm": 4.84781551361084, "learning_rate": 4.170572395363467e-06, "loss": 0.0693465, "memory(GiB)": 13.7, "step": 61370, "train_speed(iter/s)": 1.528195 }, { "acc": 0.99861107, "epoch": 28.76728380595266, "grad_norm": 0.010857660323381424, "learning_rate": 4.169808048298693e-06, "loss": 0.01941561, "memory(GiB)": 13.7, "step": 61375, "train_speed(iter/s)": 1.528196 }, { "acc": 0.98175602, "epoch": 28.769627372861496, "grad_norm": 3.1668081283569336, "learning_rate": 4.1690437212010245e-06, "loss": 0.03442557, "memory(GiB)": 13.7, "step": 61380, "train_speed(iter/s)": 1.528197 }, { "acc": 0.97842274, "epoch": 28.77197093977033, "grad_norm": 5.812234401702881, "learning_rate": 4.168279414088835e-06, "loss": 0.08750274, "memory(GiB)": 13.7, "step": 61385, "train_speed(iter/s)": 1.528197 }, { "acc": 0.98779764, "epoch": 28.774314506679165, "grad_norm": 2.449312448501587, "learning_rate": 4.167515126980493e-06, "loss": 0.07047125, "memory(GiB)": 13.7, "step": 61390, "train_speed(iter/s)": 1.528204 }, { "acc": 0.9833334, "epoch": 28.776658073588003, "grad_norm": 8.678976058959961, "learning_rate": 4.166750859894369e-06, "loss": 0.05046382, "memory(GiB)": 13.7, "step": 61395, "train_speed(iter/s)": 1.528202 }, { "acc": 0.9802084, "epoch": 28.779001640496837, "grad_norm": 0.008039749227464199, "learning_rate": 4.165986612848837e-06, "loss": 0.05743335, "memory(GiB)": 13.7, "step": 61400, "train_speed(iter/s)": 1.528203 }, { "acc": 0.98592262, "epoch": 28.78134520740567, "grad_norm": 4.506558418273926, "learning_rate": 4.165222385862265e-06, "loss": 0.04201285, "memory(GiB)": 13.7, "step": 61405, "train_speed(iter/s)": 1.528207 }, { "acc": 0.98819017, "epoch": 28.783688774314506, "grad_norm": 1.1254116296768188, "learning_rate": 4.1644581789530216e-06, "loss": 0.04562525, "memory(GiB)": 13.7, "step": 61410, "train_speed(iter/s)": 1.528209 }, { "acc": 0.98336306, "epoch": 28.786032341223343, "grad_norm": 1.8127057552337646, "learning_rate": 4.163693992139479e-06, "loss": 0.05234245, "memory(GiB)": 13.7, "step": 61415, "train_speed(iter/s)": 1.528207 }, { "acc": 0.97840061, "epoch": 28.788375908132178, "grad_norm": 1.4923527240753174, "learning_rate": 4.162929825440002e-06, "loss": 0.0641749, "memory(GiB)": 13.7, "step": 61420, "train_speed(iter/s)": 1.528213 }, { "acc": 0.9729166, "epoch": 28.790719475041012, "grad_norm": 5.619545936584473, "learning_rate": 4.162165678872961e-06, "loss": 0.05590523, "memory(GiB)": 13.7, "step": 61425, "train_speed(iter/s)": 1.528218 }, { "acc": 0.95722218, "epoch": 28.793063041949846, "grad_norm": 5.044849395751953, "learning_rate": 4.161401552456724e-06, "loss": 0.14446005, "memory(GiB)": 13.7, "step": 61430, "train_speed(iter/s)": 1.528218 }, { "acc": 0.99145832, "epoch": 28.795406608858684, "grad_norm": 1.8228226900100708, "learning_rate": 4.160637446209657e-06, "loss": 0.04618239, "memory(GiB)": 13.7, "step": 61435, "train_speed(iter/s)": 1.528227 }, { "acc": 0.9916667, "epoch": 28.79775017576752, "grad_norm": 0.0029175959061831236, "learning_rate": 4.159873360150124e-06, "loss": 0.01428027, "memory(GiB)": 13.7, "step": 61440, "train_speed(iter/s)": 1.528228 }, { "acc": 0.9885416, "epoch": 28.800093742676353, "grad_norm": 5.149816513061523, "learning_rate": 4.159109294296496e-06, "loss": 0.02850492, "memory(GiB)": 13.7, "step": 61445, "train_speed(iter/s)": 1.52823 }, { "acc": 0.9770834, "epoch": 28.802437309585187, "grad_norm": 4.545719623565674, "learning_rate": 4.158345248667136e-06, "loss": 0.05189308, "memory(GiB)": 13.7, "step": 61450, "train_speed(iter/s)": 1.528234 }, { "acc": 0.9926136, "epoch": 28.804780876494025, "grad_norm": 2.152177333831787, "learning_rate": 4.1575812232804085e-06, "loss": 0.03248691, "memory(GiB)": 13.7, "step": 61455, "train_speed(iter/s)": 1.528234 }, { "acc": 0.9765625, "epoch": 28.80712444340286, "grad_norm": 4.419712543487549, "learning_rate": 4.15681721815468e-06, "loss": 0.04306828, "memory(GiB)": 13.7, "step": 61460, "train_speed(iter/s)": 1.52824 }, { "acc": 0.99344692, "epoch": 28.809468010311694, "grad_norm": 1.5548017024993896, "learning_rate": 4.156053233308314e-06, "loss": 0.02142797, "memory(GiB)": 13.7, "step": 61465, "train_speed(iter/s)": 1.528242 }, { "acc": 0.96812496, "epoch": 28.81181157722053, "grad_norm": 4.520391941070557, "learning_rate": 4.155289268759675e-06, "loss": 0.09303795, "memory(GiB)": 13.7, "step": 61470, "train_speed(iter/s)": 1.528242 }, { "acc": 0.9885417, "epoch": 28.814155144129366, "grad_norm": 5.173193454742432, "learning_rate": 4.1545253245271244e-06, "loss": 0.03984652, "memory(GiB)": 13.7, "step": 61475, "train_speed(iter/s)": 1.52825 }, { "acc": 0.9833333, "epoch": 28.8164987110382, "grad_norm": 0.2559974789619446, "learning_rate": 4.153761400629026e-06, "loss": 0.02799692, "memory(GiB)": 13.7, "step": 61480, "train_speed(iter/s)": 1.528252 }, { "acc": 0.98394346, "epoch": 28.818842277947034, "grad_norm": 2.501437187194824, "learning_rate": 4.152997497083741e-06, "loss": 0.05002722, "memory(GiB)": 13.7, "step": 61485, "train_speed(iter/s)": 1.528251 }, { "acc": 0.990625, "epoch": 28.821185844855872, "grad_norm": 3.3543033599853516, "learning_rate": 4.152233613909632e-06, "loss": 0.02261926, "memory(GiB)": 13.7, "step": 61490, "train_speed(iter/s)": 1.528252 }, { "acc": 0.98363094, "epoch": 28.823529411764707, "grad_norm": 5.862417697906494, "learning_rate": 4.151469751125059e-06, "loss": 0.03761526, "memory(GiB)": 13.7, "step": 61495, "train_speed(iter/s)": 1.528261 }, { "acc": 0.990625, "epoch": 28.82587297867354, "grad_norm": 5.210756301879883, "learning_rate": 4.1507059087483855e-06, "loss": 0.02905482, "memory(GiB)": 13.7, "step": 61500, "train_speed(iter/s)": 1.528266 }, { "acc": 0.97205353, "epoch": 28.828216545582375, "grad_norm": 7.129364013671875, "learning_rate": 4.149942086797971e-06, "loss": 0.0779088, "memory(GiB)": 13.7, "step": 61505, "train_speed(iter/s)": 1.528269 }, { "acc": 0.97857141, "epoch": 28.830560112491213, "grad_norm": 2.527998447418213, "learning_rate": 4.1491782852921755e-06, "loss": 0.06370671, "memory(GiB)": 13.7, "step": 61510, "train_speed(iter/s)": 1.528272 }, { "acc": 0.978125, "epoch": 28.832903679400047, "grad_norm": 4.428157806396484, "learning_rate": 4.148414504249354e-06, "loss": 0.06150897, "memory(GiB)": 13.7, "step": 61515, "train_speed(iter/s)": 1.528278 }, { "acc": 0.97250004, "epoch": 28.83524724630888, "grad_norm": 5.764659881591797, "learning_rate": 4.147650743687869e-06, "loss": 0.11615663, "memory(GiB)": 13.7, "step": 61520, "train_speed(iter/s)": 1.528283 }, { "acc": 0.9854166, "epoch": 28.837590813217716, "grad_norm": 1.8039212226867676, "learning_rate": 4.146887003626079e-06, "loss": 0.02760243, "memory(GiB)": 13.7, "step": 61525, "train_speed(iter/s)": 1.528289 }, { "acc": 0.9927083, "epoch": 28.839934380126554, "grad_norm": 4.651853084564209, "learning_rate": 4.146123284082341e-06, "loss": 0.02871162, "memory(GiB)": 13.7, "step": 61530, "train_speed(iter/s)": 1.528287 }, { "acc": 0.9832387, "epoch": 28.842277947035388, "grad_norm": 2.5200304985046387, "learning_rate": 4.145359585075013e-06, "loss": 0.0502916, "memory(GiB)": 13.7, "step": 61535, "train_speed(iter/s)": 1.528281 }, { "acc": 0.98604164, "epoch": 28.844621513944222, "grad_norm": 1.4317513704299927, "learning_rate": 4.144595906622449e-06, "loss": 0.0462094, "memory(GiB)": 13.7, "step": 61540, "train_speed(iter/s)": 1.528279 }, { "acc": 0.97520828, "epoch": 28.846965080853057, "grad_norm": 8.73658275604248, "learning_rate": 4.14383224874301e-06, "loss": 0.08583086, "memory(GiB)": 13.7, "step": 61545, "train_speed(iter/s)": 1.528275 }, { "acc": 0.98465271, "epoch": 28.849308647761895, "grad_norm": 3.810575485229492, "learning_rate": 4.14306861145505e-06, "loss": 0.04075806, "memory(GiB)": 13.7, "step": 61550, "train_speed(iter/s)": 1.528279 }, { "acc": 0.98402786, "epoch": 28.85165221467073, "grad_norm": 6.769891738891602, "learning_rate": 4.142304994776923e-06, "loss": 0.03993125, "memory(GiB)": 13.7, "step": 61555, "train_speed(iter/s)": 1.528285 }, { "acc": 0.99187508, "epoch": 28.853995781579563, "grad_norm": 6.068891525268555, "learning_rate": 4.141541398726983e-06, "loss": 0.03861385, "memory(GiB)": 13.7, "step": 61560, "train_speed(iter/s)": 1.528294 }, { "acc": 0.98916664, "epoch": 28.8563393484884, "grad_norm": 3.0939736366271973, "learning_rate": 4.140777823323586e-06, "loss": 0.03541124, "memory(GiB)": 13.7, "step": 61565, "train_speed(iter/s)": 1.528297 }, { "acc": 0.98154755, "epoch": 28.858682915397235, "grad_norm": 1.5660078525543213, "learning_rate": 4.140014268585086e-06, "loss": 0.10183399, "memory(GiB)": 13.7, "step": 61570, "train_speed(iter/s)": 1.528305 }, { "acc": 0.98467264, "epoch": 28.86102648230607, "grad_norm": 1.7240346670150757, "learning_rate": 4.139250734529837e-06, "loss": 0.04379169, "memory(GiB)": 13.7, "step": 61575, "train_speed(iter/s)": 1.528312 }, { "acc": 0.97729168, "epoch": 28.863370049214904, "grad_norm": 0.004042159765958786, "learning_rate": 4.13848722117619e-06, "loss": 0.06002953, "memory(GiB)": 13.7, "step": 61580, "train_speed(iter/s)": 1.528312 }, { "acc": 0.96904755, "epoch": 28.86571361612374, "grad_norm": 4.775878429412842, "learning_rate": 4.137723728542498e-06, "loss": 0.0842859, "memory(GiB)": 13.7, "step": 61585, "train_speed(iter/s)": 1.528316 }, { "acc": 0.99437504, "epoch": 28.868057183032576, "grad_norm": 0.45137321949005127, "learning_rate": 4.136960256647114e-06, "loss": 0.02254966, "memory(GiB)": 13.7, "step": 61590, "train_speed(iter/s)": 1.528326 }, { "acc": 0.96657791, "epoch": 28.87040074994141, "grad_norm": 7.070040702819824, "learning_rate": 4.13619680550839e-06, "loss": 0.15189505, "memory(GiB)": 13.7, "step": 61595, "train_speed(iter/s)": 1.528328 }, { "acc": 0.98187494, "epoch": 28.872744316850245, "grad_norm": 2.1354799270629883, "learning_rate": 4.135433375144672e-06, "loss": 0.053765, "memory(GiB)": 13.7, "step": 61600, "train_speed(iter/s)": 1.528333 }, { "acc": 0.97989578, "epoch": 28.875087883759083, "grad_norm": 7.709225654602051, "learning_rate": 4.134669965574315e-06, "loss": 0.06624465, "memory(GiB)": 13.7, "step": 61605, "train_speed(iter/s)": 1.528333 }, { "acc": 0.98651514, "epoch": 28.877431450667917, "grad_norm": 0.022180132567882538, "learning_rate": 4.1339065768156695e-06, "loss": 0.03228479, "memory(GiB)": 13.7, "step": 61610, "train_speed(iter/s)": 1.528333 }, { "acc": 0.98466349, "epoch": 28.87977501757675, "grad_norm": 0.7956433892250061, "learning_rate": 4.13314320888708e-06, "loss": 0.0488497, "memory(GiB)": 13.7, "step": 61615, "train_speed(iter/s)": 1.528332 }, { "acc": 0.96520844, "epoch": 28.882118584485585, "grad_norm": 0.14025700092315674, "learning_rate": 4.1323798618069e-06, "loss": 0.05574952, "memory(GiB)": 13.7, "step": 61620, "train_speed(iter/s)": 1.528334 }, { "acc": 0.98363094, "epoch": 28.884462151394423, "grad_norm": 5.159083843231201, "learning_rate": 4.131616535593476e-06, "loss": 0.06015273, "memory(GiB)": 13.7, "step": 61625, "train_speed(iter/s)": 1.528338 }, { "acc": 0.97041664, "epoch": 28.886805718303258, "grad_norm": 7.538349628448486, "learning_rate": 4.130853230265154e-06, "loss": 0.08732548, "memory(GiB)": 13.7, "step": 61630, "train_speed(iter/s)": 1.528333 }, { "acc": 0.9958334, "epoch": 28.889149285212092, "grad_norm": 6.537330150604248, "learning_rate": 4.130089945840287e-06, "loss": 0.05518722, "memory(GiB)": 13.7, "step": 61635, "train_speed(iter/s)": 1.528342 }, { "acc": 0.98425598, "epoch": 28.89149285212093, "grad_norm": 2.583963394165039, "learning_rate": 4.129326682337216e-06, "loss": 0.05228008, "memory(GiB)": 13.7, "step": 61640, "train_speed(iter/s)": 1.528343 }, { "acc": 0.98182869, "epoch": 28.893836419029764, "grad_norm": 5.82536506652832, "learning_rate": 4.128563439774289e-06, "loss": 0.07032931, "memory(GiB)": 13.7, "step": 61645, "train_speed(iter/s)": 1.528347 }, { "acc": 0.9600893, "epoch": 28.8961799859386, "grad_norm": 4.827484607696533, "learning_rate": 4.127800218169853e-06, "loss": 0.10691559, "memory(GiB)": 13.7, "step": 61650, "train_speed(iter/s)": 1.528352 }, { "acc": 0.98500004, "epoch": 28.898523552847433, "grad_norm": 3.271904230117798, "learning_rate": 4.127037017542254e-06, "loss": 0.05465908, "memory(GiB)": 13.7, "step": 61655, "train_speed(iter/s)": 1.528352 }, { "acc": 0.97663689, "epoch": 28.90086711975627, "grad_norm": 0.07103651016950607, "learning_rate": 4.126273837909833e-06, "loss": 0.07859269, "memory(GiB)": 13.7, "step": 61660, "train_speed(iter/s)": 1.528353 }, { "acc": 0.97800598, "epoch": 28.903210686665105, "grad_norm": 1.2312443256378174, "learning_rate": 4.125510679290939e-06, "loss": 0.0348035, "memory(GiB)": 13.7, "step": 61665, "train_speed(iter/s)": 1.528355 }, { "acc": 0.9822917, "epoch": 28.90555425357394, "grad_norm": 1.0816373825073242, "learning_rate": 4.1247475417039115e-06, "loss": 0.03637656, "memory(GiB)": 13.7, "step": 61670, "train_speed(iter/s)": 1.52836 }, { "acc": 0.98447914, "epoch": 28.907897820482773, "grad_norm": 3.5102312564849854, "learning_rate": 4.123984425167098e-06, "loss": 0.05736828, "memory(GiB)": 13.7, "step": 61675, "train_speed(iter/s)": 1.528367 }, { "acc": 0.97833328, "epoch": 28.91024138739161, "grad_norm": 4.132007122039795, "learning_rate": 4.12322132969884e-06, "loss": 0.05103086, "memory(GiB)": 13.7, "step": 61680, "train_speed(iter/s)": 1.52837 }, { "acc": 0.97252979, "epoch": 28.912584954300446, "grad_norm": 3.937816858291626, "learning_rate": 4.122458255317478e-06, "loss": 0.10657709, "memory(GiB)": 13.7, "step": 61685, "train_speed(iter/s)": 1.528371 }, { "acc": 0.98458328, "epoch": 28.91492852120928, "grad_norm": 4.32977819442749, "learning_rate": 4.121695202041354e-06, "loss": 0.04114532, "memory(GiB)": 13.7, "step": 61690, "train_speed(iter/s)": 1.528374 }, { "acc": 0.98195515, "epoch": 28.917272088118114, "grad_norm": 5.779311180114746, "learning_rate": 4.120932169888811e-06, "loss": 0.06745574, "memory(GiB)": 13.7, "step": 61695, "train_speed(iter/s)": 1.528377 }, { "acc": 0.98268852, "epoch": 28.919615655026952, "grad_norm": 2.718313217163086, "learning_rate": 4.12016915887819e-06, "loss": 0.08316384, "memory(GiB)": 13.7, "step": 61700, "train_speed(iter/s)": 1.528377 }, { "acc": 0.98500004, "epoch": 28.921959221935786, "grad_norm": 3.1597907543182373, "learning_rate": 4.119406169027829e-06, "loss": 0.02993155, "memory(GiB)": 13.7, "step": 61705, "train_speed(iter/s)": 1.528387 }, { "acc": 0.9875, "epoch": 28.92430278884462, "grad_norm": 3.2726728916168213, "learning_rate": 4.11864320035607e-06, "loss": 0.01897784, "memory(GiB)": 13.7, "step": 61710, "train_speed(iter/s)": 1.528384 }, { "acc": 0.99296875, "epoch": 28.926646355753455, "grad_norm": 3.1303577423095703, "learning_rate": 4.11788025288125e-06, "loss": 0.04006368, "memory(GiB)": 13.7, "step": 61715, "train_speed(iter/s)": 1.528391 }, { "acc": 0.9895834, "epoch": 28.928989922662293, "grad_norm": 0.6808547973632812, "learning_rate": 4.117117326621711e-06, "loss": 0.02076156, "memory(GiB)": 13.7, "step": 61720, "train_speed(iter/s)": 1.528402 }, { "acc": 0.975, "epoch": 28.931333489571127, "grad_norm": 2.2817234992980957, "learning_rate": 4.116354421595788e-06, "loss": 0.04871403, "memory(GiB)": 13.7, "step": 61725, "train_speed(iter/s)": 1.528406 }, { "acc": 0.98230114, "epoch": 28.93367705647996, "grad_norm": 4.997848033905029, "learning_rate": 4.115591537821821e-06, "loss": 0.07615857, "memory(GiB)": 13.7, "step": 61730, "train_speed(iter/s)": 1.528411 }, { "acc": 0.97194443, "epoch": 28.9360206233888, "grad_norm": 6.384974002838135, "learning_rate": 4.114828675318145e-06, "loss": 0.09435633, "memory(GiB)": 13.7, "step": 61735, "train_speed(iter/s)": 1.528413 }, { "acc": 0.99548607, "epoch": 28.938364190297634, "grad_norm": 4.7903151512146, "learning_rate": 4.114065834103099e-06, "loss": 0.04116786, "memory(GiB)": 13.7, "step": 61740, "train_speed(iter/s)": 1.528424 }, { "acc": 0.9755209, "epoch": 28.940707757206468, "grad_norm": 1.995823860168457, "learning_rate": 4.113303014195017e-06, "loss": 0.05695888, "memory(GiB)": 13.7, "step": 61745, "train_speed(iter/s)": 1.528432 }, { "acc": 0.9805357, "epoch": 28.943051324115302, "grad_norm": 3.7982871532440186, "learning_rate": 4.112540215612237e-06, "loss": 0.04110478, "memory(GiB)": 13.7, "step": 61750, "train_speed(iter/s)": 1.528436 }, { "acc": 0.9822916, "epoch": 28.94539489102414, "grad_norm": 1.789639949798584, "learning_rate": 4.111777438373093e-06, "loss": 0.06431932, "memory(GiB)": 13.7, "step": 61755, "train_speed(iter/s)": 1.528443 }, { "acc": 0.9890625, "epoch": 28.947738457932974, "grad_norm": 0.00407444266602397, "learning_rate": 4.111014682495921e-06, "loss": 0.05370496, "memory(GiB)": 13.7, "step": 61760, "train_speed(iter/s)": 1.528444 }, { "acc": 0.97321434, "epoch": 28.95008202484181, "grad_norm": 2.3570942878723145, "learning_rate": 4.110251947999051e-06, "loss": 0.06099572, "memory(GiB)": 13.7, "step": 61765, "train_speed(iter/s)": 1.528447 }, { "acc": 0.96750002, "epoch": 28.952425591750643, "grad_norm": 3.2304184436798096, "learning_rate": 4.109489234900821e-06, "loss": 0.10093718, "memory(GiB)": 13.7, "step": 61770, "train_speed(iter/s)": 1.528453 }, { "acc": 0.9875, "epoch": 28.95476915865948, "grad_norm": 2.6191329956054688, "learning_rate": 4.1087265432195615e-06, "loss": 0.02565615, "memory(GiB)": 13.7, "step": 61775, "train_speed(iter/s)": 1.52846 }, { "acc": 0.971875, "epoch": 28.957112725568315, "grad_norm": 1.5784578323364258, "learning_rate": 4.107963872973607e-06, "loss": 0.10770516, "memory(GiB)": 13.7, "step": 61780, "train_speed(iter/s)": 1.528463 }, { "acc": 0.97874994, "epoch": 28.95945629247715, "grad_norm": 0.006982570979744196, "learning_rate": 4.10720122418129e-06, "loss": 0.04626696, "memory(GiB)": 13.7, "step": 61785, "train_speed(iter/s)": 1.528468 }, { "acc": 0.97763891, "epoch": 28.961799859385984, "grad_norm": 2.6433677673339844, "learning_rate": 4.106438596860938e-06, "loss": 0.06250473, "memory(GiB)": 13.7, "step": 61790, "train_speed(iter/s)": 1.52847 }, { "acc": 0.98343754, "epoch": 28.96414342629482, "grad_norm": 3.8200931549072266, "learning_rate": 4.105675991030887e-06, "loss": 0.03309399, "memory(GiB)": 13.7, "step": 61795, "train_speed(iter/s)": 1.528468 }, { "acc": 0.9731945, "epoch": 28.966486993203656, "grad_norm": 10.130743026733398, "learning_rate": 4.1049134067094655e-06, "loss": 0.05888505, "memory(GiB)": 13.7, "step": 61800, "train_speed(iter/s)": 1.52848 }, { "acc": 0.9669445, "epoch": 28.96883056011249, "grad_norm": 7.171637058258057, "learning_rate": 4.104150843915003e-06, "loss": 0.10211427, "memory(GiB)": 13.7, "step": 61805, "train_speed(iter/s)": 1.528482 }, { "acc": 0.98832798, "epoch": 28.971174127021328, "grad_norm": 1.4900883436203003, "learning_rate": 4.10338830266583e-06, "loss": 0.03101173, "memory(GiB)": 13.7, "step": 61810, "train_speed(iter/s)": 1.528485 }, { "acc": 0.98187504, "epoch": 28.973517693930162, "grad_norm": 7.925605297088623, "learning_rate": 4.102625782980275e-06, "loss": 0.08083073, "memory(GiB)": 13.7, "step": 61815, "train_speed(iter/s)": 1.52849 }, { "acc": 0.98696423, "epoch": 28.975861260838997, "grad_norm": 1.4001846313476562, "learning_rate": 4.101863284876665e-06, "loss": 0.05492101, "memory(GiB)": 13.7, "step": 61820, "train_speed(iter/s)": 1.528493 }, { "acc": 0.97767859, "epoch": 28.97820482774783, "grad_norm": 0.012933784164488316, "learning_rate": 4.1011008083733314e-06, "loss": 0.0731711, "memory(GiB)": 13.7, "step": 61825, "train_speed(iter/s)": 1.528496 }, { "acc": 0.95739584, "epoch": 28.98054839465667, "grad_norm": 3.598106622695923, "learning_rate": 4.1003383534885975e-06, "loss": 0.23072097, "memory(GiB)": 13.7, "step": 61830, "train_speed(iter/s)": 1.528501 }, { "acc": 0.98000002, "epoch": 28.982891961565503, "grad_norm": 3.97953200340271, "learning_rate": 4.099575920240793e-06, "loss": 0.07863228, "memory(GiB)": 13.7, "step": 61835, "train_speed(iter/s)": 1.528507 }, { "acc": 0.98291664, "epoch": 28.985235528474337, "grad_norm": 2.5574254989624023, "learning_rate": 4.098813508648244e-06, "loss": 0.02830554, "memory(GiB)": 13.7, "step": 61840, "train_speed(iter/s)": 1.528516 }, { "acc": 0.9885417, "epoch": 28.987579095383172, "grad_norm": 1.1771494150161743, "learning_rate": 4.098051118729277e-06, "loss": 0.03454694, "memory(GiB)": 13.7, "step": 61845, "train_speed(iter/s)": 1.528519 }, { "acc": 0.98946428, "epoch": 28.98992266229201, "grad_norm": 0.008802530355751514, "learning_rate": 4.097288750502215e-06, "loss": 0.03132175, "memory(GiB)": 13.7, "step": 61850, "train_speed(iter/s)": 1.52852 }, { "acc": 0.98208332, "epoch": 28.992266229200844, "grad_norm": 5.053147315979004, "learning_rate": 4.096526403985385e-06, "loss": 0.07374483, "memory(GiB)": 13.7, "step": 61855, "train_speed(iter/s)": 1.528526 }, { "acc": 0.98383923, "epoch": 28.99460979610968, "grad_norm": 1.7536760568618774, "learning_rate": 4.095764079197109e-06, "loss": 0.03889076, "memory(GiB)": 13.7, "step": 61860, "train_speed(iter/s)": 1.528534 }, { "acc": 0.97861605, "epoch": 28.996953363018513, "grad_norm": 2.0428266525268555, "learning_rate": 4.095001776155713e-06, "loss": 0.03394389, "memory(GiB)": 13.7, "step": 61865, "train_speed(iter/s)": 1.528543 }, { "acc": 0.97350874, "epoch": 28.99929692992735, "grad_norm": 155.5042724609375, "learning_rate": 4.09423949487952e-06, "loss": 0.14792433, "memory(GiB)": 13.7, "step": 61870, "train_speed(iter/s)": 1.528543 }, { "acc": 0.97518425, "epoch": 29.001640496836185, "grad_norm": 4.199252128601074, "learning_rate": 4.093477235386852e-06, "loss": 0.06656826, "memory(GiB)": 13.7, "step": 61875, "train_speed(iter/s)": 1.528514 }, { "acc": 0.9921875, "epoch": 29.00398406374502, "grad_norm": 2.8545565605163574, "learning_rate": 4.0927149976960306e-06, "loss": 0.0240884, "memory(GiB)": 13.7, "step": 61880, "train_speed(iter/s)": 1.528509 }, { "acc": 0.9875, "epoch": 29.006327630653857, "grad_norm": 3.654439687728882, "learning_rate": 4.09195278182538e-06, "loss": 0.04543467, "memory(GiB)": 13.7, "step": 61885, "train_speed(iter/s)": 1.528511 }, { "acc": 0.98560095, "epoch": 29.00867119756269, "grad_norm": 0.34724506735801697, "learning_rate": 4.09119058779322e-06, "loss": 0.04020482, "memory(GiB)": 13.7, "step": 61890, "train_speed(iter/s)": 1.528511 }, { "acc": 0.97993059, "epoch": 29.011014764471525, "grad_norm": 3.916308879852295, "learning_rate": 4.090428415617868e-06, "loss": 0.06234952, "memory(GiB)": 13.7, "step": 61895, "train_speed(iter/s)": 1.528513 }, { "acc": 0.9802084, "epoch": 29.01335833138036, "grad_norm": 6.926052570343018, "learning_rate": 4.089666265317649e-06, "loss": 0.04688549, "memory(GiB)": 13.7, "step": 61900, "train_speed(iter/s)": 1.528513 }, { "acc": 0.97937498, "epoch": 29.015701898289198, "grad_norm": 1.5856537818908691, "learning_rate": 4.088904136910881e-06, "loss": 0.05210909, "memory(GiB)": 13.7, "step": 61905, "train_speed(iter/s)": 1.528516 }, { "acc": 0.98458328, "epoch": 29.018045465198032, "grad_norm": 3.9748148918151855, "learning_rate": 4.088142030415882e-06, "loss": 0.03528934, "memory(GiB)": 13.7, "step": 61910, "train_speed(iter/s)": 1.528519 }, { "acc": 0.97505951, "epoch": 29.020389032106866, "grad_norm": 4.975277900695801, "learning_rate": 4.087379945850971e-06, "loss": 0.04975104, "memory(GiB)": 13.7, "step": 61915, "train_speed(iter/s)": 1.528527 }, { "acc": 0.97562494, "epoch": 29.0227325990157, "grad_norm": 3.4713642597198486, "learning_rate": 4.086617883234466e-06, "loss": 0.09349715, "memory(GiB)": 13.7, "step": 61920, "train_speed(iter/s)": 1.528526 }, { "acc": 0.97520828, "epoch": 29.02507616592454, "grad_norm": 1.659411907196045, "learning_rate": 4.085855842584686e-06, "loss": 0.06243123, "memory(GiB)": 13.7, "step": 61925, "train_speed(iter/s)": 1.528525 }, { "acc": 0.98217258, "epoch": 29.027419732833373, "grad_norm": 3.450949192047119, "learning_rate": 4.0850938239199475e-06, "loss": 0.04738778, "memory(GiB)": 13.7, "step": 61930, "train_speed(iter/s)": 1.528532 }, { "acc": 0.97696428, "epoch": 29.029763299742207, "grad_norm": 8.122170448303223, "learning_rate": 4.084331827258566e-06, "loss": 0.07514908, "memory(GiB)": 13.7, "step": 61935, "train_speed(iter/s)": 1.528539 }, { "acc": 0.99375, "epoch": 29.03210686665104, "grad_norm": 5.314596176147461, "learning_rate": 4.083569852618855e-06, "loss": 0.02650918, "memory(GiB)": 13.7, "step": 61940, "train_speed(iter/s)": 1.528542 }, { "acc": 0.98573856, "epoch": 29.03445043355988, "grad_norm": 4.618661403656006, "learning_rate": 4.082807900019134e-06, "loss": 0.05172325, "memory(GiB)": 13.7, "step": 61945, "train_speed(iter/s)": 1.528546 }, { "acc": 0.98341484, "epoch": 29.036794000468714, "grad_norm": 1.7070332765579224, "learning_rate": 4.082045969477715e-06, "loss": 0.05393671, "memory(GiB)": 13.7, "step": 61950, "train_speed(iter/s)": 1.528554 }, { "acc": 0.9822917, "epoch": 29.039137567377548, "grad_norm": 1.7167270183563232, "learning_rate": 4.081284061012915e-06, "loss": 0.03129579, "memory(GiB)": 13.7, "step": 61955, "train_speed(iter/s)": 1.528553 }, { "acc": 0.97362175, "epoch": 29.041481134286382, "grad_norm": 2.5747737884521484, "learning_rate": 4.080522174643046e-06, "loss": 0.07398659, "memory(GiB)": 13.7, "step": 61960, "train_speed(iter/s)": 1.528554 }, { "acc": 0.98812504, "epoch": 29.04382470119522, "grad_norm": 0.05249800533056259, "learning_rate": 4.07976031038642e-06, "loss": 0.0404635, "memory(GiB)": 13.7, "step": 61965, "train_speed(iter/s)": 1.528554 }, { "acc": 0.978125, "epoch": 29.046168268104054, "grad_norm": 4.129116058349609, "learning_rate": 4.078998468261354e-06, "loss": 0.04773643, "memory(GiB)": 13.7, "step": 61970, "train_speed(iter/s)": 1.528547 }, { "acc": 0.9927084, "epoch": 29.04851183501289, "grad_norm": 1.69423246383667, "learning_rate": 4.078236648286157e-06, "loss": 0.03307926, "memory(GiB)": 13.7, "step": 61975, "train_speed(iter/s)": 1.528553 }, { "acc": 0.9814683, "epoch": 29.050855401921726, "grad_norm": 4.307504177093506, "learning_rate": 4.07747485047914e-06, "loss": 0.05945753, "memory(GiB)": 13.7, "step": 61980, "train_speed(iter/s)": 1.528559 }, { "acc": 0.9729167, "epoch": 29.05319896883056, "grad_norm": 7.217517852783203, "learning_rate": 4.076713074858615e-06, "loss": 0.06481972, "memory(GiB)": 13.7, "step": 61985, "train_speed(iter/s)": 1.528561 }, { "acc": 0.98571424, "epoch": 29.055542535739395, "grad_norm": 2.4341845512390137, "learning_rate": 4.075951321442893e-06, "loss": 0.03837757, "memory(GiB)": 13.7, "step": 61990, "train_speed(iter/s)": 1.528565 }, { "acc": 0.96417618, "epoch": 29.05788610264823, "grad_norm": 6.810607433319092, "learning_rate": 4.075189590250284e-06, "loss": 0.06333706, "memory(GiB)": 13.7, "step": 61995, "train_speed(iter/s)": 1.52857 }, { "acc": 0.98171873, "epoch": 29.060229669557067, "grad_norm": 0.8719565272331238, "learning_rate": 4.074427881299098e-06, "loss": 0.05366527, "memory(GiB)": 13.7, "step": 62000, "train_speed(iter/s)": 1.528575 }, { "acc": 0.9854167, "epoch": 29.0625732364659, "grad_norm": 2.0242996215820312, "learning_rate": 4.073666194607642e-06, "loss": 0.03911253, "memory(GiB)": 13.7, "step": 62005, "train_speed(iter/s)": 1.528575 }, { "acc": 0.97875004, "epoch": 29.064916803374736, "grad_norm": 5.36610221862793, "learning_rate": 4.072904530194226e-06, "loss": 0.03620064, "memory(GiB)": 13.7, "step": 62010, "train_speed(iter/s)": 1.52858 }, { "acc": 0.991572, "epoch": 29.06726037028357, "grad_norm": 2.3363864421844482, "learning_rate": 4.07214288807716e-06, "loss": 0.03358667, "memory(GiB)": 13.7, "step": 62015, "train_speed(iter/s)": 1.52858 }, { "acc": 0.98928032, "epoch": 29.069603937192408, "grad_norm": 0.9753380417823792, "learning_rate": 4.071381268274748e-06, "loss": 0.03596662, "memory(GiB)": 13.7, "step": 62020, "train_speed(iter/s)": 1.528584 }, { "acc": 0.9791666, "epoch": 29.071947504101242, "grad_norm": 2.6426076889038086, "learning_rate": 4.070619670805296e-06, "loss": 0.03559788, "memory(GiB)": 13.7, "step": 62025, "train_speed(iter/s)": 1.528585 }, { "acc": 0.98715277, "epoch": 29.074291071010077, "grad_norm": 3.5610547065734863, "learning_rate": 4.069858095687114e-06, "loss": 0.05024736, "memory(GiB)": 13.7, "step": 62030, "train_speed(iter/s)": 1.528589 }, { "acc": 0.98472462, "epoch": 29.07663463791891, "grad_norm": 2.7602005004882812, "learning_rate": 4.069096542938506e-06, "loss": 0.04500811, "memory(GiB)": 13.7, "step": 62035, "train_speed(iter/s)": 1.528593 }, { "acc": 0.99642859, "epoch": 29.07897820482775, "grad_norm": 0.28524911403656006, "learning_rate": 4.068335012577776e-06, "loss": 0.01816952, "memory(GiB)": 13.7, "step": 62040, "train_speed(iter/s)": 1.528593 }, { "acc": 0.98495989, "epoch": 29.081321771736583, "grad_norm": 0.004143006168305874, "learning_rate": 4.067573504623231e-06, "loss": 0.06116576, "memory(GiB)": 13.7, "step": 62045, "train_speed(iter/s)": 1.528594 }, { "acc": 0.98640709, "epoch": 29.083665338645417, "grad_norm": 3.3302557468414307, "learning_rate": 4.066812019093174e-06, "loss": 0.05560007, "memory(GiB)": 13.7, "step": 62050, "train_speed(iter/s)": 1.528597 }, { "acc": 0.97240534, "epoch": 29.086008905554255, "grad_norm": 0.048228390514850616, "learning_rate": 4.066050556005909e-06, "loss": 0.14407723, "memory(GiB)": 13.7, "step": 62055, "train_speed(iter/s)": 1.528597 }, { "acc": 0.9775794, "epoch": 29.08835247246309, "grad_norm": 4.719757080078125, "learning_rate": 4.065289115379738e-06, "loss": 0.04481629, "memory(GiB)": 13.7, "step": 62060, "train_speed(iter/s)": 1.528602 }, { "acc": 0.98916664, "epoch": 29.090696039371924, "grad_norm": 3.554407835006714, "learning_rate": 4.064527697232964e-06, "loss": 0.02892127, "memory(GiB)": 13.7, "step": 62065, "train_speed(iter/s)": 1.528606 }, { "acc": 0.97041664, "epoch": 29.093039606280758, "grad_norm": 3.4461894035339355, "learning_rate": 4.063766301583889e-06, "loss": 0.04883618, "memory(GiB)": 13.7, "step": 62070, "train_speed(iter/s)": 1.528612 }, { "acc": 0.98041668, "epoch": 29.095383173189596, "grad_norm": 3.281848907470703, "learning_rate": 4.0630049284508155e-06, "loss": 0.03511052, "memory(GiB)": 13.7, "step": 62075, "train_speed(iter/s)": 1.528617 }, { "acc": 0.96655636, "epoch": 29.09772674009843, "grad_norm": 3.232537031173706, "learning_rate": 4.062243577852044e-06, "loss": 0.10852149, "memory(GiB)": 13.7, "step": 62080, "train_speed(iter/s)": 1.528627 }, { "acc": 0.98666668, "epoch": 29.100070307007265, "grad_norm": 13.176995277404785, "learning_rate": 4.0614822498058725e-06, "loss": 0.04224159, "memory(GiB)": 13.7, "step": 62085, "train_speed(iter/s)": 1.528629 }, { "acc": 0.9963542, "epoch": 29.1024138739161, "grad_norm": 0.02175663411617279, "learning_rate": 4.060720944330604e-06, "loss": 0.01301399, "memory(GiB)": 13.7, "step": 62090, "train_speed(iter/s)": 1.528635 }, { "acc": 0.98008928, "epoch": 29.104757440824937, "grad_norm": 3.0656535625457764, "learning_rate": 4.059959661444539e-06, "loss": 0.03390163, "memory(GiB)": 13.7, "step": 62095, "train_speed(iter/s)": 1.52864 }, { "acc": 0.97791672, "epoch": 29.10710100773377, "grad_norm": 4.896914482116699, "learning_rate": 4.059198401165971e-06, "loss": 0.05209436, "memory(GiB)": 13.7, "step": 62100, "train_speed(iter/s)": 1.52865 }, { "acc": 0.98592262, "epoch": 29.109444574642605, "grad_norm": 3.2721242904663086, "learning_rate": 4.058437163513201e-06, "loss": 0.02818061, "memory(GiB)": 13.7, "step": 62105, "train_speed(iter/s)": 1.52865 }, { "acc": 0.98625002, "epoch": 29.11178814155144, "grad_norm": 3.811800479888916, "learning_rate": 4.057675948504528e-06, "loss": 0.03999719, "memory(GiB)": 13.7, "step": 62110, "train_speed(iter/s)": 1.528647 }, { "acc": 0.99035797, "epoch": 29.114131708460278, "grad_norm": 3.9721462726593018, "learning_rate": 4.056914756158246e-06, "loss": 0.03250446, "memory(GiB)": 13.7, "step": 62115, "train_speed(iter/s)": 1.528647 }, { "acc": 0.97645836, "epoch": 29.116475275369112, "grad_norm": 3.0725648403167725, "learning_rate": 4.056153586492655e-06, "loss": 0.07132326, "memory(GiB)": 13.7, "step": 62120, "train_speed(iter/s)": 1.52865 }, { "acc": 0.98184528, "epoch": 29.118818842277946, "grad_norm": 5.2762017250061035, "learning_rate": 4.055392439526049e-06, "loss": 0.04735016, "memory(GiB)": 13.7, "step": 62125, "train_speed(iter/s)": 1.528663 }, { "acc": 0.98631945, "epoch": 29.121162409186784, "grad_norm": 5.287549018859863, "learning_rate": 4.054631315276723e-06, "loss": 0.08476692, "memory(GiB)": 13.7, "step": 62130, "train_speed(iter/s)": 1.52867 }, { "acc": 0.98999996, "epoch": 29.12350597609562, "grad_norm": 2.234729051589966, "learning_rate": 4.053870213762974e-06, "loss": 0.05280157, "memory(GiB)": 13.7, "step": 62135, "train_speed(iter/s)": 1.528673 }, { "acc": 0.98968754, "epoch": 29.125849543004453, "grad_norm": 3.0808374881744385, "learning_rate": 4.053109135003095e-06, "loss": 0.02854851, "memory(GiB)": 13.7, "step": 62140, "train_speed(iter/s)": 1.528671 }, { "acc": 0.98529758, "epoch": 29.128193109913287, "grad_norm": 4.139535903930664, "learning_rate": 4.052348079015379e-06, "loss": 0.06797344, "memory(GiB)": 13.7, "step": 62145, "train_speed(iter/s)": 1.528677 }, { "acc": 0.98244047, "epoch": 29.130536676822125, "grad_norm": 0.005188394337892532, "learning_rate": 4.051587045818121e-06, "loss": 0.08946666, "memory(GiB)": 13.7, "step": 62150, "train_speed(iter/s)": 1.52868 }, { "acc": 0.96875, "epoch": 29.13288024373096, "grad_norm": 2.07535719871521, "learning_rate": 4.050826035429613e-06, "loss": 0.08010596, "memory(GiB)": 13.7, "step": 62155, "train_speed(iter/s)": 1.528683 }, { "acc": 0.978125, "epoch": 29.135223810639793, "grad_norm": 4.8176116943359375, "learning_rate": 4.0500650478681464e-06, "loss": 0.04688571, "memory(GiB)": 13.7, "step": 62160, "train_speed(iter/s)": 1.528693 }, { "acc": 0.99613094, "epoch": 29.137567377548628, "grad_norm": 2.0009851455688477, "learning_rate": 4.0493040831520145e-06, "loss": 0.01754437, "memory(GiB)": 13.7, "step": 62165, "train_speed(iter/s)": 1.528696 }, { "acc": 0.9822916, "epoch": 29.139910944457466, "grad_norm": 2.215339422225952, "learning_rate": 4.048543141299506e-06, "loss": 0.04806231, "memory(GiB)": 13.7, "step": 62170, "train_speed(iter/s)": 1.528706 }, { "acc": 0.98424129, "epoch": 29.1422545113663, "grad_norm": 2.8368754386901855, "learning_rate": 4.0477822223289144e-06, "loss": 0.04910687, "memory(GiB)": 13.7, "step": 62175, "train_speed(iter/s)": 1.52872 }, { "acc": 0.98285255, "epoch": 29.144598078275134, "grad_norm": 0.004125003702938557, "learning_rate": 4.047021326258529e-06, "loss": 0.04479101, "memory(GiB)": 13.7, "step": 62180, "train_speed(iter/s)": 1.528725 }, { "acc": 0.9864584, "epoch": 29.14694164518397, "grad_norm": 5.112029552459717, "learning_rate": 4.046260453106638e-06, "loss": 0.05093067, "memory(GiB)": 13.7, "step": 62185, "train_speed(iter/s)": 1.528721 }, { "acc": 0.97193651, "epoch": 29.149285212092806, "grad_norm": 4.2767181396484375, "learning_rate": 4.04549960289153e-06, "loss": 0.08651553, "memory(GiB)": 13.7, "step": 62190, "train_speed(iter/s)": 1.528721 }, { "acc": 1.0, "epoch": 29.15162877900164, "grad_norm": 4.0532002449035645, "learning_rate": 4.044738775631494e-06, "loss": 0.01820889, "memory(GiB)": 13.7, "step": 62195, "train_speed(iter/s)": 1.528732 }, { "acc": 0.9802083, "epoch": 29.153972345910475, "grad_norm": 3.4533348083496094, "learning_rate": 4.043977971344817e-06, "loss": 0.05143828, "memory(GiB)": 13.7, "step": 62200, "train_speed(iter/s)": 1.528735 }, { "acc": 0.98187504, "epoch": 29.15631591281931, "grad_norm": 1.1272096633911133, "learning_rate": 4.04321719004979e-06, "loss": 0.06230417, "memory(GiB)": 13.7, "step": 62205, "train_speed(iter/s)": 1.528739 }, { "acc": 0.98277779, "epoch": 29.158659479728147, "grad_norm": 4.759952545166016, "learning_rate": 4.0424564317646944e-06, "loss": 0.07204303, "memory(GiB)": 13.7, "step": 62210, "train_speed(iter/s)": 1.528742 }, { "acc": 0.984375, "epoch": 29.16100304663698, "grad_norm": 4.01411771774292, "learning_rate": 4.041695696507819e-06, "loss": 0.04805138, "memory(GiB)": 13.7, "step": 62215, "train_speed(iter/s)": 1.528748 }, { "acc": 0.98187504, "epoch": 29.163346613545816, "grad_norm": 4.071815013885498, "learning_rate": 4.040934984297451e-06, "loss": 0.06549032, "memory(GiB)": 13.7, "step": 62220, "train_speed(iter/s)": 1.52874 }, { "acc": 0.97777777, "epoch": 29.165690180454654, "grad_norm": 3.4971518516540527, "learning_rate": 4.040174295151873e-06, "loss": 0.04669396, "memory(GiB)": 13.7, "step": 62225, "train_speed(iter/s)": 1.52874 }, { "acc": 0.9864584, "epoch": 29.168033747363488, "grad_norm": 0.0029190664645284414, "learning_rate": 4.03941362908937e-06, "loss": 0.03179207, "memory(GiB)": 13.7, "step": 62230, "train_speed(iter/s)": 1.528739 }, { "acc": 0.98653851, "epoch": 29.170377314272322, "grad_norm": 0.014410470612347126, "learning_rate": 4.038652986128225e-06, "loss": 0.02933144, "memory(GiB)": 13.7, "step": 62235, "train_speed(iter/s)": 1.528735 }, { "acc": 0.97383938, "epoch": 29.172720881181156, "grad_norm": 2.6469345092773438, "learning_rate": 4.037892366286723e-06, "loss": 0.07385792, "memory(GiB)": 13.7, "step": 62240, "train_speed(iter/s)": 1.528733 }, { "acc": 0.98175602, "epoch": 29.175064448089994, "grad_norm": 1.0016270875930786, "learning_rate": 4.037131769583144e-06, "loss": 0.06795453, "memory(GiB)": 13.7, "step": 62245, "train_speed(iter/s)": 1.52873 }, { "acc": 0.98212118, "epoch": 29.17740801499883, "grad_norm": 3.727372884750366, "learning_rate": 4.0363711960357746e-06, "loss": 0.04624384, "memory(GiB)": 13.7, "step": 62250, "train_speed(iter/s)": 1.52873 }, { "acc": 0.97559528, "epoch": 29.179751581907663, "grad_norm": 4.506847381591797, "learning_rate": 4.035610645662894e-06, "loss": 0.07999394, "memory(GiB)": 13.7, "step": 62255, "train_speed(iter/s)": 1.528735 }, { "acc": 0.97645292, "epoch": 29.182095148816497, "grad_norm": 2.811094284057617, "learning_rate": 4.034850118482782e-06, "loss": 0.06111823, "memory(GiB)": 13.7, "step": 62260, "train_speed(iter/s)": 1.528738 }, { "acc": 0.99020834, "epoch": 29.184438715725335, "grad_norm": 2.7747678756713867, "learning_rate": 4.034089614513724e-06, "loss": 0.02144014, "memory(GiB)": 13.7, "step": 62265, "train_speed(iter/s)": 1.528744 }, { "acc": 0.98425598, "epoch": 29.18678228263417, "grad_norm": 4.450559616088867, "learning_rate": 4.033329133773995e-06, "loss": 0.04875786, "memory(GiB)": 13.7, "step": 62270, "train_speed(iter/s)": 1.528744 }, { "acc": 0.98312502, "epoch": 29.189125849543004, "grad_norm": 3.4707982540130615, "learning_rate": 4.032568676281875e-06, "loss": 0.05636576, "memory(GiB)": 13.7, "step": 62275, "train_speed(iter/s)": 1.528748 }, { "acc": 0.9822917, "epoch": 29.191469416451838, "grad_norm": 6.459903240203857, "learning_rate": 4.031808242055645e-06, "loss": 0.03084131, "memory(GiB)": 13.7, "step": 62280, "train_speed(iter/s)": 1.528743 }, { "acc": 0.98812504, "epoch": 29.193812983360676, "grad_norm": 0.008842267096042633, "learning_rate": 4.031047831113583e-06, "loss": 0.04373284, "memory(GiB)": 13.7, "step": 62285, "train_speed(iter/s)": 1.528744 }, { "acc": 0.98395834, "epoch": 29.19615655026951, "grad_norm": 7.0474724769592285, "learning_rate": 4.030287443473965e-06, "loss": 0.04350665, "memory(GiB)": 13.7, "step": 62290, "train_speed(iter/s)": 1.528744 }, { "acc": 0.9869791, "epoch": 29.198500117178344, "grad_norm": 3.060476303100586, "learning_rate": 4.029527079155071e-06, "loss": 0.02942095, "memory(GiB)": 13.7, "step": 62295, "train_speed(iter/s)": 1.528743 }, { "acc": 0.99611111, "epoch": 29.200843684087182, "grad_norm": 0.41366466879844666, "learning_rate": 4.0287667381751754e-06, "loss": 0.0137951, "memory(GiB)": 13.7, "step": 62300, "train_speed(iter/s)": 1.528747 }, { "acc": 0.99624996, "epoch": 29.203187250996017, "grad_norm": 2.8451106548309326, "learning_rate": 4.028006420552556e-06, "loss": 0.03420904, "memory(GiB)": 13.7, "step": 62305, "train_speed(iter/s)": 1.528752 }, { "acc": 0.98073864, "epoch": 29.20553081790485, "grad_norm": 1.2641817331314087, "learning_rate": 4.027246126305487e-06, "loss": 0.03820505, "memory(GiB)": 13.7, "step": 62310, "train_speed(iter/s)": 1.528756 }, { "acc": 0.990625, "epoch": 29.207874384813685, "grad_norm": 0.6310035586357117, "learning_rate": 4.026485855452243e-06, "loss": 0.03552909, "memory(GiB)": 13.7, "step": 62315, "train_speed(iter/s)": 1.528753 }, { "acc": 0.99196434, "epoch": 29.210217951722523, "grad_norm": 3.3596537113189697, "learning_rate": 4.025725608011099e-06, "loss": 0.02732444, "memory(GiB)": 13.7, "step": 62320, "train_speed(iter/s)": 1.528758 }, { "acc": 0.996875, "epoch": 29.212561518631357, "grad_norm": 2.1010072231292725, "learning_rate": 4.024965384000329e-06, "loss": 0.01888077, "memory(GiB)": 13.7, "step": 62325, "train_speed(iter/s)": 1.528769 }, { "acc": 0.9875, "epoch": 29.21490508554019, "grad_norm": 4.963110446929932, "learning_rate": 4.024205183438207e-06, "loss": 0.03545063, "memory(GiB)": 13.7, "step": 62330, "train_speed(iter/s)": 1.528776 }, { "acc": 0.9802083, "epoch": 29.217248652449026, "grad_norm": 4.209277629852295, "learning_rate": 4.023445006343003e-06, "loss": 0.09511613, "memory(GiB)": 13.7, "step": 62335, "train_speed(iter/s)": 1.528774 }, { "acc": 0.9791667, "epoch": 29.219592219357864, "grad_norm": 3.915576696395874, "learning_rate": 4.022684852732993e-06, "loss": 0.04037749, "memory(GiB)": 13.7, "step": 62340, "train_speed(iter/s)": 1.528781 }, { "acc": 0.98285255, "epoch": 29.221935786266698, "grad_norm": 5.5556511878967285, "learning_rate": 4.021924722626446e-06, "loss": 0.05962, "memory(GiB)": 13.7, "step": 62345, "train_speed(iter/s)": 1.528782 }, { "acc": 0.97619057, "epoch": 29.224279353175532, "grad_norm": 0.4307084083557129, "learning_rate": 4.021164616041636e-06, "loss": 0.04423613, "memory(GiB)": 13.7, "step": 62350, "train_speed(iter/s)": 1.528787 }, { "acc": 0.98249998, "epoch": 29.226622920084367, "grad_norm": 2.8145389556884766, "learning_rate": 4.0204045329968295e-06, "loss": 0.0360948, "memory(GiB)": 13.7, "step": 62355, "train_speed(iter/s)": 1.528793 }, { "acc": 0.97666664, "epoch": 29.228966486993205, "grad_norm": 4.361587047576904, "learning_rate": 4.019644473510299e-06, "loss": 0.07466089, "memory(GiB)": 13.7, "step": 62360, "train_speed(iter/s)": 1.528793 }, { "acc": 0.98208332, "epoch": 29.23131005390204, "grad_norm": 5.119802951812744, "learning_rate": 4.018884437600311e-06, "loss": 0.05673516, "memory(GiB)": 13.7, "step": 62365, "train_speed(iter/s)": 1.528801 }, { "acc": 0.99821434, "epoch": 29.233653620810873, "grad_norm": 3.1155927181243896, "learning_rate": 4.018124425285138e-06, "loss": 0.01600014, "memory(GiB)": 13.7, "step": 62370, "train_speed(iter/s)": 1.528809 }, { "acc": 0.97488098, "epoch": 29.23599718771971, "grad_norm": 0.0020097780507057905, "learning_rate": 4.017364436583045e-06, "loss": 0.06282662, "memory(GiB)": 13.7, "step": 62375, "train_speed(iter/s)": 1.528816 }, { "acc": 0.98611107, "epoch": 29.238340754628545, "grad_norm": 3.75022554397583, "learning_rate": 4.016604471512303e-06, "loss": 0.06453838, "memory(GiB)": 13.7, "step": 62380, "train_speed(iter/s)": 1.528829 }, { "acc": 0.99008923, "epoch": 29.24068432153738, "grad_norm": 2.3992209434509277, "learning_rate": 4.0158445300911764e-06, "loss": 0.02597504, "memory(GiB)": 13.7, "step": 62385, "train_speed(iter/s)": 1.528831 }, { "acc": 0.9635417, "epoch": 29.243027888446214, "grad_norm": 3.63615345954895, "learning_rate": 4.0150846123379335e-06, "loss": 0.08963194, "memory(GiB)": 13.7, "step": 62390, "train_speed(iter/s)": 1.52884 }, { "acc": 0.990625, "epoch": 29.245371455355052, "grad_norm": 5.687416076660156, "learning_rate": 4.014324718270837e-06, "loss": 0.03215846, "memory(GiB)": 13.7, "step": 62395, "train_speed(iter/s)": 1.528843 }, { "acc": 0.98416672, "epoch": 29.247715022263886, "grad_norm": 4.014645576477051, "learning_rate": 4.013564847908156e-06, "loss": 0.03157979, "memory(GiB)": 13.7, "step": 62400, "train_speed(iter/s)": 1.528847 }, { "acc": 0.98728628, "epoch": 29.25005858917272, "grad_norm": 2.343766689300537, "learning_rate": 4.0128050012681534e-06, "loss": 0.06308507, "memory(GiB)": 13.7, "step": 62405, "train_speed(iter/s)": 1.52885 }, { "acc": 0.98361111, "epoch": 29.252402156081555, "grad_norm": 2.367292881011963, "learning_rate": 4.012045178369093e-06, "loss": 0.07362466, "memory(GiB)": 13.7, "step": 62410, "train_speed(iter/s)": 1.528855 }, { "acc": 0.97217264, "epoch": 29.254745722990393, "grad_norm": 1.9826319217681885, "learning_rate": 4.01128537922924e-06, "loss": 0.05201442, "memory(GiB)": 13.7, "step": 62415, "train_speed(iter/s)": 1.528851 }, { "acc": 0.99302082, "epoch": 29.257089289899227, "grad_norm": 0.008297180756926537, "learning_rate": 4.010525603866855e-06, "loss": 0.0393516, "memory(GiB)": 13.7, "step": 62420, "train_speed(iter/s)": 1.528845 }, { "acc": 0.97124996, "epoch": 29.25943285680806, "grad_norm": 0.9171876907348633, "learning_rate": 4.009765852300204e-06, "loss": 0.09438201, "memory(GiB)": 13.7, "step": 62425, "train_speed(iter/s)": 1.528846 }, { "acc": 0.982197, "epoch": 29.261776423716896, "grad_norm": 0.0025808257050812244, "learning_rate": 4.009006124547547e-06, "loss": 0.07503932, "memory(GiB)": 13.7, "step": 62430, "train_speed(iter/s)": 1.528841 }, { "acc": 0.98604164, "epoch": 29.264119990625733, "grad_norm": 2.1455750465393066, "learning_rate": 4.008246420627146e-06, "loss": 0.02545105, "memory(GiB)": 13.7, "step": 62435, "train_speed(iter/s)": 1.528841 }, { "acc": 0.98916664, "epoch": 29.266463557534568, "grad_norm": 2.3570716381073, "learning_rate": 4.0074867405572595e-06, "loss": 0.04295962, "memory(GiB)": 13.7, "step": 62440, "train_speed(iter/s)": 1.528842 }, { "acc": 0.98527775, "epoch": 29.268807124443402, "grad_norm": 2.826791524887085, "learning_rate": 4.006727084356151e-06, "loss": 0.0629726, "memory(GiB)": 13.7, "step": 62445, "train_speed(iter/s)": 1.528844 }, { "acc": 0.98252983, "epoch": 29.27115069135224, "grad_norm": 4.76821756362915, "learning_rate": 4.005967452042077e-06, "loss": 0.06547356, "memory(GiB)": 13.7, "step": 62450, "train_speed(iter/s)": 1.528844 }, { "acc": 0.99187498, "epoch": 29.273494258261074, "grad_norm": 0.9738948345184326, "learning_rate": 4.0052078436332985e-06, "loss": 0.01539105, "memory(GiB)": 13.7, "step": 62455, "train_speed(iter/s)": 1.528847 }, { "acc": 0.99187498, "epoch": 29.27583782516991, "grad_norm": 0.005800147075206041, "learning_rate": 4.004448259148074e-06, "loss": 0.03005944, "memory(GiB)": 13.7, "step": 62460, "train_speed(iter/s)": 1.528842 }, { "acc": 0.97952461, "epoch": 29.278181392078743, "grad_norm": 3.2117409706115723, "learning_rate": 4.003688698604661e-06, "loss": 0.04686731, "memory(GiB)": 13.7, "step": 62465, "train_speed(iter/s)": 1.528846 }, { "acc": 0.9802083, "epoch": 29.28052495898758, "grad_norm": 4.169013977050781, "learning_rate": 4.002929162021317e-06, "loss": 0.03835503, "memory(GiB)": 13.7, "step": 62470, "train_speed(iter/s)": 1.528848 }, { "acc": 0.9854167, "epoch": 29.282868525896415, "grad_norm": 4.663494110107422, "learning_rate": 4.002169649416301e-06, "loss": 0.03361316, "memory(GiB)": 13.7, "step": 62475, "train_speed(iter/s)": 1.528854 }, { "acc": 0.98145828, "epoch": 29.28521209280525, "grad_norm": 2.824169874191284, "learning_rate": 4.0014101608078635e-06, "loss": 0.06447772, "memory(GiB)": 13.7, "step": 62480, "train_speed(iter/s)": 1.528858 }, { "acc": 0.98729162, "epoch": 29.287555659714084, "grad_norm": 3.0195350646972656, "learning_rate": 4.000650696214265e-06, "loss": 0.07543162, "memory(GiB)": 13.7, "step": 62485, "train_speed(iter/s)": 1.528859 }, { "acc": 0.98627205, "epoch": 29.28989922662292, "grad_norm": 3.672144651412964, "learning_rate": 3.999891255653759e-06, "loss": 0.05073924, "memory(GiB)": 13.7, "step": 62490, "train_speed(iter/s)": 1.528873 }, { "acc": 0.97310095, "epoch": 29.292242793531756, "grad_norm": 6.589116096496582, "learning_rate": 3.999131839144599e-06, "loss": 0.0688483, "memory(GiB)": 13.7, "step": 62495, "train_speed(iter/s)": 1.52888 }, { "acc": 0.97458334, "epoch": 29.29458636044059, "grad_norm": 3.507779836654663, "learning_rate": 3.99837244670504e-06, "loss": 0.16835291, "memory(GiB)": 13.7, "step": 62500, "train_speed(iter/s)": 1.528878 }, { "acc": 0.99196434, "epoch": 29.296929927349424, "grad_norm": 8.034393310546875, "learning_rate": 3.997613078353335e-06, "loss": 0.02575816, "memory(GiB)": 13.7, "step": 62505, "train_speed(iter/s)": 1.528874 }, { "acc": 0.99041662, "epoch": 29.299273494258262, "grad_norm": 1.3377262353897095, "learning_rate": 3.996853734107737e-06, "loss": 0.02442746, "memory(GiB)": 13.7, "step": 62510, "train_speed(iter/s)": 1.528875 }, { "acc": 0.98604164, "epoch": 29.301617061167097, "grad_norm": 0.022546345368027687, "learning_rate": 3.996094413986501e-06, "loss": 0.03594981, "memory(GiB)": 13.7, "step": 62515, "train_speed(iter/s)": 1.528876 }, { "acc": 0.98222904, "epoch": 29.30396062807593, "grad_norm": 2.7416155338287354, "learning_rate": 3.9953351180078725e-06, "loss": 0.04219719, "memory(GiB)": 13.7, "step": 62520, "train_speed(iter/s)": 1.528884 }, { "acc": 0.9885417, "epoch": 29.306304194984765, "grad_norm": 1.630167007446289, "learning_rate": 3.994575846190105e-06, "loss": 0.05235553, "memory(GiB)": 13.7, "step": 62525, "train_speed(iter/s)": 1.528887 }, { "acc": 0.98606148, "epoch": 29.308647761893603, "grad_norm": 2.1787970066070557, "learning_rate": 3.993816598551451e-06, "loss": 0.03553026, "memory(GiB)": 13.7, "step": 62530, "train_speed(iter/s)": 1.528895 }, { "acc": 0.98184986, "epoch": 29.310991328802437, "grad_norm": 3.7772750854492188, "learning_rate": 3.993057375110158e-06, "loss": 0.0436303, "memory(GiB)": 13.7, "step": 62535, "train_speed(iter/s)": 1.528902 }, { "acc": 0.99245358, "epoch": 29.31333489571127, "grad_norm": 0.8990413546562195, "learning_rate": 3.992298175884476e-06, "loss": 0.04768511, "memory(GiB)": 13.7, "step": 62540, "train_speed(iter/s)": 1.528912 }, { "acc": 0.95799103, "epoch": 29.31567846262011, "grad_norm": 2.548128366470337, "learning_rate": 3.991539000892654e-06, "loss": 0.0775673, "memory(GiB)": 13.7, "step": 62545, "train_speed(iter/s)": 1.528916 }, { "acc": 0.98208332, "epoch": 29.318022029528944, "grad_norm": 0.9141151309013367, "learning_rate": 3.99077985015294e-06, "loss": 0.05254377, "memory(GiB)": 13.7, "step": 62550, "train_speed(iter/s)": 1.528919 }, { "acc": 0.99208336, "epoch": 29.320365596437778, "grad_norm": 7.422595977783203, "learning_rate": 3.99002072368358e-06, "loss": 0.03496161, "memory(GiB)": 13.7, "step": 62555, "train_speed(iter/s)": 1.528922 }, { "acc": 0.9807292, "epoch": 29.322709163346612, "grad_norm": 1.3953877687454224, "learning_rate": 3.989261621502824e-06, "loss": 0.06455798, "memory(GiB)": 13.7, "step": 62560, "train_speed(iter/s)": 1.528923 }, { "acc": 0.98508015, "epoch": 29.32505273025545, "grad_norm": 2.941016435623169, "learning_rate": 3.988502543628916e-06, "loss": 0.06565109, "memory(GiB)": 13.7, "step": 62565, "train_speed(iter/s)": 1.52893 }, { "acc": 0.97842264, "epoch": 29.327396297164285, "grad_norm": 6.928832054138184, "learning_rate": 3.9877434900801005e-06, "loss": 0.0796494, "memory(GiB)": 13.7, "step": 62570, "train_speed(iter/s)": 1.528927 }, { "acc": 0.97768307, "epoch": 29.32973986407312, "grad_norm": 1.9033827781677246, "learning_rate": 3.986984460874626e-06, "loss": 0.07072021, "memory(GiB)": 13.7, "step": 62575, "train_speed(iter/s)": 1.528941 }, { "acc": 0.97321434, "epoch": 29.332083430981953, "grad_norm": 3.7223896980285645, "learning_rate": 3.986225456030735e-06, "loss": 0.06424738, "memory(GiB)": 13.7, "step": 62580, "train_speed(iter/s)": 1.528941 }, { "acc": 0.9770834, "epoch": 29.33442699789079, "grad_norm": 6.203938007354736, "learning_rate": 3.985466475566671e-06, "loss": 0.04754624, "memory(GiB)": 13.7, "step": 62585, "train_speed(iter/s)": 1.528943 }, { "acc": 0.98163376, "epoch": 29.336770564799625, "grad_norm": 5.165523529052734, "learning_rate": 3.984707519500679e-06, "loss": 0.0463687, "memory(GiB)": 13.7, "step": 62590, "train_speed(iter/s)": 1.528945 }, { "acc": 0.97425594, "epoch": 29.33911413170846, "grad_norm": 7.518386363983154, "learning_rate": 3.983948587851e-06, "loss": 0.12335887, "memory(GiB)": 13.7, "step": 62595, "train_speed(iter/s)": 1.528945 }, { "acc": 0.97228088, "epoch": 29.341457698617294, "grad_norm": 4.502285480499268, "learning_rate": 3.98318968063588e-06, "loss": 0.07776684, "memory(GiB)": 13.7, "step": 62600, "train_speed(iter/s)": 1.528944 }, { "acc": 0.97802086, "epoch": 29.34380126552613, "grad_norm": 5.764113426208496, "learning_rate": 3.982430797873557e-06, "loss": 0.08075833, "memory(GiB)": 13.7, "step": 62605, "train_speed(iter/s)": 1.528945 }, { "acc": 0.98187504, "epoch": 29.346144832434966, "grad_norm": 0.7258714437484741, "learning_rate": 3.981671939582272e-06, "loss": 0.04502414, "memory(GiB)": 13.7, "step": 62610, "train_speed(iter/s)": 1.528944 }, { "acc": 0.990625, "epoch": 29.3484883993438, "grad_norm": 4.878168106079102, "learning_rate": 3.9809131057802654e-06, "loss": 0.02175131, "memory(GiB)": 13.7, "step": 62615, "train_speed(iter/s)": 1.52895 }, { "acc": 0.9820569, "epoch": 29.35083196625264, "grad_norm": 2.5896525382995605, "learning_rate": 3.98015429648578e-06, "loss": 0.08833318, "memory(GiB)": 13.7, "step": 62620, "train_speed(iter/s)": 1.528951 }, { "acc": 0.984375, "epoch": 29.353175533161473, "grad_norm": 3.142760992050171, "learning_rate": 3.979395511717052e-06, "loss": 0.03310967, "memory(GiB)": 13.7, "step": 62625, "train_speed(iter/s)": 1.528952 }, { "acc": 0.99333334, "epoch": 29.355519100070307, "grad_norm": 0.7834714651107788, "learning_rate": 3.978636751492322e-06, "loss": 0.02686306, "memory(GiB)": 13.7, "step": 62630, "train_speed(iter/s)": 1.528955 }, { "acc": 0.97307549, "epoch": 29.35786266697914, "grad_norm": 8.41140079498291, "learning_rate": 3.977878015829828e-06, "loss": 0.05046792, "memory(GiB)": 13.7, "step": 62635, "train_speed(iter/s)": 1.528961 }, { "acc": 0.98445511, "epoch": 29.36020623388798, "grad_norm": 5.199882507324219, "learning_rate": 3.977119304747808e-06, "loss": 0.06731598, "memory(GiB)": 13.7, "step": 62640, "train_speed(iter/s)": 1.528972 }, { "acc": 0.99385967, "epoch": 29.362549800796813, "grad_norm": 2.0280861854553223, "learning_rate": 3.976360618264495e-06, "loss": 0.03083677, "memory(GiB)": 13.7, "step": 62645, "train_speed(iter/s)": 1.528975 }, { "acc": 0.99083328, "epoch": 29.364893367705648, "grad_norm": 3.3589487075805664, "learning_rate": 3.9756019563981305e-06, "loss": 0.03863192, "memory(GiB)": 13.7, "step": 62650, "train_speed(iter/s)": 1.52898 }, { "acc": 0.99375, "epoch": 29.367236934614482, "grad_norm": 5.659160137176514, "learning_rate": 3.9748433191669465e-06, "loss": 0.03368625, "memory(GiB)": 13.7, "step": 62655, "train_speed(iter/s)": 1.528979 }, { "acc": 0.98770828, "epoch": 29.36958050152332, "grad_norm": 2.5610461235046387, "learning_rate": 3.974084706589179e-06, "loss": 0.03530711, "memory(GiB)": 13.7, "step": 62660, "train_speed(iter/s)": 1.528989 }, { "acc": 0.9942709, "epoch": 29.371924068432154, "grad_norm": 0.042088694870471954, "learning_rate": 3.973326118683065e-06, "loss": 0.04143966, "memory(GiB)": 13.7, "step": 62665, "train_speed(iter/s)": 1.528991 }, { "acc": 0.98083334, "epoch": 29.37426763534099, "grad_norm": 0.6513235569000244, "learning_rate": 3.972567555466834e-06, "loss": 0.04089213, "memory(GiB)": 13.7, "step": 62670, "train_speed(iter/s)": 1.528993 }, { "acc": 0.9927083, "epoch": 29.376611202249823, "grad_norm": 0.01068614237010479, "learning_rate": 3.971809016958724e-06, "loss": 0.06510383, "memory(GiB)": 13.7, "step": 62675, "train_speed(iter/s)": 1.528998 }, { "acc": 0.98208332, "epoch": 29.37895476915866, "grad_norm": 3.471864700317383, "learning_rate": 3.971050503176966e-06, "loss": 0.04728663, "memory(GiB)": 13.7, "step": 62680, "train_speed(iter/s)": 1.529008 }, { "acc": 0.98008013, "epoch": 29.381298336067495, "grad_norm": 3.8124353885650635, "learning_rate": 3.970292014139792e-06, "loss": 0.04942085, "memory(GiB)": 13.7, "step": 62685, "train_speed(iter/s)": 1.529012 }, { "acc": 0.99005203, "epoch": 29.38364190297633, "grad_norm": 0.9301874041557312, "learning_rate": 3.969533549865432e-06, "loss": 0.03792643, "memory(GiB)": 13.7, "step": 62690, "train_speed(iter/s)": 1.529014 }, { "acc": 0.98823872, "epoch": 29.385985469885163, "grad_norm": 1.561803936958313, "learning_rate": 3.968775110372119e-06, "loss": 0.03560798, "memory(GiB)": 13.7, "step": 62695, "train_speed(iter/s)": 1.529011 }, { "acc": 0.98979168, "epoch": 29.388329036794, "grad_norm": 2.095745325088501, "learning_rate": 3.968016695678082e-06, "loss": 0.03410358, "memory(GiB)": 13.7, "step": 62700, "train_speed(iter/s)": 1.529018 }, { "acc": 0.98468752, "epoch": 29.390672603702836, "grad_norm": 4.529170989990234, "learning_rate": 3.967258305801553e-06, "loss": 0.0553741, "memory(GiB)": 13.7, "step": 62705, "train_speed(iter/s)": 1.529026 }, { "acc": 0.97967262, "epoch": 29.39301617061167, "grad_norm": 1.6564899682998657, "learning_rate": 3.96649994076076e-06, "loss": 0.08661473, "memory(GiB)": 13.7, "step": 62710, "train_speed(iter/s)": 1.52903 }, { "acc": 0.99125004, "epoch": 29.395359737520508, "grad_norm": 3.4714157581329346, "learning_rate": 3.96574160057393e-06, "loss": 0.02019051, "memory(GiB)": 13.7, "step": 62715, "train_speed(iter/s)": 1.529033 }, { "acc": 0.9770834, "epoch": 29.397703304429342, "grad_norm": 3.7543435096740723, "learning_rate": 3.964983285259294e-06, "loss": 0.07144442, "memory(GiB)": 13.7, "step": 62720, "train_speed(iter/s)": 1.529042 }, { "acc": 0.99375, "epoch": 29.400046871338176, "grad_norm": 0.057822536677122116, "learning_rate": 3.964224994835079e-06, "loss": 0.02593061, "memory(GiB)": 13.7, "step": 62725, "train_speed(iter/s)": 1.529045 }, { "acc": 0.971875, "epoch": 29.40239043824701, "grad_norm": 5.785679340362549, "learning_rate": 3.963466729319508e-06, "loss": 0.10932305, "memory(GiB)": 13.7, "step": 62730, "train_speed(iter/s)": 1.529047 }, { "acc": 0.9864584, "epoch": 29.40473400515585, "grad_norm": 0.21489198505878448, "learning_rate": 3.962708488730811e-06, "loss": 0.09072597, "memory(GiB)": 13.7, "step": 62735, "train_speed(iter/s)": 1.529048 }, { "acc": 0.97104168, "epoch": 29.407077572064683, "grad_norm": 1.3037967681884766, "learning_rate": 3.961950273087213e-06, "loss": 0.09077876, "memory(GiB)": 13.7, "step": 62740, "train_speed(iter/s)": 1.529048 }, { "acc": 0.97071428, "epoch": 29.409421138973517, "grad_norm": 7.34965181350708, "learning_rate": 3.961192082406937e-06, "loss": 0.0639266, "memory(GiB)": 13.7, "step": 62745, "train_speed(iter/s)": 1.529054 }, { "acc": 0.98169193, "epoch": 29.41176470588235, "grad_norm": 5.905750274658203, "learning_rate": 3.960433916708211e-06, "loss": 0.04228164, "memory(GiB)": 13.7, "step": 62750, "train_speed(iter/s)": 1.52906 }, { "acc": 0.97711391, "epoch": 29.41410827279119, "grad_norm": 3.355665445327759, "learning_rate": 3.959675776009255e-06, "loss": 0.07089125, "memory(GiB)": 13.7, "step": 62755, "train_speed(iter/s)": 1.529065 }, { "acc": 0.9953125, "epoch": 29.416451839700024, "grad_norm": 0.0027809846214950085, "learning_rate": 3.958917660328294e-06, "loss": 0.01776467, "memory(GiB)": 13.7, "step": 62760, "train_speed(iter/s)": 1.529053 }, { "acc": 0.97437172, "epoch": 29.418795406608858, "grad_norm": 2.103239059448242, "learning_rate": 3.958159569683552e-06, "loss": 0.10009351, "memory(GiB)": 13.7, "step": 62765, "train_speed(iter/s)": 1.529057 }, { "acc": 0.98165169, "epoch": 29.421138973517692, "grad_norm": 0.9414239525794983, "learning_rate": 3.95740150409325e-06, "loss": 0.04186061, "memory(GiB)": 13.7, "step": 62770, "train_speed(iter/s)": 1.529061 }, { "acc": 0.99404764, "epoch": 29.42348254042653, "grad_norm": 0.0018873533699661493, "learning_rate": 3.9566434635756065e-06, "loss": 0.03769341, "memory(GiB)": 13.7, "step": 62775, "train_speed(iter/s)": 1.529064 }, { "acc": 0.97256947, "epoch": 29.425826107335364, "grad_norm": 2.227745771408081, "learning_rate": 3.955885448148847e-06, "loss": 0.05492038, "memory(GiB)": 13.7, "step": 62780, "train_speed(iter/s)": 1.52907 }, { "acc": 0.97433014, "epoch": 29.4281696742442, "grad_norm": 2.770561933517456, "learning_rate": 3.955127457831189e-06, "loss": 0.0900174, "memory(GiB)": 13.7, "step": 62785, "train_speed(iter/s)": 1.529076 }, { "acc": 0.98562498, "epoch": 29.430513241153037, "grad_norm": 0.03793024644255638, "learning_rate": 3.954369492640851e-06, "loss": 0.02050083, "memory(GiB)": 13.7, "step": 62790, "train_speed(iter/s)": 1.529076 }, { "acc": 0.99195652, "epoch": 29.43285680806187, "grad_norm": 3.277538776397705, "learning_rate": 3.9536115525960555e-06, "loss": 0.04459815, "memory(GiB)": 13.7, "step": 62795, "train_speed(iter/s)": 1.529078 }, { "acc": 0.99073868, "epoch": 29.435200374970705, "grad_norm": 2.5819716453552246, "learning_rate": 3.952853637715019e-06, "loss": 0.03550355, "memory(GiB)": 13.7, "step": 62800, "train_speed(iter/s)": 1.529084 }, { "acc": 0.98675594, "epoch": 29.43754394187954, "grad_norm": 4.879322052001953, "learning_rate": 3.952095748015958e-06, "loss": 0.02312866, "memory(GiB)": 13.7, "step": 62805, "train_speed(iter/s)": 1.529086 }, { "acc": 0.96684532, "epoch": 29.439887508788377, "grad_norm": 7.171762943267822, "learning_rate": 3.951337883517092e-06, "loss": 0.08750745, "memory(GiB)": 13.7, "step": 62810, "train_speed(iter/s)": 1.529091 }, { "acc": 0.9788393, "epoch": 29.44223107569721, "grad_norm": 2.3613264560699463, "learning_rate": 3.950580044236636e-06, "loss": 0.05892323, "memory(GiB)": 13.7, "step": 62815, "train_speed(iter/s)": 1.529091 }, { "acc": 0.99652777, "epoch": 29.444574642606046, "grad_norm": 2.0728111267089844, "learning_rate": 3.949822230192806e-06, "loss": 0.04609107, "memory(GiB)": 13.7, "step": 62820, "train_speed(iter/s)": 1.529096 }, { "acc": 0.97620535, "epoch": 29.44691820951488, "grad_norm": 0.6371732950210571, "learning_rate": 3.949064441403819e-06, "loss": 0.09947053, "memory(GiB)": 13.7, "step": 62825, "train_speed(iter/s)": 1.529095 }, { "acc": 0.9916667, "epoch": 29.449261776423718, "grad_norm": 5.050276756286621, "learning_rate": 3.948306677887888e-06, "loss": 0.03111589, "memory(GiB)": 13.7, "step": 62830, "train_speed(iter/s)": 1.529105 }, { "acc": 0.97979164, "epoch": 29.451605343332552, "grad_norm": 3.721844434738159, "learning_rate": 3.947548939663227e-06, "loss": 0.05743868, "memory(GiB)": 13.7, "step": 62835, "train_speed(iter/s)": 1.529111 }, { "acc": 0.98633938, "epoch": 29.453948910241387, "grad_norm": 5.718887805938721, "learning_rate": 3.946791226748051e-06, "loss": 0.03787445, "memory(GiB)": 13.7, "step": 62840, "train_speed(iter/s)": 1.529113 }, { "acc": 0.99092264, "epoch": 29.45629247715022, "grad_norm": 5.290117263793945, "learning_rate": 3.946033539160572e-06, "loss": 0.05108274, "memory(GiB)": 13.7, "step": 62845, "train_speed(iter/s)": 1.52912 }, { "acc": 0.98057003, "epoch": 29.45863604405906, "grad_norm": 6.0191216468811035, "learning_rate": 3.945275876919004e-06, "loss": 0.07645832, "memory(GiB)": 13.7, "step": 62850, "train_speed(iter/s)": 1.529122 }, { "acc": 0.98862181, "epoch": 29.460979610967893, "grad_norm": 0.815766453742981, "learning_rate": 3.944518240041556e-06, "loss": 0.02197614, "memory(GiB)": 13.7, "step": 62855, "train_speed(iter/s)": 1.529121 }, { "acc": 0.971875, "epoch": 29.463323177876727, "grad_norm": 3.500619411468506, "learning_rate": 3.943760628546441e-06, "loss": 0.05295053, "memory(GiB)": 13.7, "step": 62860, "train_speed(iter/s)": 1.529119 }, { "acc": 0.97361116, "epoch": 29.465666744785565, "grad_norm": 2.94915509223938, "learning_rate": 3.943003042451867e-06, "loss": 0.05427963, "memory(GiB)": 13.7, "step": 62865, "train_speed(iter/s)": 1.529123 }, { "acc": 0.98467255, "epoch": 29.4680103116944, "grad_norm": 2.6270201206207275, "learning_rate": 3.942245481776048e-06, "loss": 0.06382775, "memory(GiB)": 13.7, "step": 62870, "train_speed(iter/s)": 1.529126 }, { "acc": 0.98372898, "epoch": 29.470353878603234, "grad_norm": 0.7751999497413635, "learning_rate": 3.941487946537188e-06, "loss": 0.05073035, "memory(GiB)": 13.7, "step": 62875, "train_speed(iter/s)": 1.529123 }, { "acc": 0.98104172, "epoch": 29.47269744551207, "grad_norm": 0.14970411360263824, "learning_rate": 3.9407304367535e-06, "loss": 0.04797506, "memory(GiB)": 13.7, "step": 62880, "train_speed(iter/s)": 1.529123 }, { "acc": 0.98156252, "epoch": 29.475041012420906, "grad_norm": 3.838701009750366, "learning_rate": 3.939972952443193e-06, "loss": 0.06193044, "memory(GiB)": 13.7, "step": 62885, "train_speed(iter/s)": 1.529122 }, { "acc": 0.97677498, "epoch": 29.47738457932974, "grad_norm": 4.0003557205200195, "learning_rate": 3.939215493624469e-06, "loss": 0.06673759, "memory(GiB)": 13.7, "step": 62890, "train_speed(iter/s)": 1.529124 }, { "acc": 0.99330359, "epoch": 29.479728146238575, "grad_norm": 1.4451881647109985, "learning_rate": 3.9384580603155405e-06, "loss": 0.02855323, "memory(GiB)": 13.7, "step": 62895, "train_speed(iter/s)": 1.529126 }, { "acc": 0.990625, "epoch": 29.48207171314741, "grad_norm": 4.320560455322266, "learning_rate": 3.93770065253461e-06, "loss": 0.05662336, "memory(GiB)": 13.7, "step": 62900, "train_speed(iter/s)": 1.529132 }, { "acc": 0.99255209, "epoch": 29.484415280056247, "grad_norm": 1.4243438243865967, "learning_rate": 3.936943270299883e-06, "loss": 0.03887407, "memory(GiB)": 13.7, "step": 62905, "train_speed(iter/s)": 1.529134 }, { "acc": 0.984375, "epoch": 29.48675884696508, "grad_norm": 3.6989333629608154, "learning_rate": 3.936185913629567e-06, "loss": 0.03790883, "memory(GiB)": 13.7, "step": 62910, "train_speed(iter/s)": 1.529133 }, { "acc": 0.97654762, "epoch": 29.489102413873916, "grad_norm": 0.6677038669586182, "learning_rate": 3.935428582541865e-06, "loss": 0.05513319, "memory(GiB)": 13.7, "step": 62915, "train_speed(iter/s)": 1.529137 }, { "acc": 0.9905303, "epoch": 29.49144598078275, "grad_norm": 9.461061477661133, "learning_rate": 3.93467127705498e-06, "loss": 0.06564229, "memory(GiB)": 13.7, "step": 62920, "train_speed(iter/s)": 1.529139 }, { "acc": 0.99562502, "epoch": 29.493789547691588, "grad_norm": 3.1413280963897705, "learning_rate": 3.933913997187118e-06, "loss": 0.04099689, "memory(GiB)": 13.7, "step": 62925, "train_speed(iter/s)": 1.529139 }, { "acc": 0.9729166, "epoch": 29.496133114600422, "grad_norm": 5.448449611663818, "learning_rate": 3.933156742956478e-06, "loss": 0.0640879, "memory(GiB)": 13.7, "step": 62930, "train_speed(iter/s)": 1.529143 }, { "acc": 0.98708334, "epoch": 29.498476681509256, "grad_norm": 3.458765983581543, "learning_rate": 3.932399514381265e-06, "loss": 0.03157449, "memory(GiB)": 13.7, "step": 62935, "train_speed(iter/s)": 1.529144 }, { "acc": 0.98250542, "epoch": 29.500820248418094, "grad_norm": 0.7935914993286133, "learning_rate": 3.931642311479676e-06, "loss": 0.03358906, "memory(GiB)": 13.7, "step": 62940, "train_speed(iter/s)": 1.529148 }, { "acc": 0.985322, "epoch": 29.50316381532693, "grad_norm": 1.3523694276809692, "learning_rate": 3.930885134269917e-06, "loss": 0.03037737, "memory(GiB)": 13.7, "step": 62945, "train_speed(iter/s)": 1.52915 }, { "acc": 0.9746727, "epoch": 29.505507382235763, "grad_norm": 6.092356204986572, "learning_rate": 3.9301279827701824e-06, "loss": 0.07515822, "memory(GiB)": 13.7, "step": 62950, "train_speed(iter/s)": 1.529155 }, { "acc": 0.96112061, "epoch": 29.507850949144597, "grad_norm": 5.378627300262451, "learning_rate": 3.929370856998677e-06, "loss": 0.13153074, "memory(GiB)": 13.7, "step": 62955, "train_speed(iter/s)": 1.529155 }, { "acc": 0.98094692, "epoch": 29.510194516053435, "grad_norm": 2.1790876388549805, "learning_rate": 3.928613756973597e-06, "loss": 0.0592267, "memory(GiB)": 13.7, "step": 62960, "train_speed(iter/s)": 1.529161 }, { "acc": 0.9802083, "epoch": 29.51253808296227, "grad_norm": 2.9616189002990723, "learning_rate": 3.927856682713139e-06, "loss": 0.03347327, "memory(GiB)": 13.7, "step": 62965, "train_speed(iter/s)": 1.529166 }, { "acc": 0.98520832, "epoch": 29.514881649871104, "grad_norm": 4.210878372192383, "learning_rate": 3.927099634235505e-06, "loss": 0.05194393, "memory(GiB)": 13.7, "step": 62970, "train_speed(iter/s)": 1.529165 }, { "acc": 0.98604164, "epoch": 29.517225216779938, "grad_norm": 3.757434368133545, "learning_rate": 3.926342611558889e-06, "loss": 0.04603141, "memory(GiB)": 13.7, "step": 62975, "train_speed(iter/s)": 1.529167 }, { "acc": 0.99196434, "epoch": 29.519568783688776, "grad_norm": 0.6760329604148865, "learning_rate": 3.925585614701487e-06, "loss": 0.027753, "memory(GiB)": 13.7, "step": 62980, "train_speed(iter/s)": 1.529165 }, { "acc": 0.97791672, "epoch": 29.52191235059761, "grad_norm": 0.9127902388572693, "learning_rate": 3.9248286436814955e-06, "loss": 0.04829465, "memory(GiB)": 13.7, "step": 62985, "train_speed(iter/s)": 1.529171 }, { "acc": 0.98614578, "epoch": 29.524255917506444, "grad_norm": 0.8213880658149719, "learning_rate": 3.92407169851711e-06, "loss": 0.02959052, "memory(GiB)": 13.7, "step": 62990, "train_speed(iter/s)": 1.529171 }, { "acc": 0.97540178, "epoch": 29.52659948441528, "grad_norm": 4.555996894836426, "learning_rate": 3.923314779226523e-06, "loss": 0.05258434, "memory(GiB)": 13.7, "step": 62995, "train_speed(iter/s)": 1.529177 }, { "acc": 0.98467264, "epoch": 29.528943051324116, "grad_norm": 0.0010433245915919542, "learning_rate": 3.9225578858279315e-06, "loss": 0.0265162, "memory(GiB)": 13.7, "step": 63000, "train_speed(iter/s)": 1.529184 }, { "acc": 0.99363098, "epoch": 29.53128661823295, "grad_norm": 2.2579545974731445, "learning_rate": 3.921801018339528e-06, "loss": 0.02168443, "memory(GiB)": 13.7, "step": 63005, "train_speed(iter/s)": 1.529185 }, { "acc": 0.99196434, "epoch": 29.533630185141785, "grad_norm": 1.7500252723693848, "learning_rate": 3.921044176779501e-06, "loss": 0.02671705, "memory(GiB)": 13.7, "step": 63010, "train_speed(iter/s)": 1.529186 }, { "acc": 0.97791672, "epoch": 29.53597375205062, "grad_norm": 3.3168907165527344, "learning_rate": 3.920287361166048e-06, "loss": 0.03277843, "memory(GiB)": 13.7, "step": 63015, "train_speed(iter/s)": 1.529192 }, { "acc": 0.99363098, "epoch": 29.538317318959457, "grad_norm": 2.1407952308654785, "learning_rate": 3.919530571517359e-06, "loss": 0.03713367, "memory(GiB)": 13.7, "step": 63020, "train_speed(iter/s)": 1.529199 }, { "acc": 0.96052094, "epoch": 29.54066088586829, "grad_norm": 7.214844226837158, "learning_rate": 3.918773807851622e-06, "loss": 0.0972627, "memory(GiB)": 13.7, "step": 63025, "train_speed(iter/s)": 1.529204 }, { "acc": 0.9792078, "epoch": 29.543004452777126, "grad_norm": 3.3812754154205322, "learning_rate": 3.918017070187029e-06, "loss": 0.06009348, "memory(GiB)": 13.7, "step": 63030, "train_speed(iter/s)": 1.529204 }, { "acc": 0.9839962, "epoch": 29.545348019685964, "grad_norm": 4.433620452880859, "learning_rate": 3.91726035854177e-06, "loss": 0.07838178, "memory(GiB)": 13.7, "step": 63035, "train_speed(iter/s)": 1.529206 }, { "acc": 0.97719698, "epoch": 29.547691586594798, "grad_norm": 3.5957870483398438, "learning_rate": 3.916503672934033e-06, "loss": 0.08398567, "memory(GiB)": 13.7, "step": 63040, "train_speed(iter/s)": 1.529207 }, { "acc": 0.98078327, "epoch": 29.550035153503632, "grad_norm": 0.2006266564130783, "learning_rate": 3.915747013382007e-06, "loss": 0.05638121, "memory(GiB)": 13.7, "step": 63045, "train_speed(iter/s)": 1.529212 }, { "acc": 0.97683277, "epoch": 29.552378720412467, "grad_norm": 2.9666850566864014, "learning_rate": 3.914990379903876e-06, "loss": 0.09440181, "memory(GiB)": 13.7, "step": 63050, "train_speed(iter/s)": 1.52921 }, { "acc": 0.99258928, "epoch": 29.554722287321304, "grad_norm": 1.5410912036895752, "learning_rate": 3.9142337725178334e-06, "loss": 0.0229313, "memory(GiB)": 13.7, "step": 63055, "train_speed(iter/s)": 1.529213 }, { "acc": 0.98383932, "epoch": 29.55706585423014, "grad_norm": 5.562313556671143, "learning_rate": 3.913477191242063e-06, "loss": 0.06389192, "memory(GiB)": 13.7, "step": 63060, "train_speed(iter/s)": 1.529219 }, { "acc": 0.98812504, "epoch": 29.559409421138973, "grad_norm": 2.3373208045959473, "learning_rate": 3.912720636094748e-06, "loss": 0.02133075, "memory(GiB)": 13.7, "step": 63065, "train_speed(iter/s)": 1.529223 }, { "acc": 0.99437504, "epoch": 29.561752988047807, "grad_norm": 0.0264165960252285, "learning_rate": 3.911964107094075e-06, "loss": 0.01042111, "memory(GiB)": 13.7, "step": 63070, "train_speed(iter/s)": 1.529222 }, { "acc": 0.98230896, "epoch": 29.564096554956645, "grad_norm": 2.5171234607696533, "learning_rate": 3.911207604258231e-06, "loss": 0.11198077, "memory(GiB)": 13.7, "step": 63075, "train_speed(iter/s)": 1.529234 }, { "acc": 0.9791666, "epoch": 29.56644012186548, "grad_norm": 3.6529648303985596, "learning_rate": 3.910451127605395e-06, "loss": 0.04308416, "memory(GiB)": 13.7, "step": 63080, "train_speed(iter/s)": 1.529238 }, { "acc": 0.9844162, "epoch": 29.568783688774314, "grad_norm": 4.295818328857422, "learning_rate": 3.909694677153753e-06, "loss": 0.07384697, "memory(GiB)": 13.7, "step": 63085, "train_speed(iter/s)": 1.52924 }, { "acc": 0.98059216, "epoch": 29.571127255683148, "grad_norm": 2.622407913208008, "learning_rate": 3.908938252921489e-06, "loss": 0.06411427, "memory(GiB)": 13.7, "step": 63090, "train_speed(iter/s)": 1.529248 }, { "acc": 0.97432537, "epoch": 29.573470822591986, "grad_norm": 4.40825080871582, "learning_rate": 3.908181854926781e-06, "loss": 0.07951896, "memory(GiB)": 13.7, "step": 63095, "train_speed(iter/s)": 1.52925 }, { "acc": 0.98790188, "epoch": 29.57581438950082, "grad_norm": 1.9166864156723022, "learning_rate": 3.907425483187815e-06, "loss": 0.03997496, "memory(GiB)": 13.7, "step": 63100, "train_speed(iter/s)": 1.529255 }, { "acc": 0.98238096, "epoch": 29.578157956409655, "grad_norm": 2.3547847270965576, "learning_rate": 3.906669137722769e-06, "loss": 0.05103176, "memory(GiB)": 13.7, "step": 63105, "train_speed(iter/s)": 1.529256 }, { "acc": 0.98301468, "epoch": 29.58050152331849, "grad_norm": 4.855216979980469, "learning_rate": 3.905912818549823e-06, "loss": 0.03246825, "memory(GiB)": 13.7, "step": 63110, "train_speed(iter/s)": 1.529257 }, { "acc": 0.99165173, "epoch": 29.582845090227327, "grad_norm": 1.8053576946258545, "learning_rate": 3.905156525687156e-06, "loss": 0.0318756, "memory(GiB)": 13.7, "step": 63115, "train_speed(iter/s)": 1.529262 }, { "acc": 0.9645834, "epoch": 29.58518865713616, "grad_norm": 5.26121187210083, "learning_rate": 3.904400259152949e-06, "loss": 0.0653963, "memory(GiB)": 13.7, "step": 63120, "train_speed(iter/s)": 1.529253 }, { "acc": 0.9854167, "epoch": 29.587532224044995, "grad_norm": 3.7469677925109863, "learning_rate": 3.903644018965378e-06, "loss": 0.05454553, "memory(GiB)": 13.7, "step": 63125, "train_speed(iter/s)": 1.52926 }, { "acc": 0.96957798, "epoch": 29.589875790953833, "grad_norm": 4.170806884765625, "learning_rate": 3.902887805142621e-06, "loss": 0.1175963, "memory(GiB)": 13.7, "step": 63130, "train_speed(iter/s)": 1.529258 }, { "acc": 0.9927083, "epoch": 29.592219357862668, "grad_norm": 0.0019082656363025308, "learning_rate": 3.902131617702856e-06, "loss": 0.02316551, "memory(GiB)": 13.7, "step": 63135, "train_speed(iter/s)": 1.52926 }, { "acc": 0.9739584, "epoch": 29.594562924771502, "grad_norm": 8.056221008300781, "learning_rate": 3.901375456664257e-06, "loss": 0.06201178, "memory(GiB)": 13.7, "step": 63140, "train_speed(iter/s)": 1.529262 }, { "acc": 0.98207798, "epoch": 29.596906491680336, "grad_norm": 5.336615562438965, "learning_rate": 3.9006193220450045e-06, "loss": 0.04861824, "memory(GiB)": 13.7, "step": 63145, "train_speed(iter/s)": 1.529266 }, { "acc": 0.9864583, "epoch": 29.599250058589174, "grad_norm": 1.9324208498001099, "learning_rate": 3.899863213863268e-06, "loss": 0.02590097, "memory(GiB)": 13.7, "step": 63150, "train_speed(iter/s)": 1.529272 }, { "acc": 0.97583332, "epoch": 29.60159362549801, "grad_norm": 4.663598537445068, "learning_rate": 3.899107132137223e-06, "loss": 0.06260783, "memory(GiB)": 13.7, "step": 63155, "train_speed(iter/s)": 1.529278 }, { "acc": 0.9831439, "epoch": 29.603937192406843, "grad_norm": 2.6867785453796387, "learning_rate": 3.898351076885045e-06, "loss": 0.0704139, "memory(GiB)": 13.7, "step": 63160, "train_speed(iter/s)": 1.529277 }, { "acc": 0.98966351, "epoch": 29.606280759315677, "grad_norm": 3.9090263843536377, "learning_rate": 3.897595048124907e-06, "loss": 0.03789231, "memory(GiB)": 13.7, "step": 63165, "train_speed(iter/s)": 1.52928 }, { "acc": 0.9791666, "epoch": 29.608624326224515, "grad_norm": 0.015397537499666214, "learning_rate": 3.89683904587498e-06, "loss": 0.05512586, "memory(GiB)": 13.7, "step": 63170, "train_speed(iter/s)": 1.529278 }, { "acc": 0.99120045, "epoch": 29.61096789313335, "grad_norm": 0.019970089197158813, "learning_rate": 3.896083070153436e-06, "loss": 0.05791939, "memory(GiB)": 13.7, "step": 63175, "train_speed(iter/s)": 1.529275 }, { "acc": 0.98458328, "epoch": 29.613311460042183, "grad_norm": 6.574028491973877, "learning_rate": 3.8953271209784475e-06, "loss": 0.03047335, "memory(GiB)": 13.7, "step": 63180, "train_speed(iter/s)": 1.529268 }, { "acc": 0.98946428, "epoch": 29.615655026951018, "grad_norm": 5.468748569488525, "learning_rate": 3.8945711983681865e-06, "loss": 0.06477835, "memory(GiB)": 13.7, "step": 63185, "train_speed(iter/s)": 1.529273 }, { "acc": 0.96625004, "epoch": 29.617998593859856, "grad_norm": 3.755056142807007, "learning_rate": 3.893815302340817e-06, "loss": 0.07892594, "memory(GiB)": 13.7, "step": 63190, "train_speed(iter/s)": 1.52928 }, { "acc": 0.98648815, "epoch": 29.62034216076869, "grad_norm": 4.736595630645752, "learning_rate": 3.893059432914513e-06, "loss": 0.05561839, "memory(GiB)": 13.7, "step": 63195, "train_speed(iter/s)": 1.529292 }, { "acc": 0.98980656, "epoch": 29.622685727677524, "grad_norm": 3.6475472450256348, "learning_rate": 3.8923035901074416e-06, "loss": 0.07555862, "memory(GiB)": 13.7, "step": 63200, "train_speed(iter/s)": 1.529289 }, { "acc": 0.97875004, "epoch": 29.625029294586362, "grad_norm": 3.822953701019287, "learning_rate": 3.891547773937772e-06, "loss": 0.06250473, "memory(GiB)": 13.7, "step": 63205, "train_speed(iter/s)": 1.529295 }, { "acc": 0.97104168, "epoch": 29.627372861495196, "grad_norm": 2.998563051223755, "learning_rate": 3.8907919844236704e-06, "loss": 0.05957332, "memory(GiB)": 13.7, "step": 63210, "train_speed(iter/s)": 1.529294 }, { "acc": 0.98159723, "epoch": 29.62971642840403, "grad_norm": 4.704882621765137, "learning_rate": 3.8900362215833025e-06, "loss": 0.05560209, "memory(GiB)": 13.7, "step": 63215, "train_speed(iter/s)": 1.529292 }, { "acc": 0.98083334, "epoch": 29.632059995312865, "grad_norm": 5.939140796661377, "learning_rate": 3.8892804854348375e-06, "loss": 0.04786196, "memory(GiB)": 13.7, "step": 63220, "train_speed(iter/s)": 1.52929 }, { "acc": 0.978125, "epoch": 29.634403562221703, "grad_norm": 8.940642356872559, "learning_rate": 3.888524775996439e-06, "loss": 0.07196053, "memory(GiB)": 13.7, "step": 63225, "train_speed(iter/s)": 1.529288 }, { "acc": 0.99124994, "epoch": 29.636747129130537, "grad_norm": 0.9226203560829163, "learning_rate": 3.887769093286271e-06, "loss": 0.02125775, "memory(GiB)": 13.7, "step": 63230, "train_speed(iter/s)": 1.529291 }, { "acc": 0.98988094, "epoch": 29.63909069603937, "grad_norm": 0.01956774853169918, "learning_rate": 3.887013437322499e-06, "loss": 0.04345849, "memory(GiB)": 13.7, "step": 63235, "train_speed(iter/s)": 1.529292 }, { "acc": 0.978125, "epoch": 29.641434262948206, "grad_norm": 5.408104419708252, "learning_rate": 3.886257808123286e-06, "loss": 0.03926343, "memory(GiB)": 13.7, "step": 63240, "train_speed(iter/s)": 1.529295 }, { "acc": 0.97104168, "epoch": 29.643777829857044, "grad_norm": 10.533892631530762, "learning_rate": 3.885502205706794e-06, "loss": 0.09455836, "memory(GiB)": 13.7, "step": 63245, "train_speed(iter/s)": 1.529292 }, { "acc": 0.98732376, "epoch": 29.646121396765878, "grad_norm": 1.9994386434555054, "learning_rate": 3.884746630091186e-06, "loss": 0.04423666, "memory(GiB)": 13.7, "step": 63250, "train_speed(iter/s)": 1.529299 }, { "acc": 0.99239082, "epoch": 29.648464963674712, "grad_norm": 1.0684839487075806, "learning_rate": 3.883991081294624e-06, "loss": 0.02374913, "memory(GiB)": 13.7, "step": 63255, "train_speed(iter/s)": 1.529299 }, { "acc": 0.98005209, "epoch": 29.650808530583546, "grad_norm": 4.955013275146484, "learning_rate": 3.883235559335267e-06, "loss": 0.10199697, "memory(GiB)": 13.7, "step": 63260, "train_speed(iter/s)": 1.529299 }, { "acc": 0.99250002, "epoch": 29.653152097492384, "grad_norm": 2.475311040878296, "learning_rate": 3.882480064231278e-06, "loss": 0.02680861, "memory(GiB)": 13.7, "step": 63265, "train_speed(iter/s)": 1.529304 }, { "acc": 0.96300592, "epoch": 29.65549566440122, "grad_norm": 5.593172073364258, "learning_rate": 3.881724596000817e-06, "loss": 0.06973348, "memory(GiB)": 13.7, "step": 63270, "train_speed(iter/s)": 1.529307 }, { "acc": 0.9817709, "epoch": 29.657839231310053, "grad_norm": 5.877792835235596, "learning_rate": 3.880969154662038e-06, "loss": 0.04807339, "memory(GiB)": 13.7, "step": 63275, "train_speed(iter/s)": 1.529314 }, { "acc": 0.99008923, "epoch": 29.66018279821889, "grad_norm": 5.0699663162231445, "learning_rate": 3.880213740233104e-06, "loss": 0.02883855, "memory(GiB)": 13.7, "step": 63280, "train_speed(iter/s)": 1.529322 }, { "acc": 0.99080353, "epoch": 29.662526365127725, "grad_norm": 2.55350399017334, "learning_rate": 3.8794583527321715e-06, "loss": 0.051652, "memory(GiB)": 13.7, "step": 63285, "train_speed(iter/s)": 1.529318 }, { "acc": 0.97448864, "epoch": 29.66486993203656, "grad_norm": 3.1557421684265137, "learning_rate": 3.878702992177396e-06, "loss": 0.06733782, "memory(GiB)": 13.7, "step": 63290, "train_speed(iter/s)": 1.529323 }, { "acc": 0.9742857, "epoch": 29.667213498945394, "grad_norm": 2.2843987941741943, "learning_rate": 3.877947658586937e-06, "loss": 0.11191261, "memory(GiB)": 13.7, "step": 63295, "train_speed(iter/s)": 1.529325 }, { "acc": 0.97354164, "epoch": 29.66955706585423, "grad_norm": 1.2112096548080444, "learning_rate": 3.877192351978945e-06, "loss": 0.07451377, "memory(GiB)": 13.7, "step": 63300, "train_speed(iter/s)": 1.529326 }, { "acc": 0.9823801, "epoch": 29.671900632763066, "grad_norm": 1.0203934907913208, "learning_rate": 3.876437072371582e-06, "loss": 0.05178816, "memory(GiB)": 13.7, "step": 63305, "train_speed(iter/s)": 1.529334 }, { "acc": 0.98552084, "epoch": 29.6742441996719, "grad_norm": 0.0018962292233482003, "learning_rate": 3.875681819782998e-06, "loss": 0.03171948, "memory(GiB)": 13.7, "step": 63310, "train_speed(iter/s)": 1.529342 }, { "acc": 0.98145828, "epoch": 29.676587766580734, "grad_norm": 11.322787284851074, "learning_rate": 3.87492659423135e-06, "loss": 0.05150048, "memory(GiB)": 13.7, "step": 63315, "train_speed(iter/s)": 1.52934 }, { "acc": 0.99249992, "epoch": 29.678931333489572, "grad_norm": 0.004536398686468601, "learning_rate": 3.874171395734785e-06, "loss": 0.0248887, "memory(GiB)": 13.7, "step": 63320, "train_speed(iter/s)": 1.529343 }, { "acc": 0.99821434, "epoch": 29.681274900398407, "grad_norm": 0.031008509919047356, "learning_rate": 3.873416224311461e-06, "loss": 0.00635982, "memory(GiB)": 13.7, "step": 63325, "train_speed(iter/s)": 1.529349 }, { "acc": 0.97531252, "epoch": 29.68361846730724, "grad_norm": 8.015398025512695, "learning_rate": 3.8726610799795285e-06, "loss": 0.08210974, "memory(GiB)": 13.7, "step": 63330, "train_speed(iter/s)": 1.529355 }, { "acc": 0.97114582, "epoch": 29.685962034216075, "grad_norm": 9.385128021240234, "learning_rate": 3.871905962757137e-06, "loss": 0.10029691, "memory(GiB)": 13.7, "step": 63335, "train_speed(iter/s)": 1.529364 }, { "acc": 0.98245392, "epoch": 29.688305601124913, "grad_norm": 3.316905975341797, "learning_rate": 3.87115087266244e-06, "loss": 0.10079798, "memory(GiB)": 13.7, "step": 63340, "train_speed(iter/s)": 1.52937 }, { "acc": 0.98154755, "epoch": 29.690649168033747, "grad_norm": 1.4810841083526611, "learning_rate": 3.870395809713584e-06, "loss": 0.03611659, "memory(GiB)": 13.7, "step": 63345, "train_speed(iter/s)": 1.529375 }, { "acc": 0.99375, "epoch": 29.69299273494258, "grad_norm": 2.9346108436584473, "learning_rate": 3.869640773928722e-06, "loss": 0.03991579, "memory(GiB)": 13.7, "step": 63350, "train_speed(iter/s)": 1.529382 }, { "acc": 0.9757143, "epoch": 29.69533630185142, "grad_norm": 1.9203695058822632, "learning_rate": 3.868885765326001e-06, "loss": 0.07012185, "memory(GiB)": 13.7, "step": 63355, "train_speed(iter/s)": 1.529381 }, { "acc": 0.98708334, "epoch": 29.697679868760254, "grad_norm": 2.5099635124206543, "learning_rate": 3.868130783923567e-06, "loss": 0.03723873, "memory(GiB)": 13.7, "step": 63360, "train_speed(iter/s)": 1.529389 }, { "acc": 0.996875, "epoch": 29.700023435669088, "grad_norm": 3.1815590858459473, "learning_rate": 3.86737582973957e-06, "loss": 0.01443323, "memory(GiB)": 13.7, "step": 63365, "train_speed(iter/s)": 1.529391 }, { "acc": 0.98500004, "epoch": 29.702367002577923, "grad_norm": 3.962428331375122, "learning_rate": 3.866620902792154e-06, "loss": 0.02235937, "memory(GiB)": 13.7, "step": 63370, "train_speed(iter/s)": 1.529392 }, { "acc": 0.97639885, "epoch": 29.70471056948676, "grad_norm": 2.490989923477173, "learning_rate": 3.865866003099467e-06, "loss": 0.05862123, "memory(GiB)": 13.7, "step": 63375, "train_speed(iter/s)": 1.529394 }, { "acc": 0.98239584, "epoch": 29.707054136395595, "grad_norm": 1.9210965633392334, "learning_rate": 3.865111130679655e-06, "loss": 0.04396517, "memory(GiB)": 13.7, "step": 63380, "train_speed(iter/s)": 1.529395 }, { "acc": 0.97770824, "epoch": 29.70939770330443, "grad_norm": 3.463959217071533, "learning_rate": 3.86435628555086e-06, "loss": 0.05390124, "memory(GiB)": 13.7, "step": 63385, "train_speed(iter/s)": 1.529404 }, { "acc": 0.98113976, "epoch": 29.711741270213263, "grad_norm": 3.250950336456299, "learning_rate": 3.863601467731227e-06, "loss": 0.05578383, "memory(GiB)": 13.7, "step": 63390, "train_speed(iter/s)": 1.529409 }, { "acc": 0.98777771, "epoch": 29.7140848371221, "grad_norm": 3.0538041591644287, "learning_rate": 3.862846677238901e-06, "loss": 0.03840104, "memory(GiB)": 13.7, "step": 63395, "train_speed(iter/s)": 1.529409 }, { "acc": 0.9916667, "epoch": 29.716428404030935, "grad_norm": 4.067080020904541, "learning_rate": 3.862091914092024e-06, "loss": 0.02158449, "memory(GiB)": 13.7, "step": 63400, "train_speed(iter/s)": 1.529415 }, { "acc": 0.98458328, "epoch": 29.71877197093977, "grad_norm": 4.179263591766357, "learning_rate": 3.861337178308735e-06, "loss": 0.05118757, "memory(GiB)": 13.7, "step": 63405, "train_speed(iter/s)": 1.529416 }, { "acc": 0.96757441, "epoch": 29.721115537848604, "grad_norm": 6.75609827041626, "learning_rate": 3.860582469907179e-06, "loss": 0.09706234, "memory(GiB)": 13.7, "step": 63410, "train_speed(iter/s)": 1.529422 }, { "acc": 0.97666664, "epoch": 29.723459104757442, "grad_norm": 2.4533677101135254, "learning_rate": 3.8598277889054955e-06, "loss": 0.08853024, "memory(GiB)": 13.7, "step": 63415, "train_speed(iter/s)": 1.529425 }, { "acc": 0.98812504, "epoch": 29.725802671666276, "grad_norm": 4.784453868865967, "learning_rate": 3.859073135321824e-06, "loss": 0.03777612, "memory(GiB)": 13.7, "step": 63420, "train_speed(iter/s)": 1.529426 }, { "acc": 0.98361111, "epoch": 29.72814623857511, "grad_norm": 4.642398834228516, "learning_rate": 3.858318509174305e-06, "loss": 0.03609871, "memory(GiB)": 13.7, "step": 63425, "train_speed(iter/s)": 1.529436 }, { "acc": 0.98050594, "epoch": 29.73048980548395, "grad_norm": 0.010726472362875938, "learning_rate": 3.857563910481077e-06, "loss": 0.0518248, "memory(GiB)": 13.7, "step": 63430, "train_speed(iter/s)": 1.529433 }, { "acc": 0.97205353, "epoch": 29.732833372392783, "grad_norm": 6.211528778076172, "learning_rate": 3.8568093392602765e-06, "loss": 0.09235748, "memory(GiB)": 13.7, "step": 63435, "train_speed(iter/s)": 1.529441 }, { "acc": 0.98277779, "epoch": 29.735176939301617, "grad_norm": 5.8260908126831055, "learning_rate": 3.856054795530045e-06, "loss": 0.03891488, "memory(GiB)": 13.7, "step": 63440, "train_speed(iter/s)": 1.529446 }, { "acc": 0.97321424, "epoch": 29.73752050621045, "grad_norm": 3.438835859298706, "learning_rate": 3.855300279308515e-06, "loss": 0.07264592, "memory(GiB)": 13.7, "step": 63445, "train_speed(iter/s)": 1.529449 }, { "acc": 0.98395824, "epoch": 29.73986407311929, "grad_norm": 1.0136698484420776, "learning_rate": 3.854545790613822e-06, "loss": 0.04069332, "memory(GiB)": 13.7, "step": 63450, "train_speed(iter/s)": 1.529445 }, { "acc": 0.97780714, "epoch": 29.742207640028123, "grad_norm": 3.2270379066467285, "learning_rate": 3.853791329464106e-06, "loss": 0.08690795, "memory(GiB)": 13.7, "step": 63455, "train_speed(iter/s)": 1.52945 }, { "acc": 0.98715277, "epoch": 29.744551206936958, "grad_norm": 6.974382400512695, "learning_rate": 3.853036895877498e-06, "loss": 0.03977966, "memory(GiB)": 13.7, "step": 63460, "train_speed(iter/s)": 1.529447 }, { "acc": 0.9895834, "epoch": 29.746894773845792, "grad_norm": 8.846026420593262, "learning_rate": 3.852282489872134e-06, "loss": 0.03921199, "memory(GiB)": 13.7, "step": 63465, "train_speed(iter/s)": 1.529451 }, { "acc": 0.94524994, "epoch": 29.74923834075463, "grad_norm": 4.5421366691589355, "learning_rate": 3.851528111466145e-06, "loss": 0.13694937, "memory(GiB)": 13.7, "step": 63470, "train_speed(iter/s)": 1.529458 }, { "acc": 0.984375, "epoch": 29.751581907663464, "grad_norm": 0.054753199219703674, "learning_rate": 3.850773760677667e-06, "loss": 0.05934582, "memory(GiB)": 13.7, "step": 63475, "train_speed(iter/s)": 1.529462 }, { "acc": 0.99613094, "epoch": 29.7539254745723, "grad_norm": 1.8784151077270508, "learning_rate": 3.8500194375248314e-06, "loss": 0.0267498, "memory(GiB)": 13.7, "step": 63480, "train_speed(iter/s)": 1.529458 }, { "acc": 0.98833332, "epoch": 29.756269041481133, "grad_norm": 2.8282065391540527, "learning_rate": 3.849265142025768e-06, "loss": 0.0572358, "memory(GiB)": 13.7, "step": 63485, "train_speed(iter/s)": 1.529463 }, { "acc": 0.99375, "epoch": 29.75861260838997, "grad_norm": 3.0748629570007324, "learning_rate": 3.848510874198609e-06, "loss": 0.02972438, "memory(GiB)": 13.7, "step": 63490, "train_speed(iter/s)": 1.529467 }, { "acc": 0.9885417, "epoch": 29.760956175298805, "grad_norm": 2.9577889442443848, "learning_rate": 3.847756634061483e-06, "loss": 0.03850127, "memory(GiB)": 13.7, "step": 63495, "train_speed(iter/s)": 1.529466 }, { "acc": 0.990625, "epoch": 29.76329974220764, "grad_norm": 0.0008526794845238328, "learning_rate": 3.847002421632521e-06, "loss": 0.03446809, "memory(GiB)": 13.7, "step": 63500, "train_speed(iter/s)": 1.529468 }, { "acc": 0.98249998, "epoch": 29.765643309116474, "grad_norm": 3.825906753540039, "learning_rate": 3.846248236929851e-06, "loss": 0.03438752, "memory(GiB)": 13.7, "step": 63505, "train_speed(iter/s)": 1.529475 }, { "acc": 0.97666664, "epoch": 29.76798687602531, "grad_norm": 0.003797628451138735, "learning_rate": 3.845494079971601e-06, "loss": 0.07690613, "memory(GiB)": 13.7, "step": 63510, "train_speed(iter/s)": 1.529477 }, { "acc": 0.98258934, "epoch": 29.770330442934146, "grad_norm": 2.6598854064941406, "learning_rate": 3.844739950775899e-06, "loss": 0.04106949, "memory(GiB)": 13.7, "step": 63515, "train_speed(iter/s)": 1.529478 }, { "acc": 0.98091345, "epoch": 29.77267400984298, "grad_norm": 0.9402626156806946, "learning_rate": 3.843985849360874e-06, "loss": 0.06451942, "memory(GiB)": 13.7, "step": 63520, "train_speed(iter/s)": 1.529483 }, { "acc": 0.9813942, "epoch": 29.775017576751818, "grad_norm": 4.789305210113525, "learning_rate": 3.843231775744647e-06, "loss": 0.08455071, "memory(GiB)": 13.7, "step": 63525, "train_speed(iter/s)": 1.529488 }, { "acc": 0.9645833, "epoch": 29.777361143660652, "grad_norm": 4.348776340484619, "learning_rate": 3.842477729945347e-06, "loss": 0.0823092, "memory(GiB)": 13.7, "step": 63530, "train_speed(iter/s)": 1.529493 }, { "acc": 0.97770824, "epoch": 29.779704710569487, "grad_norm": 4.647748947143555, "learning_rate": 3.841723711981099e-06, "loss": 0.03704616, "memory(GiB)": 13.7, "step": 63535, "train_speed(iter/s)": 1.529502 }, { "acc": 0.97701397, "epoch": 29.78204827747832, "grad_norm": 1.6620041131973267, "learning_rate": 3.840969721870023e-06, "loss": 0.07938287, "memory(GiB)": 13.7, "step": 63540, "train_speed(iter/s)": 1.529511 }, { "acc": 0.97900238, "epoch": 29.78439184438716, "grad_norm": 3.115434169769287, "learning_rate": 3.840215759630248e-06, "loss": 0.05406682, "memory(GiB)": 13.7, "step": 63545, "train_speed(iter/s)": 1.529517 }, { "acc": 0.9763195, "epoch": 29.786735411295993, "grad_norm": 6.548984050750732, "learning_rate": 3.839461825279893e-06, "loss": 0.06074171, "memory(GiB)": 13.7, "step": 63550, "train_speed(iter/s)": 1.52952 }, { "acc": 0.99383011, "epoch": 29.789078978204827, "grad_norm": 3.689713716506958, "learning_rate": 3.838707918837083e-06, "loss": 0.03616909, "memory(GiB)": 13.7, "step": 63555, "train_speed(iter/s)": 1.529521 }, { "acc": 1.0, "epoch": 29.79142254511366, "grad_norm": 2.284282922744751, "learning_rate": 3.837954040319937e-06, "loss": 0.03661947, "memory(GiB)": 13.7, "step": 63560, "train_speed(iter/s)": 1.529518 }, { "acc": 0.97738972, "epoch": 29.7937661120225, "grad_norm": 5.882724761962891, "learning_rate": 3.837200189746577e-06, "loss": 0.0636269, "memory(GiB)": 13.7, "step": 63565, "train_speed(iter/s)": 1.529535 }, { "acc": 0.9833333, "epoch": 29.796109678931334, "grad_norm": 2.078672409057617, "learning_rate": 3.8364463671351225e-06, "loss": 0.05313046, "memory(GiB)": 13.7, "step": 63570, "train_speed(iter/s)": 1.529534 }, { "acc": 0.99125004, "epoch": 29.798453245840168, "grad_norm": 2.8184969425201416, "learning_rate": 3.835692572503693e-06, "loss": 0.03789334, "memory(GiB)": 13.7, "step": 63575, "train_speed(iter/s)": 1.529535 }, { "acc": 0.98251991, "epoch": 29.800796812749002, "grad_norm": 4.117167949676514, "learning_rate": 3.834938805870407e-06, "loss": 0.05873027, "memory(GiB)": 13.7, "step": 63580, "train_speed(iter/s)": 1.529539 }, { "acc": 0.97919483, "epoch": 29.80314037965784, "grad_norm": 3.0622189044952393, "learning_rate": 3.834185067253384e-06, "loss": 0.05562371, "memory(GiB)": 13.7, "step": 63585, "train_speed(iter/s)": 1.529547 }, { "acc": 0.97593746, "epoch": 29.805483946566675, "grad_norm": 3.445014476776123, "learning_rate": 3.83343135667074e-06, "loss": 0.0597308, "memory(GiB)": 13.7, "step": 63590, "train_speed(iter/s)": 1.529549 }, { "acc": 0.98711309, "epoch": 29.80782751347551, "grad_norm": 2.578598737716675, "learning_rate": 3.83267767414059e-06, "loss": 0.03597804, "memory(GiB)": 13.7, "step": 63595, "train_speed(iter/s)": 1.52956 }, { "acc": 0.98208332, "epoch": 29.810171080384343, "grad_norm": 0.018267158418893814, "learning_rate": 3.831924019681054e-06, "loss": 0.06717337, "memory(GiB)": 13.7, "step": 63600, "train_speed(iter/s)": 1.529566 }, { "acc": 0.98666668, "epoch": 29.81251464729318, "grad_norm": 2.0976173877716064, "learning_rate": 3.831170393310247e-06, "loss": 0.05782462, "memory(GiB)": 13.7, "step": 63605, "train_speed(iter/s)": 1.529565 }, { "acc": 0.96788378, "epoch": 29.814858214202015, "grad_norm": 5.059609413146973, "learning_rate": 3.830416795046281e-06, "loss": 0.08588064, "memory(GiB)": 13.7, "step": 63610, "train_speed(iter/s)": 1.529572 }, { "acc": 0.98619785, "epoch": 29.81720178111085, "grad_norm": 1.344442367553711, "learning_rate": 3.829663224907269e-06, "loss": 0.08015434, "memory(GiB)": 13.7, "step": 63615, "train_speed(iter/s)": 1.529576 }, { "acc": 0.96694441, "epoch": 29.819545348019687, "grad_norm": 7.437169551849365, "learning_rate": 3.82890968291133e-06, "loss": 0.06618953, "memory(GiB)": 13.7, "step": 63620, "train_speed(iter/s)": 1.529579 }, { "acc": 0.984375, "epoch": 29.821888914928522, "grad_norm": 3.201678991317749, "learning_rate": 3.828156169076571e-06, "loss": 0.02596306, "memory(GiB)": 13.7, "step": 63625, "train_speed(iter/s)": 1.52958 }, { "acc": 0.98145828, "epoch": 29.824232481837356, "grad_norm": 3.7843339443206787, "learning_rate": 3.827402683421107e-06, "loss": 0.06436196, "memory(GiB)": 13.7, "step": 63630, "train_speed(iter/s)": 1.529589 }, { "acc": 0.97622032, "epoch": 29.82657604874619, "grad_norm": 4.473394870758057, "learning_rate": 3.826649225963049e-06, "loss": 0.08076791, "memory(GiB)": 13.7, "step": 63635, "train_speed(iter/s)": 1.529584 }, { "acc": 0.98125, "epoch": 29.82891961565503, "grad_norm": 2.443324327468872, "learning_rate": 3.8258957967205045e-06, "loss": 0.09758492, "memory(GiB)": 13.7, "step": 63640, "train_speed(iter/s)": 1.529593 }, { "acc": 0.97270832, "epoch": 29.831263182563863, "grad_norm": 1.7209975719451904, "learning_rate": 3.825142395711589e-06, "loss": 0.057197, "memory(GiB)": 13.7, "step": 63645, "train_speed(iter/s)": 1.529596 }, { "acc": 0.99034729, "epoch": 29.833606749472697, "grad_norm": 3.9288535118103027, "learning_rate": 3.824389022954409e-06, "loss": 0.02969857, "memory(GiB)": 13.7, "step": 63650, "train_speed(iter/s)": 1.529604 }, { "acc": 0.9927083, "epoch": 29.83595031638153, "grad_norm": 1.027342677116394, "learning_rate": 3.823635678467073e-06, "loss": 0.03008726, "memory(GiB)": 13.7, "step": 63655, "train_speed(iter/s)": 1.529607 }, { "acc": 0.97458334, "epoch": 29.83829388329037, "grad_norm": 5.5468339920043945, "learning_rate": 3.822882362267688e-06, "loss": 0.10173697, "memory(GiB)": 13.7, "step": 63660, "train_speed(iter/s)": 1.529608 }, { "acc": 0.98383923, "epoch": 29.840637450199203, "grad_norm": 4.657070159912109, "learning_rate": 3.822129074374362e-06, "loss": 0.03738104, "memory(GiB)": 13.7, "step": 63665, "train_speed(iter/s)": 1.529613 }, { "acc": 0.98367529, "epoch": 29.842981017108038, "grad_norm": 5.899132251739502, "learning_rate": 3.821375814805202e-06, "loss": 0.05232639, "memory(GiB)": 13.7, "step": 63670, "train_speed(iter/s)": 1.529615 }, { "acc": 0.98450394, "epoch": 29.845324584016872, "grad_norm": 0.01241637859493494, "learning_rate": 3.820622583578315e-06, "loss": 0.03958937, "memory(GiB)": 13.7, "step": 63675, "train_speed(iter/s)": 1.529616 }, { "acc": 0.9791667, "epoch": 29.84766815092571, "grad_norm": 6.208694934844971, "learning_rate": 3.819869380711804e-06, "loss": 0.0550266, "memory(GiB)": 13.7, "step": 63680, "train_speed(iter/s)": 1.529626 }, { "acc": 0.97979164, "epoch": 29.850011717834544, "grad_norm": 5.741518974304199, "learning_rate": 3.819116206223773e-06, "loss": 0.03661752, "memory(GiB)": 13.7, "step": 63685, "train_speed(iter/s)": 1.529628 }, { "acc": 0.98217258, "epoch": 29.85235528474338, "grad_norm": 6.141251564025879, "learning_rate": 3.81836306013233e-06, "loss": 0.07998333, "memory(GiB)": 13.7, "step": 63690, "train_speed(iter/s)": 1.529626 }, { "acc": 0.9916667, "epoch": 29.854698851652216, "grad_norm": 2.5442309379577637, "learning_rate": 3.8176099424555745e-06, "loss": 0.0190645, "memory(GiB)": 13.7, "step": 63695, "train_speed(iter/s)": 1.529627 }, { "acc": 0.98741322, "epoch": 29.85704241856105, "grad_norm": 3.340822219848633, "learning_rate": 3.816856853211608e-06, "loss": 0.06741441, "memory(GiB)": 13.7, "step": 63700, "train_speed(iter/s)": 1.529633 }, { "acc": 0.99187498, "epoch": 29.859385985469885, "grad_norm": 0.013421044684946537, "learning_rate": 3.816103792418536e-06, "loss": 0.0326601, "memory(GiB)": 13.7, "step": 63705, "train_speed(iter/s)": 1.529643 }, { "acc": 0.97749386, "epoch": 29.86172955237872, "grad_norm": 4.861176490783691, "learning_rate": 3.815350760094459e-06, "loss": 0.06690836, "memory(GiB)": 13.7, "step": 63710, "train_speed(iter/s)": 1.529642 }, { "acc": 0.97843752, "epoch": 29.864073119287557, "grad_norm": 4.519562721252441, "learning_rate": 3.814597756257473e-06, "loss": 0.07132252, "memory(GiB)": 13.7, "step": 63715, "train_speed(iter/s)": 1.529649 }, { "acc": 0.97238102, "epoch": 29.86641668619639, "grad_norm": 1.0066611766815186, "learning_rate": 3.813844780925683e-06, "loss": 0.09829534, "memory(GiB)": 13.7, "step": 63720, "train_speed(iter/s)": 1.529653 }, { "acc": 0.9763195, "epoch": 29.868760253105226, "grad_norm": 4.850347518920898, "learning_rate": 3.8130918341171842e-06, "loss": 0.08281394, "memory(GiB)": 13.7, "step": 63725, "train_speed(iter/s)": 1.529651 }, { "acc": 0.9769886, "epoch": 29.87110382001406, "grad_norm": 5.898172378540039, "learning_rate": 3.8123389158500794e-06, "loss": 0.06094587, "memory(GiB)": 13.7, "step": 63730, "train_speed(iter/s)": 1.529655 }, { "acc": 0.97770834, "epoch": 29.873447386922898, "grad_norm": 5.594351291656494, "learning_rate": 3.8115860261424633e-06, "loss": 0.06619097, "memory(GiB)": 13.7, "step": 63735, "train_speed(iter/s)": 1.529656 }, { "acc": 0.97666664, "epoch": 29.875790953831732, "grad_norm": 6.502513885498047, "learning_rate": 3.8108331650124327e-06, "loss": 0.06382148, "memory(GiB)": 13.7, "step": 63740, "train_speed(iter/s)": 1.52966 }, { "acc": 0.97687502, "epoch": 29.878134520740566, "grad_norm": 7.546570301055908, "learning_rate": 3.8100803324780827e-06, "loss": 0.08937017, "memory(GiB)": 13.7, "step": 63745, "train_speed(iter/s)": 1.529663 }, { "acc": 1.0, "epoch": 29.8804780876494, "grad_norm": 0.44208163022994995, "learning_rate": 3.8093275285575124e-06, "loss": 0.0204114, "memory(GiB)": 13.7, "step": 63750, "train_speed(iter/s)": 1.529661 }, { "acc": 0.9916667, "epoch": 29.88282165455824, "grad_norm": 2.284266233444214, "learning_rate": 3.808574753268815e-06, "loss": 0.04162205, "memory(GiB)": 13.7, "step": 63755, "train_speed(iter/s)": 1.529662 }, { "acc": 0.99541664, "epoch": 29.885165221467073, "grad_norm": 1.960521936416626, "learning_rate": 3.807822006630083e-06, "loss": 0.01314086, "memory(GiB)": 13.7, "step": 63760, "train_speed(iter/s)": 1.529663 }, { "acc": 0.98611107, "epoch": 29.887508788375907, "grad_norm": 0.4247561991214752, "learning_rate": 3.8070692886594142e-06, "loss": 0.0306671, "memory(GiB)": 13.7, "step": 63765, "train_speed(iter/s)": 1.529665 }, { "acc": 0.99707794, "epoch": 29.889852355284745, "grad_norm": 1.4674036502838135, "learning_rate": 3.806316599374897e-06, "loss": 0.01470101, "memory(GiB)": 13.7, "step": 63770, "train_speed(iter/s)": 1.529668 }, { "acc": 0.98008928, "epoch": 29.89219592219358, "grad_norm": 2.4622905254364014, "learning_rate": 3.8055639387946273e-06, "loss": 0.07164428, "memory(GiB)": 13.7, "step": 63775, "train_speed(iter/s)": 1.529673 }, { "acc": 0.98812504, "epoch": 29.894539489102414, "grad_norm": 1.050763726234436, "learning_rate": 3.8048113069366945e-06, "loss": 0.02931996, "memory(GiB)": 13.7, "step": 63780, "train_speed(iter/s)": 1.529681 }, { "acc": 0.99125004, "epoch": 29.896883056011248, "grad_norm": 1.867897391319275, "learning_rate": 3.8040587038191896e-06, "loss": 0.03382342, "memory(GiB)": 13.7, "step": 63785, "train_speed(iter/s)": 1.529682 }, { "acc": 0.99375, "epoch": 29.899226622920086, "grad_norm": 0.004711986985057592, "learning_rate": 3.803306129460201e-06, "loss": 0.0217137, "memory(GiB)": 13.7, "step": 63790, "train_speed(iter/s)": 1.529687 }, { "acc": 0.99437504, "epoch": 29.90157018982892, "grad_norm": 2.6341378688812256, "learning_rate": 3.8025535838778223e-06, "loss": 0.04188256, "memory(GiB)": 13.7, "step": 63795, "train_speed(iter/s)": 1.529687 }, { "acc": 0.984375, "epoch": 29.903913756737754, "grad_norm": 4.404118061065674, "learning_rate": 3.8018010670901378e-06, "loss": 0.04486638, "memory(GiB)": 13.7, "step": 63800, "train_speed(iter/s)": 1.529695 }, { "acc": 0.98309031, "epoch": 29.90625732364659, "grad_norm": 4.062219619750977, "learning_rate": 3.801048579115239e-06, "loss": 0.05429968, "memory(GiB)": 13.7, "step": 63805, "train_speed(iter/s)": 1.529703 }, { "acc": 0.98185101, "epoch": 29.908600890555427, "grad_norm": 3.755474805831909, "learning_rate": 3.8002961199712116e-06, "loss": 0.08871515, "memory(GiB)": 13.7, "step": 63810, "train_speed(iter/s)": 1.529703 }, { "acc": 0.990625, "epoch": 29.91094445746426, "grad_norm": 0.005372744519263506, "learning_rate": 3.7995436896761447e-06, "loss": 0.03348916, "memory(GiB)": 13.7, "step": 63815, "train_speed(iter/s)": 1.529712 }, { "acc": 0.97717266, "epoch": 29.913288024373095, "grad_norm": 2.879835367202759, "learning_rate": 3.7987912882481185e-06, "loss": 0.07790246, "memory(GiB)": 13.7, "step": 63820, "train_speed(iter/s)": 1.529719 }, { "acc": 0.99187498, "epoch": 29.91563159128193, "grad_norm": 0.015084155835211277, "learning_rate": 3.7980389157052237e-06, "loss": 0.04504879, "memory(GiB)": 13.7, "step": 63825, "train_speed(iter/s)": 1.529724 }, { "acc": 0.9822916, "epoch": 29.917975158190767, "grad_norm": 3.3795666694641113, "learning_rate": 3.797286572065542e-06, "loss": 0.04502911, "memory(GiB)": 13.7, "step": 63830, "train_speed(iter/s)": 1.529728 }, { "acc": 0.98467255, "epoch": 29.9203187250996, "grad_norm": 0.053984351456165314, "learning_rate": 3.796534257347159e-06, "loss": 0.0383874, "memory(GiB)": 13.7, "step": 63835, "train_speed(iter/s)": 1.529736 }, { "acc": 0.97122631, "epoch": 29.922662292008436, "grad_norm": 0.4049832820892334, "learning_rate": 3.7957819715681567e-06, "loss": 0.09890978, "memory(GiB)": 13.7, "step": 63840, "train_speed(iter/s)": 1.529741 }, { "acc": 0.97937498, "epoch": 29.925005858917274, "grad_norm": 2.4832122325897217, "learning_rate": 3.795029714746618e-06, "loss": 0.04340065, "memory(GiB)": 13.7, "step": 63845, "train_speed(iter/s)": 1.529743 }, { "acc": 0.97977619, "epoch": 29.927349425826108, "grad_norm": 2.9683399200439453, "learning_rate": 3.794277486900625e-06, "loss": 0.05544496, "memory(GiB)": 13.7, "step": 63850, "train_speed(iter/s)": 1.529743 }, { "acc": 0.98250008, "epoch": 29.929692992734942, "grad_norm": 7.168578147888184, "learning_rate": 3.793525288048259e-06, "loss": 0.05825261, "memory(GiB)": 13.7, "step": 63855, "train_speed(iter/s)": 1.529745 }, { "acc": 0.95625, "epoch": 29.932036559643777, "grad_norm": 2.183366537094116, "learning_rate": 3.7927731182075993e-06, "loss": 0.08422561, "memory(GiB)": 13.7, "step": 63860, "train_speed(iter/s)": 1.529749 }, { "acc": 0.98145828, "epoch": 29.934380126552615, "grad_norm": 1.212984561920166, "learning_rate": 3.792020977396726e-06, "loss": 0.06387981, "memory(GiB)": 13.7, "step": 63865, "train_speed(iter/s)": 1.529753 }, { "acc": 0.97937498, "epoch": 29.93672369346145, "grad_norm": 3.951211452484131, "learning_rate": 3.7912688656337188e-06, "loss": 0.05970587, "memory(GiB)": 13.7, "step": 63870, "train_speed(iter/s)": 1.529751 }, { "acc": 0.98767853, "epoch": 29.939067260370283, "grad_norm": 3.9822540283203125, "learning_rate": 3.7905167829366533e-06, "loss": 0.04230605, "memory(GiB)": 13.7, "step": 63875, "train_speed(iter/s)": 1.529747 }, { "acc": 0.9782238, "epoch": 29.941410827279118, "grad_norm": 1.9991414546966553, "learning_rate": 3.789764729323611e-06, "loss": 0.09611186, "memory(GiB)": 13.7, "step": 63880, "train_speed(iter/s)": 1.529747 }, { "acc": 0.98698864, "epoch": 29.943754394187955, "grad_norm": 6.155022621154785, "learning_rate": 3.7890127048126675e-06, "loss": 0.02816055, "memory(GiB)": 13.7, "step": 63885, "train_speed(iter/s)": 1.529751 }, { "acc": 0.98467264, "epoch": 29.94609796109679, "grad_norm": 7.850793361663818, "learning_rate": 3.7882607094218977e-06, "loss": 0.04312162, "memory(GiB)": 13.7, "step": 63890, "train_speed(iter/s)": 1.529756 }, { "acc": 0.981007, "epoch": 29.948441528005624, "grad_norm": 5.069724082946777, "learning_rate": 3.787508743169378e-06, "loss": 0.1095113, "memory(GiB)": 13.7, "step": 63895, "train_speed(iter/s)": 1.529757 }, { "acc": 0.98708324, "epoch": 29.95078509491446, "grad_norm": 2.625628709793091, "learning_rate": 3.7867568060731846e-06, "loss": 0.0377679, "memory(GiB)": 13.7, "step": 63900, "train_speed(iter/s)": 1.529759 }, { "acc": 0.988447, "epoch": 29.953128661823296, "grad_norm": 0.005802072584629059, "learning_rate": 3.786004898151388e-06, "loss": 0.03301759, "memory(GiB)": 13.7, "step": 63905, "train_speed(iter/s)": 1.529757 }, { "acc": 0.98133926, "epoch": 29.95547222873213, "grad_norm": 8.85898208618164, "learning_rate": 3.785253019422065e-06, "loss": 0.0721653, "memory(GiB)": 13.7, "step": 63910, "train_speed(iter/s)": 1.529763 }, { "acc": 0.9708334, "epoch": 29.957815795640965, "grad_norm": 6.827716827392578, "learning_rate": 3.7845011699032868e-06, "loss": 0.05381559, "memory(GiB)": 13.7, "step": 63915, "train_speed(iter/s)": 1.529768 }, { "acc": 0.98589859, "epoch": 29.960159362549803, "grad_norm": 0.11691377311944962, "learning_rate": 3.7837493496131244e-06, "loss": 0.05837126, "memory(GiB)": 13.7, "step": 63920, "train_speed(iter/s)": 1.529765 }, { "acc": 0.990625, "epoch": 29.962502929458637, "grad_norm": 1.3907442092895508, "learning_rate": 3.7829975585696515e-06, "loss": 0.02070709, "memory(GiB)": 13.7, "step": 63925, "train_speed(iter/s)": 1.529761 }, { "acc": 0.98571434, "epoch": 29.96484649636747, "grad_norm": 7.8175048828125, "learning_rate": 3.782245796790937e-06, "loss": 0.0583753, "memory(GiB)": 13.7, "step": 63930, "train_speed(iter/s)": 1.529768 }, { "acc": 0.98888893, "epoch": 29.967190063276306, "grad_norm": 1.470572590827942, "learning_rate": 3.781494064295051e-06, "loss": 0.04533879, "memory(GiB)": 13.7, "step": 63935, "train_speed(iter/s)": 1.529761 }, { "acc": 0.98084822, "epoch": 29.969533630185143, "grad_norm": 1.5493736267089844, "learning_rate": 3.780742361100065e-06, "loss": 0.08809178, "memory(GiB)": 13.7, "step": 63940, "train_speed(iter/s)": 1.52976 }, { "acc": 0.98486118, "epoch": 29.971877197093978, "grad_norm": 0.5132201910018921, "learning_rate": 3.779990687224044e-06, "loss": 0.05739982, "memory(GiB)": 13.7, "step": 63945, "train_speed(iter/s)": 1.529763 }, { "acc": 0.97666664, "epoch": 29.974220764002812, "grad_norm": 0.05572831258177757, "learning_rate": 3.7792390426850562e-06, "loss": 0.03702745, "memory(GiB)": 13.7, "step": 63950, "train_speed(iter/s)": 1.529766 }, { "acc": 0.9916667, "epoch": 29.976564330911646, "grad_norm": 1.1393481492996216, "learning_rate": 3.778487427501171e-06, "loss": 0.03819223, "memory(GiB)": 13.7, "step": 63955, "train_speed(iter/s)": 1.529774 }, { "acc": 0.98874998, "epoch": 29.978907897820484, "grad_norm": 0.5608384609222412, "learning_rate": 3.777735841690453e-06, "loss": 0.02770206, "memory(GiB)": 13.7, "step": 63960, "train_speed(iter/s)": 1.529779 }, { "acc": 1.0, "epoch": 29.98125146472932, "grad_norm": 4.862892150878906, "learning_rate": 3.776984285270967e-06, "loss": 0.03565198, "memory(GiB)": 13.7, "step": 63965, "train_speed(iter/s)": 1.529779 }, { "acc": 0.98217258, "epoch": 29.983595031638153, "grad_norm": 0.0050179618410766125, "learning_rate": 3.7762327582607815e-06, "loss": 0.03434218, "memory(GiB)": 13.7, "step": 63970, "train_speed(iter/s)": 1.529786 }, { "acc": 0.96821432, "epoch": 29.985938598546987, "grad_norm": 6.289765357971191, "learning_rate": 3.775481260677956e-06, "loss": 0.07759035, "memory(GiB)": 13.7, "step": 63975, "train_speed(iter/s)": 1.529795 }, { "acc": 0.98344698, "epoch": 29.988282165455825, "grad_norm": 1.158250331878662, "learning_rate": 3.7747297925405583e-06, "loss": 0.09991021, "memory(GiB)": 13.7, "step": 63980, "train_speed(iter/s)": 1.529795 }, { "acc": 0.9895834, "epoch": 29.99062573236466, "grad_norm": 3.24161434173584, "learning_rate": 3.77397835386665e-06, "loss": 0.0204599, "memory(GiB)": 13.7, "step": 63985, "train_speed(iter/s)": 1.529801 }, { "acc": 0.98156242, "epoch": 29.992969299273494, "grad_norm": 3.8213913440704346, "learning_rate": 3.7732269446742925e-06, "loss": 0.08148079, "memory(GiB)": 13.7, "step": 63990, "train_speed(iter/s)": 1.529809 }, { "acc": 0.99624996, "epoch": 29.995312866182328, "grad_norm": 0.0005693163257092237, "learning_rate": 3.7724755649815458e-06, "loss": 0.02039263, "memory(GiB)": 13.7, "step": 63995, "train_speed(iter/s)": 1.529811 }, { "acc": 0.97645836, "epoch": 29.997656433091166, "grad_norm": 4.601892948150635, "learning_rate": 3.7717242148064726e-06, "loss": 0.06853777, "memory(GiB)": 13.7, "step": 64000, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98566923, "epoch": 30.0, "grad_norm": 7.8439717292785645, "learning_rate": 3.770972894167132e-06, "loss": 0.07352266, "memory(GiB)": 13.7, "step": 64005, "train_speed(iter/s)": 1.529801 }, { "acc": 0.98864584, "epoch": 30.002343566908834, "grad_norm": 1.883190631866455, "learning_rate": 3.7702216030815846e-06, "loss": 0.02533415, "memory(GiB)": 13.7, "step": 64010, "train_speed(iter/s)": 1.529793 }, { "acc": 0.98041668, "epoch": 30.004687133817672, "grad_norm": 4.686326503753662, "learning_rate": 3.7694703415678885e-06, "loss": 0.02960481, "memory(GiB)": 13.7, "step": 64015, "train_speed(iter/s)": 1.529797 }, { "acc": 0.98083334, "epoch": 30.007030700726506, "grad_norm": 2.269850969314575, "learning_rate": 3.7687191096440993e-06, "loss": 0.0221538, "memory(GiB)": 13.7, "step": 64020, "train_speed(iter/s)": 1.529796 }, { "acc": 0.9836607, "epoch": 30.00937426763534, "grad_norm": 0.015340888872742653, "learning_rate": 3.767967907328279e-06, "loss": 0.0516009, "memory(GiB)": 13.7, "step": 64025, "train_speed(iter/s)": 1.529798 }, { "acc": 0.98395834, "epoch": 30.011717834544175, "grad_norm": 3.057988405227661, "learning_rate": 3.767216734638479e-06, "loss": 0.03447444, "memory(GiB)": 13.7, "step": 64030, "train_speed(iter/s)": 1.529805 }, { "acc": 0.97625008, "epoch": 30.014061401453013, "grad_norm": 2.113532304763794, "learning_rate": 3.766465591592758e-06, "loss": 0.06034614, "memory(GiB)": 13.7, "step": 64035, "train_speed(iter/s)": 1.529811 }, { "acc": 0.99312496, "epoch": 30.016404968361847, "grad_norm": 5.140698432922363, "learning_rate": 3.765714478209168e-06, "loss": 0.03127727, "memory(GiB)": 13.7, "step": 64040, "train_speed(iter/s)": 1.529809 }, { "acc": 0.98458328, "epoch": 30.01874853527068, "grad_norm": 2.7197420597076416, "learning_rate": 3.764963394505767e-06, "loss": 0.04751786, "memory(GiB)": 13.7, "step": 64045, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98455353, "epoch": 30.021092102179516, "grad_norm": 2.1929001808166504, "learning_rate": 3.7642123405006064e-06, "loss": 0.02948175, "memory(GiB)": 13.7, "step": 64050, "train_speed(iter/s)": 1.529813 }, { "acc": 0.990625, "epoch": 30.023435669088354, "grad_norm": 0.8740476369857788, "learning_rate": 3.76346131621174e-06, "loss": 0.01540896, "memory(GiB)": 13.7, "step": 64055, "train_speed(iter/s)": 1.529814 }, { "acc": 0.99094067, "epoch": 30.025779235997188, "grad_norm": 4.066282749176025, "learning_rate": 3.7627103216572193e-06, "loss": 0.01514656, "memory(GiB)": 13.7, "step": 64060, "train_speed(iter/s)": 1.529815 }, { "acc": 0.97666664, "epoch": 30.028122802906022, "grad_norm": 1.336933970451355, "learning_rate": 3.7619593568550976e-06, "loss": 0.07862834, "memory(GiB)": 13.7, "step": 64065, "train_speed(iter/s)": 1.529821 }, { "acc": 0.98169651, "epoch": 30.030466369814857, "grad_norm": 0.14314018189907074, "learning_rate": 3.761208421823422e-06, "loss": 0.04054785, "memory(GiB)": 13.7, "step": 64070, "train_speed(iter/s)": 1.529816 }, { "acc": 0.98395834, "epoch": 30.032809936723694, "grad_norm": 2.3141090869903564, "learning_rate": 3.7604575165802454e-06, "loss": 0.05241513, "memory(GiB)": 13.7, "step": 64075, "train_speed(iter/s)": 1.529824 }, { "acc": 0.98036861, "epoch": 30.03515350363253, "grad_norm": 5.060593605041504, "learning_rate": 3.759706641143615e-06, "loss": 0.08923255, "memory(GiB)": 13.7, "step": 64080, "train_speed(iter/s)": 1.529822 }, { "acc": 0.99248514, "epoch": 30.037497070541363, "grad_norm": 0.6088466048240662, "learning_rate": 3.758955795531582e-06, "loss": 0.01290096, "memory(GiB)": 13.7, "step": 64085, "train_speed(iter/s)": 1.529827 }, { "acc": 0.98601189, "epoch": 30.0398406374502, "grad_norm": 2.9879441261291504, "learning_rate": 3.7582049797621927e-06, "loss": 0.06277135, "memory(GiB)": 13.7, "step": 64090, "train_speed(iter/s)": 1.529827 }, { "acc": 0.99090805, "epoch": 30.042184204359035, "grad_norm": 3.7906463146209717, "learning_rate": 3.7574541938534935e-06, "loss": 0.04034272, "memory(GiB)": 13.7, "step": 64095, "train_speed(iter/s)": 1.529829 }, { "acc": 0.95630455, "epoch": 30.04452777126787, "grad_norm": 6.88255500793457, "learning_rate": 3.7567034378235325e-06, "loss": 0.11852822, "memory(GiB)": 13.7, "step": 64100, "train_speed(iter/s)": 1.529835 }, { "acc": 0.99406252, "epoch": 30.046871338176704, "grad_norm": 1.3241257667541504, "learning_rate": 3.755952711690355e-06, "loss": 0.02143335, "memory(GiB)": 13.7, "step": 64105, "train_speed(iter/s)": 1.529831 }, { "acc": 0.98519344, "epoch": 30.04921490508554, "grad_norm": 4.835328102111816, "learning_rate": 3.755202015472005e-06, "loss": 0.06827328, "memory(GiB)": 13.7, "step": 64110, "train_speed(iter/s)": 1.529837 }, { "acc": 0.990625, "epoch": 30.051558471994376, "grad_norm": 3.0070338249206543, "learning_rate": 3.7544513491865288e-06, "loss": 0.02177026, "memory(GiB)": 13.7, "step": 64115, "train_speed(iter/s)": 1.529837 }, { "acc": 0.96958332, "epoch": 30.05390203890321, "grad_norm": 4.601891994476318, "learning_rate": 3.7537007128519675e-06, "loss": 0.08640909, "memory(GiB)": 13.7, "step": 64120, "train_speed(iter/s)": 1.529845 }, { "acc": 0.98973217, "epoch": 30.056245605812045, "grad_norm": 2.9200804233551025, "learning_rate": 3.7529501064863646e-06, "loss": 0.05577093, "memory(GiB)": 13.7, "step": 64125, "train_speed(iter/s)": 1.529849 }, { "acc": 0.99383011, "epoch": 30.058589172720882, "grad_norm": 0.7955514788627625, "learning_rate": 3.752199530107763e-06, "loss": 0.03362559, "memory(GiB)": 13.7, "step": 64130, "train_speed(iter/s)": 1.529856 }, { "acc": 0.99437504, "epoch": 30.060932739629717, "grad_norm": 0.006309447810053825, "learning_rate": 3.751448983734204e-06, "loss": 0.04077978, "memory(GiB)": 13.7, "step": 64135, "train_speed(iter/s)": 1.529861 }, { "acc": 0.990625, "epoch": 30.06327630653855, "grad_norm": 0.009318594820797443, "learning_rate": 3.750698467383727e-06, "loss": 0.01766143, "memory(GiB)": 13.7, "step": 64140, "train_speed(iter/s)": 1.529867 }, { "acc": 0.98125, "epoch": 30.065619873447385, "grad_norm": 5.47904109954834, "learning_rate": 3.749947981074374e-06, "loss": 0.05756412, "memory(GiB)": 13.7, "step": 64145, "train_speed(iter/s)": 1.529876 }, { "acc": 0.99562502, "epoch": 30.067963440356223, "grad_norm": 2.384920835494995, "learning_rate": 3.749197524824184e-06, "loss": 0.02508121, "memory(GiB)": 13.7, "step": 64150, "train_speed(iter/s)": 1.529883 }, { "acc": 0.9885416, "epoch": 30.070307007265058, "grad_norm": 0.4873909056186676, "learning_rate": 3.748447098651193e-06, "loss": 0.04020478, "memory(GiB)": 13.7, "step": 64155, "train_speed(iter/s)": 1.529886 }, { "acc": 0.9895834, "epoch": 30.072650574173892, "grad_norm": 0.005141416564583778, "learning_rate": 3.7476967025734407e-06, "loss": 0.01967005, "memory(GiB)": 13.7, "step": 64160, "train_speed(iter/s)": 1.52989 }, { "acc": 0.98113098, "epoch": 30.074994141082726, "grad_norm": 1.731148600578308, "learning_rate": 3.7469463366089646e-06, "loss": 0.03487996, "memory(GiB)": 13.7, "step": 64165, "train_speed(iter/s)": 1.52989 }, { "acc": 0.98083334, "epoch": 30.077337707991564, "grad_norm": 0.0014190657529979944, "learning_rate": 3.7461960007757993e-06, "loss": 0.03661615, "memory(GiB)": 13.7, "step": 64170, "train_speed(iter/s)": 1.529888 }, { "acc": 0.97514877, "epoch": 30.0796812749004, "grad_norm": 5.620906352996826, "learning_rate": 3.7454456950919814e-06, "loss": 0.07535721, "memory(GiB)": 13.7, "step": 64175, "train_speed(iter/s)": 1.529886 }, { "acc": 0.9885416, "epoch": 30.082024841809233, "grad_norm": 6.306610107421875, "learning_rate": 3.7446954195755474e-06, "loss": 0.04780997, "memory(GiB)": 13.7, "step": 64180, "train_speed(iter/s)": 1.529889 }, { "acc": 0.98708334, "epoch": 30.08436840871807, "grad_norm": 2.5025501251220703, "learning_rate": 3.7439451742445283e-06, "loss": 0.03021627, "memory(GiB)": 13.7, "step": 64185, "train_speed(iter/s)": 1.529887 }, { "acc": 0.975, "epoch": 30.086711975626905, "grad_norm": 0.014264839701354504, "learning_rate": 3.743194959116959e-06, "loss": 0.05730094, "memory(GiB)": 13.7, "step": 64190, "train_speed(iter/s)": 1.529895 }, { "acc": 0.98796883, "epoch": 30.08905554253574, "grad_norm": 6.244643211364746, "learning_rate": 3.7424447742108747e-06, "loss": 0.04542465, "memory(GiB)": 13.7, "step": 64195, "train_speed(iter/s)": 1.529902 }, { "acc": 0.9666666, "epoch": 30.091399109444573, "grad_norm": 3.262930393218994, "learning_rate": 3.7416946195443027e-06, "loss": 0.08015037, "memory(GiB)": 13.7, "step": 64200, "train_speed(iter/s)": 1.529908 }, { "acc": 0.98988094, "epoch": 30.09374267635341, "grad_norm": 0.0030677670147269964, "learning_rate": 3.740944495135277e-06, "loss": 0.04914579, "memory(GiB)": 13.7, "step": 64205, "train_speed(iter/s)": 1.529914 }, { "acc": 0.97430553, "epoch": 30.096086243262246, "grad_norm": 5.198098182678223, "learning_rate": 3.740194401001828e-06, "loss": 0.04325707, "memory(GiB)": 13.7, "step": 64210, "train_speed(iter/s)": 1.529919 }, { "acc": 0.99437504, "epoch": 30.09842981017108, "grad_norm": 2.1273529529571533, "learning_rate": 3.7394443371619844e-06, "loss": 0.04267282, "memory(GiB)": 13.7, "step": 64215, "train_speed(iter/s)": 1.529916 }, { "acc": 0.9875, "epoch": 30.100773377079914, "grad_norm": 7.137444019317627, "learning_rate": 3.738694303633777e-06, "loss": 0.04909216, "memory(GiB)": 13.7, "step": 64220, "train_speed(iter/s)": 1.529921 }, { "acc": 0.97842264, "epoch": 30.103116943988752, "grad_norm": 1.4773472547531128, "learning_rate": 3.7379443004352322e-06, "loss": 0.06113119, "memory(GiB)": 13.7, "step": 64225, "train_speed(iter/s)": 1.529928 }, { "acc": 0.98891945, "epoch": 30.105460510897586, "grad_norm": 3.4284517765045166, "learning_rate": 3.7371943275843793e-06, "loss": 0.07145131, "memory(GiB)": 13.7, "step": 64230, "train_speed(iter/s)": 1.529931 }, { "acc": 0.98064728, "epoch": 30.10780407780642, "grad_norm": 3.4984216690063477, "learning_rate": 3.7364443850992455e-06, "loss": 0.05452988, "memory(GiB)": 13.7, "step": 64235, "train_speed(iter/s)": 1.529932 }, { "acc": 0.9926136, "epoch": 30.110147644715255, "grad_norm": 4.672125816345215, "learning_rate": 3.735694472997855e-06, "loss": 0.0260929, "memory(GiB)": 13.7, "step": 64240, "train_speed(iter/s)": 1.529931 }, { "acc": 0.984375, "epoch": 30.112491211624093, "grad_norm": 0.40797820687294006, "learning_rate": 3.7349445912982328e-06, "loss": 0.04028935, "memory(GiB)": 13.7, "step": 64245, "train_speed(iter/s)": 1.529938 }, { "acc": 0.9927083, "epoch": 30.114834778532927, "grad_norm": 1.3938510417938232, "learning_rate": 3.734194740018407e-06, "loss": 0.0137421, "memory(GiB)": 13.7, "step": 64250, "train_speed(iter/s)": 1.529938 }, { "acc": 0.990625, "epoch": 30.11717834544176, "grad_norm": 2.140540361404419, "learning_rate": 3.7334449191763977e-06, "loss": 0.02656618, "memory(GiB)": 13.7, "step": 64255, "train_speed(iter/s)": 1.529941 }, { "acc": 0.97811012, "epoch": 30.1195219123506, "grad_norm": 7.355782985687256, "learning_rate": 3.7326951287902313e-06, "loss": 0.06537684, "memory(GiB)": 13.7, "step": 64260, "train_speed(iter/s)": 1.529947 }, { "acc": 0.96281242, "epoch": 30.121865479259434, "grad_norm": 6.174746036529541, "learning_rate": 3.731945368877929e-06, "loss": 0.08020277, "memory(GiB)": 13.7, "step": 64265, "train_speed(iter/s)": 1.529958 }, { "acc": 0.98169641, "epoch": 30.124209046168268, "grad_norm": 5.882724285125732, "learning_rate": 3.731195639457511e-06, "loss": 0.1077453, "memory(GiB)": 13.7, "step": 64270, "train_speed(iter/s)": 1.529961 }, { "acc": 0.9866477, "epoch": 30.126552613077102, "grad_norm": 1.972166895866394, "learning_rate": 3.730445940547002e-06, "loss": 0.08509637, "memory(GiB)": 13.7, "step": 64275, "train_speed(iter/s)": 1.529967 }, { "acc": 0.9860714, "epoch": 30.12889617998594, "grad_norm": 3.147123336791992, "learning_rate": 3.7296962721644193e-06, "loss": 0.05758775, "memory(GiB)": 13.7, "step": 64280, "train_speed(iter/s)": 1.529975 }, { "acc": 0.9770833, "epoch": 30.131239746894774, "grad_norm": 3.14853572845459, "learning_rate": 3.7289466343277835e-06, "loss": 0.04172493, "memory(GiB)": 13.7, "step": 64285, "train_speed(iter/s)": 1.529973 }, { "acc": 0.9864583, "epoch": 30.13358331380361, "grad_norm": 0.8340328335762024, "learning_rate": 3.7281970270551114e-06, "loss": 0.03528976, "memory(GiB)": 13.7, "step": 64290, "train_speed(iter/s)": 1.529972 }, { "acc": 0.98154764, "epoch": 30.135926880712443, "grad_norm": 4.476137161254883, "learning_rate": 3.7274474503644254e-06, "loss": 0.02795115, "memory(GiB)": 13.7, "step": 64295, "train_speed(iter/s)": 1.529971 }, { "acc": 0.98869047, "epoch": 30.13827044762128, "grad_norm": 5.042039394378662, "learning_rate": 3.7266979042737384e-06, "loss": 0.0276152, "memory(GiB)": 13.7, "step": 64300, "train_speed(iter/s)": 1.529975 }, { "acc": 0.9927084, "epoch": 30.140614014530115, "grad_norm": 4.050948143005371, "learning_rate": 3.72594838880107e-06, "loss": 0.02080686, "memory(GiB)": 13.7, "step": 64305, "train_speed(iter/s)": 1.529975 }, { "acc": 0.99020824, "epoch": 30.14295758143895, "grad_norm": 4.34712553024292, "learning_rate": 3.725198903964435e-06, "loss": 0.04524663, "memory(GiB)": 13.7, "step": 64310, "train_speed(iter/s)": 1.529971 }, { "acc": 0.9916667, "epoch": 30.145301148347784, "grad_norm": 3.1174228191375732, "learning_rate": 3.724449449781848e-06, "loss": 0.02517755, "memory(GiB)": 13.7, "step": 64315, "train_speed(iter/s)": 1.529976 }, { "acc": 0.99174681, "epoch": 30.14764471525662, "grad_norm": 0.7262611389160156, "learning_rate": 3.7237000262713257e-06, "loss": 0.02406453, "memory(GiB)": 13.7, "step": 64320, "train_speed(iter/s)": 1.529977 }, { "acc": 0.9739584, "epoch": 30.149988282165456, "grad_norm": 3.6158416271209717, "learning_rate": 3.7229506334508796e-06, "loss": 0.07251358, "memory(GiB)": 13.7, "step": 64325, "train_speed(iter/s)": 1.529982 }, { "acc": 0.9833333, "epoch": 30.15233184907429, "grad_norm": 1.113273024559021, "learning_rate": 3.722201271338522e-06, "loss": 0.0370251, "memory(GiB)": 13.7, "step": 64330, "train_speed(iter/s)": 1.529983 }, { "acc": 0.99082794, "epoch": 30.154675415983128, "grad_norm": 0.916106641292572, "learning_rate": 3.721451939952268e-06, "loss": 0.02570868, "memory(GiB)": 13.7, "step": 64335, "train_speed(iter/s)": 1.52998 }, { "acc": 0.9791666, "epoch": 30.157018982891962, "grad_norm": 3.1162972450256348, "learning_rate": 3.7207026393101265e-06, "loss": 0.0813839, "memory(GiB)": 13.7, "step": 64340, "train_speed(iter/s)": 1.52998 }, { "acc": 0.99000006, "epoch": 30.159362549800797, "grad_norm": 2.890566825866699, "learning_rate": 3.7199533694301078e-06, "loss": 0.0276652, "memory(GiB)": 13.7, "step": 64345, "train_speed(iter/s)": 1.529985 }, { "acc": 0.97633934, "epoch": 30.16170611670963, "grad_norm": 3.6142444610595703, "learning_rate": 3.719204130330225e-06, "loss": 0.03810056, "memory(GiB)": 13.7, "step": 64350, "train_speed(iter/s)": 1.52999 }, { "acc": 0.9875, "epoch": 30.16404968361847, "grad_norm": 3.112178325653076, "learning_rate": 3.718454922028485e-06, "loss": 0.04109436, "memory(GiB)": 13.7, "step": 64355, "train_speed(iter/s)": 1.529992 }, { "acc": 0.99330359, "epoch": 30.166393250527303, "grad_norm": 3.4817357063293457, "learning_rate": 3.717705744542897e-06, "loss": 0.04213793, "memory(GiB)": 13.7, "step": 64360, "train_speed(iter/s)": 1.529993 }, { "acc": 0.97987642, "epoch": 30.168736817436137, "grad_norm": 6.419026851654053, "learning_rate": 3.7169565978914685e-06, "loss": 0.04771208, "memory(GiB)": 13.7, "step": 64365, "train_speed(iter/s)": 1.529995 }, { "acc": 0.9885417, "epoch": 30.17108038434497, "grad_norm": 2.4970650672912598, "learning_rate": 3.7162074820922067e-06, "loss": 0.05472333, "memory(GiB)": 13.7, "step": 64370, "train_speed(iter/s)": 1.529997 }, { "acc": 0.984375, "epoch": 30.17342395125381, "grad_norm": 8.687644958496094, "learning_rate": 3.7154583971631165e-06, "loss": 0.04081065, "memory(GiB)": 13.7, "step": 64375, "train_speed(iter/s)": 1.53 }, { "acc": 0.97872028, "epoch": 30.175767518162644, "grad_norm": 5.409793376922607, "learning_rate": 3.7147093431222064e-06, "loss": 0.05145499, "memory(GiB)": 13.7, "step": 64380, "train_speed(iter/s)": 1.529998 }, { "acc": 0.9885416, "epoch": 30.178111085071478, "grad_norm": 3.0057027339935303, "learning_rate": 3.713960319987479e-06, "loss": 0.02073035, "memory(GiB)": 13.7, "step": 64385, "train_speed(iter/s)": 1.529995 }, { "acc": 0.98152781, "epoch": 30.180454651980313, "grad_norm": 9.65245246887207, "learning_rate": 3.713211327776939e-06, "loss": 0.06780101, "memory(GiB)": 13.7, "step": 64390, "train_speed(iter/s)": 1.529999 }, { "acc": 0.98598213, "epoch": 30.18279821888915, "grad_norm": 4.948935031890869, "learning_rate": 3.7124623665085896e-06, "loss": 0.06780661, "memory(GiB)": 13.7, "step": 64395, "train_speed(iter/s)": 1.530001 }, { "acc": 0.97937508, "epoch": 30.185141785797985, "grad_norm": 1.5914380550384521, "learning_rate": 3.711713436200432e-06, "loss": 0.05017438, "memory(GiB)": 13.7, "step": 64400, "train_speed(iter/s)": 1.530003 }, { "acc": 0.985322, "epoch": 30.18748535270682, "grad_norm": 2.539252281188965, "learning_rate": 3.710964536870472e-06, "loss": 0.03791461, "memory(GiB)": 13.7, "step": 64405, "train_speed(iter/s)": 1.530005 }, { "acc": 0.97729168, "epoch": 30.189828919615653, "grad_norm": 3.7939248085021973, "learning_rate": 3.7102156685367073e-06, "loss": 0.0831729, "memory(GiB)": 13.7, "step": 64410, "train_speed(iter/s)": 1.530012 }, { "acc": 0.98031254, "epoch": 30.19217248652449, "grad_norm": 4.04884147644043, "learning_rate": 3.7094668312171385e-06, "loss": 0.07560797, "memory(GiB)": 13.7, "step": 64415, "train_speed(iter/s)": 1.530011 }, { "acc": 0.9739584, "epoch": 30.194516053433325, "grad_norm": 4.854429244995117, "learning_rate": 3.7087180249297643e-06, "loss": 0.05800014, "memory(GiB)": 13.7, "step": 64420, "train_speed(iter/s)": 1.530012 }, { "acc": 0.976894, "epoch": 30.19685962034216, "grad_norm": 3.347911834716797, "learning_rate": 3.707969249692587e-06, "loss": 0.08891711, "memory(GiB)": 13.7, "step": 64425, "train_speed(iter/s)": 1.530007 }, { "acc": 0.99494057, "epoch": 30.199203187250998, "grad_norm": 1.334456443786621, "learning_rate": 3.7072205055236027e-06, "loss": 0.0212941, "memory(GiB)": 13.7, "step": 64430, "train_speed(iter/s)": 1.530014 }, { "acc": 0.990625, "epoch": 30.201546754159832, "grad_norm": 1.1054717302322388, "learning_rate": 3.7064717924408067e-06, "loss": 0.031246, "memory(GiB)": 13.7, "step": 64435, "train_speed(iter/s)": 1.530014 }, { "acc": 0.98987179, "epoch": 30.203890321068666, "grad_norm": 4.515166759490967, "learning_rate": 3.7057231104621993e-06, "loss": 0.09033521, "memory(GiB)": 13.7, "step": 64440, "train_speed(iter/s)": 1.530011 }, { "acc": 0.98416672, "epoch": 30.2062338879775, "grad_norm": 1.777331829071045, "learning_rate": 3.704974459605776e-06, "loss": 0.0435342, "memory(GiB)": 13.7, "step": 64445, "train_speed(iter/s)": 1.530017 }, { "acc": 0.96624994, "epoch": 30.20857745488634, "grad_norm": 3.0049803256988525, "learning_rate": 3.704225839889527e-06, "loss": 0.07773386, "memory(GiB)": 13.7, "step": 64450, "train_speed(iter/s)": 1.53002 }, { "acc": 0.98450394, "epoch": 30.210921021795173, "grad_norm": 3.038648843765259, "learning_rate": 3.703477251331453e-06, "loss": 0.07552425, "memory(GiB)": 13.7, "step": 64455, "train_speed(iter/s)": 1.530026 }, { "acc": 0.99184523, "epoch": 30.213264588704007, "grad_norm": 5.275463104248047, "learning_rate": 3.7027286939495438e-06, "loss": 0.04634402, "memory(GiB)": 13.7, "step": 64460, "train_speed(iter/s)": 1.530026 }, { "acc": 0.99624996, "epoch": 30.21560815561284, "grad_norm": 2.6903951168060303, "learning_rate": 3.7019801677617922e-06, "loss": 0.01680991, "memory(GiB)": 13.7, "step": 64465, "train_speed(iter/s)": 1.530028 }, { "acc": 0.978125, "epoch": 30.21795172252168, "grad_norm": 2.003448247909546, "learning_rate": 3.701231672786193e-06, "loss": 0.04785565, "memory(GiB)": 13.7, "step": 64470, "train_speed(iter/s)": 1.530033 }, { "acc": 0.97041664, "epoch": 30.220295289430513, "grad_norm": 6.686974048614502, "learning_rate": 3.7004832090407346e-06, "loss": 0.07405646, "memory(GiB)": 13.7, "step": 64475, "train_speed(iter/s)": 1.530035 }, { "acc": 0.9850893, "epoch": 30.222638856339348, "grad_norm": 0.8599353432655334, "learning_rate": 3.6997347765434105e-06, "loss": 0.0420911, "memory(GiB)": 13.7, "step": 64480, "train_speed(iter/s)": 1.53004 }, { "acc": 0.97967262, "epoch": 30.224982423248182, "grad_norm": 2.683164119720459, "learning_rate": 3.698986375312209e-06, "loss": 0.05756356, "memory(GiB)": 13.7, "step": 64485, "train_speed(iter/s)": 1.530038 }, { "acc": 0.99197302, "epoch": 30.22732599015702, "grad_norm": 2.532313108444214, "learning_rate": 3.698238005365119e-06, "loss": 0.02489513, "memory(GiB)": 13.7, "step": 64490, "train_speed(iter/s)": 1.530044 }, { "acc": 0.98706236, "epoch": 30.229669557065854, "grad_norm": 2.814051866531372, "learning_rate": 3.697489666720128e-06, "loss": 0.03746757, "memory(GiB)": 13.7, "step": 64495, "train_speed(iter/s)": 1.530041 }, { "acc": 0.9864583, "epoch": 30.23201312397469, "grad_norm": 0.04166163504123688, "learning_rate": 3.6967413593952255e-06, "loss": 0.037747, "memory(GiB)": 13.7, "step": 64500, "train_speed(iter/s)": 1.530041 }, { "acc": 0.98440475, "epoch": 30.234356690883526, "grad_norm": 5.869378089904785, "learning_rate": 3.6959930834083968e-06, "loss": 0.03436861, "memory(GiB)": 13.7, "step": 64505, "train_speed(iter/s)": 1.530041 }, { "acc": 0.9927083, "epoch": 30.23670025779236, "grad_norm": 0.7354653477668762, "learning_rate": 3.695244838777629e-06, "loss": 0.02068643, "memory(GiB)": 13.7, "step": 64510, "train_speed(iter/s)": 1.530048 }, { "acc": 0.96644344, "epoch": 30.239043824701195, "grad_norm": 6.0854034423828125, "learning_rate": 3.694496625520908e-06, "loss": 0.08365023, "memory(GiB)": 13.7, "step": 64515, "train_speed(iter/s)": 1.530051 }, { "acc": 0.98708324, "epoch": 30.24138739161003, "grad_norm": 1.0855810642242432, "learning_rate": 3.6937484436562167e-06, "loss": 0.0422993, "memory(GiB)": 13.7, "step": 64520, "train_speed(iter/s)": 1.530055 }, { "acc": 0.97870913, "epoch": 30.243730958518867, "grad_norm": 3.817344903945923, "learning_rate": 3.6930002932015402e-06, "loss": 0.11508317, "memory(GiB)": 13.7, "step": 64525, "train_speed(iter/s)": 1.530058 }, { "acc": 0.99236107, "epoch": 30.2460745254277, "grad_norm": 1.4262166023254395, "learning_rate": 3.6922521741748625e-06, "loss": 0.05527775, "memory(GiB)": 13.7, "step": 64530, "train_speed(iter/s)": 1.53006 }, { "acc": 0.98782816, "epoch": 30.248418092336536, "grad_norm": 1.3716896772384644, "learning_rate": 3.691504086594163e-06, "loss": 0.0917555, "memory(GiB)": 13.7, "step": 64535, "train_speed(iter/s)": 1.530059 }, { "acc": 0.9850893, "epoch": 30.25076165924537, "grad_norm": 3.1064140796661377, "learning_rate": 3.690756030477425e-06, "loss": 0.04819084, "memory(GiB)": 13.7, "step": 64540, "train_speed(iter/s)": 1.53006 }, { "acc": 0.98500004, "epoch": 30.253105226154208, "grad_norm": 1.5156290531158447, "learning_rate": 3.690008005842629e-06, "loss": 0.03993748, "memory(GiB)": 13.7, "step": 64545, "train_speed(iter/s)": 1.530058 }, { "acc": 0.97891026, "epoch": 30.255448793063042, "grad_norm": 1.750587821006775, "learning_rate": 3.6892600127077555e-06, "loss": 0.04554287, "memory(GiB)": 13.7, "step": 64550, "train_speed(iter/s)": 1.530067 }, { "acc": 0.98604164, "epoch": 30.257792359971877, "grad_norm": 0.7750796675682068, "learning_rate": 3.6885120510907834e-06, "loss": 0.03004045, "memory(GiB)": 13.7, "step": 64555, "train_speed(iter/s)": 1.530063 }, { "acc": 0.98205395, "epoch": 30.26013592688071, "grad_norm": 2.905219078063965, "learning_rate": 3.687764121009692e-06, "loss": 0.06612689, "memory(GiB)": 13.7, "step": 64560, "train_speed(iter/s)": 1.530061 }, { "acc": 0.99611111, "epoch": 30.26247949378955, "grad_norm": 1.9654117822647095, "learning_rate": 3.6870162224824575e-06, "loss": 0.02909569, "memory(GiB)": 13.7, "step": 64565, "train_speed(iter/s)": 1.530058 }, { "acc": 0.9708004, "epoch": 30.264823060698383, "grad_norm": 6.4878716468811035, "learning_rate": 3.686268355527059e-06, "loss": 0.07163242, "memory(GiB)": 13.7, "step": 64570, "train_speed(iter/s)": 1.530063 }, { "acc": 0.97592258, "epoch": 30.267166627607217, "grad_norm": 2.4394829273223877, "learning_rate": 3.685520520161471e-06, "loss": 0.06371058, "memory(GiB)": 13.7, "step": 64575, "train_speed(iter/s)": 1.530068 }, { "acc": 0.97498722, "epoch": 30.269510194516055, "grad_norm": 2.405317783355713, "learning_rate": 3.68477271640367e-06, "loss": 0.06771547, "memory(GiB)": 13.7, "step": 64580, "train_speed(iter/s)": 1.530075 }, { "acc": 0.9927083, "epoch": 30.27185376142489, "grad_norm": 0.07715894281864166, "learning_rate": 3.6840249442716304e-06, "loss": 0.02090048, "memory(GiB)": 13.7, "step": 64585, "train_speed(iter/s)": 1.530085 }, { "acc": 0.98779755, "epoch": 30.274197328333724, "grad_norm": 1.201446533203125, "learning_rate": 3.6832772037833263e-06, "loss": 0.04228947, "memory(GiB)": 13.7, "step": 64590, "train_speed(iter/s)": 1.530088 }, { "acc": 0.9958334, "epoch": 30.276540895242558, "grad_norm": 0.014406929723918438, "learning_rate": 3.682529494956729e-06, "loss": 0.01760096, "memory(GiB)": 13.7, "step": 64595, "train_speed(iter/s)": 1.530096 }, { "acc": 0.98705359, "epoch": 30.278884462151396, "grad_norm": 3.6833035945892334, "learning_rate": 3.681781817809815e-06, "loss": 0.03350199, "memory(GiB)": 13.7, "step": 64600, "train_speed(iter/s)": 1.530099 }, { "acc": 0.9739584, "epoch": 30.28122802906023, "grad_norm": 2.6760027408599854, "learning_rate": 3.681034172360553e-06, "loss": 0.05025198, "memory(GiB)": 13.7, "step": 64605, "train_speed(iter/s)": 1.530103 }, { "acc": 0.98883924, "epoch": 30.283571595969065, "grad_norm": 1.7816965579986572, "learning_rate": 3.680286558626915e-06, "loss": 0.02802683, "memory(GiB)": 13.7, "step": 64610, "train_speed(iter/s)": 1.530101 }, { "acc": 0.96958332, "epoch": 30.2859151628779, "grad_norm": 1.0303800106048584, "learning_rate": 3.679538976626871e-06, "loss": 0.07947483, "memory(GiB)": 13.7, "step": 64615, "train_speed(iter/s)": 1.5301 }, { "acc": 0.9838541, "epoch": 30.288258729786737, "grad_norm": 5.218222141265869, "learning_rate": 3.67879142637839e-06, "loss": 0.06052673, "memory(GiB)": 13.7, "step": 64620, "train_speed(iter/s)": 1.530105 }, { "acc": 0.98165178, "epoch": 30.29060229669557, "grad_norm": 3.9942786693573, "learning_rate": 3.67804390789944e-06, "loss": 0.06692995, "memory(GiB)": 13.7, "step": 64625, "train_speed(iter/s)": 1.530105 }, { "acc": 0.97699404, "epoch": 30.292945863604405, "grad_norm": 6.337641716003418, "learning_rate": 3.677296421207991e-06, "loss": 0.07058021, "memory(GiB)": 13.7, "step": 64630, "train_speed(iter/s)": 1.530105 }, { "acc": 0.97952385, "epoch": 30.29528943051324, "grad_norm": 2.711613416671753, "learning_rate": 3.6765489663220087e-06, "loss": 0.04973503, "memory(GiB)": 13.7, "step": 64635, "train_speed(iter/s)": 1.530108 }, { "acc": 0.98549109, "epoch": 30.297632997422077, "grad_norm": 4.044661045074463, "learning_rate": 3.675801543259459e-06, "loss": 0.05941364, "memory(GiB)": 13.7, "step": 64640, "train_speed(iter/s)": 1.530113 }, { "acc": 0.9697917, "epoch": 30.299976564330912, "grad_norm": 0.9223458766937256, "learning_rate": 3.6750541520383095e-06, "loss": 0.07678719, "memory(GiB)": 13.7, "step": 64645, "train_speed(iter/s)": 1.530119 }, { "acc": 0.98812504, "epoch": 30.302320131239746, "grad_norm": 3.5488626956939697, "learning_rate": 3.674306792676522e-06, "loss": 0.03458983, "memory(GiB)": 13.7, "step": 64650, "train_speed(iter/s)": 1.530119 }, { "acc": 0.96932545, "epoch": 30.30466369814858, "grad_norm": 4.60987663269043, "learning_rate": 3.673559465192065e-06, "loss": 0.08192565, "memory(GiB)": 13.7, "step": 64655, "train_speed(iter/s)": 1.530125 }, { "acc": 0.97736111, "epoch": 30.30700726505742, "grad_norm": 4.322269916534424, "learning_rate": 3.672812169602898e-06, "loss": 0.05371318, "memory(GiB)": 13.7, "step": 64660, "train_speed(iter/s)": 1.530126 }, { "acc": 0.97776051, "epoch": 30.309350831966253, "grad_norm": 0.8272148966789246, "learning_rate": 3.672064905926985e-06, "loss": 0.05511804, "memory(GiB)": 13.7, "step": 64665, "train_speed(iter/s)": 1.530129 }, { "acc": 0.98239584, "epoch": 30.311694398875087, "grad_norm": 6.222501277923584, "learning_rate": 3.6713176741822847e-06, "loss": 0.06290126, "memory(GiB)": 13.7, "step": 64670, "train_speed(iter/s)": 1.530127 }, { "acc": 0.98343754, "epoch": 30.314037965783925, "grad_norm": 3.5813488960266113, "learning_rate": 3.6705704743867636e-06, "loss": 0.05043168, "memory(GiB)": 13.7, "step": 64675, "train_speed(iter/s)": 1.530133 }, { "acc": 0.98041668, "epoch": 30.31638153269276, "grad_norm": 6.652472019195557, "learning_rate": 3.6698233065583773e-06, "loss": 0.05619349, "memory(GiB)": 13.7, "step": 64680, "train_speed(iter/s)": 1.530141 }, { "acc": 0.98687496, "epoch": 30.318725099601593, "grad_norm": 1.9539257287979126, "learning_rate": 3.669076170715089e-06, "loss": 0.04454157, "memory(GiB)": 13.7, "step": 64685, "train_speed(iter/s)": 1.530148 }, { "acc": 0.9811553, "epoch": 30.321068666510428, "grad_norm": 4.867204189300537, "learning_rate": 3.6683290668748545e-06, "loss": 0.06398149, "memory(GiB)": 13.7, "step": 64690, "train_speed(iter/s)": 1.530151 }, { "acc": 0.99375, "epoch": 30.323412233419266, "grad_norm": 2.24630069732666, "learning_rate": 3.6675819950556342e-06, "loss": 0.01614237, "memory(GiB)": 13.7, "step": 64695, "train_speed(iter/s)": 1.530157 }, { "acc": 0.99090271, "epoch": 30.3257558003281, "grad_norm": 2.3740766048431396, "learning_rate": 3.666834955275382e-06, "loss": 0.07201929, "memory(GiB)": 13.7, "step": 64700, "train_speed(iter/s)": 1.530159 }, { "acc": 0.98529758, "epoch": 30.328099367236934, "grad_norm": 2.825165033340454, "learning_rate": 3.666087947552056e-06, "loss": 0.05305901, "memory(GiB)": 13.7, "step": 64705, "train_speed(iter/s)": 1.530165 }, { "acc": 0.97437496, "epoch": 30.33044293414577, "grad_norm": 5.478896617889404, "learning_rate": 3.665340971903613e-06, "loss": 0.05925655, "memory(GiB)": 13.7, "step": 64710, "train_speed(iter/s)": 1.530167 }, { "acc": 0.9740922, "epoch": 30.332786501054606, "grad_norm": 1.66592276096344, "learning_rate": 3.6645940283480047e-06, "loss": 0.09016732, "memory(GiB)": 13.7, "step": 64715, "train_speed(iter/s)": 1.530171 }, { "acc": 0.98666668, "epoch": 30.33513006796344, "grad_norm": 2.0619006156921387, "learning_rate": 3.6638471169031885e-06, "loss": 0.04131101, "memory(GiB)": 13.7, "step": 64720, "train_speed(iter/s)": 1.530184 }, { "acc": 0.99541664, "epoch": 30.337473634872275, "grad_norm": 2.149470806121826, "learning_rate": 3.6631002375871163e-06, "loss": 0.02847953, "memory(GiB)": 13.7, "step": 64725, "train_speed(iter/s)": 1.530192 }, { "acc": 0.98208332, "epoch": 30.33981720178111, "grad_norm": 1.3691731691360474, "learning_rate": 3.6623533904177405e-06, "loss": 0.07979768, "memory(GiB)": 13.7, "step": 64730, "train_speed(iter/s)": 1.530194 }, { "acc": 0.98236113, "epoch": 30.342160768689947, "grad_norm": 1.6163253784179688, "learning_rate": 3.6616065754130137e-06, "loss": 0.03865297, "memory(GiB)": 13.7, "step": 64735, "train_speed(iter/s)": 1.530202 }, { "acc": 0.98398809, "epoch": 30.34450433559878, "grad_norm": 4.172962188720703, "learning_rate": 3.6608597925908874e-06, "loss": 0.0516523, "memory(GiB)": 13.7, "step": 64740, "train_speed(iter/s)": 1.530206 }, { "acc": 0.996875, "epoch": 30.346847902507616, "grad_norm": 2.004411458969116, "learning_rate": 3.6601130419693078e-06, "loss": 0.02881832, "memory(GiB)": 13.7, "step": 64745, "train_speed(iter/s)": 1.530214 }, { "acc": 0.99627972, "epoch": 30.349191469416454, "grad_norm": 0.0018454005476087332, "learning_rate": 3.6593663235662284e-06, "loss": 0.01479698, "memory(GiB)": 13.7, "step": 64750, "train_speed(iter/s)": 1.530219 }, { "acc": 0.99082794, "epoch": 30.351535036325288, "grad_norm": 2.6180596351623535, "learning_rate": 3.658619637399596e-06, "loss": 0.03366739, "memory(GiB)": 13.7, "step": 64755, "train_speed(iter/s)": 1.530225 }, { "acc": 0.990625, "epoch": 30.353878603234122, "grad_norm": 1.6813651323318481, "learning_rate": 3.6578729834873604e-06, "loss": 0.03163685, "memory(GiB)": 13.7, "step": 64760, "train_speed(iter/s)": 1.530225 }, { "acc": 0.9895833, "epoch": 30.356222170142956, "grad_norm": 3.0611531734466553, "learning_rate": 3.657126361847466e-06, "loss": 0.05496545, "memory(GiB)": 13.7, "step": 64765, "train_speed(iter/s)": 1.530224 }, { "acc": 0.97094707, "epoch": 30.358565737051794, "grad_norm": 2.9696645736694336, "learning_rate": 3.656379772497861e-06, "loss": 0.09231638, "memory(GiB)": 13.7, "step": 64770, "train_speed(iter/s)": 1.530228 }, { "acc": 0.97979164, "epoch": 30.36090930396063, "grad_norm": 1.2046949863433838, "learning_rate": 3.655633215456491e-06, "loss": 0.05666845, "memory(GiB)": 13.7, "step": 64775, "train_speed(iter/s)": 1.53023 }, { "acc": 0.98500004, "epoch": 30.363252870869463, "grad_norm": 3.4631130695343018, "learning_rate": 3.654886690741301e-06, "loss": 0.04206063, "memory(GiB)": 13.7, "step": 64780, "train_speed(iter/s)": 1.530234 }, { "acc": 0.98519344, "epoch": 30.365596437778297, "grad_norm": 2.2714686393737793, "learning_rate": 3.654140198370233e-06, "loss": 0.04177603, "memory(GiB)": 13.7, "step": 64785, "train_speed(iter/s)": 1.530234 }, { "acc": 0.97354164, "epoch": 30.367940004687135, "grad_norm": 3.8140714168548584, "learning_rate": 3.6533937383612316e-06, "loss": 0.08374126, "memory(GiB)": 13.7, "step": 64790, "train_speed(iter/s)": 1.530242 }, { "acc": 0.9833334, "epoch": 30.37028357159597, "grad_norm": 4.6854119300842285, "learning_rate": 3.6526473107322395e-06, "loss": 0.04612111, "memory(GiB)": 13.7, "step": 64795, "train_speed(iter/s)": 1.530243 }, { "acc": 0.97717171, "epoch": 30.372627138504804, "grad_norm": 0.7062471508979797, "learning_rate": 3.6519009155011976e-06, "loss": 0.06861987, "memory(GiB)": 13.7, "step": 64800, "train_speed(iter/s)": 1.530242 }, { "acc": 0.97598429, "epoch": 30.374970705413638, "grad_norm": 0.0009318477241322398, "learning_rate": 3.651154552686047e-06, "loss": 0.058744, "memory(GiB)": 13.7, "step": 64805, "train_speed(iter/s)": 1.530242 }, { "acc": 0.97696428, "epoch": 30.377314272322476, "grad_norm": 3.2786788940429688, "learning_rate": 3.6504082223047284e-06, "loss": 0.05293831, "memory(GiB)": 13.7, "step": 64810, "train_speed(iter/s)": 1.53024 }, { "acc": 0.98999996, "epoch": 30.37965783923131, "grad_norm": 4.635365962982178, "learning_rate": 3.649661924375181e-06, "loss": 0.05377167, "memory(GiB)": 13.7, "step": 64815, "train_speed(iter/s)": 1.530247 }, { "acc": 0.9936554, "epoch": 30.382001406140144, "grad_norm": 2.904163122177124, "learning_rate": 3.6489156589153434e-06, "loss": 0.04409684, "memory(GiB)": 13.7, "step": 64820, "train_speed(iter/s)": 1.530242 }, { "acc": 0.97937508, "epoch": 30.384344973048982, "grad_norm": 1.579908013343811, "learning_rate": 3.6481694259431526e-06, "loss": 0.06301208, "memory(GiB)": 13.7, "step": 64825, "train_speed(iter/s)": 1.530241 }, { "acc": 0.9833334, "epoch": 30.386688539957817, "grad_norm": 6.650137901306152, "learning_rate": 3.6474232254765457e-06, "loss": 0.06040759, "memory(GiB)": 13.7, "step": 64830, "train_speed(iter/s)": 1.530244 }, { "acc": 0.9760417, "epoch": 30.38903210686665, "grad_norm": 4.981583595275879, "learning_rate": 3.64667705753346e-06, "loss": 0.08283805, "memory(GiB)": 13.7, "step": 64835, "train_speed(iter/s)": 1.53024 }, { "acc": 0.99750004, "epoch": 30.391375673775485, "grad_norm": 0.0010081474902108312, "learning_rate": 3.6459309221318295e-06, "loss": 0.00957065, "memory(GiB)": 13.7, "step": 64840, "train_speed(iter/s)": 1.530235 }, { "acc": 0.98916664, "epoch": 30.393719240684323, "grad_norm": 7.473283290863037, "learning_rate": 3.645184819289589e-06, "loss": 0.03608137, "memory(GiB)": 13.7, "step": 64845, "train_speed(iter/s)": 1.530239 }, { "acc": 0.98353634, "epoch": 30.396062807593157, "grad_norm": 0.011635887436568737, "learning_rate": 3.6444387490246737e-06, "loss": 0.04688786, "memory(GiB)": 13.7, "step": 64850, "train_speed(iter/s)": 1.53024 }, { "acc": 0.98050594, "epoch": 30.39840637450199, "grad_norm": 1.8421574831008911, "learning_rate": 3.6436927113550164e-06, "loss": 0.08711786, "memory(GiB)": 13.7, "step": 64855, "train_speed(iter/s)": 1.530239 }, { "acc": 0.98343754, "epoch": 30.400749941410826, "grad_norm": 3.8618996143341064, "learning_rate": 3.6429467062985475e-06, "loss": 0.04688038, "memory(GiB)": 13.7, "step": 64860, "train_speed(iter/s)": 1.530239 }, { "acc": 0.99204445, "epoch": 30.403093508319664, "grad_norm": 3.526116371154785, "learning_rate": 3.6422007338732017e-06, "loss": 0.03580042, "memory(GiB)": 13.7, "step": 64865, "train_speed(iter/s)": 1.530245 }, { "acc": 0.98562498, "epoch": 30.405437075228498, "grad_norm": 4.029200077056885, "learning_rate": 3.641454794096907e-06, "loss": 0.04172982, "memory(GiB)": 13.7, "step": 64870, "train_speed(iter/s)": 1.530247 }, { "acc": 0.98187504, "epoch": 30.407780642137332, "grad_norm": 2.564295768737793, "learning_rate": 3.6407088869875934e-06, "loss": 0.05838611, "memory(GiB)": 13.7, "step": 64875, "train_speed(iter/s)": 1.530247 }, { "acc": 0.98114586, "epoch": 30.410124209046167, "grad_norm": 2.581271171569824, "learning_rate": 3.6399630125631913e-06, "loss": 0.06608549, "memory(GiB)": 13.7, "step": 64880, "train_speed(iter/s)": 1.530256 }, { "acc": 0.97636909, "epoch": 30.412467775955005, "grad_norm": 6.484419822692871, "learning_rate": 3.6392171708416298e-06, "loss": 0.13301237, "memory(GiB)": 13.7, "step": 64885, "train_speed(iter/s)": 1.530254 }, { "acc": 0.98885412, "epoch": 30.41481134286384, "grad_norm": 2.9396533966064453, "learning_rate": 3.6384713618408337e-06, "loss": 0.04320616, "memory(GiB)": 13.7, "step": 64890, "train_speed(iter/s)": 1.530257 }, { "acc": 0.97877979, "epoch": 30.417154909772673, "grad_norm": 2.657738447189331, "learning_rate": 3.6377255855787326e-06, "loss": 0.07356222, "memory(GiB)": 13.7, "step": 64895, "train_speed(iter/s)": 1.530262 }, { "acc": 0.9885417, "epoch": 30.419498476681508, "grad_norm": 2.043951988220215, "learning_rate": 3.6369798420732493e-06, "loss": 0.086803, "memory(GiB)": 13.7, "step": 64900, "train_speed(iter/s)": 1.530252 }, { "acc": 0.99092264, "epoch": 30.421842043590345, "grad_norm": 2.737581968307495, "learning_rate": 3.6362341313423142e-06, "loss": 0.04893925, "memory(GiB)": 13.7, "step": 64905, "train_speed(iter/s)": 1.530256 }, { "acc": 0.97979164, "epoch": 30.42418561049918, "grad_norm": 7.166528224945068, "learning_rate": 3.6354884534038476e-06, "loss": 0.05256543, "memory(GiB)": 13.7, "step": 64910, "train_speed(iter/s)": 1.530254 }, { "acc": 0.98083334, "epoch": 30.426529177408014, "grad_norm": 5.4527201652526855, "learning_rate": 3.634742808275774e-06, "loss": 0.03366733, "memory(GiB)": 13.7, "step": 64915, "train_speed(iter/s)": 1.530259 }, { "acc": 0.97133923, "epoch": 30.428872744316852, "grad_norm": 0.17128488421440125, "learning_rate": 3.6339971959760157e-06, "loss": 0.06036171, "memory(GiB)": 13.7, "step": 64920, "train_speed(iter/s)": 1.530265 }, { "acc": 0.98798618, "epoch": 30.431216311225686, "grad_norm": 2.4473423957824707, "learning_rate": 3.6332516165224963e-06, "loss": 0.03540698, "memory(GiB)": 13.7, "step": 64925, "train_speed(iter/s)": 1.530268 }, { "acc": 0.9864584, "epoch": 30.43355987813452, "grad_norm": 0.010090394876897335, "learning_rate": 3.632506069933135e-06, "loss": 0.04694481, "memory(GiB)": 13.7, "step": 64930, "train_speed(iter/s)": 1.530266 }, { "acc": 0.98090286, "epoch": 30.435903445043355, "grad_norm": 3.2792348861694336, "learning_rate": 3.631760556225855e-06, "loss": 0.0465976, "memory(GiB)": 13.7, "step": 64935, "train_speed(iter/s)": 1.530272 }, { "acc": 0.99012499, "epoch": 30.438247011952193, "grad_norm": 4.951673984527588, "learning_rate": 3.6310150754185742e-06, "loss": 0.03424956, "memory(GiB)": 13.7, "step": 64940, "train_speed(iter/s)": 1.530272 }, { "acc": 0.990625, "epoch": 30.440590578861027, "grad_norm": 2.152514934539795, "learning_rate": 3.6302696275292108e-06, "loss": 0.06392322, "memory(GiB)": 13.7, "step": 64945, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98675594, "epoch": 30.44293414576986, "grad_norm": 5.954095363616943, "learning_rate": 3.6295242125756863e-06, "loss": 0.03812808, "memory(GiB)": 13.7, "step": 64950, "train_speed(iter/s)": 1.530274 }, { "acc": 0.98425598, "epoch": 30.445277712678696, "grad_norm": 4.748636245727539, "learning_rate": 3.6287788305759146e-06, "loss": 0.0506224, "memory(GiB)": 13.7, "step": 64955, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98291664, "epoch": 30.447621279587533, "grad_norm": 6.572736740112305, "learning_rate": 3.628033481547814e-06, "loss": 0.04381708, "memory(GiB)": 13.7, "step": 64960, "train_speed(iter/s)": 1.530276 }, { "acc": 0.98118057, "epoch": 30.449964846496368, "grad_norm": 4.047602653503418, "learning_rate": 3.6272881655092973e-06, "loss": 0.06320624, "memory(GiB)": 13.7, "step": 64965, "train_speed(iter/s)": 1.530282 }, { "acc": 0.97988968, "epoch": 30.452308413405202, "grad_norm": 4.106302738189697, "learning_rate": 3.6265428824782833e-06, "loss": 0.04933584, "memory(GiB)": 13.7, "step": 64970, "train_speed(iter/s)": 1.530283 }, { "acc": 0.97333336, "epoch": 30.454651980314036, "grad_norm": 5.5233635902404785, "learning_rate": 3.6257976324726846e-06, "loss": 0.0793963, "memory(GiB)": 13.7, "step": 64975, "train_speed(iter/s)": 1.530287 }, { "acc": 0.9808279, "epoch": 30.456995547222874, "grad_norm": 2.8474090099334717, "learning_rate": 3.6250524155104145e-06, "loss": 0.05948005, "memory(GiB)": 13.7, "step": 64980, "train_speed(iter/s)": 1.53029 }, { "acc": 0.9791338, "epoch": 30.45933911413171, "grad_norm": 3.742611885070801, "learning_rate": 3.624307231609386e-06, "loss": 0.03423859, "memory(GiB)": 13.7, "step": 64985, "train_speed(iter/s)": 1.530287 }, { "acc": 0.9942709, "epoch": 30.461682681040543, "grad_norm": 2.6486265659332275, "learning_rate": 3.623562080787511e-06, "loss": 0.01952551, "memory(GiB)": 13.7, "step": 64990, "train_speed(iter/s)": 1.530294 }, { "acc": 0.98102684, "epoch": 30.46402624794938, "grad_norm": 6.130033016204834, "learning_rate": 3.6228169630626987e-06, "loss": 0.04983581, "memory(GiB)": 13.7, "step": 64995, "train_speed(iter/s)": 1.530302 }, { "acc": 0.98319025, "epoch": 30.466369814858215, "grad_norm": 5.916995525360107, "learning_rate": 3.622071878452861e-06, "loss": 0.07237998, "memory(GiB)": 13.7, "step": 65000, "train_speed(iter/s)": 1.530309 }, { "acc": 0.99722223, "epoch": 30.46871338176705, "grad_norm": 1.5355501174926758, "learning_rate": 3.621326826975906e-06, "loss": 0.0313424, "memory(GiB)": 13.7, "step": 65005, "train_speed(iter/s)": 1.53032 }, { "acc": 0.98145828, "epoch": 30.471056948675884, "grad_norm": 3.4980340003967285, "learning_rate": 3.620581808649744e-06, "loss": 0.03143082, "memory(GiB)": 13.7, "step": 65010, "train_speed(iter/s)": 1.530325 }, { "acc": 0.98559027, "epoch": 30.47340051558472, "grad_norm": 2.267077922821045, "learning_rate": 3.6198368234922827e-06, "loss": 0.04881188, "memory(GiB)": 13.7, "step": 65015, "train_speed(iter/s)": 1.530321 }, { "acc": 0.97383928, "epoch": 30.475744082493556, "grad_norm": 8.203841209411621, "learning_rate": 3.6190918715214263e-06, "loss": 0.05873009, "memory(GiB)": 13.7, "step": 65020, "train_speed(iter/s)": 1.530323 }, { "acc": 0.99453125, "epoch": 30.47808764940239, "grad_norm": 4.448745250701904, "learning_rate": 3.618346952755085e-06, "loss": 0.05328559, "memory(GiB)": 13.7, "step": 65025, "train_speed(iter/s)": 1.530324 }, { "acc": 0.98291664, "epoch": 30.480431216311224, "grad_norm": 9.832395553588867, "learning_rate": 3.6176020672111626e-06, "loss": 0.06368899, "memory(GiB)": 13.7, "step": 65030, "train_speed(iter/s)": 1.530328 }, { "acc": 0.98506947, "epoch": 30.482774783220062, "grad_norm": 2.0000174045562744, "learning_rate": 3.616857214907562e-06, "loss": 0.05751911, "memory(GiB)": 13.7, "step": 65035, "train_speed(iter/s)": 1.530327 }, { "acc": 0.98500004, "epoch": 30.485118350128896, "grad_norm": 3.42887020111084, "learning_rate": 3.616112395862188e-06, "loss": 0.02394576, "memory(GiB)": 13.7, "step": 65040, "train_speed(iter/s)": 1.530326 }, { "acc": 0.97312498, "epoch": 30.48746191703773, "grad_norm": 3.168207883834839, "learning_rate": 3.6153676100929447e-06, "loss": 0.07558757, "memory(GiB)": 13.7, "step": 65045, "train_speed(iter/s)": 1.530335 }, { "acc": 0.99017859, "epoch": 30.489805483946565, "grad_norm": 4.806088447570801, "learning_rate": 3.614622857617733e-06, "loss": 0.04684564, "memory(GiB)": 13.7, "step": 65050, "train_speed(iter/s)": 1.530342 }, { "acc": 0.9926136, "epoch": 30.492149050855403, "grad_norm": 0.44571638107299805, "learning_rate": 3.613878138454455e-06, "loss": 0.0264012, "memory(GiB)": 13.7, "step": 65055, "train_speed(iter/s)": 1.530342 }, { "acc": 0.98403988, "epoch": 30.494492617764237, "grad_norm": 0.005445010028779507, "learning_rate": 3.613133452621012e-06, "loss": 0.06142305, "memory(GiB)": 13.7, "step": 65060, "train_speed(iter/s)": 1.530349 }, { "acc": 0.98633938, "epoch": 30.49683618467307, "grad_norm": 3.4410560131073, "learning_rate": 3.612388800135301e-06, "loss": 0.0853349, "memory(GiB)": 13.7, "step": 65065, "train_speed(iter/s)": 1.530353 }, { "acc": 0.97534723, "epoch": 30.499179751581906, "grad_norm": 6.293158054351807, "learning_rate": 3.611644181015225e-06, "loss": 0.16869569, "memory(GiB)": 13.7, "step": 65070, "train_speed(iter/s)": 1.530356 }, { "acc": 0.98738098, "epoch": 30.501523318490744, "grad_norm": 2.451420545578003, "learning_rate": 3.6108995952786805e-06, "loss": 0.02698378, "memory(GiB)": 13.7, "step": 65075, "train_speed(iter/s)": 1.530362 }, { "acc": 0.98758011, "epoch": 30.503866885399578, "grad_norm": 3.5727360248565674, "learning_rate": 3.610155042943563e-06, "loss": 0.03658393, "memory(GiB)": 13.7, "step": 65080, "train_speed(iter/s)": 1.530364 }, { "acc": 0.98125, "epoch": 30.506210452308412, "grad_norm": 1.0191031694412231, "learning_rate": 3.6094105240277706e-06, "loss": 0.0711457, "memory(GiB)": 13.7, "step": 65085, "train_speed(iter/s)": 1.530373 }, { "acc": 0.98187494, "epoch": 30.50855401921725, "grad_norm": 1.467550277709961, "learning_rate": 3.6086660385492006e-06, "loss": 0.03426267, "memory(GiB)": 13.7, "step": 65090, "train_speed(iter/s)": 1.530378 }, { "acc": 0.97416668, "epoch": 30.510897586126084, "grad_norm": 4.794677257537842, "learning_rate": 3.6079215865257444e-06, "loss": 0.09877231, "memory(GiB)": 13.7, "step": 65095, "train_speed(iter/s)": 1.530382 }, { "acc": 0.98722229, "epoch": 30.51324115303492, "grad_norm": 4.308042049407959, "learning_rate": 3.6071771679752994e-06, "loss": 0.05172738, "memory(GiB)": 13.7, "step": 65100, "train_speed(iter/s)": 1.530385 }, { "acc": 0.9833334, "epoch": 30.515584719943753, "grad_norm": 1.9851216077804565, "learning_rate": 3.6064327829157587e-06, "loss": 0.055855, "memory(GiB)": 13.7, "step": 65105, "train_speed(iter/s)": 1.530389 }, { "acc": 0.99125004, "epoch": 30.51792828685259, "grad_norm": 1.430646538734436, "learning_rate": 3.6056884313650125e-06, "loss": 0.02288846, "memory(GiB)": 13.7, "step": 65110, "train_speed(iter/s)": 1.530394 }, { "acc": 0.96863098, "epoch": 30.520271853761425, "grad_norm": 6.811441421508789, "learning_rate": 3.604944113340955e-06, "loss": 0.09466543, "memory(GiB)": 13.7, "step": 65115, "train_speed(iter/s)": 1.530396 }, { "acc": 0.99187498, "epoch": 30.52261542067026, "grad_norm": 2.6191444396972656, "learning_rate": 3.604199828861477e-06, "loss": 0.0316508, "memory(GiB)": 13.7, "step": 65120, "train_speed(iter/s)": 1.530399 }, { "acc": 0.97354164, "epoch": 30.524958987579094, "grad_norm": 3.3508317470550537, "learning_rate": 3.603455577944466e-06, "loss": 0.04214284, "memory(GiB)": 13.7, "step": 65125, "train_speed(iter/s)": 1.5304 }, { "acc": 0.99219704, "epoch": 30.52730255448793, "grad_norm": 0.5058659315109253, "learning_rate": 3.6027113606078147e-06, "loss": 0.04040673, "memory(GiB)": 13.7, "step": 65130, "train_speed(iter/s)": 1.53041 }, { "acc": 0.96385422, "epoch": 30.529646121396766, "grad_norm": 1.366344690322876, "learning_rate": 3.60196717686941e-06, "loss": 0.08034617, "memory(GiB)": 13.7, "step": 65135, "train_speed(iter/s)": 1.530411 }, { "acc": 0.98322296, "epoch": 30.5319896883056, "grad_norm": 2.6478800773620605, "learning_rate": 3.6012230267471383e-06, "loss": 0.04165645, "memory(GiB)": 13.7, "step": 65140, "train_speed(iter/s)": 1.530411 }, { "acc": 0.96875, "epoch": 30.534333255214435, "grad_norm": 1.2300208806991577, "learning_rate": 3.6004789102588893e-06, "loss": 0.11387784, "memory(GiB)": 13.7, "step": 65145, "train_speed(iter/s)": 1.530416 }, { "acc": 0.984375, "epoch": 30.536676822123273, "grad_norm": 1.831897258758545, "learning_rate": 3.5997348274225473e-06, "loss": 0.04194318, "memory(GiB)": 13.7, "step": 65150, "train_speed(iter/s)": 1.530416 }, { "acc": 0.99309216, "epoch": 30.539020389032107, "grad_norm": 2.2025787830352783, "learning_rate": 3.598990778256e-06, "loss": 0.0224796, "memory(GiB)": 13.7, "step": 65155, "train_speed(iter/s)": 1.530416 }, { "acc": 0.97597227, "epoch": 30.54136395594094, "grad_norm": 0.7992088794708252, "learning_rate": 3.5982467627771287e-06, "loss": 0.05041232, "memory(GiB)": 13.7, "step": 65160, "train_speed(iter/s)": 1.530421 }, { "acc": 0.9953125, "epoch": 30.54370752284978, "grad_norm": 2.0661191940307617, "learning_rate": 3.59750278100382e-06, "loss": 0.01690594, "memory(GiB)": 13.7, "step": 65165, "train_speed(iter/s)": 1.530429 }, { "acc": 0.9979167, "epoch": 30.546051089758613, "grad_norm": 0.004274637904018164, "learning_rate": 3.596758832953952e-06, "loss": 0.0137011, "memory(GiB)": 13.7, "step": 65170, "train_speed(iter/s)": 1.530428 }, { "acc": 0.97729168, "epoch": 30.548394656667448, "grad_norm": 4.753841876983643, "learning_rate": 3.5960149186454125e-06, "loss": 0.05394096, "memory(GiB)": 13.7, "step": 65175, "train_speed(iter/s)": 1.530431 }, { "acc": 0.98760414, "epoch": 30.550738223576282, "grad_norm": 3.440206289291382, "learning_rate": 3.595271038096079e-06, "loss": 0.04676618, "memory(GiB)": 13.7, "step": 65180, "train_speed(iter/s)": 1.53043 }, { "acc": 0.97058535, "epoch": 30.55308179048512, "grad_norm": 4.197824478149414, "learning_rate": 3.594527191323834e-06, "loss": 0.0921822, "memory(GiB)": 13.7, "step": 65185, "train_speed(iter/s)": 1.530435 }, { "acc": 0.98816967, "epoch": 30.555425357393954, "grad_norm": 3.6826276779174805, "learning_rate": 3.5937833783465558e-06, "loss": 0.03776973, "memory(GiB)": 13.7, "step": 65190, "train_speed(iter/s)": 1.530441 }, { "acc": 0.97911701, "epoch": 30.55776892430279, "grad_norm": 0.164270281791687, "learning_rate": 3.593039599182124e-06, "loss": 0.06343105, "memory(GiB)": 13.7, "step": 65195, "train_speed(iter/s)": 1.530437 }, { "acc": 0.97511368, "epoch": 30.560112491211623, "grad_norm": 2.6192376613616943, "learning_rate": 3.592295853848417e-06, "loss": 0.07564276, "memory(GiB)": 13.7, "step": 65200, "train_speed(iter/s)": 1.530444 }, { "acc": 0.975, "epoch": 30.56245605812046, "grad_norm": 4.932958126068115, "learning_rate": 3.5915521423633114e-06, "loss": 0.08966041, "memory(GiB)": 13.7, "step": 65205, "train_speed(iter/s)": 1.530452 }, { "acc": 0.98208332, "epoch": 30.564799625029295, "grad_norm": 0.007249176036566496, "learning_rate": 3.5908084647446815e-06, "loss": 0.03447403, "memory(GiB)": 13.7, "step": 65210, "train_speed(iter/s)": 1.530454 }, { "acc": 0.96794109, "epoch": 30.56714319193813, "grad_norm": 6.544258117675781, "learning_rate": 3.5900648210104074e-06, "loss": 0.10731189, "memory(GiB)": 13.7, "step": 65215, "train_speed(iter/s)": 1.530463 }, { "acc": 0.97468758, "epoch": 30.569486758846963, "grad_norm": 2.826747179031372, "learning_rate": 3.5893212111783602e-06, "loss": 0.07051488, "memory(GiB)": 13.7, "step": 65220, "train_speed(iter/s)": 1.530463 }, { "acc": 0.99145832, "epoch": 30.5718303257558, "grad_norm": 4.5992889404296875, "learning_rate": 3.5885776352664144e-06, "loss": 0.02404006, "memory(GiB)": 13.7, "step": 65225, "train_speed(iter/s)": 1.530476 }, { "acc": 0.98653851, "epoch": 30.574173892664636, "grad_norm": 4.4683837890625, "learning_rate": 3.587834093292445e-06, "loss": 0.04140407, "memory(GiB)": 13.7, "step": 65230, "train_speed(iter/s)": 1.530479 }, { "acc": 0.9927084, "epoch": 30.57651745957347, "grad_norm": 5.266326427459717, "learning_rate": 3.5870905852743226e-06, "loss": 0.01951735, "memory(GiB)": 13.7, "step": 65235, "train_speed(iter/s)": 1.53048 }, { "acc": 0.99125004, "epoch": 30.578861026482308, "grad_norm": 2.4398224353790283, "learning_rate": 3.586347111229921e-06, "loss": 0.03437756, "memory(GiB)": 13.7, "step": 65240, "train_speed(iter/s)": 1.530483 }, { "acc": 0.98490534, "epoch": 30.581204593391142, "grad_norm": 2.137575149536133, "learning_rate": 3.585603671177106e-06, "loss": 0.05541326, "memory(GiB)": 13.7, "step": 65245, "train_speed(iter/s)": 1.530478 }, { "acc": 0.97437496, "epoch": 30.583548160299976, "grad_norm": 3.7449162006378174, "learning_rate": 3.5848602651337515e-06, "loss": 0.06535801, "memory(GiB)": 13.7, "step": 65250, "train_speed(iter/s)": 1.530475 }, { "acc": 0.97545013, "epoch": 30.58589172720881, "grad_norm": 10.150565147399902, "learning_rate": 3.5841168931177243e-06, "loss": 0.09746997, "memory(GiB)": 13.7, "step": 65255, "train_speed(iter/s)": 1.530481 }, { "acc": 0.98258934, "epoch": 30.58823529411765, "grad_norm": 0.4891996681690216, "learning_rate": 3.5833735551468956e-06, "loss": 0.05410055, "memory(GiB)": 13.7, "step": 65260, "train_speed(iter/s)": 1.530488 }, { "acc": 0.98031254, "epoch": 30.590578861026483, "grad_norm": 3.943962335586548, "learning_rate": 3.5826302512391305e-06, "loss": 0.07163954, "memory(GiB)": 13.7, "step": 65265, "train_speed(iter/s)": 1.53049 }, { "acc": 0.97354164, "epoch": 30.592922427935317, "grad_norm": 5.606632709503174, "learning_rate": 3.5818869814122955e-06, "loss": 0.0793278, "memory(GiB)": 13.7, "step": 65270, "train_speed(iter/s)": 1.530491 }, { "acc": 0.98247032, "epoch": 30.59526599484415, "grad_norm": 4.107895374298096, "learning_rate": 3.5811437456842574e-06, "loss": 0.04213558, "memory(GiB)": 13.7, "step": 65275, "train_speed(iter/s)": 1.530492 }, { "acc": 0.97192707, "epoch": 30.59760956175299, "grad_norm": 2.4688680171966553, "learning_rate": 3.580400544072881e-06, "loss": 0.0830754, "memory(GiB)": 13.7, "step": 65280, "train_speed(iter/s)": 1.530494 }, { "acc": 0.9894886, "epoch": 30.599953128661824, "grad_norm": 0.9641708135604858, "learning_rate": 3.57965737659603e-06, "loss": 0.02953773, "memory(GiB)": 13.7, "step": 65285, "train_speed(iter/s)": 1.530497 }, { "acc": 0.9864522, "epoch": 30.602296695570658, "grad_norm": 0.7595985531806946, "learning_rate": 3.578914243271568e-06, "loss": 0.04031787, "memory(GiB)": 13.7, "step": 65290, "train_speed(iter/s)": 1.530499 }, { "acc": 0.97842255, "epoch": 30.604640262479492, "grad_norm": 4.356091022491455, "learning_rate": 3.578171144117357e-06, "loss": 0.05791649, "memory(GiB)": 13.7, "step": 65295, "train_speed(iter/s)": 1.530504 }, { "acc": 0.97302084, "epoch": 30.60698382938833, "grad_norm": 0.4710407555103302, "learning_rate": 3.577428079151259e-06, "loss": 0.04608199, "memory(GiB)": 13.7, "step": 65300, "train_speed(iter/s)": 1.530514 }, { "acc": 0.98604164, "epoch": 30.609327396297164, "grad_norm": 2.8559281826019287, "learning_rate": 3.5766850483911344e-06, "loss": 0.04168116, "memory(GiB)": 13.7, "step": 65305, "train_speed(iter/s)": 1.530513 }, { "acc": 0.98738098, "epoch": 30.611670963206, "grad_norm": 2.503838539123535, "learning_rate": 3.5759420518548444e-06, "loss": 0.05581526, "memory(GiB)": 13.7, "step": 65310, "train_speed(iter/s)": 1.530509 }, { "acc": 0.98125, "epoch": 30.614014530114837, "grad_norm": 5.296031475067139, "learning_rate": 3.5751990895602463e-06, "loss": 0.05321313, "memory(GiB)": 13.7, "step": 65315, "train_speed(iter/s)": 1.530504 }, { "acc": 0.98812504, "epoch": 30.61635809702367, "grad_norm": 3.9209165573120117, "learning_rate": 3.5744561615252006e-06, "loss": 0.02846571, "memory(GiB)": 13.7, "step": 65320, "train_speed(iter/s)": 1.530507 }, { "acc": 0.9942708, "epoch": 30.618701663932505, "grad_norm": 1.1431726217269897, "learning_rate": 3.5737132677675653e-06, "loss": 0.02202669, "memory(GiB)": 13.7, "step": 65325, "train_speed(iter/s)": 1.530511 }, { "acc": 0.98552084, "epoch": 30.62104523084134, "grad_norm": 1.6420278549194336, "learning_rate": 3.5729704083051925e-06, "loss": 0.03599365, "memory(GiB)": 13.7, "step": 65330, "train_speed(iter/s)": 1.530518 }, { "acc": 0.99092264, "epoch": 30.623388797750177, "grad_norm": 2.207986354827881, "learning_rate": 3.572227583155944e-06, "loss": 0.02923409, "memory(GiB)": 13.7, "step": 65335, "train_speed(iter/s)": 1.530521 }, { "acc": 0.98445654, "epoch": 30.62573236465901, "grad_norm": 2.5128352642059326, "learning_rate": 3.5714847923376713e-06, "loss": 0.0585814, "memory(GiB)": 13.7, "step": 65340, "train_speed(iter/s)": 1.530524 }, { "acc": 0.98479176, "epoch": 30.628075931567846, "grad_norm": 4.740480422973633, "learning_rate": 3.5707420358682283e-06, "loss": 0.05515622, "memory(GiB)": 13.7, "step": 65345, "train_speed(iter/s)": 1.530525 }, { "acc": 0.98576641, "epoch": 30.63041949847668, "grad_norm": 2.3742523193359375, "learning_rate": 3.569999313765471e-06, "loss": 0.07882216, "memory(GiB)": 13.7, "step": 65350, "train_speed(iter/s)": 1.530525 }, { "acc": 0.99333334, "epoch": 30.632763065385518, "grad_norm": 3.4556334018707275, "learning_rate": 3.569256626047249e-06, "loss": 0.02717831, "memory(GiB)": 13.7, "step": 65355, "train_speed(iter/s)": 1.530534 }, { "acc": 0.98733568, "epoch": 30.635106632294352, "grad_norm": 3.3518259525299072, "learning_rate": 3.5685139727314173e-06, "loss": 0.02934288, "memory(GiB)": 13.7, "step": 65360, "train_speed(iter/s)": 1.530535 }, { "acc": 0.97069454, "epoch": 30.637450199203187, "grad_norm": 0.8771743178367615, "learning_rate": 3.567771353835826e-06, "loss": 0.06141801, "memory(GiB)": 13.7, "step": 65365, "train_speed(iter/s)": 1.530539 }, { "acc": 0.98166122, "epoch": 30.63979376611202, "grad_norm": 2.7765543460845947, "learning_rate": 3.567028769378324e-06, "loss": 0.11528517, "memory(GiB)": 13.7, "step": 65370, "train_speed(iter/s)": 1.530542 }, { "acc": 0.97562504, "epoch": 30.64213733302086, "grad_norm": 5.968421936035156, "learning_rate": 3.5662862193767596e-06, "loss": 0.07227399, "memory(GiB)": 13.7, "step": 65375, "train_speed(iter/s)": 1.530549 }, { "acc": 0.9854166, "epoch": 30.644480899929693, "grad_norm": 2.5191805362701416, "learning_rate": 3.565543703848983e-06, "loss": 0.02841111, "memory(GiB)": 13.7, "step": 65380, "train_speed(iter/s)": 1.530545 }, { "acc": 0.97342262, "epoch": 30.646824466838527, "grad_norm": 10.176491737365723, "learning_rate": 3.564801222812843e-06, "loss": 0.08299586, "memory(GiB)": 13.7, "step": 65385, "train_speed(iter/s)": 1.530553 }, { "acc": 0.98050594, "epoch": 30.649168033747365, "grad_norm": 0.9821087718009949, "learning_rate": 3.5640587762861835e-06, "loss": 0.0525063, "memory(GiB)": 13.7, "step": 65390, "train_speed(iter/s)": 1.530554 }, { "acc": 0.98458328, "epoch": 30.6515116006562, "grad_norm": 3.8985798358917236, "learning_rate": 3.5633163642868523e-06, "loss": 0.05297896, "memory(GiB)": 13.7, "step": 65395, "train_speed(iter/s)": 1.530561 }, { "acc": 0.9802084, "epoch": 30.653855167565034, "grad_norm": 3.1513381004333496, "learning_rate": 3.562573986832694e-06, "loss": 0.04891763, "memory(GiB)": 13.7, "step": 65400, "train_speed(iter/s)": 1.530566 }, { "acc": 0.9645834, "epoch": 30.65619873447387, "grad_norm": 5.394706726074219, "learning_rate": 3.5618316439415534e-06, "loss": 0.08243425, "memory(GiB)": 13.7, "step": 65405, "train_speed(iter/s)": 1.530564 }, { "acc": 0.96312504, "epoch": 30.658542301382706, "grad_norm": 6.637418746948242, "learning_rate": 3.5610893356312755e-06, "loss": 0.07863691, "memory(GiB)": 13.7, "step": 65410, "train_speed(iter/s)": 1.530565 }, { "acc": 0.97961311, "epoch": 30.66088586829154, "grad_norm": 1.1324498653411865, "learning_rate": 3.5603470619196994e-06, "loss": 0.05423528, "memory(GiB)": 13.7, "step": 65415, "train_speed(iter/s)": 1.530575 }, { "acc": 0.9927083, "epoch": 30.663229435200375, "grad_norm": 1.882535696029663, "learning_rate": 3.5596048228246676e-06, "loss": 0.0251852, "memory(GiB)": 13.7, "step": 65420, "train_speed(iter/s)": 1.530577 }, { "acc": 0.98874998, "epoch": 30.66557300210921, "grad_norm": 4.013803005218506, "learning_rate": 3.5588626183640235e-06, "loss": 0.03136098, "memory(GiB)": 13.7, "step": 65425, "train_speed(iter/s)": 1.530578 }, { "acc": 0.9895834, "epoch": 30.667916569018047, "grad_norm": 3.099818706512451, "learning_rate": 3.558120448555605e-06, "loss": 0.04902968, "memory(GiB)": 13.7, "step": 65430, "train_speed(iter/s)": 1.530581 }, { "acc": 0.97833328, "epoch": 30.67026013592688, "grad_norm": 3.711057186126709, "learning_rate": 3.5573783134172535e-06, "loss": 0.06464689, "memory(GiB)": 13.7, "step": 65435, "train_speed(iter/s)": 1.530586 }, { "acc": 0.9864584, "epoch": 30.672603702835715, "grad_norm": 2.028705358505249, "learning_rate": 3.556636212966806e-06, "loss": 0.05692219, "memory(GiB)": 13.7, "step": 65440, "train_speed(iter/s)": 1.530585 }, { "acc": 0.984375, "epoch": 30.67494726974455, "grad_norm": 0.00579691119492054, "learning_rate": 3.5558941472220994e-06, "loss": 0.0406865, "memory(GiB)": 13.7, "step": 65445, "train_speed(iter/s)": 1.530589 }, { "acc": 0.97644348, "epoch": 30.677290836653388, "grad_norm": 1.8987524509429932, "learning_rate": 3.5551521162009745e-06, "loss": 0.04090858, "memory(GiB)": 13.7, "step": 65450, "train_speed(iter/s)": 1.530589 }, { "acc": 0.99187498, "epoch": 30.679634403562222, "grad_norm": 6.711606025695801, "learning_rate": 3.5544101199212616e-06, "loss": 0.03400722, "memory(GiB)": 13.7, "step": 65455, "train_speed(iter/s)": 1.530593 }, { "acc": 0.97739582, "epoch": 30.681977970471056, "grad_norm": 1.4516347646713257, "learning_rate": 3.5536681584007993e-06, "loss": 0.03818548, "memory(GiB)": 13.7, "step": 65460, "train_speed(iter/s)": 1.530596 }, { "acc": 0.97383938, "epoch": 30.68432153737989, "grad_norm": 4.513697624206543, "learning_rate": 3.5529262316574215e-06, "loss": 0.05781103, "memory(GiB)": 13.7, "step": 65465, "train_speed(iter/s)": 1.530598 }, { "acc": 0.98912773, "epoch": 30.68666510428873, "grad_norm": 1.2312781810760498, "learning_rate": 3.552184339708961e-06, "loss": 0.03487587, "memory(GiB)": 13.7, "step": 65470, "train_speed(iter/s)": 1.530606 }, { "acc": 0.98895836, "epoch": 30.689008671197563, "grad_norm": 2.5134387016296387, "learning_rate": 3.551442482573251e-06, "loss": 0.03746622, "memory(GiB)": 13.7, "step": 65475, "train_speed(iter/s)": 1.530604 }, { "acc": 0.97424679, "epoch": 30.691352238106397, "grad_norm": 4.831058979034424, "learning_rate": 3.5507006602681236e-06, "loss": 0.06271645, "memory(GiB)": 13.7, "step": 65480, "train_speed(iter/s)": 1.5306 }, { "acc": 0.98612175, "epoch": 30.693695805015235, "grad_norm": 2.859363317489624, "learning_rate": 3.5499588728114095e-06, "loss": 0.04981627, "memory(GiB)": 13.7, "step": 65485, "train_speed(iter/s)": 1.530606 }, { "acc": 0.98969698, "epoch": 30.69603937192407, "grad_norm": 2.2685554027557373, "learning_rate": 3.5492171202209376e-06, "loss": 0.04721619, "memory(GiB)": 13.7, "step": 65490, "train_speed(iter/s)": 1.530612 }, { "acc": 0.98500004, "epoch": 30.698382938832903, "grad_norm": 2.1557302474975586, "learning_rate": 3.5484754025145394e-06, "loss": 0.03375882, "memory(GiB)": 13.7, "step": 65495, "train_speed(iter/s)": 1.530614 }, { "acc": 0.9942708, "epoch": 30.700726505741738, "grad_norm": 0.7843119502067566, "learning_rate": 3.547733719710043e-06, "loss": 0.02555311, "memory(GiB)": 13.7, "step": 65500, "train_speed(iter/s)": 1.530616 }, { "acc": 0.9791667, "epoch": 30.703070072650576, "grad_norm": 3.298161745071411, "learning_rate": 3.5469920718252738e-06, "loss": 0.0721359, "memory(GiB)": 13.7, "step": 65505, "train_speed(iter/s)": 1.530621 }, { "acc": 0.98708334, "epoch": 30.70541363955941, "grad_norm": 2.8585264682769775, "learning_rate": 3.5462504588780605e-06, "loss": 0.0293815, "memory(GiB)": 13.7, "step": 65510, "train_speed(iter/s)": 1.530628 }, { "acc": 0.9640625, "epoch": 30.707757206468244, "grad_norm": 5.819393157958984, "learning_rate": 3.5455088808862288e-06, "loss": 0.08692611, "memory(GiB)": 13.7, "step": 65515, "train_speed(iter/s)": 1.530635 }, { "acc": 0.98848219, "epoch": 30.71010077337708, "grad_norm": 4.409381866455078, "learning_rate": 3.5447673378676022e-06, "loss": 0.02820335, "memory(GiB)": 13.7, "step": 65520, "train_speed(iter/s)": 1.53064 }, { "acc": 0.9864583, "epoch": 30.712444340285916, "grad_norm": 3.180750846862793, "learning_rate": 3.5440258298400075e-06, "loss": 0.03720892, "memory(GiB)": 13.7, "step": 65525, "train_speed(iter/s)": 1.530641 }, { "acc": 0.97881947, "epoch": 30.71478790719475, "grad_norm": 2.8986928462982178, "learning_rate": 3.5432843568212675e-06, "loss": 0.05441486, "memory(GiB)": 13.7, "step": 65530, "train_speed(iter/s)": 1.530642 }, { "acc": 0.98604164, "epoch": 30.717131474103585, "grad_norm": 2.9944567680358887, "learning_rate": 3.5425429188292038e-06, "loss": 0.02215586, "memory(GiB)": 13.7, "step": 65535, "train_speed(iter/s)": 1.530645 }, { "acc": 0.98269348, "epoch": 30.71947504101242, "grad_norm": 4.3794426918029785, "learning_rate": 3.5418015158816395e-06, "loss": 0.05760128, "memory(GiB)": 13.7, "step": 65540, "train_speed(iter/s)": 1.530648 }, { "acc": 0.97438946, "epoch": 30.721818607921257, "grad_norm": 3.932385206222534, "learning_rate": 3.541060147996393e-06, "loss": 0.06872309, "memory(GiB)": 13.7, "step": 65545, "train_speed(iter/s)": 1.530649 }, { "acc": 0.96729164, "epoch": 30.72416217483009, "grad_norm": 7.382641315460205, "learning_rate": 3.5403188151912854e-06, "loss": 0.05826387, "memory(GiB)": 13.7, "step": 65550, "train_speed(iter/s)": 1.530651 }, { "acc": 0.97909088, "epoch": 30.726505741738926, "grad_norm": 4.670179843902588, "learning_rate": 3.539577517484138e-06, "loss": 0.09500323, "memory(GiB)": 13.7, "step": 65555, "train_speed(iter/s)": 1.530649 }, { "acc": 0.98361111, "epoch": 30.72884930864776, "grad_norm": 2.197087526321411, "learning_rate": 3.5388362548927676e-06, "loss": 0.06162572, "memory(GiB)": 13.7, "step": 65560, "train_speed(iter/s)": 1.530646 }, { "acc": 0.97468758, "epoch": 30.731192875556598, "grad_norm": 2.5427935123443604, "learning_rate": 3.5380950274349894e-06, "loss": 0.07006012, "memory(GiB)": 13.7, "step": 65565, "train_speed(iter/s)": 1.530644 }, { "acc": 0.98642359, "epoch": 30.733536442465432, "grad_norm": 2.4575114250183105, "learning_rate": 3.5373538351286246e-06, "loss": 0.06349051, "memory(GiB)": 13.7, "step": 65570, "train_speed(iter/s)": 1.530645 }, { "acc": 0.98496113, "epoch": 30.735880009374267, "grad_norm": 4.5058088302612305, "learning_rate": 3.536612677991488e-06, "loss": 0.07013661, "memory(GiB)": 13.7, "step": 65575, "train_speed(iter/s)": 1.530648 }, { "acc": 0.9885417, "epoch": 30.738223576283104, "grad_norm": 0.7461980581283569, "learning_rate": 3.5358715560413907e-06, "loss": 0.02958788, "memory(GiB)": 13.7, "step": 65580, "train_speed(iter/s)": 1.530649 }, { "acc": 0.9916667, "epoch": 30.74056714319194, "grad_norm": 0.9047915935516357, "learning_rate": 3.535130469296151e-06, "loss": 0.02194043, "memory(GiB)": 13.7, "step": 65585, "train_speed(iter/s)": 1.530656 }, { "acc": 0.98189487, "epoch": 30.742910710100773, "grad_norm": 4.913313388824463, "learning_rate": 3.5343894177735804e-06, "loss": 0.05331784, "memory(GiB)": 13.7, "step": 65590, "train_speed(iter/s)": 1.53066 }, { "acc": 0.9859375, "epoch": 30.745254277009607, "grad_norm": 2.0808377265930176, "learning_rate": 3.5336484014914905e-06, "loss": 0.06070402, "memory(GiB)": 13.7, "step": 65595, "train_speed(iter/s)": 1.530664 }, { "acc": 0.98676472, "epoch": 30.747597843918445, "grad_norm": 0.10016871988773346, "learning_rate": 3.532907420467695e-06, "loss": 0.03229035, "memory(GiB)": 13.7, "step": 65600, "train_speed(iter/s)": 1.530662 }, { "acc": 0.99092264, "epoch": 30.74994141082728, "grad_norm": 1.6294173002243042, "learning_rate": 3.532166474720003e-06, "loss": 0.0323665, "memory(GiB)": 13.7, "step": 65605, "train_speed(iter/s)": 1.530663 }, { "acc": 0.99437504, "epoch": 30.752284977736114, "grad_norm": 1.4664109945297241, "learning_rate": 3.531425564266226e-06, "loss": 0.03346441, "memory(GiB)": 13.7, "step": 65610, "train_speed(iter/s)": 1.530661 }, { "acc": 0.99375, "epoch": 30.754628544644948, "grad_norm": 1.5336825847625732, "learning_rate": 3.530684689124172e-06, "loss": 0.01446844, "memory(GiB)": 13.7, "step": 65615, "train_speed(iter/s)": 1.530662 }, { "acc": 0.99104166, "epoch": 30.756972111553786, "grad_norm": 1.3617730140686035, "learning_rate": 3.5299438493116505e-06, "loss": 0.05126284, "memory(GiB)": 13.7, "step": 65620, "train_speed(iter/s)": 1.530667 }, { "acc": 0.99245033, "epoch": 30.75931567846262, "grad_norm": 2.4504306316375732, "learning_rate": 3.529203044846466e-06, "loss": 0.04914398, "memory(GiB)": 13.7, "step": 65625, "train_speed(iter/s)": 1.530665 }, { "acc": 0.9894886, "epoch": 30.761659245371455, "grad_norm": 3.648232936859131, "learning_rate": 3.5284622757464273e-06, "loss": 0.02935008, "memory(GiB)": 13.7, "step": 65630, "train_speed(iter/s)": 1.530667 }, { "acc": 0.97848282, "epoch": 30.76400281228029, "grad_norm": 3.551044464111328, "learning_rate": 3.5277215420293397e-06, "loss": 0.06891397, "memory(GiB)": 13.7, "step": 65635, "train_speed(iter/s)": 1.530667 }, { "acc": 0.99437504, "epoch": 30.766346379189127, "grad_norm": 0.3164842128753662, "learning_rate": 3.5269808437130064e-06, "loss": 0.01676933, "memory(GiB)": 13.7, "step": 65640, "train_speed(iter/s)": 1.530666 }, { "acc": 0.96839008, "epoch": 30.76868994609796, "grad_norm": 3.6888952255249023, "learning_rate": 3.526240180815234e-06, "loss": 0.06262407, "memory(GiB)": 13.7, "step": 65645, "train_speed(iter/s)": 1.530668 }, { "acc": 0.97833328, "epoch": 30.771033513006795, "grad_norm": 5.133885383605957, "learning_rate": 3.5254995533538234e-06, "loss": 0.09244632, "memory(GiB)": 13.7, "step": 65650, "train_speed(iter/s)": 1.530673 }, { "acc": 0.97931547, "epoch": 30.773377079915633, "grad_norm": 4.6941070556640625, "learning_rate": 3.524758961346579e-06, "loss": 0.04443893, "memory(GiB)": 13.7, "step": 65655, "train_speed(iter/s)": 1.530677 }, { "acc": 0.97979164, "epoch": 30.775720646824468, "grad_norm": 0.8718875050544739, "learning_rate": 3.524018404811301e-06, "loss": 0.04206827, "memory(GiB)": 13.7, "step": 65660, "train_speed(iter/s)": 1.530684 }, { "acc": 0.98758154, "epoch": 30.778064213733302, "grad_norm": 0.026563020423054695, "learning_rate": 3.52327788376579e-06, "loss": 0.07120733, "memory(GiB)": 13.7, "step": 65665, "train_speed(iter/s)": 1.530685 }, { "acc": 0.98065481, "epoch": 30.780407780642136, "grad_norm": 5.240853309631348, "learning_rate": 3.5225373982278443e-06, "loss": 0.05922527, "memory(GiB)": 13.7, "step": 65670, "train_speed(iter/s)": 1.53069 }, { "acc": 0.98545141, "epoch": 30.782751347550974, "grad_norm": 0.84743332862854, "learning_rate": 3.5217969482152648e-06, "loss": 0.04821971, "memory(GiB)": 13.7, "step": 65675, "train_speed(iter/s)": 1.530685 }, { "acc": 0.98279762, "epoch": 30.78509491445981, "grad_norm": 4.109141826629639, "learning_rate": 3.5210565337458487e-06, "loss": 0.03745828, "memory(GiB)": 13.7, "step": 65680, "train_speed(iter/s)": 1.530686 }, { "acc": 0.97254782, "epoch": 30.787438481368643, "grad_norm": 1.253333568572998, "learning_rate": 3.520316154837393e-06, "loss": 0.11679727, "memory(GiB)": 13.7, "step": 65685, "train_speed(iter/s)": 1.530685 }, { "acc": 0.990625, "epoch": 30.789782048277477, "grad_norm": 0.01839272864162922, "learning_rate": 3.5195758115076944e-06, "loss": 0.02302836, "memory(GiB)": 13.7, "step": 65690, "train_speed(iter/s)": 1.530685 }, { "acc": 0.99308605, "epoch": 30.792125615186315, "grad_norm": 5.801083564758301, "learning_rate": 3.518835503774548e-06, "loss": 0.04079352, "memory(GiB)": 13.7, "step": 65695, "train_speed(iter/s)": 1.530683 }, { "acc": 0.98825502, "epoch": 30.79446918209515, "grad_norm": 0.8873744606971741, "learning_rate": 3.518095231655749e-06, "loss": 0.06410294, "memory(GiB)": 13.7, "step": 65700, "train_speed(iter/s)": 1.530686 }, { "acc": 0.98852673, "epoch": 30.796812749003983, "grad_norm": 3.0530693531036377, "learning_rate": 3.5173549951690905e-06, "loss": 0.05714384, "memory(GiB)": 13.7, "step": 65705, "train_speed(iter/s)": 1.530691 }, { "acc": 0.97175598, "epoch": 30.799156315912818, "grad_norm": 4.818700790405273, "learning_rate": 3.5166147943323643e-06, "loss": 0.11030262, "memory(GiB)": 13.7, "step": 65710, "train_speed(iter/s)": 1.530687 }, { "acc": 0.99025297, "epoch": 30.801499882821656, "grad_norm": 2.6538517475128174, "learning_rate": 3.5158746291633646e-06, "loss": 0.04095261, "memory(GiB)": 13.7, "step": 65715, "train_speed(iter/s)": 1.530685 }, { "acc": 0.97840233, "epoch": 30.80384344973049, "grad_norm": 5.296888828277588, "learning_rate": 3.5151344996798806e-06, "loss": 0.07042783, "memory(GiB)": 13.7, "step": 65720, "train_speed(iter/s)": 1.530681 }, { "acc": 0.98041668, "epoch": 30.806187016639324, "grad_norm": 1.8409991264343262, "learning_rate": 3.5143944058997033e-06, "loss": 0.02764267, "memory(GiB)": 13.7, "step": 65725, "train_speed(iter/s)": 1.530685 }, { "acc": 0.98638897, "epoch": 30.808530583548162, "grad_norm": 1.3385529518127441, "learning_rate": 3.513654347840623e-06, "loss": 0.0447672, "memory(GiB)": 13.7, "step": 65730, "train_speed(iter/s)": 1.530692 }, { "acc": 0.97645836, "epoch": 30.810874150456996, "grad_norm": 3.565268039703369, "learning_rate": 3.5129143255204277e-06, "loss": 0.05928728, "memory(GiB)": 13.7, "step": 65735, "train_speed(iter/s)": 1.530694 }, { "acc": 0.98500004, "epoch": 30.81321771736583, "grad_norm": 2.789907455444336, "learning_rate": 3.5121743389569036e-06, "loss": 0.05967193, "memory(GiB)": 13.7, "step": 65740, "train_speed(iter/s)": 1.530692 }, { "acc": 0.98812504, "epoch": 30.815561284274665, "grad_norm": 2.3943214416503906, "learning_rate": 3.511434388167841e-06, "loss": 0.03252921, "memory(GiB)": 13.7, "step": 65745, "train_speed(iter/s)": 1.530693 }, { "acc": 0.99020834, "epoch": 30.817904851183503, "grad_norm": 0.48520445823669434, "learning_rate": 3.510694473171023e-06, "loss": 0.01388933, "memory(GiB)": 13.7, "step": 65750, "train_speed(iter/s)": 1.530698 }, { "acc": 0.98579445, "epoch": 30.820248418092337, "grad_norm": 2.2926549911499023, "learning_rate": 3.5099545939842343e-06, "loss": 0.02977818, "memory(GiB)": 13.7, "step": 65755, "train_speed(iter/s)": 1.530711 }, { "acc": 0.99090271, "epoch": 30.82259198500117, "grad_norm": 3.4104058742523193, "learning_rate": 3.5092147506252612e-06, "loss": 0.02983982, "memory(GiB)": 13.7, "step": 65760, "train_speed(iter/s)": 1.530713 }, { "acc": 0.97165184, "epoch": 30.824935551910006, "grad_norm": 3.281700611114502, "learning_rate": 3.5084749431118865e-06, "loss": 0.09792392, "memory(GiB)": 13.7, "step": 65765, "train_speed(iter/s)": 1.530718 }, { "acc": 0.99738102, "epoch": 30.827279118818844, "grad_norm": 1.6864114999771118, "learning_rate": 3.5077351714618914e-06, "loss": 0.03902999, "memory(GiB)": 13.7, "step": 65770, "train_speed(iter/s)": 1.530719 }, { "acc": 0.9744791, "epoch": 30.829622685727678, "grad_norm": 0.009959852322936058, "learning_rate": 3.50699543569306e-06, "loss": 0.05101219, "memory(GiB)": 13.7, "step": 65775, "train_speed(iter/s)": 1.530715 }, { "acc": 0.98779755, "epoch": 30.831966252636512, "grad_norm": 1.645392656326294, "learning_rate": 3.5062557358231697e-06, "loss": 0.07406726, "memory(GiB)": 13.7, "step": 65780, "train_speed(iter/s)": 1.530718 }, { "acc": 0.98812504, "epoch": 30.834309819545346, "grad_norm": 2.188606023788452, "learning_rate": 3.5055160718700054e-06, "loss": 0.03279883, "memory(GiB)": 13.7, "step": 65785, "train_speed(iter/s)": 1.530723 }, { "acc": 0.98335066, "epoch": 30.836653386454184, "grad_norm": 4.424407482147217, "learning_rate": 3.504776443851342e-06, "loss": 0.03739637, "memory(GiB)": 13.7, "step": 65790, "train_speed(iter/s)": 1.530719 }, { "acc": 0.97483006, "epoch": 30.83899695336302, "grad_norm": 8.601161003112793, "learning_rate": 3.504036851784959e-06, "loss": 0.08286964, "memory(GiB)": 13.7, "step": 65795, "train_speed(iter/s)": 1.530723 }, { "acc": 0.97704859, "epoch": 30.841340520271853, "grad_norm": 3.9915146827697754, "learning_rate": 3.503297295688633e-06, "loss": 0.0469928, "memory(GiB)": 13.7, "step": 65800, "train_speed(iter/s)": 1.530729 }, { "acc": 0.98812504, "epoch": 30.84368408718069, "grad_norm": 6.341418266296387, "learning_rate": 3.502557775580142e-06, "loss": 0.05778459, "memory(GiB)": 13.7, "step": 65805, "train_speed(iter/s)": 1.530732 }, { "acc": 0.98597469, "epoch": 30.846027654089525, "grad_norm": 7.239922523498535, "learning_rate": 3.501818291477261e-06, "loss": 0.04842036, "memory(GiB)": 13.7, "step": 65810, "train_speed(iter/s)": 1.530741 }, { "acc": 0.98083334, "epoch": 30.84837122099836, "grad_norm": 0.3545130491256714, "learning_rate": 3.5010788433977644e-06, "loss": 0.03617805, "memory(GiB)": 13.7, "step": 65815, "train_speed(iter/s)": 1.530738 }, { "acc": 0.99375, "epoch": 30.850714787907194, "grad_norm": 2.661961793899536, "learning_rate": 3.5003394313594267e-06, "loss": 0.03199677, "memory(GiB)": 13.7, "step": 65820, "train_speed(iter/s)": 1.530738 }, { "acc": 0.98449202, "epoch": 30.85305835481603, "grad_norm": 4.061497688293457, "learning_rate": 3.4996000553800203e-06, "loss": 0.06686976, "memory(GiB)": 13.7, "step": 65825, "train_speed(iter/s)": 1.530737 }, { "acc": 0.9791667, "epoch": 30.855401921724866, "grad_norm": 4.030824184417725, "learning_rate": 3.4988607154773196e-06, "loss": 0.04247395, "memory(GiB)": 13.7, "step": 65830, "train_speed(iter/s)": 1.530741 }, { "acc": 0.97756453, "epoch": 30.8577454886337, "grad_norm": 3.32185435295105, "learning_rate": 3.498121411669092e-06, "loss": 0.08412663, "memory(GiB)": 13.7, "step": 65835, "train_speed(iter/s)": 1.530741 }, { "acc": 0.990625, "epoch": 30.860089055542534, "grad_norm": 0.010000033304095268, "learning_rate": 3.4973821439731116e-06, "loss": 0.02402182, "memory(GiB)": 13.7, "step": 65840, "train_speed(iter/s)": 1.530744 }, { "acc": 0.98187504, "epoch": 30.862432622451372, "grad_norm": 4.695673942565918, "learning_rate": 3.4966429124071445e-06, "loss": 0.04301917, "memory(GiB)": 13.7, "step": 65845, "train_speed(iter/s)": 1.530748 }, { "acc": 0.98467264, "epoch": 30.864776189360207, "grad_norm": 2.9213266372680664, "learning_rate": 3.495903716988962e-06, "loss": 0.05913354, "memory(GiB)": 13.7, "step": 65850, "train_speed(iter/s)": 1.530748 }, { "acc": 0.99410172, "epoch": 30.86711975626904, "grad_norm": 1.196984052658081, "learning_rate": 3.49516455773633e-06, "loss": 0.04785895, "memory(GiB)": 13.7, "step": 65855, "train_speed(iter/s)": 1.530756 }, { "acc": 0.9958333, "epoch": 30.869463323177875, "grad_norm": 1.338143229484558, "learning_rate": 3.4944254346670182e-06, "loss": 0.02283071, "memory(GiB)": 13.7, "step": 65860, "train_speed(iter/s)": 1.530761 }, { "acc": 0.990625, "epoch": 30.871806890086713, "grad_norm": 4.045966148376465, "learning_rate": 3.493686347798791e-06, "loss": 0.05901303, "memory(GiB)": 13.7, "step": 65865, "train_speed(iter/s)": 1.530767 }, { "acc": 0.9809227, "epoch": 30.874150456995547, "grad_norm": 6.085511684417725, "learning_rate": 3.4929472971494145e-06, "loss": 0.08843975, "memory(GiB)": 13.7, "step": 65870, "train_speed(iter/s)": 1.530772 }, { "acc": 0.98517857, "epoch": 30.87649402390438, "grad_norm": 0.015688873827457428, "learning_rate": 3.492208282736651e-06, "loss": 0.04122832, "memory(GiB)": 13.7, "step": 65875, "train_speed(iter/s)": 1.530777 }, { "acc": 0.98446426, "epoch": 30.87883759081322, "grad_norm": 0.921659529209137, "learning_rate": 3.491469304578266e-06, "loss": 0.03040549, "memory(GiB)": 13.7, "step": 65880, "train_speed(iter/s)": 1.530768 }, { "acc": 0.98217258, "epoch": 30.881181157722054, "grad_norm": 1.2731581926345825, "learning_rate": 3.4907303626920197e-06, "loss": 0.04544859, "memory(GiB)": 13.7, "step": 65885, "train_speed(iter/s)": 1.530771 }, { "acc": 0.99305553, "epoch": 30.883524724630888, "grad_norm": 0.5811768174171448, "learning_rate": 3.4899914570956765e-06, "loss": 0.03383551, "memory(GiB)": 13.7, "step": 65890, "train_speed(iter/s)": 1.530773 }, { "acc": 0.98101187, "epoch": 30.885868291539722, "grad_norm": 4.978233814239502, "learning_rate": 3.4892525878069965e-06, "loss": 0.07796953, "memory(GiB)": 13.7, "step": 65895, "train_speed(iter/s)": 1.53078 }, { "acc": 0.97218132, "epoch": 30.88821185844856, "grad_norm": 0.004013797268271446, "learning_rate": 3.488513754843739e-06, "loss": 0.08054153, "memory(GiB)": 13.7, "step": 65900, "train_speed(iter/s)": 1.530787 }, { "acc": 0.98145838, "epoch": 30.890555425357395, "grad_norm": 0.13564370572566986, "learning_rate": 3.4877749582236636e-06, "loss": 0.03057321, "memory(GiB)": 13.7, "step": 65905, "train_speed(iter/s)": 1.530793 }, { "acc": 0.97365532, "epoch": 30.89289899226623, "grad_norm": 1.3556039333343506, "learning_rate": 3.4870361979645305e-06, "loss": 0.06528947, "memory(GiB)": 13.7, "step": 65910, "train_speed(iter/s)": 1.530792 }, { "acc": 0.98727684, "epoch": 30.895242559175063, "grad_norm": 5.748608112335205, "learning_rate": 3.486297474084094e-06, "loss": 0.03330207, "memory(GiB)": 13.7, "step": 65915, "train_speed(iter/s)": 1.530795 }, { "acc": 0.97827377, "epoch": 30.8975861260839, "grad_norm": 5.814391136169434, "learning_rate": 3.48555878660011e-06, "loss": 0.05559697, "memory(GiB)": 13.7, "step": 65920, "train_speed(iter/s)": 1.530799 }, { "acc": 0.98916664, "epoch": 30.899929692992735, "grad_norm": 2.8510239124298096, "learning_rate": 3.4848201355303374e-06, "loss": 0.02401651, "memory(GiB)": 13.7, "step": 65925, "train_speed(iter/s)": 1.530804 }, { "acc": 0.98708334, "epoch": 30.90227325990157, "grad_norm": 5.453239440917969, "learning_rate": 3.4840815208925285e-06, "loss": 0.0341527, "memory(GiB)": 13.7, "step": 65930, "train_speed(iter/s)": 1.53081 }, { "acc": 0.9846199, "epoch": 30.904616826810404, "grad_norm": 3.2566089630126953, "learning_rate": 3.483342942704438e-06, "loss": 0.05853578, "memory(GiB)": 13.7, "step": 65935, "train_speed(iter/s)": 1.530814 }, { "acc": 0.97327385, "epoch": 30.906960393719242, "grad_norm": 3.435302257537842, "learning_rate": 3.48260440098382e-06, "loss": 0.05600115, "memory(GiB)": 13.7, "step": 65940, "train_speed(iter/s)": 1.530815 }, { "acc": 0.99444447, "epoch": 30.909303960628076, "grad_norm": 0.003834274597465992, "learning_rate": 3.481865895748424e-06, "loss": 0.01155497, "memory(GiB)": 13.7, "step": 65945, "train_speed(iter/s)": 1.530823 }, { "acc": 0.99291668, "epoch": 30.91164752753691, "grad_norm": 0.02323737181723118, "learning_rate": 3.481127427016004e-06, "loss": 0.01118849, "memory(GiB)": 13.7, "step": 65950, "train_speed(iter/s)": 1.530828 }, { "acc": 0.98857222, "epoch": 30.913991094445745, "grad_norm": 4.196619033813477, "learning_rate": 3.48038899480431e-06, "loss": 0.04183549, "memory(GiB)": 13.7, "step": 65955, "train_speed(iter/s)": 1.530829 }, { "acc": 0.97265873, "epoch": 30.916334661354583, "grad_norm": 5.847602844238281, "learning_rate": 3.4796505991310885e-06, "loss": 0.08581715, "memory(GiB)": 13.7, "step": 65960, "train_speed(iter/s)": 1.530832 }, { "acc": 0.98041668, "epoch": 30.918678228263417, "grad_norm": 3.3844802379608154, "learning_rate": 3.478912240014091e-06, "loss": 0.05625221, "memory(GiB)": 13.7, "step": 65965, "train_speed(iter/s)": 1.530833 }, { "acc": 0.9864583, "epoch": 30.92102179517225, "grad_norm": 3.8470864295959473, "learning_rate": 3.478173917471065e-06, "loss": 0.02955007, "memory(GiB)": 13.7, "step": 65970, "train_speed(iter/s)": 1.53084 }, { "acc": 0.99057541, "epoch": 30.92336536208109, "grad_norm": 0.686811089515686, "learning_rate": 3.4774356315197556e-06, "loss": 0.03554207, "memory(GiB)": 13.7, "step": 65975, "train_speed(iter/s)": 1.530846 }, { "acc": 0.99409094, "epoch": 30.925708928989923, "grad_norm": 2.4422950744628906, "learning_rate": 3.476697382177911e-06, "loss": 0.03572637, "memory(GiB)": 13.7, "step": 65980, "train_speed(iter/s)": 1.530846 }, { "acc": 0.97965279, "epoch": 30.928052495898758, "grad_norm": 5.279123783111572, "learning_rate": 3.4759591694632748e-06, "loss": 0.04770183, "memory(GiB)": 13.7, "step": 65985, "train_speed(iter/s)": 1.530847 }, { "acc": 0.98612175, "epoch": 30.930396062807592, "grad_norm": 4.008093357086182, "learning_rate": 3.475220993393591e-06, "loss": 0.03435547, "memory(GiB)": 13.7, "step": 65990, "train_speed(iter/s)": 1.530849 }, { "acc": 0.98803034, "epoch": 30.93273962971643, "grad_norm": 4.122697353363037, "learning_rate": 3.474482853986606e-06, "loss": 0.04431665, "memory(GiB)": 13.7, "step": 65995, "train_speed(iter/s)": 1.530855 }, { "acc": 0.97875004, "epoch": 30.935083196625264, "grad_norm": 4.764847278594971, "learning_rate": 3.4737447512600587e-06, "loss": 0.03496583, "memory(GiB)": 13.7, "step": 66000, "train_speed(iter/s)": 1.530859 }, { "acc": 0.98895836, "epoch": 30.9374267635341, "grad_norm": 0.9033429026603699, "learning_rate": 3.47300668523169e-06, "loss": 0.05177712, "memory(GiB)": 13.7, "step": 66005, "train_speed(iter/s)": 1.530861 }, { "acc": 0.98475704, "epoch": 30.939770330442933, "grad_norm": 1.0142333507537842, "learning_rate": 3.472268655919244e-06, "loss": 0.03471961, "memory(GiB)": 13.7, "step": 66010, "train_speed(iter/s)": 1.530861 }, { "acc": 0.99250002, "epoch": 30.94211389735177, "grad_norm": 4.283344745635986, "learning_rate": 3.471530663340459e-06, "loss": 0.02189426, "memory(GiB)": 13.7, "step": 66015, "train_speed(iter/s)": 1.530866 }, { "acc": 0.97622023, "epoch": 30.944457464260605, "grad_norm": 2.353071689605713, "learning_rate": 3.470792707513072e-06, "loss": 0.0765527, "memory(GiB)": 13.7, "step": 66020, "train_speed(iter/s)": 1.530864 }, { "acc": 0.96363106, "epoch": 30.94680103116944, "grad_norm": 4.726710796356201, "learning_rate": 3.470054788454825e-06, "loss": 0.07012349, "memory(GiB)": 13.7, "step": 66025, "train_speed(iter/s)": 1.530864 }, { "acc": 0.98006945, "epoch": 30.949144598078274, "grad_norm": 7.393517017364502, "learning_rate": 3.4693169061834513e-06, "loss": 0.08893154, "memory(GiB)": 13.7, "step": 66030, "train_speed(iter/s)": 1.530867 }, { "acc": 0.99020824, "epoch": 30.95148816498711, "grad_norm": 2.0779693126678467, "learning_rate": 3.46857906071669e-06, "loss": 0.02687724, "memory(GiB)": 13.7, "step": 66035, "train_speed(iter/s)": 1.53087 }, { "acc": 0.9958333, "epoch": 30.953831731895946, "grad_norm": 0.003323365468531847, "learning_rate": 3.4678412520722767e-06, "loss": 0.01068592, "memory(GiB)": 13.7, "step": 66040, "train_speed(iter/s)": 1.530871 }, { "acc": 0.98291664, "epoch": 30.95617529880478, "grad_norm": 3.665426015853882, "learning_rate": 3.4671034802679425e-06, "loss": 0.08408961, "memory(GiB)": 13.7, "step": 66045, "train_speed(iter/s)": 1.530877 }, { "acc": 0.98197918, "epoch": 30.958518865713614, "grad_norm": 0.0054712435230612755, "learning_rate": 3.466365745321423e-06, "loss": 0.04751412, "memory(GiB)": 13.7, "step": 66050, "train_speed(iter/s)": 1.530878 }, { "acc": 0.97805367, "epoch": 30.960862432622452, "grad_norm": 4.792297840118408, "learning_rate": 3.4656280472504517e-06, "loss": 0.07780869, "memory(GiB)": 13.7, "step": 66055, "train_speed(iter/s)": 1.530881 }, { "acc": 0.97681541, "epoch": 30.963205999531286, "grad_norm": 7.333854675292969, "learning_rate": 3.4648903860727594e-06, "loss": 0.04553896, "memory(GiB)": 13.7, "step": 66060, "train_speed(iter/s)": 1.530881 }, { "acc": 0.9958334, "epoch": 30.96554956644012, "grad_norm": 0.07372380048036575, "learning_rate": 3.4641527618060766e-06, "loss": 0.01754816, "memory(GiB)": 13.7, "step": 66065, "train_speed(iter/s)": 1.530883 }, { "acc": 0.98952389, "epoch": 30.96789313334896, "grad_norm": 3.8369977474212646, "learning_rate": 3.4634151744681356e-06, "loss": 0.03561706, "memory(GiB)": 13.7, "step": 66070, "train_speed(iter/s)": 1.530889 }, { "acc": 0.9885416, "epoch": 30.970236700257793, "grad_norm": 1.890462875366211, "learning_rate": 3.462677624076662e-06, "loss": 0.05309595, "memory(GiB)": 13.7, "step": 66075, "train_speed(iter/s)": 1.530899 }, { "acc": 0.97520828, "epoch": 30.972580267166627, "grad_norm": 0.9402983784675598, "learning_rate": 3.4619401106493887e-06, "loss": 0.09181225, "memory(GiB)": 13.7, "step": 66080, "train_speed(iter/s)": 1.530906 }, { "acc": 0.97977676, "epoch": 30.97492383407546, "grad_norm": 3.6209282875061035, "learning_rate": 3.46120263420404e-06, "loss": 0.07315539, "memory(GiB)": 13.7, "step": 66085, "train_speed(iter/s)": 1.530911 }, { "acc": 0.98585854, "epoch": 30.9772674009843, "grad_norm": 1.0825942754745483, "learning_rate": 3.460465194758342e-06, "loss": 0.03643728, "memory(GiB)": 13.7, "step": 66090, "train_speed(iter/s)": 1.530909 }, { "acc": 0.97743053, "epoch": 30.979610967893134, "grad_norm": 6.708784580230713, "learning_rate": 3.459727792330021e-06, "loss": 0.06660038, "memory(GiB)": 13.7, "step": 66095, "train_speed(iter/s)": 1.530914 }, { "acc": 0.99541664, "epoch": 30.981954534801968, "grad_norm": 0.042979270219802856, "learning_rate": 3.4589904269368037e-06, "loss": 0.00976283, "memory(GiB)": 13.7, "step": 66100, "train_speed(iter/s)": 1.530917 }, { "acc": 0.98240528, "epoch": 30.984298101710802, "grad_norm": 6.98936653137207, "learning_rate": 3.45825309859641e-06, "loss": 0.07855828, "memory(GiB)": 13.7, "step": 66105, "train_speed(iter/s)": 1.53092 }, { "acc": 0.98874998, "epoch": 30.98664166861964, "grad_norm": 3.7773303985595703, "learning_rate": 3.457515807326567e-06, "loss": 0.01633786, "memory(GiB)": 13.7, "step": 66110, "train_speed(iter/s)": 1.530919 }, { "acc": 0.97163687, "epoch": 30.988985235528475, "grad_norm": 5.696508407592773, "learning_rate": 3.4567785531449944e-06, "loss": 0.06381973, "memory(GiB)": 13.7, "step": 66115, "train_speed(iter/s)": 1.530924 }, { "acc": 0.95583334, "epoch": 30.99132880243731, "grad_norm": 5.6826300621032715, "learning_rate": 3.4560413360694152e-06, "loss": 0.09636176, "memory(GiB)": 13.7, "step": 66120, "train_speed(iter/s)": 1.53093 }, { "acc": 0.99437504, "epoch": 30.993672369346143, "grad_norm": 4.1991777420043945, "learning_rate": 3.455304156117546e-06, "loss": 0.02132128, "memory(GiB)": 13.7, "step": 66125, "train_speed(iter/s)": 1.530925 }, { "acc": 0.96988096, "epoch": 30.99601593625498, "grad_norm": 0.004824526142328978, "learning_rate": 3.4545670133071096e-06, "loss": 0.07345768, "memory(GiB)": 13.7, "step": 66130, "train_speed(iter/s)": 1.53092 }, { "acc": 0.98708334, "epoch": 30.998359503163815, "grad_norm": 0.38278740644454956, "learning_rate": 3.453829907655823e-06, "loss": 0.05593452, "memory(GiB)": 13.7, "step": 66135, "train_speed(iter/s)": 1.530925 }, { "acc": 0.97979164, "epoch": 31.00070307007265, "grad_norm": 0.9603814482688904, "learning_rate": 3.4530928391814044e-06, "loss": 0.06326483, "memory(GiB)": 13.7, "step": 66140, "train_speed(iter/s)": 1.530906 }, { "acc": 0.98812504, "epoch": 31.003046636981487, "grad_norm": 1.5682618618011475, "learning_rate": 3.4523558079015713e-06, "loss": 0.02899988, "memory(GiB)": 13.7, "step": 66145, "train_speed(iter/s)": 1.530909 }, { "acc": 0.99437504, "epoch": 31.00539020389032, "grad_norm": 0.09217271208763123, "learning_rate": 3.4516188138340366e-06, "loss": 0.03122164, "memory(GiB)": 13.7, "step": 66150, "train_speed(iter/s)": 1.530915 }, { "acc": 0.97270832, "epoch": 31.007733770799156, "grad_norm": 1.841545581817627, "learning_rate": 3.450881856996518e-06, "loss": 0.06661873, "memory(GiB)": 13.7, "step": 66155, "train_speed(iter/s)": 1.530913 }, { "acc": 0.99375, "epoch": 31.01007733770799, "grad_norm": 2.803849458694458, "learning_rate": 3.4501449374067296e-06, "loss": 0.01622739, "memory(GiB)": 13.7, "step": 66160, "train_speed(iter/s)": 1.530916 }, { "acc": 0.98663197, "epoch": 31.012420904616828, "grad_norm": 3.6891796588897705, "learning_rate": 3.4494080550823846e-06, "loss": 0.03193882, "memory(GiB)": 13.7, "step": 66165, "train_speed(iter/s)": 1.530919 }, { "acc": 0.98110123, "epoch": 31.014764471525663, "grad_norm": 1.3907811641693115, "learning_rate": 3.4486712100411913e-06, "loss": 0.03554821, "memory(GiB)": 13.7, "step": 66170, "train_speed(iter/s)": 1.530919 }, { "acc": 0.98562498, "epoch": 31.017108038434497, "grad_norm": 5.586635589599609, "learning_rate": 3.447934402300865e-06, "loss": 0.0712664, "memory(GiB)": 13.7, "step": 66175, "train_speed(iter/s)": 1.53092 }, { "acc": 0.9833333, "epoch": 31.01945160534333, "grad_norm": 7.164902687072754, "learning_rate": 3.447197631879115e-06, "loss": 0.03668921, "memory(GiB)": 13.7, "step": 66180, "train_speed(iter/s)": 1.530925 }, { "acc": 0.99093132, "epoch": 31.02179517225217, "grad_norm": 2.1644070148468018, "learning_rate": 3.446460898793651e-06, "loss": 0.04534131, "memory(GiB)": 13.7, "step": 66185, "train_speed(iter/s)": 1.530923 }, { "acc": 0.99541664, "epoch": 31.024138739161003, "grad_norm": 1.0697252750396729, "learning_rate": 3.4457242030621825e-06, "loss": 0.02077431, "memory(GiB)": 13.7, "step": 66190, "train_speed(iter/s)": 1.530924 }, { "acc": 0.98599205, "epoch": 31.026482306069838, "grad_norm": 3.4221231937408447, "learning_rate": 3.444987544702415e-06, "loss": 0.04157267, "memory(GiB)": 13.7, "step": 66195, "train_speed(iter/s)": 1.530924 }, { "acc": 0.96759806, "epoch": 31.028825872978672, "grad_norm": 9.664064407348633, "learning_rate": 3.444250923732057e-06, "loss": 0.08375191, "memory(GiB)": 13.7, "step": 66200, "train_speed(iter/s)": 1.530927 }, { "acc": 0.98125, "epoch": 31.03116943988751, "grad_norm": 0.01283396128565073, "learning_rate": 3.443514340168816e-06, "loss": 0.07490532, "memory(GiB)": 13.7, "step": 66205, "train_speed(iter/s)": 1.530931 }, { "acc": 0.98500004, "epoch": 31.033513006796344, "grad_norm": 5.361358165740967, "learning_rate": 3.4427777940303924e-06, "loss": 0.03490344, "memory(GiB)": 13.7, "step": 66210, "train_speed(iter/s)": 1.530932 }, { "acc": 0.97833328, "epoch": 31.03585657370518, "grad_norm": 3.6234288215637207, "learning_rate": 3.442041285334495e-06, "loss": 0.04026199, "memory(GiB)": 13.7, "step": 66215, "train_speed(iter/s)": 1.530936 }, { "acc": 0.98529758, "epoch": 31.038200140614016, "grad_norm": 0.04951193183660507, "learning_rate": 3.441304814098824e-06, "loss": 0.02491253, "memory(GiB)": 13.7, "step": 66220, "train_speed(iter/s)": 1.530935 }, { "acc": 0.98874998, "epoch": 31.04054370752285, "grad_norm": 1.2921913862228394, "learning_rate": 3.4405683803410826e-06, "loss": 0.05218883, "memory(GiB)": 13.7, "step": 66225, "train_speed(iter/s)": 1.530941 }, { "acc": 0.99357376, "epoch": 31.042887274431685, "grad_norm": 2.0103394985198975, "learning_rate": 3.4398319840789736e-06, "loss": 0.03136947, "memory(GiB)": 13.7, "step": 66230, "train_speed(iter/s)": 1.53095 }, { "acc": 0.99383144, "epoch": 31.04523084134052, "grad_norm": 3.140237331390381, "learning_rate": 3.439095625330196e-06, "loss": 0.0502836, "memory(GiB)": 13.7, "step": 66235, "train_speed(iter/s)": 1.530955 }, { "acc": 0.97696428, "epoch": 31.047574408249357, "grad_norm": 4.297683238983154, "learning_rate": 3.4383593041124497e-06, "loss": 0.06196692, "memory(GiB)": 13.7, "step": 66240, "train_speed(iter/s)": 1.530957 }, { "acc": 0.97564487, "epoch": 31.04991797515819, "grad_norm": 5.159426212310791, "learning_rate": 3.4376230204434357e-06, "loss": 0.07254982, "memory(GiB)": 13.7, "step": 66245, "train_speed(iter/s)": 1.530958 }, { "acc": 0.97800598, "epoch": 31.052261542067026, "grad_norm": 2.1795802116394043, "learning_rate": 3.4368867743408483e-06, "loss": 0.04882504, "memory(GiB)": 13.7, "step": 66250, "train_speed(iter/s)": 1.530958 }, { "acc": 0.99077377, "epoch": 31.05460510897586, "grad_norm": 0.592535674571991, "learning_rate": 3.4361505658223855e-06, "loss": 0.02195204, "memory(GiB)": 13.7, "step": 66255, "train_speed(iter/s)": 1.530961 }, { "acc": 0.99258928, "epoch": 31.056948675884698, "grad_norm": 3.396071672439575, "learning_rate": 3.435414394905745e-06, "loss": 0.05167564, "memory(GiB)": 13.7, "step": 66260, "train_speed(iter/s)": 1.530963 }, { "acc": 0.9936554, "epoch": 31.059292242793532, "grad_norm": 0.0019039689796045423, "learning_rate": 3.43467826160862e-06, "loss": 0.01944059, "memory(GiB)": 13.7, "step": 66265, "train_speed(iter/s)": 1.530967 }, { "acc": 0.9885416, "epoch": 31.061635809702366, "grad_norm": 5.2682576179504395, "learning_rate": 3.4339421659487042e-06, "loss": 0.09288667, "memory(GiB)": 13.7, "step": 66270, "train_speed(iter/s)": 1.530971 }, { "acc": 0.98406429, "epoch": 31.0639793766112, "grad_norm": 2.942044496536255, "learning_rate": 3.433206107943694e-06, "loss": 0.07327417, "memory(GiB)": 13.7, "step": 66275, "train_speed(iter/s)": 1.530977 }, { "acc": 0.99008923, "epoch": 31.06632294352004, "grad_norm": 1.8488328456878662, "learning_rate": 3.432470087611278e-06, "loss": 0.04831821, "memory(GiB)": 13.7, "step": 66280, "train_speed(iter/s)": 1.530984 }, { "acc": 0.9958334, "epoch": 31.068666510428873, "grad_norm": 4.1433258056640625, "learning_rate": 3.431734104969152e-06, "loss": 0.01256168, "memory(GiB)": 13.7, "step": 66285, "train_speed(iter/s)": 1.530992 }, { "acc": 0.98427086, "epoch": 31.071010077337707, "grad_norm": 3.202444314956665, "learning_rate": 3.4309981600350053e-06, "loss": 0.04333174, "memory(GiB)": 13.7, "step": 66290, "train_speed(iter/s)": 1.530993 }, { "acc": 0.97633009, "epoch": 31.073353644246545, "grad_norm": 2.788048028945923, "learning_rate": 3.430262252826525e-06, "loss": 0.07236753, "memory(GiB)": 13.7, "step": 66295, "train_speed(iter/s)": 1.531001 }, { "acc": 0.97069969, "epoch": 31.07569721115538, "grad_norm": 3.134941816329956, "learning_rate": 3.4295263833614006e-06, "loss": 0.09990322, "memory(GiB)": 13.7, "step": 66300, "train_speed(iter/s)": 1.531005 }, { "acc": 0.99125004, "epoch": 31.078040778064214, "grad_norm": 3.872013807296753, "learning_rate": 3.4287905516573215e-06, "loss": 0.0290443, "memory(GiB)": 13.7, "step": 66305, "train_speed(iter/s)": 1.531 }, { "acc": 0.98640871, "epoch": 31.080384344973048, "grad_norm": 2.368130922317505, "learning_rate": 3.428054757731974e-06, "loss": 0.04903291, "memory(GiB)": 13.7, "step": 66310, "train_speed(iter/s)": 1.531007 }, { "acc": 0.98411465, "epoch": 31.082727911881886, "grad_norm": 0.004469091538339853, "learning_rate": 3.4273190016030442e-06, "loss": 0.04051903, "memory(GiB)": 13.7, "step": 66315, "train_speed(iter/s)": 1.531011 }, { "acc": 0.96794529, "epoch": 31.08507147879072, "grad_norm": 3.6135451793670654, "learning_rate": 3.4265832832882178e-06, "loss": 0.11309547, "memory(GiB)": 13.7, "step": 66320, "train_speed(iter/s)": 1.531014 }, { "acc": 0.98395834, "epoch": 31.087415045699554, "grad_norm": 0.0020273709669709206, "learning_rate": 3.425847602805178e-06, "loss": 0.02849472, "memory(GiB)": 13.7, "step": 66325, "train_speed(iter/s)": 1.531014 }, { "acc": 0.96923609, "epoch": 31.08975861260839, "grad_norm": 7.460824012756348, "learning_rate": 3.4251119601716104e-06, "loss": 0.08711008, "memory(GiB)": 13.7, "step": 66330, "train_speed(iter/s)": 1.531017 }, { "acc": 0.9957386, "epoch": 31.092102179517227, "grad_norm": 2.4519104957580566, "learning_rate": 3.424376355405194e-06, "loss": 0.02848147, "memory(GiB)": 13.7, "step": 66335, "train_speed(iter/s)": 1.53102 }, { "acc": 0.98740082, "epoch": 31.09444574642606, "grad_norm": 2.361931085586548, "learning_rate": 3.423640788523613e-06, "loss": 0.0779191, "memory(GiB)": 13.7, "step": 66340, "train_speed(iter/s)": 1.531027 }, { "acc": 0.99437504, "epoch": 31.096789313334895, "grad_norm": 0.3523460626602173, "learning_rate": 3.422905259544546e-06, "loss": 0.01683319, "memory(GiB)": 13.7, "step": 66345, "train_speed(iter/s)": 1.531031 }, { "acc": 0.9927084, "epoch": 31.09913288024373, "grad_norm": 1.6273990869522095, "learning_rate": 3.4221697684856743e-06, "loss": 0.0396825, "memory(GiB)": 13.7, "step": 66350, "train_speed(iter/s)": 1.53103 }, { "acc": 0.9833334, "epoch": 31.101476447152567, "grad_norm": 4.361639499664307, "learning_rate": 3.4214343153646753e-06, "loss": 0.06421708, "memory(GiB)": 13.7, "step": 66355, "train_speed(iter/s)": 1.531033 }, { "acc": 0.98062496, "epoch": 31.1038200140614, "grad_norm": 5.433799743652344, "learning_rate": 3.4206989001992285e-06, "loss": 0.06595262, "memory(GiB)": 13.7, "step": 66360, "train_speed(iter/s)": 1.531033 }, { "acc": 0.96888256, "epoch": 31.106163580970236, "grad_norm": 3.3970491886138916, "learning_rate": 3.4199635230070107e-06, "loss": 0.11868267, "memory(GiB)": 13.7, "step": 66365, "train_speed(iter/s)": 1.531034 }, { "acc": 0.98571434, "epoch": 31.10850714787907, "grad_norm": 0.25933319330215454, "learning_rate": 3.4192281838056957e-06, "loss": 0.0354732, "memory(GiB)": 13.7, "step": 66370, "train_speed(iter/s)": 1.531035 }, { "acc": 0.96520834, "epoch": 31.110850714787908, "grad_norm": 5.646584510803223, "learning_rate": 3.418492882612962e-06, "loss": 0.05484966, "memory(GiB)": 13.7, "step": 66375, "train_speed(iter/s)": 1.531039 }, { "acc": 0.99437504, "epoch": 31.113194281696742, "grad_norm": 0.29737064242362976, "learning_rate": 3.4177576194464823e-06, "loss": 0.01916798, "memory(GiB)": 13.7, "step": 66380, "train_speed(iter/s)": 1.531042 }, { "acc": 0.99437504, "epoch": 31.115537848605577, "grad_norm": 2.1330044269561768, "learning_rate": 3.4170223943239276e-06, "loss": 0.03794338, "memory(GiB)": 13.7, "step": 66385, "train_speed(iter/s)": 1.531042 }, { "acc": 0.99260788, "epoch": 31.117881415514415, "grad_norm": 1.7043479681015015, "learning_rate": 3.416287207262974e-06, "loss": 0.02748864, "memory(GiB)": 13.7, "step": 66390, "train_speed(iter/s)": 1.531046 }, { "acc": 0.98536701, "epoch": 31.12022498242325, "grad_norm": 3.6643474102020264, "learning_rate": 3.415552058281291e-06, "loss": 0.04004447, "memory(GiB)": 13.7, "step": 66395, "train_speed(iter/s)": 1.531051 }, { "acc": 0.98506947, "epoch": 31.122568549332083, "grad_norm": 6.5056328773498535, "learning_rate": 3.4148169473965487e-06, "loss": 0.05232502, "memory(GiB)": 13.7, "step": 66400, "train_speed(iter/s)": 1.53105 }, { "acc": 0.990625, "epoch": 31.124912116240917, "grad_norm": 2.2533018589019775, "learning_rate": 3.414081874626419e-06, "loss": 0.04064547, "memory(GiB)": 13.7, "step": 66405, "train_speed(iter/s)": 1.531055 }, { "acc": 0.98895836, "epoch": 31.127255683149755, "grad_norm": 3.483914613723755, "learning_rate": 3.413346839988569e-06, "loss": 0.03065835, "memory(GiB)": 13.7, "step": 66410, "train_speed(iter/s)": 1.53106 }, { "acc": 0.98258934, "epoch": 31.12959925005859, "grad_norm": 0.5210438370704651, "learning_rate": 3.412611843500667e-06, "loss": 0.03753876, "memory(GiB)": 13.7, "step": 66415, "train_speed(iter/s)": 1.531064 }, { "acc": 0.9885416, "epoch": 31.131942816967424, "grad_norm": 1.6838008165359497, "learning_rate": 3.4118768851803793e-06, "loss": 0.02854401, "memory(GiB)": 13.7, "step": 66420, "train_speed(iter/s)": 1.531062 }, { "acc": 0.98488102, "epoch": 31.13428638387626, "grad_norm": 0.003749049035832286, "learning_rate": 3.4111419650453725e-06, "loss": 0.02348838, "memory(GiB)": 13.7, "step": 66425, "train_speed(iter/s)": 1.531061 }, { "acc": 0.99788532, "epoch": 31.136629950785096, "grad_norm": 0.01167680136859417, "learning_rate": 3.41040708311331e-06, "loss": 0.0380125, "memory(GiB)": 13.7, "step": 66430, "train_speed(iter/s)": 1.531066 }, { "acc": 0.98812494, "epoch": 31.13897351769393, "grad_norm": 0.1899862289428711, "learning_rate": 3.4096722394018595e-06, "loss": 0.03493846, "memory(GiB)": 13.7, "step": 66435, "train_speed(iter/s)": 1.531066 }, { "acc": 0.95994186, "epoch": 31.141317084602765, "grad_norm": 12.194941520690918, "learning_rate": 3.408937433928681e-06, "loss": 0.10754931, "memory(GiB)": 13.7, "step": 66440, "train_speed(iter/s)": 1.531073 }, { "acc": 0.99118423, "epoch": 31.1436606515116, "grad_norm": 0.004228314850479364, "learning_rate": 3.408202666711437e-06, "loss": 0.02334909, "memory(GiB)": 13.7, "step": 66445, "train_speed(iter/s)": 1.531077 }, { "acc": 0.98354168, "epoch": 31.146004218420437, "grad_norm": 5.466512680053711, "learning_rate": 3.407467937767791e-06, "loss": 0.08039171, "memory(GiB)": 13.7, "step": 66450, "train_speed(iter/s)": 1.531081 }, { "acc": 0.98833332, "epoch": 31.14834778532927, "grad_norm": 5.033187389373779, "learning_rate": 3.406733247115403e-06, "loss": 0.02972583, "memory(GiB)": 13.7, "step": 66455, "train_speed(iter/s)": 1.531089 }, { "acc": 0.98217258, "epoch": 31.150691352238105, "grad_norm": 0.02437693625688553, "learning_rate": 3.4059985947719298e-06, "loss": 0.04963407, "memory(GiB)": 13.7, "step": 66460, "train_speed(iter/s)": 1.531091 }, { "acc": 0.9791667, "epoch": 31.153034919146943, "grad_norm": 0.3856769800186157, "learning_rate": 3.4052639807550338e-06, "loss": 0.03304861, "memory(GiB)": 13.7, "step": 66465, "train_speed(iter/s)": 1.531096 }, { "acc": 0.99458332, "epoch": 31.155378486055778, "grad_norm": 1.6858490705490112, "learning_rate": 3.4045294050823697e-06, "loss": 0.01382385, "memory(GiB)": 13.7, "step": 66470, "train_speed(iter/s)": 1.531095 }, { "acc": 0.9729166, "epoch": 31.157722052964612, "grad_norm": 1.5913732051849365, "learning_rate": 3.4037948677715954e-06, "loss": 0.05337127, "memory(GiB)": 13.7, "step": 66475, "train_speed(iter/s)": 1.531095 }, { "acc": 0.98328123, "epoch": 31.160065619873446, "grad_norm": 0.05074707791209221, "learning_rate": 3.4030603688403684e-06, "loss": 0.03763404, "memory(GiB)": 13.7, "step": 66480, "train_speed(iter/s)": 1.5311 }, { "acc": 0.98738098, "epoch": 31.162409186782284, "grad_norm": 5.190632343292236, "learning_rate": 3.402325908306341e-06, "loss": 0.0668065, "memory(GiB)": 13.7, "step": 66485, "train_speed(iter/s)": 1.531105 }, { "acc": 0.97840271, "epoch": 31.16475275369112, "grad_norm": 2.122244119644165, "learning_rate": 3.401591486187168e-06, "loss": 0.04139288, "memory(GiB)": 13.7, "step": 66490, "train_speed(iter/s)": 1.53111 }, { "acc": 0.98599205, "epoch": 31.167096320599953, "grad_norm": 2.239964246749878, "learning_rate": 3.4008571025005037e-06, "loss": 0.06077368, "memory(GiB)": 13.7, "step": 66495, "train_speed(iter/s)": 1.53111 }, { "acc": 0.96307545, "epoch": 31.169439887508787, "grad_norm": 16.807096481323242, "learning_rate": 3.400122757264e-06, "loss": 0.07932066, "memory(GiB)": 13.7, "step": 66500, "train_speed(iter/s)": 1.531115 }, { "acc": 0.98041668, "epoch": 31.171783454417625, "grad_norm": 5.826572418212891, "learning_rate": 3.399388450495306e-06, "loss": 0.08399732, "memory(GiB)": 13.7, "step": 66505, "train_speed(iter/s)": 1.531121 }, { "acc": 0.99204445, "epoch": 31.17412702132646, "grad_norm": 1.3992552757263184, "learning_rate": 3.398654182212075e-06, "loss": 0.02748058, "memory(GiB)": 13.7, "step": 66510, "train_speed(iter/s)": 1.531121 }, { "acc": 0.98833332, "epoch": 31.176470588235293, "grad_norm": 5.319697856903076, "learning_rate": 3.397919952431954e-06, "loss": 0.04805304, "memory(GiB)": 13.7, "step": 66515, "train_speed(iter/s)": 1.531126 }, { "acc": 0.97583332, "epoch": 31.178814155144128, "grad_norm": 0.9603935480117798, "learning_rate": 3.397185761172592e-06, "loss": 0.04902533, "memory(GiB)": 13.7, "step": 66520, "train_speed(iter/s)": 1.531125 }, { "acc": 0.97479172, "epoch": 31.181157722052966, "grad_norm": 5.556033134460449, "learning_rate": 3.396451608451638e-06, "loss": 0.06308724, "memory(GiB)": 13.7, "step": 66525, "train_speed(iter/s)": 1.531127 }, { "acc": 0.98675594, "epoch": 31.1835012889618, "grad_norm": 3.5351309776306152, "learning_rate": 3.3957174942867364e-06, "loss": 0.07604203, "memory(GiB)": 13.7, "step": 66530, "train_speed(iter/s)": 1.531129 }, { "acc": 0.98477678, "epoch": 31.185844855870634, "grad_norm": 2.1886165142059326, "learning_rate": 3.3949834186955355e-06, "loss": 0.04716522, "memory(GiB)": 13.7, "step": 66535, "train_speed(iter/s)": 1.531134 }, { "acc": 0.98562498, "epoch": 31.18818842277947, "grad_norm": 4.1028923988342285, "learning_rate": 3.3942493816956795e-06, "loss": 0.05304777, "memory(GiB)": 13.7, "step": 66540, "train_speed(iter/s)": 1.531138 }, { "acc": 0.9927084, "epoch": 31.190531989688306, "grad_norm": 4.14574670791626, "learning_rate": 3.3935153833048105e-06, "loss": 0.03244329, "memory(GiB)": 13.7, "step": 66545, "train_speed(iter/s)": 1.53114 }, { "acc": 0.97303028, "epoch": 31.19287555659714, "grad_norm": 0.9367280602455139, "learning_rate": 3.392781423540571e-06, "loss": 0.05958637, "memory(GiB)": 13.7, "step": 66550, "train_speed(iter/s)": 1.531141 }, { "acc": 0.99375, "epoch": 31.195219123505975, "grad_norm": 1.4922808408737183, "learning_rate": 3.3920475024206057e-06, "loss": 0.01382023, "memory(GiB)": 13.7, "step": 66555, "train_speed(iter/s)": 1.531141 }, { "acc": 0.98083334, "epoch": 31.197562690414813, "grad_norm": 3.3038926124572754, "learning_rate": 3.391313619962553e-06, "loss": 0.04737706, "memory(GiB)": 13.7, "step": 66560, "train_speed(iter/s)": 1.531141 }, { "acc": 0.9760088, "epoch": 31.199906257323647, "grad_norm": 6.280139923095703, "learning_rate": 3.3905797761840557e-06, "loss": 0.08288899, "memory(GiB)": 13.7, "step": 66565, "train_speed(iter/s)": 1.531142 }, { "acc": 0.99375, "epoch": 31.20224982423248, "grad_norm": 3.270883560180664, "learning_rate": 3.3898459711027516e-06, "loss": 0.03892409, "memory(GiB)": 13.7, "step": 66570, "train_speed(iter/s)": 1.53114 }, { "acc": 0.98125, "epoch": 31.204593391141316, "grad_norm": 6.026435852050781, "learning_rate": 3.389112204736277e-06, "loss": 0.06045461, "memory(GiB)": 13.7, "step": 66575, "train_speed(iter/s)": 1.531144 }, { "acc": 0.97208328, "epoch": 31.206936958050154, "grad_norm": 3.528886318206787, "learning_rate": 3.388378477102273e-06, "loss": 0.08361529, "memory(GiB)": 13.7, "step": 66580, "train_speed(iter/s)": 1.531148 }, { "acc": 0.99296875, "epoch": 31.209280524958988, "grad_norm": 2.8171417713165283, "learning_rate": 3.3876447882183755e-06, "loss": 0.05000125, "memory(GiB)": 13.7, "step": 66585, "train_speed(iter/s)": 1.531143 }, { "acc": 0.99125004, "epoch": 31.211624091867822, "grad_norm": 1.977912425994873, "learning_rate": 3.3869111381022175e-06, "loss": 0.03013645, "memory(GiB)": 13.7, "step": 66590, "train_speed(iter/s)": 1.531141 }, { "acc": 0.97562504, "epoch": 31.213967658776657, "grad_norm": 3.6719534397125244, "learning_rate": 3.3861775267714326e-06, "loss": 0.05974427, "memory(GiB)": 13.7, "step": 66595, "train_speed(iter/s)": 1.531141 }, { "acc": 0.98520832, "epoch": 31.216311225685494, "grad_norm": 5.461551189422607, "learning_rate": 3.3854439542436586e-06, "loss": 0.05439991, "memory(GiB)": 13.7, "step": 66600, "train_speed(iter/s)": 1.531145 }, { "acc": 0.98459282, "epoch": 31.21865479259433, "grad_norm": 2.9270942211151123, "learning_rate": 3.3847104205365243e-06, "loss": 0.05076261, "memory(GiB)": 13.7, "step": 66605, "train_speed(iter/s)": 1.53115 }, { "acc": 0.98083334, "epoch": 31.220998359503163, "grad_norm": 3.8068554401397705, "learning_rate": 3.3839769256676646e-06, "loss": 0.02852494, "memory(GiB)": 13.7, "step": 66610, "train_speed(iter/s)": 1.531153 }, { "acc": 0.9885417, "epoch": 31.223341926411997, "grad_norm": 3.897976875305176, "learning_rate": 3.383243469654709e-06, "loss": 0.02645148, "memory(GiB)": 13.7, "step": 66615, "train_speed(iter/s)": 1.531156 }, { "acc": 0.98187504, "epoch": 31.225685493320835, "grad_norm": 1.2487455606460571, "learning_rate": 3.3825100525152864e-06, "loss": 0.0452168, "memory(GiB)": 13.7, "step": 66620, "train_speed(iter/s)": 1.531159 }, { "acc": 0.97550592, "epoch": 31.22802906022967, "grad_norm": 0.021336616948246956, "learning_rate": 3.3817766742670282e-06, "loss": 0.06980386, "memory(GiB)": 13.7, "step": 66625, "train_speed(iter/s)": 1.531164 }, { "acc": 0.99083328, "epoch": 31.230372627138504, "grad_norm": 3.6176116466522217, "learning_rate": 3.3810433349275606e-06, "loss": 0.03192392, "memory(GiB)": 13.7, "step": 66630, "train_speed(iter/s)": 1.531169 }, { "acc": 0.97895832, "epoch": 31.23271619404734, "grad_norm": 2.6407485008239746, "learning_rate": 3.380310034514509e-06, "loss": 0.09172519, "memory(GiB)": 13.7, "step": 66635, "train_speed(iter/s)": 1.531175 }, { "acc": 0.9929018, "epoch": 31.235059760956176, "grad_norm": 1.6464983224868774, "learning_rate": 3.379576773045503e-06, "loss": 0.0289284, "memory(GiB)": 13.7, "step": 66640, "train_speed(iter/s)": 1.531176 }, { "acc": 0.9947917, "epoch": 31.23740332786501, "grad_norm": 0.9693289995193481, "learning_rate": 3.378843550538166e-06, "loss": 0.02518818, "memory(GiB)": 13.7, "step": 66645, "train_speed(iter/s)": 1.531176 }, { "acc": 0.96681547, "epoch": 31.239746894773845, "grad_norm": 6.754671573638916, "learning_rate": 3.3781103670101224e-06, "loss": 0.09876322, "memory(GiB)": 13.7, "step": 66650, "train_speed(iter/s)": 1.531181 }, { "acc": 0.98410721, "epoch": 31.242090461682682, "grad_norm": 5.8270721435546875, "learning_rate": 3.3773772224789952e-06, "loss": 0.03188113, "memory(GiB)": 13.7, "step": 66655, "train_speed(iter/s)": 1.531187 }, { "acc": 0.97900867, "epoch": 31.244434028591517, "grad_norm": 5.316017150878906, "learning_rate": 3.376644116962408e-06, "loss": 0.06483853, "memory(GiB)": 13.7, "step": 66660, "train_speed(iter/s)": 1.53119 }, { "acc": 0.98666668, "epoch": 31.24677759550035, "grad_norm": 5.891927719116211, "learning_rate": 3.3759110504779813e-06, "loss": 0.05129, "memory(GiB)": 13.7, "step": 66665, "train_speed(iter/s)": 1.531196 }, { "acc": 0.98490534, "epoch": 31.249121162409185, "grad_norm": 3.3970046043395996, "learning_rate": 3.375178023043335e-06, "loss": 0.09777288, "memory(GiB)": 13.7, "step": 66670, "train_speed(iter/s)": 1.531207 }, { "acc": 0.97767859, "epoch": 31.251464729318023, "grad_norm": 2.4008290767669678, "learning_rate": 3.3744450346760906e-06, "loss": 0.07769537, "memory(GiB)": 13.7, "step": 66675, "train_speed(iter/s)": 1.531207 }, { "acc": 0.98249998, "epoch": 31.253808296226858, "grad_norm": 3.151494026184082, "learning_rate": 3.3737120853938636e-06, "loss": 0.03321041, "memory(GiB)": 13.7, "step": 66680, "train_speed(iter/s)": 1.531208 }, { "acc": 0.9958334, "epoch": 31.256151863135692, "grad_norm": 0.009181381203234196, "learning_rate": 3.3729791752142753e-06, "loss": 0.03137792, "memory(GiB)": 13.7, "step": 66685, "train_speed(iter/s)": 1.531208 }, { "acc": 0.98041668, "epoch": 31.258495430044526, "grad_norm": 3.2280313968658447, "learning_rate": 3.372246304154941e-06, "loss": 0.05908276, "memory(GiB)": 13.7, "step": 66690, "train_speed(iter/s)": 1.531211 }, { "acc": 0.99229164, "epoch": 31.260838996953364, "grad_norm": 4.115399360656738, "learning_rate": 3.371513472233475e-06, "loss": 0.05498376, "memory(GiB)": 13.7, "step": 66695, "train_speed(iter/s)": 1.531223 }, { "acc": 0.98999996, "epoch": 31.2631825638622, "grad_norm": 1.2786173820495605, "learning_rate": 3.3707806794674935e-06, "loss": 0.04412857, "memory(GiB)": 13.7, "step": 66700, "train_speed(iter/s)": 1.531222 }, { "acc": 0.9885416, "epoch": 31.265526130771033, "grad_norm": 0.41806212067604065, "learning_rate": 3.370047925874611e-06, "loss": 0.05899804, "memory(GiB)": 13.7, "step": 66705, "train_speed(iter/s)": 1.53122 }, { "acc": 0.99065475, "epoch": 31.26786969767987, "grad_norm": 1.5292648077011108, "learning_rate": 3.369315211472441e-06, "loss": 0.03248276, "memory(GiB)": 13.7, "step": 66710, "train_speed(iter/s)": 1.531222 }, { "acc": 0.9874671, "epoch": 31.270213264588705, "grad_norm": 3.2273824214935303, "learning_rate": 3.368582536278594e-06, "loss": 0.07669584, "memory(GiB)": 13.7, "step": 66715, "train_speed(iter/s)": 1.53122 }, { "acc": 0.9763195, "epoch": 31.27255683149754, "grad_norm": 5.034430980682373, "learning_rate": 3.3678499003106814e-06, "loss": 0.07719573, "memory(GiB)": 13.7, "step": 66720, "train_speed(iter/s)": 1.53122 }, { "acc": 1.0, "epoch": 31.274900398406373, "grad_norm": 0.007062874268740416, "learning_rate": 3.367117303586312e-06, "loss": 0.01198409, "memory(GiB)": 13.7, "step": 66725, "train_speed(iter/s)": 1.531222 }, { "acc": 0.97682161, "epoch": 31.27724396531521, "grad_norm": 4.016053676605225, "learning_rate": 3.366384746123098e-06, "loss": 0.07112778, "memory(GiB)": 13.7, "step": 66730, "train_speed(iter/s)": 1.531219 }, { "acc": 0.97727175, "epoch": 31.279587532224046, "grad_norm": 4.251003742218018, "learning_rate": 3.3656522279386466e-06, "loss": 0.06523215, "memory(GiB)": 13.7, "step": 66735, "train_speed(iter/s)": 1.531228 }, { "acc": 0.9958334, "epoch": 31.28193109913288, "grad_norm": 5.7202863693237305, "learning_rate": 3.3649197490505638e-06, "loss": 0.03294935, "memory(GiB)": 13.7, "step": 66740, "train_speed(iter/s)": 1.53123 }, { "acc": 0.99611111, "epoch": 31.284274666041714, "grad_norm": 0.001921052928082645, "learning_rate": 3.3641873094764572e-06, "loss": 0.02091734, "memory(GiB)": 13.7, "step": 66745, "train_speed(iter/s)": 1.531238 }, { "acc": 0.97504606, "epoch": 31.286618232950552, "grad_norm": 5.941891193389893, "learning_rate": 3.3634549092339326e-06, "loss": 0.08108671, "memory(GiB)": 13.7, "step": 66750, "train_speed(iter/s)": 1.53124 }, { "acc": 0.98453522, "epoch": 31.288961799859386, "grad_norm": 1.8981802463531494, "learning_rate": 3.362722548340593e-06, "loss": 0.04834294, "memory(GiB)": 13.7, "step": 66755, "train_speed(iter/s)": 1.531244 }, { "acc": 0.996875, "epoch": 31.29130536676822, "grad_norm": 3.141944646835327, "learning_rate": 3.3619902268140424e-06, "loss": 0.02671046, "memory(GiB)": 13.7, "step": 66760, "train_speed(iter/s)": 1.531244 }, { "acc": 0.99092264, "epoch": 31.293648933677055, "grad_norm": 2.85803484916687, "learning_rate": 3.361257944671886e-06, "loss": 0.04618813, "memory(GiB)": 13.7, "step": 66765, "train_speed(iter/s)": 1.531248 }, { "acc": 0.98631945, "epoch": 31.295992500585893, "grad_norm": 0.6660735011100769, "learning_rate": 3.3605257019317204e-06, "loss": 0.0632197, "memory(GiB)": 13.7, "step": 66770, "train_speed(iter/s)": 1.531254 }, { "acc": 0.99395828, "epoch": 31.298336067494727, "grad_norm": 1.8294562101364136, "learning_rate": 3.359793498611151e-06, "loss": 0.0481723, "memory(GiB)": 13.7, "step": 66775, "train_speed(iter/s)": 1.531248 }, { "acc": 0.99278851, "epoch": 31.30067963440356, "grad_norm": 3.332048177719116, "learning_rate": 3.3590613347277744e-06, "loss": 0.01956102, "memory(GiB)": 13.7, "step": 66780, "train_speed(iter/s)": 1.531249 }, { "acc": 0.96670456, "epoch": 31.3030232013124, "grad_norm": 5.58969259262085, "learning_rate": 3.358329210299192e-06, "loss": 0.11781332, "memory(GiB)": 13.7, "step": 66785, "train_speed(iter/s)": 1.531253 }, { "acc": 0.98154755, "epoch": 31.305366768221234, "grad_norm": 2.7107677459716797, "learning_rate": 3.3575971253430012e-06, "loss": 0.04418107, "memory(GiB)": 13.7, "step": 66790, "train_speed(iter/s)": 1.531253 }, { "acc": 0.98916664, "epoch": 31.307710335130068, "grad_norm": 3.0649542808532715, "learning_rate": 3.3568650798767977e-06, "loss": 0.04139928, "memory(GiB)": 13.7, "step": 66795, "train_speed(iter/s)": 1.531257 }, { "acc": 0.99222755, "epoch": 31.310053902038902, "grad_norm": 2.6169819831848145, "learning_rate": 3.3561330739181765e-06, "loss": 0.03145718, "memory(GiB)": 13.7, "step": 66800, "train_speed(iter/s)": 1.531262 }, { "acc": 0.9697916, "epoch": 31.31239746894774, "grad_norm": 7.12312126159668, "learning_rate": 3.355401107484735e-06, "loss": 0.04965511, "memory(GiB)": 13.7, "step": 66805, "train_speed(iter/s)": 1.531263 }, { "acc": 0.98046875, "epoch": 31.314741035856574, "grad_norm": 1.8406119346618652, "learning_rate": 3.354669180594065e-06, "loss": 0.06040825, "memory(GiB)": 13.7, "step": 66810, "train_speed(iter/s)": 1.531265 }, { "acc": 0.9822916, "epoch": 31.31708460276541, "grad_norm": 7.0020341873168945, "learning_rate": 3.3539372932637625e-06, "loss": 0.06839582, "memory(GiB)": 13.7, "step": 66815, "train_speed(iter/s)": 1.531263 }, { "acc": 0.9979167, "epoch": 31.319428169674243, "grad_norm": 3.4751436710357666, "learning_rate": 3.3532054455114183e-06, "loss": 0.01648848, "memory(GiB)": 13.7, "step": 66820, "train_speed(iter/s)": 1.531266 }, { "acc": 0.9921875, "epoch": 31.32177173658308, "grad_norm": 3.459679365158081, "learning_rate": 3.352473637354622e-06, "loss": 0.01823867, "memory(GiB)": 13.7, "step": 66825, "train_speed(iter/s)": 1.531269 }, { "acc": 0.98865623, "epoch": 31.324115303491915, "grad_norm": 6.358877182006836, "learning_rate": 3.351741868810966e-06, "loss": 0.05489464, "memory(GiB)": 13.7, "step": 66830, "train_speed(iter/s)": 1.531272 }, { "acc": 0.996875, "epoch": 31.32645887040075, "grad_norm": 0.8900787234306335, "learning_rate": 3.3510101398980407e-06, "loss": 0.01814332, "memory(GiB)": 13.7, "step": 66835, "train_speed(iter/s)": 1.531275 }, { "acc": 0.98354168, "epoch": 31.328802437309584, "grad_norm": 4.824779033660889, "learning_rate": 3.3502784506334306e-06, "loss": 0.05019698, "memory(GiB)": 13.7, "step": 66840, "train_speed(iter/s)": 1.531279 }, { "acc": 0.98857136, "epoch": 31.33114600421842, "grad_norm": 0.005225155036896467, "learning_rate": 3.3495468010347247e-06, "loss": 0.05569482, "memory(GiB)": 13.7, "step": 66845, "train_speed(iter/s)": 1.531279 }, { "acc": 0.96550598, "epoch": 31.333489571127256, "grad_norm": 3.359419822692871, "learning_rate": 3.348815191119511e-06, "loss": 0.13174236, "memory(GiB)": 13.7, "step": 66850, "train_speed(iter/s)": 1.531278 }, { "acc": 0.990625, "epoch": 31.33583313803609, "grad_norm": 0.9908991456031799, "learning_rate": 3.3480836209053724e-06, "loss": 0.03120678, "memory(GiB)": 13.7, "step": 66855, "train_speed(iter/s)": 1.531283 }, { "acc": 0.98623514, "epoch": 31.338176704944924, "grad_norm": 3.2734718322753906, "learning_rate": 3.3473520904098966e-06, "loss": 0.04046677, "memory(GiB)": 13.7, "step": 66860, "train_speed(iter/s)": 1.531287 }, { "acc": 0.98288689, "epoch": 31.340520271853762, "grad_norm": 0.5482867956161499, "learning_rate": 3.346620599650665e-06, "loss": 0.08876958, "memory(GiB)": 13.7, "step": 66865, "train_speed(iter/s)": 1.531291 }, { "acc": 0.99072914, "epoch": 31.342863838762597, "grad_norm": 2.503166437149048, "learning_rate": 3.3458891486452604e-06, "loss": 0.03167501, "memory(GiB)": 13.7, "step": 66870, "train_speed(iter/s)": 1.531293 }, { "acc": 0.99229164, "epoch": 31.34520740567143, "grad_norm": 2.270794153213501, "learning_rate": 3.3451577374112663e-06, "loss": 0.02030712, "memory(GiB)": 13.7, "step": 66875, "train_speed(iter/s)": 1.531296 }, { "acc": 0.990625, "epoch": 31.34755097258027, "grad_norm": 1.2889554500579834, "learning_rate": 3.3444263659662612e-06, "loss": 0.02572767, "memory(GiB)": 13.7, "step": 66880, "train_speed(iter/s)": 1.531301 }, { "acc": 0.98239584, "epoch": 31.349894539489103, "grad_norm": 5.483858585357666, "learning_rate": 3.3436950343278245e-06, "loss": 0.04482273, "memory(GiB)": 13.7, "step": 66885, "train_speed(iter/s)": 1.531303 }, { "acc": 0.98676472, "epoch": 31.352238106397937, "grad_norm": 5.085337162017822, "learning_rate": 3.3429637425135374e-06, "loss": 0.0330249, "memory(GiB)": 13.7, "step": 66890, "train_speed(iter/s)": 1.531311 }, { "acc": 0.97270832, "epoch": 31.35458167330677, "grad_norm": 5.651975631713867, "learning_rate": 3.3422324905409765e-06, "loss": 0.08384172, "memory(GiB)": 13.7, "step": 66895, "train_speed(iter/s)": 1.531319 }, { "acc": 0.990625, "epoch": 31.35692524021561, "grad_norm": 1.4805058240890503, "learning_rate": 3.341501278427717e-06, "loss": 0.02116172, "memory(GiB)": 13.7, "step": 66900, "train_speed(iter/s)": 1.531325 }, { "acc": 0.98071423, "epoch": 31.359268807124444, "grad_norm": 2.572152614593506, "learning_rate": 3.340770106191339e-06, "loss": 0.03537966, "memory(GiB)": 13.7, "step": 66905, "train_speed(iter/s)": 1.531322 }, { "acc": 0.98520832, "epoch": 31.361612374033278, "grad_norm": 4.398787021636963, "learning_rate": 3.340038973849414e-06, "loss": 0.06590426, "memory(GiB)": 13.7, "step": 66910, "train_speed(iter/s)": 1.531331 }, { "acc": 0.97369051, "epoch": 31.363955940942112, "grad_norm": 2.1828181743621826, "learning_rate": 3.3393078814195156e-06, "loss": 0.10116695, "memory(GiB)": 13.7, "step": 66915, "train_speed(iter/s)": 1.531329 }, { "acc": 0.97446423, "epoch": 31.36629950785095, "grad_norm": 5.070863723754883, "learning_rate": 3.3385768289192212e-06, "loss": 0.06142941, "memory(GiB)": 13.7, "step": 66920, "train_speed(iter/s)": 1.531335 }, { "acc": 0.98529758, "epoch": 31.368643074759785, "grad_norm": 4.038718223571777, "learning_rate": 3.337845816366098e-06, "loss": 0.06817834, "memory(GiB)": 13.7, "step": 66925, "train_speed(iter/s)": 1.531333 }, { "acc": 0.99041672, "epoch": 31.37098664166862, "grad_norm": 3.837627649307251, "learning_rate": 3.3371148437777185e-06, "loss": 0.03982593, "memory(GiB)": 13.7, "step": 66930, "train_speed(iter/s)": 1.531337 }, { "acc": 0.97686958, "epoch": 31.373330208577453, "grad_norm": 4.327943801879883, "learning_rate": 3.336383911171655e-06, "loss": 0.06221889, "memory(GiB)": 13.7, "step": 66935, "train_speed(iter/s)": 1.531342 }, { "acc": 0.97446346, "epoch": 31.37567377548629, "grad_norm": 9.016170501708984, "learning_rate": 3.335653018565475e-06, "loss": 0.0938949, "memory(GiB)": 13.7, "step": 66940, "train_speed(iter/s)": 1.531345 }, { "acc": 0.98791676, "epoch": 31.378017342395125, "grad_norm": 3.3690030574798584, "learning_rate": 3.334922165976746e-06, "loss": 0.03431405, "memory(GiB)": 13.7, "step": 66945, "train_speed(iter/s)": 1.531347 }, { "acc": 0.99446859, "epoch": 31.38036090930396, "grad_norm": 4.269958019256592, "learning_rate": 3.3341913534230375e-06, "loss": 0.04395141, "memory(GiB)": 13.7, "step": 66950, "train_speed(iter/s)": 1.531345 }, { "acc": 0.9979167, "epoch": 31.382704476212798, "grad_norm": 0.04108527675271034, "learning_rate": 3.3334605809219133e-06, "loss": 0.01015782, "memory(GiB)": 13.7, "step": 66955, "train_speed(iter/s)": 1.531347 }, { "acc": 0.9822917, "epoch": 31.385048043121632, "grad_norm": 4.364016532897949, "learning_rate": 3.332729848490942e-06, "loss": 0.0411242, "memory(GiB)": 13.7, "step": 66960, "train_speed(iter/s)": 1.531349 }, { "acc": 0.99444447, "epoch": 31.387391610030466, "grad_norm": 1.4884768724441528, "learning_rate": 3.3319991561476856e-06, "loss": 0.02046032, "memory(GiB)": 13.7, "step": 66965, "train_speed(iter/s)": 1.531351 }, { "acc": 0.996875, "epoch": 31.3897351769393, "grad_norm": 0.013211402110755444, "learning_rate": 3.331268503909707e-06, "loss": 0.02699497, "memory(GiB)": 13.7, "step": 66970, "train_speed(iter/s)": 1.531357 }, { "acc": 0.99291668, "epoch": 31.39207874384814, "grad_norm": 0.03127771243453026, "learning_rate": 3.3305378917945695e-06, "loss": 0.02212129, "memory(GiB)": 13.7, "step": 66975, "train_speed(iter/s)": 1.531359 }, { "acc": 0.97833328, "epoch": 31.394422310756973, "grad_norm": 4.657836437225342, "learning_rate": 3.3298073198198354e-06, "loss": 0.08318608, "memory(GiB)": 13.7, "step": 66980, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98008928, "epoch": 31.396765877665807, "grad_norm": 2.634608268737793, "learning_rate": 3.3290767880030642e-06, "loss": 0.04848001, "memory(GiB)": 13.7, "step": 66985, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98120365, "epoch": 31.39910944457464, "grad_norm": 4.6080780029296875, "learning_rate": 3.3283462963618167e-06, "loss": 0.06341274, "memory(GiB)": 13.7, "step": 66990, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98374996, "epoch": 31.40145301148348, "grad_norm": 2.680602550506592, "learning_rate": 3.3276158449136512e-06, "loss": 0.0336038, "memory(GiB)": 13.7, "step": 66995, "train_speed(iter/s)": 1.531371 }, { "acc": 0.98458328, "epoch": 31.403796578392313, "grad_norm": 3.655278444290161, "learning_rate": 3.3268854336761253e-06, "loss": 0.03834205, "memory(GiB)": 13.7, "step": 67000, "train_speed(iter/s)": 1.531373 }, { "acc": 0.9842803, "epoch": 31.406140145301148, "grad_norm": 4.253831386566162, "learning_rate": 3.326155062666794e-06, "loss": 0.05445416, "memory(GiB)": 13.7, "step": 67005, "train_speed(iter/s)": 1.531373 }, { "acc": 0.97707796, "epoch": 31.408483712209982, "grad_norm": 3.1504929065704346, "learning_rate": 3.325424731903215e-06, "loss": 0.03651886, "memory(GiB)": 13.7, "step": 67010, "train_speed(iter/s)": 1.53137 }, { "acc": 0.98363972, "epoch": 31.41082727911882, "grad_norm": 2.497349500656128, "learning_rate": 3.3246944414029426e-06, "loss": 0.05170507, "memory(GiB)": 13.7, "step": 67015, "train_speed(iter/s)": 1.531377 }, { "acc": 0.98604164, "epoch": 31.413170846027654, "grad_norm": 2.9655349254608154, "learning_rate": 3.3239641911835293e-06, "loss": 0.03781357, "memory(GiB)": 13.7, "step": 67020, "train_speed(iter/s)": 1.531381 }, { "acc": 0.9864583, "epoch": 31.41551441293649, "grad_norm": 3.0929155349731445, "learning_rate": 3.323233981262529e-06, "loss": 0.05084293, "memory(GiB)": 13.7, "step": 67025, "train_speed(iter/s)": 1.53138 }, { "acc": 0.99125004, "epoch": 31.417857979845323, "grad_norm": 3.6057212352752686, "learning_rate": 3.322503811657493e-06, "loss": 0.02423783, "memory(GiB)": 13.7, "step": 67030, "train_speed(iter/s)": 1.531388 }, { "acc": 0.98562498, "epoch": 31.42020154675416, "grad_norm": 4.8556294441223145, "learning_rate": 3.321773682385975e-06, "loss": 0.04210899, "memory(GiB)": 13.7, "step": 67035, "train_speed(iter/s)": 1.531389 }, { "acc": 0.98979168, "epoch": 31.422545113662995, "grad_norm": 0.7982640862464905, "learning_rate": 3.321043593465521e-06, "loss": 0.03841426, "memory(GiB)": 13.7, "step": 67040, "train_speed(iter/s)": 1.531391 }, { "acc": 0.9916667, "epoch": 31.42488868057183, "grad_norm": 1.577068567276001, "learning_rate": 3.3203135449136836e-06, "loss": 0.0282522, "memory(GiB)": 13.7, "step": 67045, "train_speed(iter/s)": 1.531395 }, { "acc": 0.9729167, "epoch": 31.427232247480667, "grad_norm": 4.829746723175049, "learning_rate": 3.3195835367480063e-06, "loss": 0.07044285, "memory(GiB)": 13.7, "step": 67050, "train_speed(iter/s)": 1.531408 }, { "acc": 0.97833328, "epoch": 31.4295758143895, "grad_norm": 4.007284164428711, "learning_rate": 3.31885356898604e-06, "loss": 0.06194473, "memory(GiB)": 13.7, "step": 67055, "train_speed(iter/s)": 1.531407 }, { "acc": 0.97374458, "epoch": 31.431919381298336, "grad_norm": 0.9698230028152466, "learning_rate": 3.318123641645328e-06, "loss": 0.05313968, "memory(GiB)": 13.7, "step": 67060, "train_speed(iter/s)": 1.531409 }, { "acc": 0.99300594, "epoch": 31.43426294820717, "grad_norm": 0.05220389366149902, "learning_rate": 3.317393754743417e-06, "loss": 0.05054325, "memory(GiB)": 13.7, "step": 67065, "train_speed(iter/s)": 1.531413 }, { "acc": 0.98729162, "epoch": 31.436606515116008, "grad_norm": 3.857163190841675, "learning_rate": 3.3166639082978514e-06, "loss": 0.0455208, "memory(GiB)": 13.7, "step": 67070, "train_speed(iter/s)": 1.531417 }, { "acc": 0.98485126, "epoch": 31.438950082024842, "grad_norm": 0.00035906146513298154, "learning_rate": 3.3159341023261726e-06, "loss": 0.05458514, "memory(GiB)": 13.7, "step": 67075, "train_speed(iter/s)": 1.531422 }, { "acc": 0.98291664, "epoch": 31.441293648933677, "grad_norm": 0.004369405563920736, "learning_rate": 3.315204336845925e-06, "loss": 0.06581282, "memory(GiB)": 13.7, "step": 67080, "train_speed(iter/s)": 1.531422 }, { "acc": 0.99092264, "epoch": 31.44363721584251, "grad_norm": 0.0156144630163908, "learning_rate": 3.314474611874649e-06, "loss": 0.04474227, "memory(GiB)": 13.7, "step": 67085, "train_speed(iter/s)": 1.531427 }, { "acc": 0.98687496, "epoch": 31.44598078275135, "grad_norm": 3.90395450592041, "learning_rate": 3.313744927429882e-06, "loss": 0.03954914, "memory(GiB)": 13.7, "step": 67090, "train_speed(iter/s)": 1.531428 }, { "acc": 0.99666672, "epoch": 31.448324349660183, "grad_norm": 2.447706937789917, "learning_rate": 3.3130152835291673e-06, "loss": 0.02683489, "memory(GiB)": 13.7, "step": 67095, "train_speed(iter/s)": 1.531431 }, { "acc": 0.97476196, "epoch": 31.450667916569017, "grad_norm": 5.143670082092285, "learning_rate": 3.312285680190041e-06, "loss": 0.05426636, "memory(GiB)": 13.7, "step": 67100, "train_speed(iter/s)": 1.531432 }, { "acc": 0.98395834, "epoch": 31.45301148347785, "grad_norm": 3.1574225425720215, "learning_rate": 3.3115561174300396e-06, "loss": 0.04316896, "memory(GiB)": 13.7, "step": 67105, "train_speed(iter/s)": 1.531435 }, { "acc": 0.98500004, "epoch": 31.45535505038669, "grad_norm": 1.266236424446106, "learning_rate": 3.3108265952667016e-06, "loss": 0.03825819, "memory(GiB)": 13.7, "step": 67110, "train_speed(iter/s)": 1.53144 }, { "acc": 0.9864583, "epoch": 31.457698617295524, "grad_norm": 0.23121526837348938, "learning_rate": 3.3100971137175613e-06, "loss": 0.02973913, "memory(GiB)": 13.7, "step": 67115, "train_speed(iter/s)": 1.531441 }, { "acc": 0.97277775, "epoch": 31.460042184204358, "grad_norm": 3.640238046646118, "learning_rate": 3.3093676728001523e-06, "loss": 0.07998532, "memory(GiB)": 13.7, "step": 67120, "train_speed(iter/s)": 1.531445 }, { "acc": 0.98923607, "epoch": 31.462385751113196, "grad_norm": 0.0024704537354409695, "learning_rate": 3.3086382725320094e-06, "loss": 0.03296981, "memory(GiB)": 13.7, "step": 67125, "train_speed(iter/s)": 1.531444 }, { "acc": 0.97701387, "epoch": 31.46472931802203, "grad_norm": 3.047013282775879, "learning_rate": 3.307908912930665e-06, "loss": 0.05402989, "memory(GiB)": 13.7, "step": 67130, "train_speed(iter/s)": 1.531447 }, { "acc": 0.98883934, "epoch": 31.467072884930865, "grad_norm": 0.008809209801256657, "learning_rate": 3.3071795940136476e-06, "loss": 0.04799064, "memory(GiB)": 13.7, "step": 67135, "train_speed(iter/s)": 1.53145 }, { "acc": 0.98104172, "epoch": 31.4694164518397, "grad_norm": 6.009537696838379, "learning_rate": 3.306450315798491e-06, "loss": 0.05252122, "memory(GiB)": 13.7, "step": 67140, "train_speed(iter/s)": 1.531453 }, { "acc": 0.98458328, "epoch": 31.471760018748537, "grad_norm": 0.7694525718688965, "learning_rate": 3.3057210783027236e-06, "loss": 0.0195903, "memory(GiB)": 13.7, "step": 67145, "train_speed(iter/s)": 1.531456 }, { "acc": 0.99375, "epoch": 31.47410358565737, "grad_norm": 1.295498251914978, "learning_rate": 3.304991881543873e-06, "loss": 0.03666788, "memory(GiB)": 13.7, "step": 67150, "train_speed(iter/s)": 1.531456 }, { "acc": 0.98104162, "epoch": 31.476447152566205, "grad_norm": 4.641827583312988, "learning_rate": 3.3042627255394677e-06, "loss": 0.03821672, "memory(GiB)": 13.7, "step": 67155, "train_speed(iter/s)": 1.531457 }, { "acc": 0.98916664, "epoch": 31.47879071947504, "grad_norm": 2.5426785945892334, "learning_rate": 3.303533610307034e-06, "loss": 0.02720212, "memory(GiB)": 13.7, "step": 67160, "train_speed(iter/s)": 1.531456 }, { "acc": 0.99020824, "epoch": 31.481134286383877, "grad_norm": 0.8287060856819153, "learning_rate": 3.302804535864097e-06, "loss": 0.03157915, "memory(GiB)": 13.7, "step": 67165, "train_speed(iter/s)": 1.531454 }, { "acc": 0.99020834, "epoch": 31.48347785329271, "grad_norm": 0.03347962349653244, "learning_rate": 3.302075502228183e-06, "loss": 0.01849793, "memory(GiB)": 13.7, "step": 67170, "train_speed(iter/s)": 1.531456 }, { "acc": 0.99562502, "epoch": 31.485821420201546, "grad_norm": 1.9091122150421143, "learning_rate": 3.3013465094168136e-06, "loss": 0.02712044, "memory(GiB)": 13.7, "step": 67175, "train_speed(iter/s)": 1.531451 }, { "acc": 0.98386364, "epoch": 31.48816498711038, "grad_norm": 5.860531330108643, "learning_rate": 3.3006175574475106e-06, "loss": 0.04607504, "memory(GiB)": 13.7, "step": 67180, "train_speed(iter/s)": 1.531455 }, { "acc": 0.98425598, "epoch": 31.490508554019218, "grad_norm": 3.4943528175354004, "learning_rate": 3.299888646337798e-06, "loss": 0.05300912, "memory(GiB)": 13.7, "step": 67185, "train_speed(iter/s)": 1.531463 }, { "acc": 0.98979168, "epoch": 31.492852120928053, "grad_norm": 0.005056716036051512, "learning_rate": 3.299159776105195e-06, "loss": 0.03608336, "memory(GiB)": 13.7, "step": 67190, "train_speed(iter/s)": 1.531465 }, { "acc": 0.9880208, "epoch": 31.495195687836887, "grad_norm": 0.5979735851287842, "learning_rate": 3.298430946767222e-06, "loss": 0.04620976, "memory(GiB)": 13.7, "step": 67195, "train_speed(iter/s)": 1.53147 }, { "acc": 0.98664265, "epoch": 31.497539254745725, "grad_norm": 4.592175006866455, "learning_rate": 3.2977021583413973e-06, "loss": 0.0452675, "memory(GiB)": 13.7, "step": 67200, "train_speed(iter/s)": 1.531469 }, { "acc": 0.99875002, "epoch": 31.49988282165456, "grad_norm": 1.7981585264205933, "learning_rate": 3.296973410845237e-06, "loss": 0.01854357, "memory(GiB)": 13.7, "step": 67205, "train_speed(iter/s)": 1.531471 }, { "acc": 0.97833328, "epoch": 31.502226388563393, "grad_norm": 8.925804138183594, "learning_rate": 3.2962447042962627e-06, "loss": 0.09896878, "memory(GiB)": 13.7, "step": 67210, "train_speed(iter/s)": 1.53147 }, { "acc": 0.9947917, "epoch": 31.504569955472228, "grad_norm": 1.5945720672607422, "learning_rate": 3.295516038711985e-06, "loss": 0.02866743, "memory(GiB)": 13.7, "step": 67215, "train_speed(iter/s)": 1.531478 }, { "acc": 0.98500004, "epoch": 31.506913522381065, "grad_norm": 3.2737529277801514, "learning_rate": 3.294787414109921e-06, "loss": 0.03126138, "memory(GiB)": 13.7, "step": 67220, "train_speed(iter/s)": 1.531476 }, { "acc": 0.98166122, "epoch": 31.5092570892899, "grad_norm": 3.6061365604400635, "learning_rate": 3.2940588305075823e-06, "loss": 0.03149867, "memory(GiB)": 13.7, "step": 67225, "train_speed(iter/s)": 1.531476 }, { "acc": 0.98583336, "epoch": 31.511600656198734, "grad_norm": 5.844050407409668, "learning_rate": 3.2933302879224843e-06, "loss": 0.02953306, "memory(GiB)": 13.7, "step": 67230, "train_speed(iter/s)": 1.531478 }, { "acc": 0.9916667, "epoch": 31.51394422310757, "grad_norm": 1.966516137123108, "learning_rate": 3.2926017863721367e-06, "loss": 0.0197186, "memory(GiB)": 13.7, "step": 67235, "train_speed(iter/s)": 1.531477 }, { "acc": 0.9838541, "epoch": 31.516287790016406, "grad_norm": 2.553293228149414, "learning_rate": 3.291873325874052e-06, "loss": 0.04715813, "memory(GiB)": 13.7, "step": 67240, "train_speed(iter/s)": 1.531481 }, { "acc": 0.98812504, "epoch": 31.51863135692524, "grad_norm": 6.2746686935424805, "learning_rate": 3.2911449064457403e-06, "loss": 0.04177653, "memory(GiB)": 13.7, "step": 67245, "train_speed(iter/s)": 1.531489 }, { "acc": 0.98812504, "epoch": 31.520974923834075, "grad_norm": 1.6452579498291016, "learning_rate": 3.2904165281047074e-06, "loss": 0.04428874, "memory(GiB)": 13.7, "step": 67250, "train_speed(iter/s)": 1.531495 }, { "acc": 0.98104172, "epoch": 31.52331849074291, "grad_norm": 3.8724112510681152, "learning_rate": 3.289688190868465e-06, "loss": 0.06009139, "memory(GiB)": 13.7, "step": 67255, "train_speed(iter/s)": 1.531498 }, { "acc": 0.99750004, "epoch": 31.525662057651747, "grad_norm": 2.6472158432006836, "learning_rate": 3.288959894754518e-06, "loss": 0.02561039, "memory(GiB)": 13.7, "step": 67260, "train_speed(iter/s)": 1.531501 }, { "acc": 0.96550598, "epoch": 31.52800562456058, "grad_norm": 3.1065540313720703, "learning_rate": 3.2882316397803717e-06, "loss": 0.09361676, "memory(GiB)": 13.7, "step": 67265, "train_speed(iter/s)": 1.531506 }, { "acc": 0.9885417, "epoch": 31.530349191469416, "grad_norm": 1.260231375694275, "learning_rate": 3.287503425963531e-06, "loss": 0.04049758, "memory(GiB)": 13.7, "step": 67270, "train_speed(iter/s)": 1.531511 }, { "acc": 0.98395834, "epoch": 31.532692758378253, "grad_norm": 1.6457467079162598, "learning_rate": 3.2867752533215e-06, "loss": 0.02820568, "memory(GiB)": 13.7, "step": 67275, "train_speed(iter/s)": 1.531514 }, { "acc": 0.984375, "epoch": 31.535036325287088, "grad_norm": 0.6723731160163879, "learning_rate": 3.286047121871782e-06, "loss": 0.05578731, "memory(GiB)": 13.7, "step": 67280, "train_speed(iter/s)": 1.531525 }, { "acc": 0.9802084, "epoch": 31.537379892195922, "grad_norm": 1.322008728981018, "learning_rate": 3.2853190316318796e-06, "loss": 0.04713837, "memory(GiB)": 13.7, "step": 67285, "train_speed(iter/s)": 1.531534 }, { "acc": 0.98803034, "epoch": 31.539723459104756, "grad_norm": 5.697359561920166, "learning_rate": 3.284590982619293e-06, "loss": 0.06590698, "memory(GiB)": 13.7, "step": 67290, "train_speed(iter/s)": 1.531545 }, { "acc": 0.98656254, "epoch": 31.542067026013594, "grad_norm": 1.8109279870986938, "learning_rate": 3.283862974851523e-06, "loss": 0.05537354, "memory(GiB)": 13.7, "step": 67295, "train_speed(iter/s)": 1.531549 }, { "acc": 0.98812504, "epoch": 31.54441059292243, "grad_norm": 0.02267337404191494, "learning_rate": 3.283135008346065e-06, "loss": 0.01268905, "memory(GiB)": 13.7, "step": 67300, "train_speed(iter/s)": 1.531551 }, { "acc": 0.9885417, "epoch": 31.546754159831263, "grad_norm": 4.537913799285889, "learning_rate": 3.2824070831204215e-06, "loss": 0.03204764, "memory(GiB)": 13.7, "step": 67305, "train_speed(iter/s)": 1.531552 }, { "acc": 0.97458334, "epoch": 31.549097726740097, "grad_norm": 6.48788595199585, "learning_rate": 3.281679199192085e-06, "loss": 0.05946255, "memory(GiB)": 13.7, "step": 67310, "train_speed(iter/s)": 1.531555 }, { "acc": 0.96479168, "epoch": 31.551441293648935, "grad_norm": 0.8538147211074829, "learning_rate": 3.280951356578555e-06, "loss": 0.06288847, "memory(GiB)": 13.7, "step": 67315, "train_speed(iter/s)": 1.531562 }, { "acc": 0.98937492, "epoch": 31.55378486055777, "grad_norm": 4.674624919891357, "learning_rate": 3.280223555297326e-06, "loss": 0.03849945, "memory(GiB)": 13.7, "step": 67320, "train_speed(iter/s)": 1.531565 }, { "acc": 0.97583332, "epoch": 31.556128427466604, "grad_norm": 2.8857240676879883, "learning_rate": 3.2794957953658896e-06, "loss": 0.03593324, "memory(GiB)": 13.7, "step": 67325, "train_speed(iter/s)": 1.531571 }, { "acc": 0.9864584, "epoch": 31.558471994375438, "grad_norm": 4.860183238983154, "learning_rate": 3.2787680768017418e-06, "loss": 0.04926647, "memory(GiB)": 13.7, "step": 67330, "train_speed(iter/s)": 1.531573 }, { "acc": 0.990625, "epoch": 31.560815561284276, "grad_norm": 0.05155720189213753, "learning_rate": 3.2780403996223735e-06, "loss": 0.01839329, "memory(GiB)": 13.7, "step": 67335, "train_speed(iter/s)": 1.531577 }, { "acc": 0.97895832, "epoch": 31.56315912819311, "grad_norm": 2.3713858127593994, "learning_rate": 3.277312763845274e-06, "loss": 0.02549107, "memory(GiB)": 13.7, "step": 67340, "train_speed(iter/s)": 1.53158 }, { "acc": 0.98872604, "epoch": 31.565502695101944, "grad_norm": 0.9331409335136414, "learning_rate": 3.2765851694879346e-06, "loss": 0.03034794, "memory(GiB)": 13.7, "step": 67345, "train_speed(iter/s)": 1.531589 }, { "acc": 0.9829402, "epoch": 31.56784626201078, "grad_norm": 1.603677749633789, "learning_rate": 3.275857616567845e-06, "loss": 0.06631236, "memory(GiB)": 13.7, "step": 67350, "train_speed(iter/s)": 1.53159 }, { "acc": 0.97874994, "epoch": 31.570189828919617, "grad_norm": 0.07150733470916748, "learning_rate": 3.2751301051024914e-06, "loss": 0.02959241, "memory(GiB)": 13.7, "step": 67355, "train_speed(iter/s)": 1.531589 }, { "acc": 0.98258934, "epoch": 31.57253339582845, "grad_norm": 6.197556972503662, "learning_rate": 3.274402635109362e-06, "loss": 0.0306914, "memory(GiB)": 13.7, "step": 67360, "train_speed(iter/s)": 1.531591 }, { "acc": 0.99050598, "epoch": 31.574876962737285, "grad_norm": 1.0121382474899292, "learning_rate": 3.2736752066059434e-06, "loss": 0.03582881, "memory(GiB)": 13.7, "step": 67365, "train_speed(iter/s)": 1.531594 }, { "acc": 0.98479166, "epoch": 31.577220529646123, "grad_norm": 2.766953468322754, "learning_rate": 3.272947819609719e-06, "loss": 0.04406654, "memory(GiB)": 13.7, "step": 67370, "train_speed(iter/s)": 1.5316 }, { "acc": 0.99375, "epoch": 31.579564096554957, "grad_norm": 0.3401328921318054, "learning_rate": 3.2722204741381737e-06, "loss": 0.03085477, "memory(GiB)": 13.7, "step": 67375, "train_speed(iter/s)": 1.5316 }, { "acc": 0.96885414, "epoch": 31.58190766346379, "grad_norm": 4.406409740447998, "learning_rate": 3.2714931702087914e-06, "loss": 0.06567014, "memory(GiB)": 13.7, "step": 67380, "train_speed(iter/s)": 1.531604 }, { "acc": 0.99187498, "epoch": 31.584251230372626, "grad_norm": 4.833829402923584, "learning_rate": 3.270765907839052e-06, "loss": 0.03153111, "memory(GiB)": 13.7, "step": 67385, "train_speed(iter/s)": 1.531607 }, { "acc": 0.98311014, "epoch": 31.586594797281464, "grad_norm": 5.173568248748779, "learning_rate": 3.270038687046438e-06, "loss": 0.05761908, "memory(GiB)": 13.7, "step": 67390, "train_speed(iter/s)": 1.531609 }, { "acc": 0.98680916, "epoch": 31.588938364190298, "grad_norm": 3.587906837463379, "learning_rate": 3.269311507848429e-06, "loss": 0.03434795, "memory(GiB)": 13.7, "step": 67395, "train_speed(iter/s)": 1.53161 }, { "acc": 0.97250004, "epoch": 31.591281931099132, "grad_norm": 4.751775741577148, "learning_rate": 3.2685843702625025e-06, "loss": 0.09143882, "memory(GiB)": 13.7, "step": 67400, "train_speed(iter/s)": 1.531612 }, { "acc": 0.9895833, "epoch": 31.593625498007967, "grad_norm": 3.751978874206543, "learning_rate": 3.267857274306139e-06, "loss": 0.02943388, "memory(GiB)": 13.7, "step": 67405, "train_speed(iter/s)": 1.531613 }, { "acc": 0.98395834, "epoch": 31.595969064916805, "grad_norm": 4.487793445587158, "learning_rate": 3.267130219996815e-06, "loss": 0.04318442, "memory(GiB)": 13.7, "step": 67410, "train_speed(iter/s)": 1.531614 }, { "acc": 0.98125, "epoch": 31.59831263182564, "grad_norm": 1.236458659172058, "learning_rate": 3.2664032073520054e-06, "loss": 0.061893, "memory(GiB)": 13.7, "step": 67415, "train_speed(iter/s)": 1.531619 }, { "acc": 0.9864584, "epoch": 31.600656198734473, "grad_norm": 1.609106183052063, "learning_rate": 3.2656762363891863e-06, "loss": 0.02441509, "memory(GiB)": 13.7, "step": 67420, "train_speed(iter/s)": 1.531621 }, { "acc": 0.98083334, "epoch": 31.602999765643307, "grad_norm": 2.3219029903411865, "learning_rate": 3.2649493071258315e-06, "loss": 0.03094749, "memory(GiB)": 13.7, "step": 67425, "train_speed(iter/s)": 1.531623 }, { "acc": 0.98874998, "epoch": 31.605343332552145, "grad_norm": 0.009983137249946594, "learning_rate": 3.2642224195794113e-06, "loss": 0.02414979, "memory(GiB)": 13.7, "step": 67430, "train_speed(iter/s)": 1.531629 }, { "acc": 0.9854167, "epoch": 31.60768689946098, "grad_norm": 2.1072115898132324, "learning_rate": 3.2634955737674022e-06, "loss": 0.02158461, "memory(GiB)": 13.7, "step": 67435, "train_speed(iter/s)": 1.531634 }, { "acc": 0.98386364, "epoch": 31.610030466369814, "grad_norm": 3.56402850151062, "learning_rate": 3.2627687697072725e-06, "loss": 0.04831024, "memory(GiB)": 13.7, "step": 67440, "train_speed(iter/s)": 1.531638 }, { "acc": 0.98190556, "epoch": 31.612374033278652, "grad_norm": 1.6219234466552734, "learning_rate": 3.2620420074164923e-06, "loss": 0.04407905, "memory(GiB)": 13.7, "step": 67445, "train_speed(iter/s)": 1.531641 }, { "acc": 0.98464279, "epoch": 31.614717600187486, "grad_norm": 2.4431354999542236, "learning_rate": 3.261315286912531e-06, "loss": 0.05943358, "memory(GiB)": 13.7, "step": 67450, "train_speed(iter/s)": 1.531648 }, { "acc": 0.99215279, "epoch": 31.61706116709632, "grad_norm": 3.9554359912872314, "learning_rate": 3.260588608212856e-06, "loss": 0.03459929, "memory(GiB)": 13.7, "step": 67455, "train_speed(iter/s)": 1.531651 }, { "acc": 0.99196434, "epoch": 31.619404734005155, "grad_norm": 2.3482048511505127, "learning_rate": 3.2598619713349356e-06, "loss": 0.03958978, "memory(GiB)": 13.7, "step": 67460, "train_speed(iter/s)": 1.531653 }, { "acc": 0.9864584, "epoch": 31.621748300913993, "grad_norm": 2.736525535583496, "learning_rate": 3.2591353762962365e-06, "loss": 0.03605599, "memory(GiB)": 13.7, "step": 67465, "train_speed(iter/s)": 1.531653 }, { "acc": 0.984375, "epoch": 31.624091867822827, "grad_norm": 3.2901008129119873, "learning_rate": 3.2584088231142206e-06, "loss": 0.03263795, "memory(GiB)": 13.7, "step": 67470, "train_speed(iter/s)": 1.53165 }, { "acc": 0.97690477, "epoch": 31.62643543473166, "grad_norm": 4.733397960662842, "learning_rate": 3.257682311806352e-06, "loss": 0.05459796, "memory(GiB)": 13.7, "step": 67475, "train_speed(iter/s)": 1.531655 }, { "acc": 0.98343754, "epoch": 31.628779001640495, "grad_norm": 6.364009857177734, "learning_rate": 3.2569558423900972e-06, "loss": 0.06377991, "memory(GiB)": 13.7, "step": 67480, "train_speed(iter/s)": 1.531658 }, { "acc": 0.98788376, "epoch": 31.631122568549333, "grad_norm": 3.3327646255493164, "learning_rate": 3.256229414882914e-06, "loss": 0.03913393, "memory(GiB)": 13.7, "step": 67485, "train_speed(iter/s)": 1.531668 }, { "acc": 0.98946428, "epoch": 31.633466135458168, "grad_norm": 2.3349499702453613, "learning_rate": 3.2555030293022665e-06, "loss": 0.05010435, "memory(GiB)": 13.7, "step": 67490, "train_speed(iter/s)": 1.531667 }, { "acc": 0.9833334, "epoch": 31.635809702367002, "grad_norm": 1.1249969005584717, "learning_rate": 3.254776685665614e-06, "loss": 0.0346086, "memory(GiB)": 13.7, "step": 67495, "train_speed(iter/s)": 1.531674 }, { "acc": 0.97042618, "epoch": 31.638153269275836, "grad_norm": 3.104959011077881, "learning_rate": 3.254050383990413e-06, "loss": 0.06811627, "memory(GiB)": 13.7, "step": 67500, "train_speed(iter/s)": 1.531675 }, { "acc": 0.99444447, "epoch": 31.640496836184674, "grad_norm": 0.005526977591216564, "learning_rate": 3.2533241242941255e-06, "loss": 0.01587061, "memory(GiB)": 13.7, "step": 67505, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98779755, "epoch": 31.64284040309351, "grad_norm": 6.843539714813232, "learning_rate": 3.252597906594206e-06, "loss": 0.05446355, "memory(GiB)": 13.7, "step": 67510, "train_speed(iter/s)": 1.531675 }, { "acc": 0.99375, "epoch": 31.645183970002343, "grad_norm": 5.149538516998291, "learning_rate": 3.251871730908108e-06, "loss": 0.02523192, "memory(GiB)": 13.7, "step": 67515, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98354168, "epoch": 31.647527536911177, "grad_norm": 0.9222227931022644, "learning_rate": 3.2511455972532906e-06, "loss": 0.03252838, "memory(GiB)": 13.7, "step": 67520, "train_speed(iter/s)": 1.531683 }, { "acc": 0.9895833, "epoch": 31.649871103820015, "grad_norm": 3.559481143951416, "learning_rate": 3.2504195056472066e-06, "loss": 0.05599229, "memory(GiB)": 13.7, "step": 67525, "train_speed(iter/s)": 1.531686 }, { "acc": 0.98904762, "epoch": 31.65221467072885, "grad_norm": 0.0002740601194091141, "learning_rate": 3.2496934561073068e-06, "loss": 0.03258173, "memory(GiB)": 13.7, "step": 67530, "train_speed(iter/s)": 1.531693 }, { "acc": 0.98675594, "epoch": 31.654558237637684, "grad_norm": 0.04212617129087448, "learning_rate": 3.2489674486510446e-06, "loss": 0.05976065, "memory(GiB)": 13.7, "step": 67535, "train_speed(iter/s)": 1.531691 }, { "acc": 0.99219704, "epoch": 31.65690180454652, "grad_norm": 0.0025911612901836634, "learning_rate": 3.248241483295872e-06, "loss": 0.04314584, "memory(GiB)": 13.7, "step": 67540, "train_speed(iter/s)": 1.531693 }, { "acc": 1.0, "epoch": 31.659245371455356, "grad_norm": 5.469929218292236, "learning_rate": 3.2475155600592385e-06, "loss": 0.06150206, "memory(GiB)": 13.7, "step": 67545, "train_speed(iter/s)": 1.5317 }, { "acc": 0.98178034, "epoch": 31.66158893836419, "grad_norm": 1.649801254272461, "learning_rate": 3.2467896789585885e-06, "loss": 0.0800572, "memory(GiB)": 13.7, "step": 67550, "train_speed(iter/s)": 1.531703 }, { "acc": 0.98354168, "epoch": 31.663932505273024, "grad_norm": 1.9481388330459595, "learning_rate": 3.2460638400113755e-06, "loss": 0.03122475, "memory(GiB)": 13.7, "step": 67555, "train_speed(iter/s)": 1.531704 }, { "acc": 0.99300594, "epoch": 31.666276072181862, "grad_norm": 2.192610740661621, "learning_rate": 3.2453380432350424e-06, "loss": 0.03518045, "memory(GiB)": 13.7, "step": 67560, "train_speed(iter/s)": 1.531702 }, { "acc": 0.98529758, "epoch": 31.668619639090696, "grad_norm": 3.139202833175659, "learning_rate": 3.2446122886470382e-06, "loss": 0.04578703, "memory(GiB)": 13.7, "step": 67565, "train_speed(iter/s)": 1.531707 }, { "acc": 0.98447914, "epoch": 31.67096320599953, "grad_norm": 2.148160219192505, "learning_rate": 3.2438865762648054e-06, "loss": 0.04583071, "memory(GiB)": 13.7, "step": 67570, "train_speed(iter/s)": 1.531717 }, { "acc": 0.97312498, "epoch": 31.673306772908365, "grad_norm": 0.6707635521888733, "learning_rate": 3.2431609061057876e-06, "loss": 0.07328761, "memory(GiB)": 13.7, "step": 67575, "train_speed(iter/s)": 1.531725 }, { "acc": 0.9875, "epoch": 31.675650339817203, "grad_norm": 0.8403788208961487, "learning_rate": 3.2424352781874297e-06, "loss": 0.0470422, "memory(GiB)": 13.7, "step": 67580, "train_speed(iter/s)": 1.531717 }, { "acc": 0.9863637, "epoch": 31.677993906726037, "grad_norm": 1.5356895923614502, "learning_rate": 3.241709692527171e-06, "loss": 0.0501054, "memory(GiB)": 13.7, "step": 67585, "train_speed(iter/s)": 1.531724 }, { "acc": 0.98777781, "epoch": 31.68033747363487, "grad_norm": 2.3817999362945557, "learning_rate": 3.240984149142455e-06, "loss": 0.05998875, "memory(GiB)": 13.7, "step": 67590, "train_speed(iter/s)": 1.531733 }, { "acc": 0.9895834, "epoch": 31.682681040543706, "grad_norm": 1.6481597423553467, "learning_rate": 3.2402586480507174e-06, "loss": 0.02449469, "memory(GiB)": 13.7, "step": 67595, "train_speed(iter/s)": 1.531736 }, { "acc": 0.978125, "epoch": 31.685024607452544, "grad_norm": 0.03908615559339523, "learning_rate": 3.2395331892694006e-06, "loss": 0.08599298, "memory(GiB)": 13.7, "step": 67600, "train_speed(iter/s)": 1.531735 }, { "acc": 0.99083328, "epoch": 31.687368174361378, "grad_norm": 2.429043769836426, "learning_rate": 3.238807772815939e-06, "loss": 0.01940003, "memory(GiB)": 13.7, "step": 67605, "train_speed(iter/s)": 1.531743 }, { "acc": 0.98592262, "epoch": 31.689711741270212, "grad_norm": 0.12598009407520294, "learning_rate": 3.2380823987077725e-06, "loss": 0.01811388, "memory(GiB)": 13.7, "step": 67610, "train_speed(iter/s)": 1.531745 }, { "acc": 0.97517357, "epoch": 31.69205530817905, "grad_norm": 8.610623359680176, "learning_rate": 3.237357066962335e-06, "loss": 0.08910307, "memory(GiB)": 13.7, "step": 67615, "train_speed(iter/s)": 1.531748 }, { "acc": 0.98239584, "epoch": 31.694398875087884, "grad_norm": 3.291550874710083, "learning_rate": 3.23663177759706e-06, "loss": 0.06055379, "memory(GiB)": 13.7, "step": 67620, "train_speed(iter/s)": 1.531753 }, { "acc": 0.98279762, "epoch": 31.69674244199672, "grad_norm": 7.343364238739014, "learning_rate": 3.2359065306293836e-06, "loss": 0.03921599, "memory(GiB)": 13.7, "step": 67625, "train_speed(iter/s)": 1.531757 }, { "acc": 0.98083334, "epoch": 31.699086008905553, "grad_norm": 3.252593755722046, "learning_rate": 3.235181326076737e-06, "loss": 0.02848371, "memory(GiB)": 13.7, "step": 67630, "train_speed(iter/s)": 1.531758 }, { "acc": 0.98842258, "epoch": 31.70142957581439, "grad_norm": 3.46338152885437, "learning_rate": 3.234456163956551e-06, "loss": 0.05640298, "memory(GiB)": 13.7, "step": 67635, "train_speed(iter/s)": 1.53176 }, { "acc": 0.97901783, "epoch": 31.703773142723225, "grad_norm": 4.291989326477051, "learning_rate": 3.2337310442862573e-06, "loss": 0.06394168, "memory(GiB)": 13.7, "step": 67640, "train_speed(iter/s)": 1.531766 }, { "acc": 0.9822917, "epoch": 31.70611670963206, "grad_norm": 1.1789546012878418, "learning_rate": 3.2330059670832857e-06, "loss": 0.05288334, "memory(GiB)": 13.7, "step": 67645, "train_speed(iter/s)": 1.531771 }, { "acc": 0.98656254, "epoch": 31.708460276540894, "grad_norm": 1.19509756565094, "learning_rate": 3.232280932365062e-06, "loss": 0.11106706, "memory(GiB)": 13.7, "step": 67650, "train_speed(iter/s)": 1.531772 }, { "acc": 0.99258928, "epoch": 31.71080384344973, "grad_norm": 0.02928713709115982, "learning_rate": 3.231555940149018e-06, "loss": 0.01471814, "memory(GiB)": 13.7, "step": 67655, "train_speed(iter/s)": 1.531779 }, { "acc": 0.9822916, "epoch": 31.713147410358566, "grad_norm": 5.90981388092041, "learning_rate": 3.2308309904525757e-06, "loss": 0.04701246, "memory(GiB)": 13.7, "step": 67660, "train_speed(iter/s)": 1.531784 }, { "acc": 0.9888195, "epoch": 31.7154909772674, "grad_norm": 0.0031853977125138044, "learning_rate": 3.230106083293164e-06, "loss": 0.02641608, "memory(GiB)": 13.7, "step": 67665, "train_speed(iter/s)": 1.531787 }, { "acc": 0.97486296, "epoch": 31.717834544176235, "grad_norm": 4.977469444274902, "learning_rate": 3.2293812186882056e-06, "loss": 0.09771929, "memory(GiB)": 13.7, "step": 67670, "train_speed(iter/s)": 1.531786 }, { "acc": 0.98311958, "epoch": 31.720178111085072, "grad_norm": 4.7301154136657715, "learning_rate": 3.2286563966551253e-06, "loss": 0.06945474, "memory(GiB)": 13.7, "step": 67675, "train_speed(iter/s)": 1.531791 }, { "acc": 0.97666664, "epoch": 31.722521677993907, "grad_norm": 4.375322341918945, "learning_rate": 3.227931617211342e-06, "loss": 0.04776828, "memory(GiB)": 13.7, "step": 67680, "train_speed(iter/s)": 1.531795 }, { "acc": 0.97873392, "epoch": 31.72486524490274, "grad_norm": 4.2071309089660645, "learning_rate": 3.2272068803742807e-06, "loss": 0.0721624, "memory(GiB)": 13.7, "step": 67685, "train_speed(iter/s)": 1.531801 }, { "acc": 0.987887, "epoch": 31.72720881181158, "grad_norm": 2.6878716945648193, "learning_rate": 3.2264821861613597e-06, "loss": 0.05070784, "memory(GiB)": 13.7, "step": 67690, "train_speed(iter/s)": 1.531806 }, { "acc": 0.98569441, "epoch": 31.729552378720413, "grad_norm": 0.0020169776398688555, "learning_rate": 3.2257575345899983e-06, "loss": 0.0246468, "memory(GiB)": 13.7, "step": 67695, "train_speed(iter/s)": 1.531805 }, { "acc": 0.99321423, "epoch": 31.731895945629248, "grad_norm": 1.009890079498291, "learning_rate": 3.2250329256776166e-06, "loss": 0.02282778, "memory(GiB)": 13.7, "step": 67700, "train_speed(iter/s)": 1.531797 }, { "acc": 0.98187504, "epoch": 31.734239512538082, "grad_norm": 2.8505585193634033, "learning_rate": 3.2243083594416287e-06, "loss": 0.05594789, "memory(GiB)": 13.7, "step": 67705, "train_speed(iter/s)": 1.531799 }, { "acc": 0.98562498, "epoch": 31.73658307944692, "grad_norm": 4.2223801612854, "learning_rate": 3.2235838358994544e-06, "loss": 0.03000539, "memory(GiB)": 13.7, "step": 67710, "train_speed(iter/s)": 1.531806 }, { "acc": 0.9782692, "epoch": 31.738926646355754, "grad_norm": 0.4808048903942108, "learning_rate": 3.2228593550685073e-06, "loss": 0.04517274, "memory(GiB)": 13.7, "step": 67715, "train_speed(iter/s)": 1.531808 }, { "acc": 0.99930553, "epoch": 31.74127021326459, "grad_norm": 0.002698224503546953, "learning_rate": 3.222134916966201e-06, "loss": 0.00919887, "memory(GiB)": 13.7, "step": 67720, "train_speed(iter/s)": 1.531807 }, { "acc": 0.96883926, "epoch": 31.743613780173423, "grad_norm": 5.9693779945373535, "learning_rate": 3.2214105216099477e-06, "loss": 0.0824944, "memory(GiB)": 13.7, "step": 67725, "train_speed(iter/s)": 1.531814 }, { "acc": 0.98093748, "epoch": 31.74595734708226, "grad_norm": 4.457394123077393, "learning_rate": 3.220686169017161e-06, "loss": 0.07350978, "memory(GiB)": 13.7, "step": 67730, "train_speed(iter/s)": 1.531819 }, { "acc": 0.98708324, "epoch": 31.748300913991095, "grad_norm": 3.714257001876831, "learning_rate": 3.2199618592052517e-06, "loss": 0.04594921, "memory(GiB)": 13.7, "step": 67735, "train_speed(iter/s)": 1.531826 }, { "acc": 0.99428024, "epoch": 31.75064448089993, "grad_norm": 1.7400791645050049, "learning_rate": 3.21923759219163e-06, "loss": 0.04953484, "memory(GiB)": 13.7, "step": 67740, "train_speed(iter/s)": 1.531828 }, { "acc": 0.96988087, "epoch": 31.752988047808763, "grad_norm": 4.642609119415283, "learning_rate": 3.218513367993705e-06, "loss": 0.07853746, "memory(GiB)": 13.7, "step": 67745, "train_speed(iter/s)": 1.531838 }, { "acc": 0.97979164, "epoch": 31.7553316147176, "grad_norm": 5.899588108062744, "learning_rate": 3.217789186628883e-06, "loss": 0.04804341, "memory(GiB)": 13.7, "step": 67750, "train_speed(iter/s)": 1.531836 }, { "acc": 0.98135414, "epoch": 31.757675181626436, "grad_norm": 4.365206718444824, "learning_rate": 3.2170650481145747e-06, "loss": 0.03967725, "memory(GiB)": 13.7, "step": 67755, "train_speed(iter/s)": 1.531838 }, { "acc": 0.98083344, "epoch": 31.76001874853527, "grad_norm": 4.356658458709717, "learning_rate": 3.2163409524681834e-06, "loss": 0.04502087, "memory(GiB)": 13.7, "step": 67760, "train_speed(iter/s)": 1.531842 }, { "acc": 0.99041672, "epoch": 31.762362315444108, "grad_norm": 3.9445998668670654, "learning_rate": 3.2156168997071113e-06, "loss": 0.04482921, "memory(GiB)": 13.7, "step": 67765, "train_speed(iter/s)": 1.531847 }, { "acc": 0.98384466, "epoch": 31.764705882352942, "grad_norm": 3.4848499298095703, "learning_rate": 3.2148928898487675e-06, "loss": 0.04080918, "memory(GiB)": 13.7, "step": 67770, "train_speed(iter/s)": 1.531848 }, { "acc": 0.98024807, "epoch": 31.767049449261776, "grad_norm": 3.3302195072174072, "learning_rate": 3.214168922910552e-06, "loss": 0.06342461, "memory(GiB)": 13.7, "step": 67775, "train_speed(iter/s)": 1.53185 }, { "acc": 0.98791666, "epoch": 31.76939301617061, "grad_norm": 2.91475772857666, "learning_rate": 3.2134449989098667e-06, "loss": 0.02876016, "memory(GiB)": 13.7, "step": 67780, "train_speed(iter/s)": 1.53185 }, { "acc": 0.96312504, "epoch": 31.77173658307945, "grad_norm": 5.887421607971191, "learning_rate": 3.2127211178641125e-06, "loss": 0.07412803, "memory(GiB)": 13.7, "step": 67785, "train_speed(iter/s)": 1.531856 }, { "acc": 0.98946428, "epoch": 31.774080149988283, "grad_norm": 4.0414228439331055, "learning_rate": 3.21199727979069e-06, "loss": 0.03193004, "memory(GiB)": 13.7, "step": 67790, "train_speed(iter/s)": 1.531856 }, { "acc": 0.97333336, "epoch": 31.776423716897117, "grad_norm": 1.9470314979553223, "learning_rate": 3.2112734847069964e-06, "loss": 0.04408157, "memory(GiB)": 13.7, "step": 67795, "train_speed(iter/s)": 1.531863 }, { "acc": 0.98371105, "epoch": 31.77876728380595, "grad_norm": 5.382812023162842, "learning_rate": 3.2105497326304326e-06, "loss": 0.05407227, "memory(GiB)": 13.7, "step": 67800, "train_speed(iter/s)": 1.531866 }, { "acc": 0.98249998, "epoch": 31.78111085071479, "grad_norm": 3.1673200130462646, "learning_rate": 3.2098260235783907e-06, "loss": 0.03415351, "memory(GiB)": 13.7, "step": 67805, "train_speed(iter/s)": 1.531872 }, { "acc": 0.98624992, "epoch": 31.783454417623624, "grad_norm": 3.2543728351593018, "learning_rate": 3.209102357568268e-06, "loss": 0.05025303, "memory(GiB)": 13.7, "step": 67810, "train_speed(iter/s)": 1.531871 }, { "acc": 0.97666664, "epoch": 31.785797984532458, "grad_norm": 7.2255778312683105, "learning_rate": 3.20837873461746e-06, "loss": 0.06718296, "memory(GiB)": 13.7, "step": 67815, "train_speed(iter/s)": 1.531873 }, { "acc": 0.9822916, "epoch": 31.788141551441292, "grad_norm": 0.492776483297348, "learning_rate": 3.20765515474336e-06, "loss": 0.03426705, "memory(GiB)": 13.7, "step": 67820, "train_speed(iter/s)": 1.531881 }, { "acc": 0.9838541, "epoch": 31.79048511835013, "grad_norm": 3.495297908782959, "learning_rate": 3.2069316179633585e-06, "loss": 0.05531188, "memory(GiB)": 13.7, "step": 67825, "train_speed(iter/s)": 1.531884 }, { "acc": 0.98788376, "epoch": 31.792828685258964, "grad_norm": 4.2147417068481445, "learning_rate": 3.20620812429485e-06, "loss": 0.02589914, "memory(GiB)": 13.7, "step": 67830, "train_speed(iter/s)": 1.531884 }, { "acc": 0.9864584, "epoch": 31.7951722521678, "grad_norm": 2.5036425590515137, "learning_rate": 3.2054846737552237e-06, "loss": 0.05389295, "memory(GiB)": 13.7, "step": 67835, "train_speed(iter/s)": 1.531884 }, { "acc": 0.9791667, "epoch": 31.797515819076633, "grad_norm": 7.834686756134033, "learning_rate": 3.2047612663618687e-06, "loss": 0.0736407, "memory(GiB)": 13.7, "step": 67840, "train_speed(iter/s)": 1.531886 }, { "acc": 0.99298611, "epoch": 31.79985938598547, "grad_norm": 1.2832690477371216, "learning_rate": 3.2040379021321734e-06, "loss": 0.02004082, "memory(GiB)": 13.7, "step": 67845, "train_speed(iter/s)": 1.53189 }, { "acc": 0.98395834, "epoch": 31.802202952894305, "grad_norm": 2.849527359008789, "learning_rate": 3.203314581083525e-06, "loss": 0.03897484, "memory(GiB)": 13.7, "step": 67850, "train_speed(iter/s)": 1.531888 }, { "acc": 0.98937502, "epoch": 31.80454651980314, "grad_norm": 4.3512091636657715, "learning_rate": 3.2025913032333084e-06, "loss": 0.03948169, "memory(GiB)": 13.7, "step": 67855, "train_speed(iter/s)": 1.531895 }, { "acc": 0.9621727, "epoch": 31.806890086711977, "grad_norm": 7.745965480804443, "learning_rate": 3.2018680685989124e-06, "loss": 0.06018504, "memory(GiB)": 13.7, "step": 67860, "train_speed(iter/s)": 1.531905 }, { "acc": 0.98187504, "epoch": 31.80923365362081, "grad_norm": 5.877301216125488, "learning_rate": 3.2011448771977184e-06, "loss": 0.05795299, "memory(GiB)": 13.7, "step": 67865, "train_speed(iter/s)": 1.531907 }, { "acc": 0.97997971, "epoch": 31.811577220529646, "grad_norm": 1.4534193277359009, "learning_rate": 3.2004217290471084e-06, "loss": 0.06895845, "memory(GiB)": 13.7, "step": 67870, "train_speed(iter/s)": 1.531908 }, { "acc": 0.97979164, "epoch": 31.81392078743848, "grad_norm": 3.874685525894165, "learning_rate": 3.1996986241644685e-06, "loss": 0.08165814, "memory(GiB)": 13.7, "step": 67875, "train_speed(iter/s)": 1.531908 }, { "acc": 0.98291664, "epoch": 31.816264354347318, "grad_norm": 5.154394149780273, "learning_rate": 3.1989755625671786e-06, "loss": 0.03216617, "memory(GiB)": 13.7, "step": 67880, "train_speed(iter/s)": 1.531909 }, { "acc": 0.99050598, "epoch": 31.818607921256152, "grad_norm": 2.282362461090088, "learning_rate": 3.198252544272615e-06, "loss": 0.03312405, "memory(GiB)": 13.7, "step": 67885, "train_speed(iter/s)": 1.531909 }, { "acc": 0.9927083, "epoch": 31.820951488164987, "grad_norm": 1.03126060962677, "learning_rate": 3.1975295692981606e-06, "loss": 0.02944159, "memory(GiB)": 13.7, "step": 67890, "train_speed(iter/s)": 1.531909 }, { "acc": 0.97041664, "epoch": 31.82329505507382, "grad_norm": 5.108860015869141, "learning_rate": 3.196806637661192e-06, "loss": 0.05078452, "memory(GiB)": 13.7, "step": 67895, "train_speed(iter/s)": 1.531917 }, { "acc": 0.98145828, "epoch": 31.82563862198266, "grad_norm": 3.9874675273895264, "learning_rate": 3.196083749379086e-06, "loss": 0.03271898, "memory(GiB)": 13.7, "step": 67900, "train_speed(iter/s)": 1.531915 }, { "acc": 0.99385414, "epoch": 31.827982188891493, "grad_norm": 3.5682787895202637, "learning_rate": 3.19536090446922e-06, "loss": 0.01787097, "memory(GiB)": 13.7, "step": 67905, "train_speed(iter/s)": 1.531916 }, { "acc": 0.97198868, "epoch": 31.830325755800327, "grad_norm": 4.514060974121094, "learning_rate": 3.1946381029489655e-06, "loss": 0.09912428, "memory(GiB)": 13.7, "step": 67910, "train_speed(iter/s)": 1.531925 }, { "acc": 0.97986107, "epoch": 31.83266932270916, "grad_norm": 3.364041805267334, "learning_rate": 3.193915344835699e-06, "loss": 0.05742329, "memory(GiB)": 13.7, "step": 67915, "train_speed(iter/s)": 1.531924 }, { "acc": 0.97407198, "epoch": 31.835012889618, "grad_norm": 5.817129135131836, "learning_rate": 3.193192630146793e-06, "loss": 0.04625241, "memory(GiB)": 13.7, "step": 67920, "train_speed(iter/s)": 1.531931 }, { "acc": 0.98872032, "epoch": 31.837356456526834, "grad_norm": 1.7595382928848267, "learning_rate": 3.19246995889962e-06, "loss": 0.03033006, "memory(GiB)": 13.7, "step": 67925, "train_speed(iter/s)": 1.531932 }, { "acc": 0.98520832, "epoch": 31.839700023435668, "grad_norm": 2.7577903270721436, "learning_rate": 3.1917473311115468e-06, "loss": 0.03058865, "memory(GiB)": 13.7, "step": 67930, "train_speed(iter/s)": 1.531934 }, { "acc": 0.9791666, "epoch": 31.842043590344506, "grad_norm": 1.1583251953125, "learning_rate": 3.1910247467999466e-06, "loss": 0.08090643, "memory(GiB)": 13.7, "step": 67935, "train_speed(iter/s)": 1.531937 }, { "acc": 0.9770833, "epoch": 31.84438715725334, "grad_norm": 7.7414350509643555, "learning_rate": 3.1903022059821877e-06, "loss": 0.10017681, "memory(GiB)": 13.7, "step": 67940, "train_speed(iter/s)": 1.531936 }, { "acc": 0.98225269, "epoch": 31.846730724162175, "grad_norm": 2.241145372390747, "learning_rate": 3.1895797086756354e-06, "loss": 0.05193383, "memory(GiB)": 13.7, "step": 67945, "train_speed(iter/s)": 1.531931 }, { "acc": 0.97104168, "epoch": 31.84907429107101, "grad_norm": 7.932825565338135, "learning_rate": 3.188857254897658e-06, "loss": 0.0712878, "memory(GiB)": 13.7, "step": 67950, "train_speed(iter/s)": 1.531933 }, { "acc": 0.97770834, "epoch": 31.851417857979847, "grad_norm": 2.544748306274414, "learning_rate": 3.1881348446656195e-06, "loss": 0.05068454, "memory(GiB)": 13.7, "step": 67955, "train_speed(iter/s)": 1.531938 }, { "acc": 0.99708328, "epoch": 31.85376142488868, "grad_norm": 1.8032467365264893, "learning_rate": 3.1874124779968866e-06, "loss": 0.04441204, "memory(GiB)": 13.7, "step": 67960, "train_speed(iter/s)": 1.531943 }, { "acc": 0.9791666, "epoch": 31.856104991797515, "grad_norm": 5.6245598793029785, "learning_rate": 3.1866901549088223e-06, "loss": 0.04740151, "memory(GiB)": 13.7, "step": 67965, "train_speed(iter/s)": 1.531948 }, { "acc": 0.98029766, "epoch": 31.85844855870635, "grad_norm": 5.394438743591309, "learning_rate": 3.185967875418787e-06, "loss": 0.06115668, "memory(GiB)": 13.7, "step": 67970, "train_speed(iter/s)": 1.531951 }, { "acc": 0.98666668, "epoch": 31.860792125615188, "grad_norm": 0.8319130539894104, "learning_rate": 3.1852456395441417e-06, "loss": 0.02703183, "memory(GiB)": 13.7, "step": 67975, "train_speed(iter/s)": 1.531956 }, { "acc": 0.98687496, "epoch": 31.863135692524022, "grad_norm": 3.330517530441284, "learning_rate": 3.184523447302249e-06, "loss": 0.04842524, "memory(GiB)": 13.7, "step": 67980, "train_speed(iter/s)": 1.531956 }, { "acc": 0.99077377, "epoch": 31.865479259432856, "grad_norm": 0.00011685278150252998, "learning_rate": 3.183801298710465e-06, "loss": 0.03449243, "memory(GiB)": 13.7, "step": 67985, "train_speed(iter/s)": 1.531959 }, { "acc": 0.99375, "epoch": 31.86782282634169, "grad_norm": 1.0044578313827515, "learning_rate": 3.183079193786151e-06, "loss": 0.01582266, "memory(GiB)": 13.7, "step": 67990, "train_speed(iter/s)": 1.531963 }, { "acc": 0.990625, "epoch": 31.87016639325053, "grad_norm": 1.0669326782226562, "learning_rate": 3.1823571325466617e-06, "loss": 0.03116649, "memory(GiB)": 13.7, "step": 67995, "train_speed(iter/s)": 1.531965 }, { "acc": 0.98500004, "epoch": 31.872509960159363, "grad_norm": 2.4246902465820312, "learning_rate": 3.181635115009353e-06, "loss": 0.04485484, "memory(GiB)": 13.7, "step": 68000, "train_speed(iter/s)": 1.531961 }, { "acc": 0.98282633, "epoch": 31.874853527068197, "grad_norm": 4.207035064697266, "learning_rate": 3.180913141191583e-06, "loss": 0.0713977, "memory(GiB)": 13.7, "step": 68005, "train_speed(iter/s)": 1.531966 }, { "acc": 0.98395824, "epoch": 31.87719709397703, "grad_norm": 2.425830841064453, "learning_rate": 3.180191211110703e-06, "loss": 0.02479237, "memory(GiB)": 13.7, "step": 68010, "train_speed(iter/s)": 1.531971 }, { "acc": 0.97364578, "epoch": 31.87954066088587, "grad_norm": 1.7802616357803345, "learning_rate": 3.179469324784065e-06, "loss": 0.06985418, "memory(GiB)": 13.7, "step": 68015, "train_speed(iter/s)": 1.531977 }, { "acc": 0.98286705, "epoch": 31.881884227794703, "grad_norm": 0.4067055583000183, "learning_rate": 3.1787474822290223e-06, "loss": 0.05059447, "memory(GiB)": 13.7, "step": 68020, "train_speed(iter/s)": 1.531977 }, { "acc": 0.9864584, "epoch": 31.884227794703538, "grad_norm": 2.967268943786621, "learning_rate": 3.178025683462925e-06, "loss": 0.03972747, "memory(GiB)": 13.7, "step": 68025, "train_speed(iter/s)": 1.531983 }, { "acc": 0.97558613, "epoch": 31.886571361612376, "grad_norm": 1.4597762823104858, "learning_rate": 3.1773039285031222e-06, "loss": 0.05978898, "memory(GiB)": 13.7, "step": 68030, "train_speed(iter/s)": 1.53198 }, { "acc": 0.9875, "epoch": 31.88891492852121, "grad_norm": 0.6042797565460205, "learning_rate": 3.1765822173669643e-06, "loss": 0.0265147, "memory(GiB)": 13.7, "step": 68035, "train_speed(iter/s)": 1.531979 }, { "acc": 0.98916664, "epoch": 31.891258495430044, "grad_norm": 0.34306469559669495, "learning_rate": 3.175860550071798e-06, "loss": 0.03035299, "memory(GiB)": 13.7, "step": 68040, "train_speed(iter/s)": 1.531982 }, { "acc": 0.99202118, "epoch": 31.89360206233888, "grad_norm": 6.06425142288208, "learning_rate": 3.175138926634968e-06, "loss": 0.04891591, "memory(GiB)": 13.7, "step": 68045, "train_speed(iter/s)": 1.531988 }, { "acc": 0.98820515, "epoch": 31.895945629247716, "grad_norm": 3.6092417240142822, "learning_rate": 3.1744173470738244e-06, "loss": 0.05585246, "memory(GiB)": 13.7, "step": 68050, "train_speed(iter/s)": 1.531987 }, { "acc": 0.98760414, "epoch": 31.89828919615655, "grad_norm": 3.976752519607544, "learning_rate": 3.1736958114057075e-06, "loss": 0.04496405, "memory(GiB)": 13.7, "step": 68055, "train_speed(iter/s)": 1.53199 }, { "acc": 0.97737179, "epoch": 31.900632763065385, "grad_norm": 4.698248863220215, "learning_rate": 3.17297431964796e-06, "loss": 0.06383417, "memory(GiB)": 13.7, "step": 68060, "train_speed(iter/s)": 1.531988 }, { "acc": 0.99057541, "epoch": 31.90297632997422, "grad_norm": 1.016694188117981, "learning_rate": 3.1722528718179276e-06, "loss": 0.03192507, "memory(GiB)": 13.7, "step": 68065, "train_speed(iter/s)": 1.531996 }, { "acc": 0.9864583, "epoch": 31.905319896883057, "grad_norm": 2.9886317253112793, "learning_rate": 3.17153146793295e-06, "loss": 0.02128226, "memory(GiB)": 13.7, "step": 68070, "train_speed(iter/s)": 1.531996 }, { "acc": 0.98990173, "epoch": 31.90766346379189, "grad_norm": 2.1123199462890625, "learning_rate": 3.170810108010366e-06, "loss": 0.04977484, "memory(GiB)": 13.7, "step": 68075, "train_speed(iter/s)": 1.531998 }, { "acc": 0.991572, "epoch": 31.910007030700726, "grad_norm": 5.28359317779541, "learning_rate": 3.1700887920675174e-06, "loss": 0.03760665, "memory(GiB)": 13.7, "step": 68080, "train_speed(iter/s)": 1.532005 }, { "acc": 0.98433037, "epoch": 31.91235059760956, "grad_norm": 2.839454412460327, "learning_rate": 3.1693675201217394e-06, "loss": 0.08490457, "memory(GiB)": 13.7, "step": 68085, "train_speed(iter/s)": 1.532006 }, { "acc": 0.98559532, "epoch": 31.914694164518398, "grad_norm": 2.323050022125244, "learning_rate": 3.1686462921903726e-06, "loss": 0.05447214, "memory(GiB)": 13.7, "step": 68090, "train_speed(iter/s)": 1.532014 }, { "acc": 0.9875, "epoch": 31.917037731427232, "grad_norm": 2.479433298110962, "learning_rate": 3.1679251082907498e-06, "loss": 0.01551229, "memory(GiB)": 13.7, "step": 68095, "train_speed(iter/s)": 1.532019 }, { "acc": 0.98270836, "epoch": 31.919381298336067, "grad_norm": 0.0059877242892980576, "learning_rate": 3.1672039684402068e-06, "loss": 0.04301758, "memory(GiB)": 13.7, "step": 68100, "train_speed(iter/s)": 1.532021 }, { "acc": 0.98916664, "epoch": 31.921724865244904, "grad_norm": 2.931706428527832, "learning_rate": 3.1664828726560764e-06, "loss": 0.03987062, "memory(GiB)": 13.7, "step": 68105, "train_speed(iter/s)": 1.53202 }, { "acc": 0.97618046, "epoch": 31.92406843215374, "grad_norm": 2.2812108993530273, "learning_rate": 3.1657618209556944e-06, "loss": 0.06716313, "memory(GiB)": 13.7, "step": 68110, "train_speed(iter/s)": 1.532024 }, { "acc": 0.98395834, "epoch": 31.926411999062573, "grad_norm": 1.9793511629104614, "learning_rate": 3.16504081335639e-06, "loss": 0.04740289, "memory(GiB)": 13.7, "step": 68115, "train_speed(iter/s)": 1.53202 }, { "acc": 0.98500004, "epoch": 31.928755565971407, "grad_norm": 0.9197005033493042, "learning_rate": 3.1643198498754935e-06, "loss": 0.03636557, "memory(GiB)": 13.7, "step": 68120, "train_speed(iter/s)": 1.532019 }, { "acc": 0.9842803, "epoch": 31.931099132880245, "grad_norm": 0.5085486769676208, "learning_rate": 3.163598930530336e-06, "loss": 0.06285629, "memory(GiB)": 13.7, "step": 68125, "train_speed(iter/s)": 1.532024 }, { "acc": 0.98288383, "epoch": 31.93344269978908, "grad_norm": 2.8957419395446777, "learning_rate": 3.1628780553382456e-06, "loss": 0.03468332, "memory(GiB)": 13.7, "step": 68130, "train_speed(iter/s)": 1.532029 }, { "acc": 0.990625, "epoch": 31.935786266697914, "grad_norm": 2.789153575897217, "learning_rate": 3.162157224316551e-06, "loss": 0.01947422, "memory(GiB)": 13.7, "step": 68135, "train_speed(iter/s)": 1.532032 }, { "acc": 0.98348427, "epoch": 31.938129833606748, "grad_norm": 3.22723388671875, "learning_rate": 3.161436437482577e-06, "loss": 0.0367874, "memory(GiB)": 13.7, "step": 68140, "train_speed(iter/s)": 1.532036 }, { "acc": 0.9809659, "epoch": 31.940473400515586, "grad_norm": 4.12851095199585, "learning_rate": 3.1607156948536487e-06, "loss": 0.04595847, "memory(GiB)": 13.7, "step": 68145, "train_speed(iter/s)": 1.532034 }, { "acc": 0.99020834, "epoch": 31.94281696742442, "grad_norm": 2.550158977508545, "learning_rate": 3.1599949964470893e-06, "loss": 0.01912205, "memory(GiB)": 13.7, "step": 68150, "train_speed(iter/s)": 1.532033 }, { "acc": 0.98217258, "epoch": 31.945160534333255, "grad_norm": 7.233752727508545, "learning_rate": 3.159274342280225e-06, "loss": 0.03454102, "memory(GiB)": 13.7, "step": 68155, "train_speed(iter/s)": 1.532031 }, { "acc": 0.98571434, "epoch": 31.94750410124209, "grad_norm": 4.824951171875, "learning_rate": 3.158553732370377e-06, "loss": 0.04325134, "memory(GiB)": 13.7, "step": 68160, "train_speed(iter/s)": 1.532032 }, { "acc": 0.9791667, "epoch": 31.949847668150927, "grad_norm": 0.00838142167776823, "learning_rate": 3.1578331667348653e-06, "loss": 0.11460159, "memory(GiB)": 13.7, "step": 68165, "train_speed(iter/s)": 1.53204 }, { "acc": 0.98812504, "epoch": 31.95219123505976, "grad_norm": 1.9475622177124023, "learning_rate": 3.157112645391012e-06, "loss": 0.07318116, "memory(GiB)": 13.7, "step": 68170, "train_speed(iter/s)": 1.532038 }, { "acc": 0.95258932, "epoch": 31.954534801968595, "grad_norm": 5.330197334289551, "learning_rate": 3.156392168356135e-06, "loss": 0.11895537, "memory(GiB)": 13.7, "step": 68175, "train_speed(iter/s)": 1.532039 }, { "acc": 0.98501892, "epoch": 31.956878368877433, "grad_norm": 2.6467597484588623, "learning_rate": 3.1556717356475497e-06, "loss": 0.03724058, "memory(GiB)": 13.7, "step": 68180, "train_speed(iter/s)": 1.53204 }, { "acc": 0.98321428, "epoch": 31.959221935786267, "grad_norm": 1.3455095291137695, "learning_rate": 3.1549513472825766e-06, "loss": 0.06866499, "memory(GiB)": 13.7, "step": 68185, "train_speed(iter/s)": 1.532038 }, { "acc": 0.97071428, "epoch": 31.9615655026951, "grad_norm": 0.47544506192207336, "learning_rate": 3.1542310032785283e-06, "loss": 0.06272824, "memory(GiB)": 13.7, "step": 68190, "train_speed(iter/s)": 1.532041 }, { "acc": 0.98624992, "epoch": 31.963909069603936, "grad_norm": 5.709923267364502, "learning_rate": 3.1535107036527222e-06, "loss": 0.03669909, "memory(GiB)": 13.7, "step": 68195, "train_speed(iter/s)": 1.532041 }, { "acc": 0.97583332, "epoch": 31.966252636512774, "grad_norm": 2.570727586746216, "learning_rate": 3.1527904484224727e-06, "loss": 0.064083, "memory(GiB)": 13.7, "step": 68200, "train_speed(iter/s)": 1.532044 }, { "acc": 0.98936014, "epoch": 31.96859620342161, "grad_norm": 1.6093443632125854, "learning_rate": 3.1520702376050875e-06, "loss": 0.03276444, "memory(GiB)": 13.7, "step": 68205, "train_speed(iter/s)": 1.532044 }, { "acc": 0.99477711, "epoch": 31.970939770330443, "grad_norm": 2.0541534423828125, "learning_rate": 3.1513500712178834e-06, "loss": 0.02780873, "memory(GiB)": 13.7, "step": 68210, "train_speed(iter/s)": 1.532045 }, { "acc": 0.9791338, "epoch": 31.973283337239277, "grad_norm": 8.817048072814941, "learning_rate": 3.150629949278169e-06, "loss": 0.09016438, "memory(GiB)": 13.7, "step": 68215, "train_speed(iter/s)": 1.532048 }, { "acc": 0.98187504, "epoch": 31.975626904148115, "grad_norm": 0.03484572470188141, "learning_rate": 3.1499098718032532e-06, "loss": 0.04948872, "memory(GiB)": 13.7, "step": 68220, "train_speed(iter/s)": 1.53206 }, { "acc": 0.996875, "epoch": 31.97797047105695, "grad_norm": 0.003148708725348115, "learning_rate": 3.149189838810443e-06, "loss": 0.01322084, "memory(GiB)": 13.7, "step": 68225, "train_speed(iter/s)": 1.532067 }, { "acc": 0.97375002, "epoch": 31.980314037965783, "grad_norm": 3.7104978561401367, "learning_rate": 3.1484698503170475e-06, "loss": 0.07341656, "memory(GiB)": 13.7, "step": 68230, "train_speed(iter/s)": 1.532064 }, { "acc": 0.97399807, "epoch": 31.982657604874618, "grad_norm": 4.979562759399414, "learning_rate": 3.147749906340372e-06, "loss": 0.05951862, "memory(GiB)": 13.7, "step": 68235, "train_speed(iter/s)": 1.532069 }, { "acc": 0.9947917, "epoch": 31.985001171783455, "grad_norm": 2.3759548664093018, "learning_rate": 3.1470300068977227e-06, "loss": 0.01896625, "memory(GiB)": 13.7, "step": 68240, "train_speed(iter/s)": 1.532073 }, { "acc": 0.96895838, "epoch": 31.98734473869229, "grad_norm": 4.200494766235352, "learning_rate": 3.1463101520064034e-06, "loss": 0.06866318, "memory(GiB)": 13.7, "step": 68245, "train_speed(iter/s)": 1.532081 }, { "acc": 0.98747025, "epoch": 31.989688305601124, "grad_norm": 4.028447151184082, "learning_rate": 3.1455903416837153e-06, "loss": 0.07727296, "memory(GiB)": 13.7, "step": 68250, "train_speed(iter/s)": 1.532087 }, { "acc": 0.9802084, "epoch": 31.992031872509962, "grad_norm": 5.340128421783447, "learning_rate": 3.1448705759469623e-06, "loss": 0.09583031, "memory(GiB)": 13.7, "step": 68255, "train_speed(iter/s)": 1.532095 }, { "acc": 0.98500004, "epoch": 31.994375439418796, "grad_norm": 11.291277885437012, "learning_rate": 3.1441508548134453e-06, "loss": 0.04875803, "memory(GiB)": 13.7, "step": 68260, "train_speed(iter/s)": 1.5321 }, { "acc": 0.984375, "epoch": 31.99671900632763, "grad_norm": 5.03689432144165, "learning_rate": 3.143431178300462e-06, "loss": 0.06741034, "memory(GiB)": 13.7, "step": 68265, "train_speed(iter/s)": 1.532101 }, { "acc": 0.9867857, "epoch": 31.999062573236465, "grad_norm": 2.444648027420044, "learning_rate": 3.1427115464253128e-06, "loss": 0.03707474, "memory(GiB)": 13.7, "step": 68270, "train_speed(iter/s)": 1.532103 }, { "acc": 0.97312508, "epoch": 32.0014061401453, "grad_norm": 4.424084663391113, "learning_rate": 3.141991959205294e-06, "loss": 0.04383861, "memory(GiB)": 13.7, "step": 68275, "train_speed(iter/s)": 1.532085 }, { "acc": 0.984375, "epoch": 32.00374970705413, "grad_norm": 5.960166931152344, "learning_rate": 3.141272416657703e-06, "loss": 0.0492781, "memory(GiB)": 13.7, "step": 68280, "train_speed(iter/s)": 1.532088 }, { "acc": 0.98883934, "epoch": 32.006093273962975, "grad_norm": 2.50148868560791, "learning_rate": 3.140552918799835e-06, "loss": 0.02783216, "memory(GiB)": 13.7, "step": 68285, "train_speed(iter/s)": 1.532092 }, { "acc": 0.98791666, "epoch": 32.00843684087181, "grad_norm": 0.9433653950691223, "learning_rate": 3.1398334656489854e-06, "loss": 0.021492, "memory(GiB)": 13.7, "step": 68290, "train_speed(iter/s)": 1.532102 }, { "acc": 0.990625, "epoch": 32.01078040778064, "grad_norm": 6.395859718322754, "learning_rate": 3.139114057222445e-06, "loss": 0.03109314, "memory(GiB)": 13.7, "step": 68295, "train_speed(iter/s)": 1.532097 }, { "acc": 0.99437504, "epoch": 32.01312397468948, "grad_norm": 0.009565776214003563, "learning_rate": 3.1383946935375098e-06, "loss": 0.02606375, "memory(GiB)": 13.7, "step": 68300, "train_speed(iter/s)": 1.532101 }, { "acc": 0.97416668, "epoch": 32.01546754159831, "grad_norm": 6.309881210327148, "learning_rate": 3.137675374611467e-06, "loss": 0.06895871, "memory(GiB)": 13.7, "step": 68305, "train_speed(iter/s)": 1.532106 }, { "acc": 0.9850893, "epoch": 32.017811108507146, "grad_norm": 1.8354014158248901, "learning_rate": 3.1369561004616077e-06, "loss": 0.06032513, "memory(GiB)": 13.7, "step": 68310, "train_speed(iter/s)": 1.532113 }, { "acc": 0.97676544, "epoch": 32.02015467541598, "grad_norm": 4.219703674316406, "learning_rate": 3.136236871105223e-06, "loss": 0.09810559, "memory(GiB)": 13.7, "step": 68315, "train_speed(iter/s)": 1.532117 }, { "acc": 0.97510414, "epoch": 32.022498242324815, "grad_norm": 5.43057107925415, "learning_rate": 3.135517686559599e-06, "loss": 0.11247481, "memory(GiB)": 13.7, "step": 68320, "train_speed(iter/s)": 1.532117 }, { "acc": 0.97801466, "epoch": 32.024841809233656, "grad_norm": 4.7467122077941895, "learning_rate": 3.1347985468420215e-06, "loss": 0.0531943, "memory(GiB)": 13.7, "step": 68325, "train_speed(iter/s)": 1.532111 }, { "acc": 0.98666668, "epoch": 32.02718537614249, "grad_norm": 4.812737941741943, "learning_rate": 3.1340794519697793e-06, "loss": 0.0248024, "memory(GiB)": 13.7, "step": 68330, "train_speed(iter/s)": 1.53211 }, { "acc": 0.98833332, "epoch": 32.029528943051325, "grad_norm": 0.594135582447052, "learning_rate": 3.133360401960154e-06, "loss": 0.03950986, "memory(GiB)": 13.7, "step": 68335, "train_speed(iter/s)": 1.532112 }, { "acc": 0.996875, "epoch": 32.03187250996016, "grad_norm": 0.014146738685667515, "learning_rate": 3.132641396830432e-06, "loss": 0.00900564, "memory(GiB)": 13.7, "step": 68340, "train_speed(iter/s)": 1.532112 }, { "acc": 0.98445654, "epoch": 32.034216076868994, "grad_norm": 8.489912033081055, "learning_rate": 3.1319224365978944e-06, "loss": 0.06338858, "memory(GiB)": 13.7, "step": 68345, "train_speed(iter/s)": 1.532116 }, { "acc": 0.9822917, "epoch": 32.03655964377783, "grad_norm": 0.002514735097065568, "learning_rate": 3.131203521279823e-06, "loss": 0.05514175, "memory(GiB)": 13.7, "step": 68350, "train_speed(iter/s)": 1.532123 }, { "acc": 0.98090286, "epoch": 32.03890321068666, "grad_norm": 4.385135173797607, "learning_rate": 3.130484650893496e-06, "loss": 0.07499087, "memory(GiB)": 13.7, "step": 68355, "train_speed(iter/s)": 1.532128 }, { "acc": 0.990625, "epoch": 32.041246777595504, "grad_norm": 2.4909493923187256, "learning_rate": 3.1297658254561964e-06, "loss": 0.02322097, "memory(GiB)": 13.7, "step": 68360, "train_speed(iter/s)": 1.532133 }, { "acc": 1.0, "epoch": 32.04359034450434, "grad_norm": 0.06600268930196762, "learning_rate": 3.1290470449851997e-06, "loss": 0.01353816, "memory(GiB)": 13.7, "step": 68365, "train_speed(iter/s)": 1.532131 }, { "acc": 0.97946434, "epoch": 32.04593391141317, "grad_norm": 2.979084014892578, "learning_rate": 3.128328309497783e-06, "loss": 0.05599651, "memory(GiB)": 13.7, "step": 68370, "train_speed(iter/s)": 1.532137 }, { "acc": 0.98708334, "epoch": 32.04827747832201, "grad_norm": 6.4172773361206055, "learning_rate": 3.1276096190112242e-06, "loss": 0.03317256, "memory(GiB)": 13.7, "step": 68375, "train_speed(iter/s)": 1.53214 }, { "acc": 0.99360695, "epoch": 32.05062104523084, "grad_norm": 1.8314108848571777, "learning_rate": 3.126890973542796e-06, "loss": 0.02920438, "memory(GiB)": 13.7, "step": 68380, "train_speed(iter/s)": 1.53214 }, { "acc": 0.97999992, "epoch": 32.052964612139675, "grad_norm": 3.8289849758148193, "learning_rate": 3.1261723731097753e-06, "loss": 0.06119061, "memory(GiB)": 13.7, "step": 68385, "train_speed(iter/s)": 1.532138 }, { "acc": 0.99131947, "epoch": 32.05530817904851, "grad_norm": 3.1417784690856934, "learning_rate": 3.1254538177294326e-06, "loss": 0.03010347, "memory(GiB)": 13.7, "step": 68390, "train_speed(iter/s)": 1.532146 }, { "acc": 0.98342266, "epoch": 32.057651745957344, "grad_norm": 3.868515968322754, "learning_rate": 3.1247353074190405e-06, "loss": 0.04044004, "memory(GiB)": 13.7, "step": 68395, "train_speed(iter/s)": 1.532149 }, { "acc": 0.97675591, "epoch": 32.059995312866185, "grad_norm": 2.1619739532470703, "learning_rate": 3.124016842195867e-06, "loss": 0.05719938, "memory(GiB)": 13.7, "step": 68400, "train_speed(iter/s)": 1.532149 }, { "acc": 0.9953125, "epoch": 32.06233887977502, "grad_norm": 2.257389545440674, "learning_rate": 3.1232984220771865e-06, "loss": 0.0387872, "memory(GiB)": 13.7, "step": 68405, "train_speed(iter/s)": 1.532155 }, { "acc": 0.98065443, "epoch": 32.064682446683854, "grad_norm": 0.16991829872131348, "learning_rate": 3.1225800470802633e-06, "loss": 0.05743241, "memory(GiB)": 13.7, "step": 68410, "train_speed(iter/s)": 1.532165 }, { "acc": 0.98666668, "epoch": 32.06702601359269, "grad_norm": 1.551560640335083, "learning_rate": 3.1218617172223665e-06, "loss": 0.04475983, "memory(GiB)": 13.7, "step": 68415, "train_speed(iter/s)": 1.532166 }, { "acc": 0.98465281, "epoch": 32.06936958050152, "grad_norm": 7.25130033493042, "learning_rate": 3.1211434325207635e-06, "loss": 0.04280057, "memory(GiB)": 13.7, "step": 68420, "train_speed(iter/s)": 1.532167 }, { "acc": 0.97203369, "epoch": 32.07171314741036, "grad_norm": 2.1155848503112793, "learning_rate": 3.120425192992718e-06, "loss": 0.06602215, "memory(GiB)": 13.7, "step": 68425, "train_speed(iter/s)": 1.532169 }, { "acc": 0.9958334, "epoch": 32.07405671431919, "grad_norm": 6.741515159606934, "learning_rate": 3.1197069986554927e-06, "loss": 0.03157831, "memory(GiB)": 13.7, "step": 68430, "train_speed(iter/s)": 1.532173 }, { "acc": 0.99541664, "epoch": 32.07640028122803, "grad_norm": 3.049490213394165, "learning_rate": 3.1189888495263533e-06, "loss": 0.02550944, "memory(GiB)": 13.7, "step": 68435, "train_speed(iter/s)": 1.532171 }, { "acc": 0.99258928, "epoch": 32.07874384813687, "grad_norm": 3.0676612854003906, "learning_rate": 3.1182707456225603e-06, "loss": 0.03403718, "memory(GiB)": 13.7, "step": 68440, "train_speed(iter/s)": 1.532174 }, { "acc": 0.98353624, "epoch": 32.0810874150457, "grad_norm": 4.069998264312744, "learning_rate": 3.1175526869613753e-06, "loss": 0.07560825, "memory(GiB)": 13.7, "step": 68445, "train_speed(iter/s)": 1.532174 }, { "acc": 0.95666666, "epoch": 32.083430981954535, "grad_norm": 7.793324947357178, "learning_rate": 3.116834673560058e-06, "loss": 0.08482438, "memory(GiB)": 13.7, "step": 68450, "train_speed(iter/s)": 1.532173 }, { "acc": 0.9761632, "epoch": 32.08577454886337, "grad_norm": 2.043179750442505, "learning_rate": 3.1161167054358654e-06, "loss": 0.08486037, "memory(GiB)": 13.7, "step": 68455, "train_speed(iter/s)": 1.532178 }, { "acc": 0.99050598, "epoch": 32.088118115772204, "grad_norm": 0.00568497646600008, "learning_rate": 3.115398782606058e-06, "loss": 0.02531693, "memory(GiB)": 13.7, "step": 68460, "train_speed(iter/s)": 1.532177 }, { "acc": 0.9848115, "epoch": 32.09046168268104, "grad_norm": 1.8519673347473145, "learning_rate": 3.114680905087891e-06, "loss": 0.0591098, "memory(GiB)": 13.7, "step": 68465, "train_speed(iter/s)": 1.532172 }, { "acc": 0.97488098, "epoch": 32.09280524958987, "grad_norm": 7.1789774894714355, "learning_rate": 3.11396307289862e-06, "loss": 0.09331121, "memory(GiB)": 13.7, "step": 68470, "train_speed(iter/s)": 1.532173 }, { "acc": 0.9813942, "epoch": 32.095148816498714, "grad_norm": 2.768798828125, "learning_rate": 3.1132452860554974e-06, "loss": 0.03843792, "memory(GiB)": 13.7, "step": 68475, "train_speed(iter/s)": 1.532176 }, { "acc": 0.9811554, "epoch": 32.09749238340755, "grad_norm": 4.441550254821777, "learning_rate": 3.112527544575779e-06, "loss": 0.09493302, "memory(GiB)": 13.7, "step": 68480, "train_speed(iter/s)": 1.532176 }, { "acc": 0.97833328, "epoch": 32.09983595031638, "grad_norm": 5.415164470672607, "learning_rate": 3.1118098484767145e-06, "loss": 0.064832, "memory(GiB)": 13.7, "step": 68485, "train_speed(iter/s)": 1.532171 }, { "acc": 0.9957386, "epoch": 32.10217951722522, "grad_norm": 0.10714981704950333, "learning_rate": 3.1110921977755583e-06, "loss": 0.02160349, "memory(GiB)": 13.7, "step": 68490, "train_speed(iter/s)": 1.532176 }, { "acc": 0.98785791, "epoch": 32.10452308413405, "grad_norm": 3.0190131664276123, "learning_rate": 3.110374592489557e-06, "loss": 0.03724529, "memory(GiB)": 13.7, "step": 68495, "train_speed(iter/s)": 1.532177 }, { "acc": 0.98247032, "epoch": 32.106866651042886, "grad_norm": 4.3023457527160645, "learning_rate": 3.109657032635962e-06, "loss": 0.06050363, "memory(GiB)": 13.7, "step": 68500, "train_speed(iter/s)": 1.53218 }, { "acc": 0.98572302, "epoch": 32.10921021795172, "grad_norm": 0.04878208786249161, "learning_rate": 3.1089395182320205e-06, "loss": 0.05219537, "memory(GiB)": 13.7, "step": 68505, "train_speed(iter/s)": 1.532176 }, { "acc": 0.98354168, "epoch": 32.11155378486056, "grad_norm": 4.759126663208008, "learning_rate": 3.1082220492949784e-06, "loss": 0.04060676, "memory(GiB)": 13.7, "step": 68510, "train_speed(iter/s)": 1.532179 }, { "acc": 0.98291664, "epoch": 32.113897351769396, "grad_norm": 2.3438053131103516, "learning_rate": 3.1075046258420804e-06, "loss": 0.03904418, "memory(GiB)": 13.7, "step": 68515, "train_speed(iter/s)": 1.532183 }, { "acc": 0.9930398, "epoch": 32.11624091867823, "grad_norm": 2.975222110748291, "learning_rate": 3.1067872478905735e-06, "loss": 0.04139967, "memory(GiB)": 13.7, "step": 68520, "train_speed(iter/s)": 1.53219 }, { "acc": 0.9947916, "epoch": 32.118584485587064, "grad_norm": 3.596885919570923, "learning_rate": 3.106069915457699e-06, "loss": 0.02394692, "memory(GiB)": 13.7, "step": 68525, "train_speed(iter/s)": 1.532199 }, { "acc": 0.97354164, "epoch": 32.1209280524959, "grad_norm": 6.024199485778809, "learning_rate": 3.1053526285607e-06, "loss": 0.06004573, "memory(GiB)": 13.7, "step": 68530, "train_speed(iter/s)": 1.532199 }, { "acc": 0.98654766, "epoch": 32.12327161940473, "grad_norm": 0.0016811491223052144, "learning_rate": 3.1046353872168184e-06, "loss": 0.03778934, "memory(GiB)": 13.7, "step": 68535, "train_speed(iter/s)": 1.532204 }, { "acc": 0.98738098, "epoch": 32.12561518631357, "grad_norm": 3.880143642425537, "learning_rate": 3.1039181914432935e-06, "loss": 0.03850021, "memory(GiB)": 13.7, "step": 68540, "train_speed(iter/s)": 1.53221 }, { "acc": 0.99187498, "epoch": 32.1279587532224, "grad_norm": 2.907987117767334, "learning_rate": 3.1032010412573642e-06, "loss": 0.02923789, "memory(GiB)": 13.7, "step": 68545, "train_speed(iter/s)": 1.532218 }, { "acc": 0.97778845, "epoch": 32.13030232013124, "grad_norm": 6.671728610992432, "learning_rate": 3.1024839366762687e-06, "loss": 0.07261334, "memory(GiB)": 13.7, "step": 68550, "train_speed(iter/s)": 1.532225 }, { "acc": 0.99354172, "epoch": 32.13264588704008, "grad_norm": 3.500804901123047, "learning_rate": 3.1017668777172456e-06, "loss": 0.04073417, "memory(GiB)": 13.7, "step": 68555, "train_speed(iter/s)": 1.532234 }, { "acc": 0.9875, "epoch": 32.13498945394891, "grad_norm": 6.260157585144043, "learning_rate": 3.1010498643975275e-06, "loss": 0.06982914, "memory(GiB)": 13.7, "step": 68560, "train_speed(iter/s)": 1.532236 }, { "acc": 0.9864584, "epoch": 32.137333020857746, "grad_norm": 3.288365364074707, "learning_rate": 3.1003328967343497e-06, "loss": 0.05508168, "memory(GiB)": 13.7, "step": 68565, "train_speed(iter/s)": 1.532244 }, { "acc": 0.9895834, "epoch": 32.13967658776658, "grad_norm": 2.7385127544403076, "learning_rate": 3.099615974744948e-06, "loss": 0.02654955, "memory(GiB)": 13.7, "step": 68570, "train_speed(iter/s)": 1.532249 }, { "acc": 0.97444439, "epoch": 32.142020154675414, "grad_norm": 2.338928699493408, "learning_rate": 3.0988990984465515e-06, "loss": 0.06497695, "memory(GiB)": 13.7, "step": 68575, "train_speed(iter/s)": 1.532253 }, { "acc": 0.97121105, "epoch": 32.14436372158425, "grad_norm": 5.909012317657471, "learning_rate": 3.0981822678563956e-06, "loss": 0.04854873, "memory(GiB)": 13.7, "step": 68580, "train_speed(iter/s)": 1.532255 }, { "acc": 0.98363094, "epoch": 32.14670728849309, "grad_norm": 2.2907416820526123, "learning_rate": 3.0974654829917074e-06, "loss": 0.03609726, "memory(GiB)": 13.7, "step": 68585, "train_speed(iter/s)": 1.53226 }, { "acc": 0.98178024, "epoch": 32.149050855401924, "grad_norm": 4.554051876068115, "learning_rate": 3.0967487438697163e-06, "loss": 0.04667979, "memory(GiB)": 13.7, "step": 68590, "train_speed(iter/s)": 1.532261 }, { "acc": 0.99027786, "epoch": 32.15139442231076, "grad_norm": 3.554288625717163, "learning_rate": 3.0960320505076536e-06, "loss": 0.03559066, "memory(GiB)": 13.7, "step": 68595, "train_speed(iter/s)": 1.532265 }, { "acc": 0.99192715, "epoch": 32.15373798921959, "grad_norm": 4.219694137573242, "learning_rate": 3.0953154029227424e-06, "loss": 0.0244362, "memory(GiB)": 13.7, "step": 68600, "train_speed(iter/s)": 1.532268 }, { "acc": 0.98874998, "epoch": 32.15608155612843, "grad_norm": 0.006437778472900391, "learning_rate": 3.094598801132208e-06, "loss": 0.02930423, "memory(GiB)": 13.7, "step": 68605, "train_speed(iter/s)": 1.532271 }, { "acc": 0.990625, "epoch": 32.15842512303726, "grad_norm": 4.101722717285156, "learning_rate": 3.093882245153279e-06, "loss": 0.02451907, "memory(GiB)": 13.7, "step": 68610, "train_speed(iter/s)": 1.532275 }, { "acc": 0.97550592, "epoch": 32.160768689946096, "grad_norm": 5.9230732917785645, "learning_rate": 3.093165735003176e-06, "loss": 0.06561508, "memory(GiB)": 13.7, "step": 68615, "train_speed(iter/s)": 1.53228 }, { "acc": 0.96204901, "epoch": 32.16311225685493, "grad_norm": 7.9312920570373535, "learning_rate": 3.0924492706991232e-06, "loss": 0.09965515, "memory(GiB)": 13.7, "step": 68620, "train_speed(iter/s)": 1.532285 }, { "acc": 0.99152966, "epoch": 32.16545582376377, "grad_norm": 1.9432778358459473, "learning_rate": 3.0917328522583422e-06, "loss": 0.04066909, "memory(GiB)": 13.7, "step": 68625, "train_speed(iter/s)": 1.532291 }, { "acc": 0.97749996, "epoch": 32.167799390672606, "grad_norm": 1.9912714958190918, "learning_rate": 3.091016479698051e-06, "loss": 0.0466585, "memory(GiB)": 13.7, "step": 68630, "train_speed(iter/s)": 1.532298 }, { "acc": 0.98354168, "epoch": 32.17014295758144, "grad_norm": 0.05732268467545509, "learning_rate": 3.0903001530354732e-06, "loss": 0.04836747, "memory(GiB)": 13.7, "step": 68635, "train_speed(iter/s)": 1.532296 }, { "acc": 0.98708334, "epoch": 32.172486524490274, "grad_norm": 5.42936897277832, "learning_rate": 3.089583872287823e-06, "loss": 0.05926771, "memory(GiB)": 13.7, "step": 68640, "train_speed(iter/s)": 1.532305 }, { "acc": 0.98315477, "epoch": 32.17483009139911, "grad_norm": 1.663338541984558, "learning_rate": 3.0888676374723187e-06, "loss": 0.04508335, "memory(GiB)": 13.7, "step": 68645, "train_speed(iter/s)": 1.532311 }, { "acc": 0.98898811, "epoch": 32.17717365830794, "grad_norm": 4.9783148765563965, "learning_rate": 3.088151448606175e-06, "loss": 0.03571818, "memory(GiB)": 13.7, "step": 68650, "train_speed(iter/s)": 1.532311 }, { "acc": 0.98383923, "epoch": 32.17951722521678, "grad_norm": 4.362336158752441, "learning_rate": 3.087435305706609e-06, "loss": 0.05530322, "memory(GiB)": 13.7, "step": 68655, "train_speed(iter/s)": 1.532323 }, { "acc": 0.9833333, "epoch": 32.18186079212561, "grad_norm": 4.226898818160407e-05, "learning_rate": 3.0867192087908317e-06, "loss": 0.05959505, "memory(GiB)": 13.7, "step": 68660, "train_speed(iter/s)": 1.532322 }, { "acc": 0.9936553, "epoch": 32.18420435903445, "grad_norm": 3.582557201385498, "learning_rate": 3.086003157876059e-06, "loss": 0.02596144, "memory(GiB)": 13.7, "step": 68665, "train_speed(iter/s)": 1.532328 }, { "acc": 0.98592262, "epoch": 32.18654792594329, "grad_norm": 4.799172878265381, "learning_rate": 3.0852871529795008e-06, "loss": 0.02315887, "memory(GiB)": 13.7, "step": 68670, "train_speed(iter/s)": 1.532322 }, { "acc": 0.98354168, "epoch": 32.18889149285212, "grad_norm": 2.596644878387451, "learning_rate": 3.0845711941183665e-06, "loss": 0.03580826, "memory(GiB)": 13.7, "step": 68675, "train_speed(iter/s)": 1.532321 }, { "acc": 0.98729172, "epoch": 32.191235059760956, "grad_norm": 3.5433919429779053, "learning_rate": 3.083855281309868e-06, "loss": 0.03117702, "memory(GiB)": 13.7, "step": 68680, "train_speed(iter/s)": 1.532327 }, { "acc": 0.98874998, "epoch": 32.19357862666979, "grad_norm": 1.9383130073547363, "learning_rate": 3.0831394145712113e-06, "loss": 0.04777669, "memory(GiB)": 13.7, "step": 68685, "train_speed(iter/s)": 1.532326 }, { "acc": 0.97875004, "epoch": 32.195922193578625, "grad_norm": 6.181979179382324, "learning_rate": 3.0824235939196023e-06, "loss": 0.05146495, "memory(GiB)": 13.7, "step": 68690, "train_speed(iter/s)": 1.532325 }, { "acc": 0.99258928, "epoch": 32.19826576048746, "grad_norm": 2.4363534450531006, "learning_rate": 3.0817078193722512e-06, "loss": 0.0356212, "memory(GiB)": 13.7, "step": 68695, "train_speed(iter/s)": 1.532329 }, { "acc": 0.990625, "epoch": 32.2006093273963, "grad_norm": 0.015348690561950207, "learning_rate": 3.080992090946359e-06, "loss": 0.03498811, "memory(GiB)": 13.7, "step": 68700, "train_speed(iter/s)": 1.532336 }, { "acc": 0.97321424, "epoch": 32.202952894305135, "grad_norm": 2.4827022552490234, "learning_rate": 3.080276408659131e-06, "loss": 0.06683128, "memory(GiB)": 13.7, "step": 68705, "train_speed(iter/s)": 1.532339 }, { "acc": 0.98392859, "epoch": 32.20529646121397, "grad_norm": 6.091379165649414, "learning_rate": 3.0795607725277694e-06, "loss": 0.0487037, "memory(GiB)": 13.7, "step": 68710, "train_speed(iter/s)": 1.532346 }, { "acc": 0.99375, "epoch": 32.2076400281228, "grad_norm": 0.0864124670624733, "learning_rate": 3.0788451825694766e-06, "loss": 0.02835769, "memory(GiB)": 13.7, "step": 68715, "train_speed(iter/s)": 1.532345 }, { "acc": 0.99520836, "epoch": 32.20998359503164, "grad_norm": 0.08102573454380035, "learning_rate": 3.078129638801453e-06, "loss": 0.01860716, "memory(GiB)": 13.7, "step": 68720, "train_speed(iter/s)": 1.532349 }, { "acc": 0.96906185, "epoch": 32.21232716194047, "grad_norm": 2.7685177326202393, "learning_rate": 3.077414141240897e-06, "loss": 0.10234416, "memory(GiB)": 13.7, "step": 68725, "train_speed(iter/s)": 1.532357 }, { "acc": 0.98440475, "epoch": 32.214670728849306, "grad_norm": 6.182498455047607, "learning_rate": 3.076698689905007e-06, "loss": 0.0559294, "memory(GiB)": 13.7, "step": 68730, "train_speed(iter/s)": 1.532362 }, { "acc": 0.98442707, "epoch": 32.21701429575814, "grad_norm": 5.824293613433838, "learning_rate": 3.0759832848109796e-06, "loss": 0.03781488, "memory(GiB)": 13.7, "step": 68735, "train_speed(iter/s)": 1.532363 }, { "acc": 0.98500004, "epoch": 32.21935786266698, "grad_norm": 5.169734954833984, "learning_rate": 3.075267925976013e-06, "loss": 0.04157262, "memory(GiB)": 13.7, "step": 68740, "train_speed(iter/s)": 1.532368 }, { "acc": 0.98743057, "epoch": 32.221701429575816, "grad_norm": 2.0177431106567383, "learning_rate": 3.0745526134172994e-06, "loss": 0.02470741, "memory(GiB)": 13.7, "step": 68745, "train_speed(iter/s)": 1.532373 }, { "acc": 0.9875, "epoch": 32.22404499648465, "grad_norm": 0.12394493818283081, "learning_rate": 3.0738373471520337e-06, "loss": 0.02985073, "memory(GiB)": 13.7, "step": 68750, "train_speed(iter/s)": 1.532374 }, { "acc": 0.99136362, "epoch": 32.226388563393485, "grad_norm": 3.3300392627716064, "learning_rate": 3.0731221271974093e-06, "loss": 0.03241706, "memory(GiB)": 13.7, "step": 68755, "train_speed(iter/s)": 1.53238 }, { "acc": 0.9838541, "epoch": 32.22873213030232, "grad_norm": 1.892901062965393, "learning_rate": 3.0724069535706157e-06, "loss": 0.04145278, "memory(GiB)": 13.7, "step": 68760, "train_speed(iter/s)": 1.532385 }, { "acc": 0.98916664, "epoch": 32.23107569721115, "grad_norm": 1.675288438796997, "learning_rate": 3.0716918262888475e-06, "loss": 0.02873584, "memory(GiB)": 13.7, "step": 68765, "train_speed(iter/s)": 1.532396 }, { "acc": 0.97375002, "epoch": 32.23341926411999, "grad_norm": 6.118265151977539, "learning_rate": 3.070976745369289e-06, "loss": 0.06187606, "memory(GiB)": 13.7, "step": 68770, "train_speed(iter/s)": 1.532399 }, { "acc": 0.98363094, "epoch": 32.23576283102883, "grad_norm": 1.573757529258728, "learning_rate": 3.0702617108291316e-06, "loss": 0.05829884, "memory(GiB)": 13.7, "step": 68775, "train_speed(iter/s)": 1.532404 }, { "acc": 0.98619041, "epoch": 32.23810639793766, "grad_norm": 5.246403694152832, "learning_rate": 3.0695467226855604e-06, "loss": 0.02888803, "memory(GiB)": 13.7, "step": 68780, "train_speed(iter/s)": 1.532398 }, { "acc": 0.9874855, "epoch": 32.2404499648465, "grad_norm": 1.1312198638916016, "learning_rate": 3.0688317809557626e-06, "loss": 0.02536495, "memory(GiB)": 13.7, "step": 68785, "train_speed(iter/s)": 1.532406 }, { "acc": 0.97928028, "epoch": 32.24279353175533, "grad_norm": 0.0060220579616725445, "learning_rate": 3.0681168856569232e-06, "loss": 0.06571867, "memory(GiB)": 13.7, "step": 68790, "train_speed(iter/s)": 1.532411 }, { "acc": 0.990625, "epoch": 32.245137098664166, "grad_norm": 2.6986582279205322, "learning_rate": 3.067402036806224e-06, "loss": 0.02526867, "memory(GiB)": 13.7, "step": 68795, "train_speed(iter/s)": 1.532403 }, { "acc": 0.9916667, "epoch": 32.247480665573, "grad_norm": 0.023606732487678528, "learning_rate": 3.0666872344208503e-06, "loss": 0.02446931, "memory(GiB)": 13.7, "step": 68800, "train_speed(iter/s)": 1.532405 }, { "acc": 0.9927083, "epoch": 32.249824232481835, "grad_norm": 1.1370675563812256, "learning_rate": 3.0659724785179834e-06, "loss": 0.03739117, "memory(GiB)": 13.7, "step": 68805, "train_speed(iter/s)": 1.532405 }, { "acc": 0.98663692, "epoch": 32.25216779939067, "grad_norm": 0.9420175552368164, "learning_rate": 3.0652577691148015e-06, "loss": 0.03649254, "memory(GiB)": 13.7, "step": 68810, "train_speed(iter/s)": 1.532409 }, { "acc": 0.98217268, "epoch": 32.25451136629951, "grad_norm": 2.366306781768799, "learning_rate": 3.0645431062284854e-06, "loss": 0.05681198, "memory(GiB)": 13.7, "step": 68815, "train_speed(iter/s)": 1.532411 }, { "acc": 0.9890625, "epoch": 32.256854933208345, "grad_norm": 4.391768455505371, "learning_rate": 3.0638284898762133e-06, "loss": 0.03931279, "memory(GiB)": 13.7, "step": 68820, "train_speed(iter/s)": 1.532408 }, { "acc": 0.9775568, "epoch": 32.25919850011718, "grad_norm": 5.417056560516357, "learning_rate": 3.06311392007516e-06, "loss": 0.09196024, "memory(GiB)": 13.7, "step": 68825, "train_speed(iter/s)": 1.53241 }, { "acc": 0.9833333, "epoch": 32.261542067026014, "grad_norm": 0.0028655969072133303, "learning_rate": 3.0623993968425047e-06, "loss": 0.02863074, "memory(GiB)": 13.7, "step": 68830, "train_speed(iter/s)": 1.532408 }, { "acc": 0.9916667, "epoch": 32.26388563393485, "grad_norm": 4.018398761749268, "learning_rate": 3.0616849201954196e-06, "loss": 0.05039767, "memory(GiB)": 13.7, "step": 68835, "train_speed(iter/s)": 1.532413 }, { "acc": 0.98438644, "epoch": 32.26622920084368, "grad_norm": 3.3488080501556396, "learning_rate": 3.0609704901510813e-06, "loss": 0.06306776, "memory(GiB)": 13.7, "step": 68840, "train_speed(iter/s)": 1.532413 }, { "acc": 0.96735992, "epoch": 32.26857276775252, "grad_norm": 3.116028070449829, "learning_rate": 3.0602561067266602e-06, "loss": 0.0795712, "memory(GiB)": 13.7, "step": 68845, "train_speed(iter/s)": 1.532417 }, { "acc": 0.98475695, "epoch": 32.27091633466136, "grad_norm": 0.915022611618042, "learning_rate": 3.059541769939328e-06, "loss": 0.0436376, "memory(GiB)": 13.7, "step": 68850, "train_speed(iter/s)": 1.532419 }, { "acc": 0.98395824, "epoch": 32.27325990157019, "grad_norm": 3.8969645500183105, "learning_rate": 3.058827479806254e-06, "loss": 0.03205473, "memory(GiB)": 13.7, "step": 68855, "train_speed(iter/s)": 1.532419 }, { "acc": 0.99133015, "epoch": 32.27560346847903, "grad_norm": 2.71250581741333, "learning_rate": 3.0581132363446096e-06, "loss": 0.0280333, "memory(GiB)": 13.7, "step": 68860, "train_speed(iter/s)": 1.53243 }, { "acc": 0.991572, "epoch": 32.27794703538786, "grad_norm": 0.00048721785424277186, "learning_rate": 3.0573990395715606e-06, "loss": 0.02341035, "memory(GiB)": 13.7, "step": 68865, "train_speed(iter/s)": 1.532434 }, { "acc": 0.990625, "epoch": 32.280290602296695, "grad_norm": 0.6785385608673096, "learning_rate": 3.056684889504277e-06, "loss": 0.02892369, "memory(GiB)": 13.7, "step": 68870, "train_speed(iter/s)": 1.532438 }, { "acc": 0.98383923, "epoch": 32.28263416920553, "grad_norm": 5.317858695983887, "learning_rate": 3.0559707861599213e-06, "loss": 0.06311826, "memory(GiB)": 13.7, "step": 68875, "train_speed(iter/s)": 1.532437 }, { "acc": 0.98834324, "epoch": 32.284977736114364, "grad_norm": 3.20678448677063, "learning_rate": 3.0552567295556594e-06, "loss": 0.07029681, "memory(GiB)": 13.7, "step": 68880, "train_speed(iter/s)": 1.53244 }, { "acc": 0.99321423, "epoch": 32.2873213030232, "grad_norm": 3.229562759399414, "learning_rate": 3.054542719708656e-06, "loss": 0.02631391, "memory(GiB)": 13.7, "step": 68885, "train_speed(iter/s)": 1.532447 }, { "acc": 0.98228626, "epoch": 32.28966486993204, "grad_norm": 1.005987286567688, "learning_rate": 3.0538287566360735e-06, "loss": 0.08419771, "memory(GiB)": 13.7, "step": 68890, "train_speed(iter/s)": 1.532457 }, { "acc": 0.99548607, "epoch": 32.292008436840874, "grad_norm": 1.6103847026824951, "learning_rate": 3.0531148403550716e-06, "loss": 0.01518705, "memory(GiB)": 13.7, "step": 68895, "train_speed(iter/s)": 1.532455 }, { "acc": 0.98708334, "epoch": 32.29435200374971, "grad_norm": 2.600252866744995, "learning_rate": 3.0524009708828096e-06, "loss": 0.02930516, "memory(GiB)": 13.7, "step": 68900, "train_speed(iter/s)": 1.532455 }, { "acc": 0.97383928, "epoch": 32.29669557065854, "grad_norm": 9.255823135375977, "learning_rate": 3.05168714823645e-06, "loss": 0.07438521, "memory(GiB)": 13.7, "step": 68905, "train_speed(iter/s)": 1.532459 }, { "acc": 0.97458334, "epoch": 32.29903913756738, "grad_norm": 2.40022873878479, "learning_rate": 3.050973372433148e-06, "loss": 0.05951185, "memory(GiB)": 13.7, "step": 68910, "train_speed(iter/s)": 1.532462 }, { "acc": 0.97922344, "epoch": 32.30138270447621, "grad_norm": 2.359823703765869, "learning_rate": 3.0502596434900616e-06, "loss": 0.04769447, "memory(GiB)": 13.7, "step": 68915, "train_speed(iter/s)": 1.532463 }, { "acc": 0.9837595, "epoch": 32.303726271385045, "grad_norm": 2.48903226852417, "learning_rate": 3.0495459614243474e-06, "loss": 0.04318283, "memory(GiB)": 13.7, "step": 68920, "train_speed(iter/s)": 1.532467 }, { "acc": 0.98083334, "epoch": 32.30606983829389, "grad_norm": 1.4294824600219727, "learning_rate": 3.048832326253158e-06, "loss": 0.10504254, "memory(GiB)": 13.7, "step": 68925, "train_speed(iter/s)": 1.532466 }, { "acc": 0.97986603, "epoch": 32.30841340520272, "grad_norm": 4.032412052154541, "learning_rate": 3.048118737993649e-06, "loss": 0.06130513, "memory(GiB)": 13.7, "step": 68930, "train_speed(iter/s)": 1.53247 }, { "acc": 0.98258934, "epoch": 32.310756972111555, "grad_norm": 0.0024695941247045994, "learning_rate": 3.04740519666297e-06, "loss": 0.05509401, "memory(GiB)": 13.7, "step": 68935, "train_speed(iter/s)": 1.532472 }, { "acc": 0.98217258, "epoch": 32.31310053902039, "grad_norm": 3.54403018951416, "learning_rate": 3.0466917022782738e-06, "loss": 0.04861271, "memory(GiB)": 13.7, "step": 68940, "train_speed(iter/s)": 1.532472 }, { "acc": 0.98723221, "epoch": 32.315444105929224, "grad_norm": 1.1443196535110474, "learning_rate": 3.04597825485671e-06, "loss": 0.04758374, "memory(GiB)": 13.7, "step": 68945, "train_speed(iter/s)": 1.532478 }, { "acc": 0.97581844, "epoch": 32.31778767283806, "grad_norm": 4.006025791168213, "learning_rate": 3.04526485441543e-06, "loss": 0.05122584, "memory(GiB)": 13.7, "step": 68950, "train_speed(iter/s)": 1.53247 }, { "acc": 0.97527781, "epoch": 32.32013123974689, "grad_norm": 2.039853811264038, "learning_rate": 3.044551500971577e-06, "loss": 0.0391257, "memory(GiB)": 13.7, "step": 68955, "train_speed(iter/s)": 1.532471 }, { "acc": 0.9958333, "epoch": 32.32247480665573, "grad_norm": 0.0020502833649516106, "learning_rate": 3.0438381945423012e-06, "loss": 0.02277479, "memory(GiB)": 13.7, "step": 68960, "train_speed(iter/s)": 1.532472 }, { "acc": 0.96999998, "epoch": 32.32481837356457, "grad_norm": 4.548119068145752, "learning_rate": 3.0431249351447474e-06, "loss": 0.07271794, "memory(GiB)": 13.7, "step": 68965, "train_speed(iter/s)": 1.53248 }, { "acc": 0.98328371, "epoch": 32.3271619404734, "grad_norm": 0.02367180772125721, "learning_rate": 3.0424117227960588e-06, "loss": 0.04843348, "memory(GiB)": 13.7, "step": 68970, "train_speed(iter/s)": 1.532482 }, { "acc": 0.98923607, "epoch": 32.32950550738224, "grad_norm": 3.0748445987701416, "learning_rate": 3.0416985575133805e-06, "loss": 0.0409668, "memory(GiB)": 13.7, "step": 68975, "train_speed(iter/s)": 1.532483 }, { "acc": 0.99187508, "epoch": 32.33184907429107, "grad_norm": 3.672683000564575, "learning_rate": 3.0409854393138537e-06, "loss": 0.0202193, "memory(GiB)": 13.7, "step": 68980, "train_speed(iter/s)": 1.532487 }, { "acc": 0.97510414, "epoch": 32.334192641199905, "grad_norm": 8.547748565673828, "learning_rate": 3.040272368214618e-06, "loss": 0.04856913, "memory(GiB)": 13.7, "step": 68985, "train_speed(iter/s)": 1.532483 }, { "acc": 0.98604164, "epoch": 32.33653620810874, "grad_norm": 4.265880107879639, "learning_rate": 3.039559344232816e-06, "loss": 0.0369475, "memory(GiB)": 13.7, "step": 68990, "train_speed(iter/s)": 1.532488 }, { "acc": 0.984375, "epoch": 32.338879775017574, "grad_norm": 4.68506383895874, "learning_rate": 3.0388463673855844e-06, "loss": 0.04326981, "memory(GiB)": 13.7, "step": 68995, "train_speed(iter/s)": 1.532487 }, { "acc": 0.99375, "epoch": 32.341223341926415, "grad_norm": 2.2552566528320312, "learning_rate": 3.038133437690062e-06, "loss": 0.02163232, "memory(GiB)": 13.7, "step": 69000, "train_speed(iter/s)": 1.532491 }, { "acc": 0.97008934, "epoch": 32.34356690883525, "grad_norm": 5.955326557159424, "learning_rate": 3.037420555163385e-06, "loss": 0.08514939, "memory(GiB)": 13.7, "step": 69005, "train_speed(iter/s)": 1.53249 }, { "acc": 0.9905303, "epoch": 32.345910475744084, "grad_norm": 1.5012216567993164, "learning_rate": 3.0367077198226873e-06, "loss": 0.02487363, "memory(GiB)": 13.7, "step": 69010, "train_speed(iter/s)": 1.53249 }, { "acc": 0.98520832, "epoch": 32.34825404265292, "grad_norm": 4.04207181930542, "learning_rate": 3.035994931685108e-06, "loss": 0.03173113, "memory(GiB)": 13.7, "step": 69015, "train_speed(iter/s)": 1.532492 }, { "acc": 0.9860714, "epoch": 32.35059760956175, "grad_norm": 2.0225141048431396, "learning_rate": 3.0352821907677743e-06, "loss": 0.0366679, "memory(GiB)": 13.7, "step": 69020, "train_speed(iter/s)": 1.532488 }, { "acc": 0.97842264, "epoch": 32.35294117647059, "grad_norm": 3.8987529277801514, "learning_rate": 3.03456949708782e-06, "loss": 0.08370413, "memory(GiB)": 13.7, "step": 69025, "train_speed(iter/s)": 1.532489 }, { "acc": 0.98342266, "epoch": 32.35528474337942, "grad_norm": 4.0175676345825195, "learning_rate": 3.0338568506623766e-06, "loss": 0.03553222, "memory(GiB)": 13.7, "step": 69030, "train_speed(iter/s)": 1.532492 }, { "acc": 0.9885417, "epoch": 32.357628310288256, "grad_norm": 2.6600751876831055, "learning_rate": 3.0331442515085733e-06, "loss": 0.02584289, "memory(GiB)": 13.7, "step": 69035, "train_speed(iter/s)": 1.532494 }, { "acc": 0.99083338, "epoch": 32.3599718771971, "grad_norm": 0.24224461615085602, "learning_rate": 3.0324316996435403e-06, "loss": 0.06800565, "memory(GiB)": 13.7, "step": 69040, "train_speed(iter/s)": 1.532494 }, { "acc": 0.97821426, "epoch": 32.36231544410593, "grad_norm": 3.637362241744995, "learning_rate": 3.0317191950844026e-06, "loss": 0.04936837, "memory(GiB)": 13.7, "step": 69045, "train_speed(iter/s)": 1.532502 }, { "acc": 0.98562498, "epoch": 32.364659011014766, "grad_norm": 4.908435344696045, "learning_rate": 3.031006737848288e-06, "loss": 0.04039252, "memory(GiB)": 13.7, "step": 69050, "train_speed(iter/s)": 1.532501 }, { "acc": 0.99499998, "epoch": 32.3670025779236, "grad_norm": 1.9815850257873535, "learning_rate": 3.0302943279523224e-06, "loss": 0.06208838, "memory(GiB)": 13.7, "step": 69055, "train_speed(iter/s)": 1.532506 }, { "acc": 0.98258924, "epoch": 32.369346144832434, "grad_norm": 6.461029529571533, "learning_rate": 3.0295819654136255e-06, "loss": 0.04570737, "memory(GiB)": 13.7, "step": 69060, "train_speed(iter/s)": 1.532501 }, { "acc": 0.98770828, "epoch": 32.37168971174127, "grad_norm": 0.005483618471771479, "learning_rate": 3.028869650249325e-06, "loss": 0.06008999, "memory(GiB)": 13.7, "step": 69065, "train_speed(iter/s)": 1.532502 }, { "acc": 0.98062496, "epoch": 32.3740332786501, "grad_norm": 0.9690185785293579, "learning_rate": 3.028157382476541e-06, "loss": 0.02453847, "memory(GiB)": 13.7, "step": 69070, "train_speed(iter/s)": 1.532502 }, { "acc": 0.98812504, "epoch": 32.37637684555894, "grad_norm": 2.1561672687530518, "learning_rate": 3.0274451621123925e-06, "loss": 0.02420923, "memory(GiB)": 13.7, "step": 69075, "train_speed(iter/s)": 1.532503 }, { "acc": 0.9875, "epoch": 32.37872041246778, "grad_norm": 3.1637725830078125, "learning_rate": 3.0267329891740016e-06, "loss": 0.01966233, "memory(GiB)": 13.7, "step": 69080, "train_speed(iter/s)": 1.532507 }, { "acc": 0.98697405, "epoch": 32.38106397937661, "grad_norm": 4.073812484741211, "learning_rate": 3.0260208636784843e-06, "loss": 0.05785621, "memory(GiB)": 13.7, "step": 69085, "train_speed(iter/s)": 1.532505 }, { "acc": 0.98895836, "epoch": 32.38340754628545, "grad_norm": 2.0959277153015137, "learning_rate": 3.0253087856429603e-06, "loss": 0.0266046, "memory(GiB)": 13.7, "step": 69090, "train_speed(iter/s)": 1.532508 }, { "acc": 0.98562508, "epoch": 32.38575111319428, "grad_norm": 4.750718593597412, "learning_rate": 3.0245967550845428e-06, "loss": 0.04542588, "memory(GiB)": 13.7, "step": 69095, "train_speed(iter/s)": 1.53251 }, { "acc": 0.99020834, "epoch": 32.388094680103116, "grad_norm": 0.0016650696052238345, "learning_rate": 3.0238847720203497e-06, "loss": 0.02543966, "memory(GiB)": 13.7, "step": 69100, "train_speed(iter/s)": 1.53251 }, { "acc": 0.97927084, "epoch": 32.39043824701195, "grad_norm": 5.015448093414307, "learning_rate": 3.0231728364674913e-06, "loss": 0.07884231, "memory(GiB)": 13.7, "step": 69105, "train_speed(iter/s)": 1.53251 }, { "acc": 0.98864584, "epoch": 32.392781813920784, "grad_norm": 5.586566925048828, "learning_rate": 3.022460948443082e-06, "loss": 0.03153672, "memory(GiB)": 13.7, "step": 69110, "train_speed(iter/s)": 1.532511 }, { "acc": 0.98708334, "epoch": 32.395125380829626, "grad_norm": 4.641688346862793, "learning_rate": 3.0217491079642324e-06, "loss": 0.02466755, "memory(GiB)": 13.7, "step": 69115, "train_speed(iter/s)": 1.532513 }, { "acc": 0.99125004, "epoch": 32.39746894773846, "grad_norm": 0.7846806049346924, "learning_rate": 3.0210373150480555e-06, "loss": 0.02085995, "memory(GiB)": 13.7, "step": 69120, "train_speed(iter/s)": 1.532513 }, { "acc": 0.98915176, "epoch": 32.399812514647294, "grad_norm": 1.6884833574295044, "learning_rate": 3.020325569711657e-06, "loss": 0.03186304, "memory(GiB)": 13.7, "step": 69125, "train_speed(iter/s)": 1.53252 }, { "acc": 0.97969704, "epoch": 32.40215608155613, "grad_norm": 0.0036061692517250776, "learning_rate": 3.0196138719721464e-06, "loss": 0.07065974, "memory(GiB)": 13.7, "step": 69130, "train_speed(iter/s)": 1.532524 }, { "acc": 0.99333334, "epoch": 32.40449964846496, "grad_norm": 2.1620495319366455, "learning_rate": 3.018902221846632e-06, "loss": 0.02809432, "memory(GiB)": 13.7, "step": 69135, "train_speed(iter/s)": 1.532523 }, { "acc": 0.98208332, "epoch": 32.4068432153738, "grad_norm": 2.156491994857788, "learning_rate": 3.018190619352218e-06, "loss": 0.04017016, "memory(GiB)": 13.7, "step": 69140, "train_speed(iter/s)": 1.53252 }, { "acc": 0.97711306, "epoch": 32.40918678228263, "grad_norm": 6.220455646514893, "learning_rate": 3.0174790645060088e-06, "loss": 0.06669555, "memory(GiB)": 13.7, "step": 69145, "train_speed(iter/s)": 1.532522 }, { "acc": 0.9833333, "epoch": 32.411530349191466, "grad_norm": 4.907601356506348, "learning_rate": 3.016767557325107e-06, "loss": 0.03364769, "memory(GiB)": 13.7, "step": 69150, "train_speed(iter/s)": 1.53252 }, { "acc": 0.99125004, "epoch": 32.41387391610031, "grad_norm": 0.8655048608779907, "learning_rate": 3.0160560978266166e-06, "loss": 0.03395419, "memory(GiB)": 13.7, "step": 69155, "train_speed(iter/s)": 1.532522 }, { "acc": 0.98445511, "epoch": 32.41621748300914, "grad_norm": 4.1027021408081055, "learning_rate": 3.0153446860276385e-06, "loss": 0.05849102, "memory(GiB)": 13.7, "step": 69160, "train_speed(iter/s)": 1.532525 }, { "acc": 0.9697917, "epoch": 32.418561049917976, "grad_norm": 6.81882381439209, "learning_rate": 3.0146333219452722e-06, "loss": 0.07505373, "memory(GiB)": 13.7, "step": 69165, "train_speed(iter/s)": 1.532526 }, { "acc": 0.98500004, "epoch": 32.42090461682681, "grad_norm": 3.5312373638153076, "learning_rate": 3.0139220055966167e-06, "loss": 0.0331293, "memory(GiB)": 13.7, "step": 69170, "train_speed(iter/s)": 1.532525 }, { "acc": 0.98338737, "epoch": 32.423248183735645, "grad_norm": 2.5593550205230713, "learning_rate": 3.0132107369987694e-06, "loss": 0.03197806, "memory(GiB)": 13.7, "step": 69175, "train_speed(iter/s)": 1.532527 }, { "acc": 0.98291664, "epoch": 32.42559175064448, "grad_norm": 4.832849025726318, "learning_rate": 3.0124995161688292e-06, "loss": 0.03836878, "memory(GiB)": 13.7, "step": 69180, "train_speed(iter/s)": 1.53253 }, { "acc": 0.99122477, "epoch": 32.42793531755331, "grad_norm": 3.590625047683716, "learning_rate": 3.0117883431238886e-06, "loss": 0.01876301, "memory(GiB)": 13.7, "step": 69185, "train_speed(iter/s)": 1.532535 }, { "acc": 1.0, "epoch": 32.430278884462155, "grad_norm": 3.3515467643737793, "learning_rate": 3.0110772178810414e-06, "loss": 0.02546775, "memory(GiB)": 13.7, "step": 69190, "train_speed(iter/s)": 1.53254 }, { "acc": 0.98321428, "epoch": 32.43262245137099, "grad_norm": 0.7605581879615784, "learning_rate": 3.010366140457384e-06, "loss": 0.04723583, "memory(GiB)": 13.7, "step": 69195, "train_speed(iter/s)": 1.532544 }, { "acc": 0.971875, "epoch": 32.43496601827982, "grad_norm": 6.682634353637695, "learning_rate": 3.009655110870005e-06, "loss": 0.07893562, "memory(GiB)": 13.7, "step": 69200, "train_speed(iter/s)": 1.532553 }, { "acc": 0.9958334, "epoch": 32.43730958518866, "grad_norm": 1.2592127323150635, "learning_rate": 3.008944129135997e-06, "loss": 0.01464332, "memory(GiB)": 13.7, "step": 69205, "train_speed(iter/s)": 1.532557 }, { "acc": 0.98206844, "epoch": 32.43965315209749, "grad_norm": 3.3405404090881348, "learning_rate": 3.00823319527245e-06, "loss": 0.03529828, "memory(GiB)": 13.7, "step": 69210, "train_speed(iter/s)": 1.532558 }, { "acc": 0.98973217, "epoch": 32.441996719006326, "grad_norm": 0.8033279776573181, "learning_rate": 3.0075223092964513e-06, "loss": 0.03297115, "memory(GiB)": 13.7, "step": 69215, "train_speed(iter/s)": 1.532558 }, { "acc": 0.978125, "epoch": 32.44434028591516, "grad_norm": 5.940443515777588, "learning_rate": 3.0068114712250884e-06, "loss": 0.04984165, "memory(GiB)": 13.7, "step": 69220, "train_speed(iter/s)": 1.532566 }, { "acc": 0.96854172, "epoch": 32.446683852823995, "grad_norm": 5.822212219238281, "learning_rate": 3.00610068107545e-06, "loss": 0.07535836, "memory(GiB)": 13.7, "step": 69225, "train_speed(iter/s)": 1.532569 }, { "acc": 0.98500004, "epoch": 32.449027419732836, "grad_norm": 4.957740783691406, "learning_rate": 3.005389938864618e-06, "loss": 0.04841622, "memory(GiB)": 13.7, "step": 69230, "train_speed(iter/s)": 1.53257 }, { "acc": 0.98738976, "epoch": 32.45137098664167, "grad_norm": 6.156411647796631, "learning_rate": 3.004679244609676e-06, "loss": 0.05197368, "memory(GiB)": 13.7, "step": 69235, "train_speed(iter/s)": 1.532573 }, { "acc": 0.9833334, "epoch": 32.453714553550505, "grad_norm": 0.0027260195929557085, "learning_rate": 3.0039685983277096e-06, "loss": 0.04855213, "memory(GiB)": 13.7, "step": 69240, "train_speed(iter/s)": 1.532578 }, { "acc": 0.99017048, "epoch": 32.45605812045934, "grad_norm": 4.10872220993042, "learning_rate": 3.0032580000357992e-06, "loss": 0.02392757, "memory(GiB)": 13.7, "step": 69245, "train_speed(iter/s)": 1.53258 }, { "acc": 0.9845089, "epoch": 32.45840168736817, "grad_norm": 5.901179790496826, "learning_rate": 3.0025474497510236e-06, "loss": 0.05218198, "memory(GiB)": 13.7, "step": 69250, "train_speed(iter/s)": 1.532584 }, { "acc": 0.98006935, "epoch": 32.46074525427701, "grad_norm": 0.5479767918586731, "learning_rate": 3.001836947490465e-06, "loss": 0.05079618, "memory(GiB)": 13.7, "step": 69255, "train_speed(iter/s)": 1.53259 }, { "acc": 0.98416672, "epoch": 32.46308882118584, "grad_norm": 3.5892553329467773, "learning_rate": 3.0011264932711986e-06, "loss": 0.0548762, "memory(GiB)": 13.7, "step": 69260, "train_speed(iter/s)": 1.532592 }, { "acc": 0.97300606, "epoch": 32.46543238809468, "grad_norm": 1.8993152379989624, "learning_rate": 3.000416087110305e-06, "loss": 0.05236055, "memory(GiB)": 13.7, "step": 69265, "train_speed(iter/s)": 1.532594 }, { "acc": 0.99020834, "epoch": 32.46777595500352, "grad_norm": 0.875221848487854, "learning_rate": 2.9997057290248564e-06, "loss": 0.03668599, "memory(GiB)": 13.7, "step": 69270, "train_speed(iter/s)": 1.532598 }, { "acc": 0.98756943, "epoch": 32.47011952191235, "grad_norm": 1.0326415300369263, "learning_rate": 2.99899541903193e-06, "loss": 0.04328216, "memory(GiB)": 13.7, "step": 69275, "train_speed(iter/s)": 1.532599 }, { "acc": 0.98093748, "epoch": 32.472463088821186, "grad_norm": 3.893561840057373, "learning_rate": 2.998285157148597e-06, "loss": 0.07294378, "memory(GiB)": 13.7, "step": 69280, "train_speed(iter/s)": 1.532601 }, { "acc": 0.98084326, "epoch": 32.47480665573002, "grad_norm": 2.1187002658843994, "learning_rate": 2.9975749433919316e-06, "loss": 0.0616854, "memory(GiB)": 13.7, "step": 69285, "train_speed(iter/s)": 1.532599 }, { "acc": 0.98374996, "epoch": 32.477150222638855, "grad_norm": 6.063297748565674, "learning_rate": 2.996864777779004e-06, "loss": 0.05397272, "memory(GiB)": 13.7, "step": 69290, "train_speed(iter/s)": 1.532599 }, { "acc": 0.99219694, "epoch": 32.47949378954769, "grad_norm": 0.7728891372680664, "learning_rate": 2.9961546603268853e-06, "loss": 0.02558623, "memory(GiB)": 13.7, "step": 69295, "train_speed(iter/s)": 1.5326 }, { "acc": 0.97603626, "epoch": 32.48183735645652, "grad_norm": 6.282659530639648, "learning_rate": 2.9954445910526438e-06, "loss": 0.06664157, "memory(GiB)": 13.7, "step": 69300, "train_speed(iter/s)": 1.532598 }, { "acc": 0.98083334, "epoch": 32.484180923365365, "grad_norm": 0.026079658418893814, "learning_rate": 2.994734569973346e-06, "loss": 0.06033906, "memory(GiB)": 13.7, "step": 69305, "train_speed(iter/s)": 1.532598 }, { "acc": 0.97977638, "epoch": 32.4865244902742, "grad_norm": 5.219465732574463, "learning_rate": 2.9940245971060615e-06, "loss": 0.06099899, "memory(GiB)": 13.7, "step": 69310, "train_speed(iter/s)": 1.532598 }, { "acc": 0.99392853, "epoch": 32.48886805718303, "grad_norm": 0.0035619037225842476, "learning_rate": 2.9933146724678536e-06, "loss": 0.03288752, "memory(GiB)": 13.7, "step": 69315, "train_speed(iter/s)": 1.532598 }, { "acc": 0.984375, "epoch": 32.49121162409187, "grad_norm": 5.208889007568359, "learning_rate": 2.9926047960757866e-06, "loss": 0.03270072, "memory(GiB)": 13.7, "step": 69320, "train_speed(iter/s)": 1.5326 }, { "acc": 0.97703371, "epoch": 32.4935551910007, "grad_norm": 5.8731303215026855, "learning_rate": 2.991894967946923e-06, "loss": 0.07077662, "memory(GiB)": 13.7, "step": 69325, "train_speed(iter/s)": 1.532606 }, { "acc": 0.99508934, "epoch": 32.495898757909536, "grad_norm": 0.7243523001670837, "learning_rate": 2.991185188098327e-06, "loss": 0.01335548, "memory(GiB)": 13.7, "step": 69330, "train_speed(iter/s)": 1.53261 }, { "acc": 0.98027163, "epoch": 32.49824232481837, "grad_norm": 0.0007919046329334378, "learning_rate": 2.990475456547055e-06, "loss": 0.03860289, "memory(GiB)": 13.7, "step": 69335, "train_speed(iter/s)": 1.532611 }, { "acc": 0.99188986, "epoch": 32.50058589172721, "grad_norm": 0.8041293025016785, "learning_rate": 2.9897657733101724e-06, "loss": 0.04117642, "memory(GiB)": 13.7, "step": 69340, "train_speed(iter/s)": 1.532616 }, { "acc": 0.98386478, "epoch": 32.502929458636046, "grad_norm": 3.85432767868042, "learning_rate": 2.9890561384047338e-06, "loss": 0.06284345, "memory(GiB)": 13.7, "step": 69345, "train_speed(iter/s)": 1.532616 }, { "acc": 0.97287359, "epoch": 32.50527302554488, "grad_norm": 6.055419445037842, "learning_rate": 2.9883465518478e-06, "loss": 0.06751005, "memory(GiB)": 13.7, "step": 69350, "train_speed(iter/s)": 1.532616 }, { "acc": 0.98988094, "epoch": 32.507616592453715, "grad_norm": 2.8396782875061035, "learning_rate": 2.9876370136564216e-06, "loss": 0.03577333, "memory(GiB)": 13.7, "step": 69355, "train_speed(iter/s)": 1.532618 }, { "acc": 0.98475647, "epoch": 32.50996015936255, "grad_norm": 3.306500196456909, "learning_rate": 2.986927523847658e-06, "loss": 0.05779312, "memory(GiB)": 13.7, "step": 69360, "train_speed(iter/s)": 1.53262 }, { "acc": 0.9833333, "epoch": 32.512303726271384, "grad_norm": 3.699444532394409, "learning_rate": 2.9862180824385602e-06, "loss": 0.04251044, "memory(GiB)": 13.7, "step": 69365, "train_speed(iter/s)": 1.532624 }, { "acc": 0.98125, "epoch": 32.51464729318022, "grad_norm": 5.567041873931885, "learning_rate": 2.985508689446183e-06, "loss": 0.03263204, "memory(GiB)": 13.7, "step": 69370, "train_speed(iter/s)": 1.532624 }, { "acc": 0.9875, "epoch": 32.51699086008905, "grad_norm": 2.2406256198883057, "learning_rate": 2.9847993448875782e-06, "loss": 0.03893777, "memory(GiB)": 13.7, "step": 69375, "train_speed(iter/s)": 1.532626 }, { "acc": 0.98041668, "epoch": 32.519334426997894, "grad_norm": 5.184568881988525, "learning_rate": 2.9840900487797936e-06, "loss": 0.03694948, "memory(GiB)": 13.7, "step": 69380, "train_speed(iter/s)": 1.532624 }, { "acc": 0.984375, "epoch": 32.52167799390673, "grad_norm": 0.12493539601564407, "learning_rate": 2.9833808011398805e-06, "loss": 0.02518176, "memory(GiB)": 13.7, "step": 69385, "train_speed(iter/s)": 1.532619 }, { "acc": 0.99402781, "epoch": 32.52402156081556, "grad_norm": 1.942442536354065, "learning_rate": 2.982671601984887e-06, "loss": 0.01295102, "memory(GiB)": 13.7, "step": 69390, "train_speed(iter/s)": 1.532628 }, { "acc": 0.98154764, "epoch": 32.5263651277244, "grad_norm": 0.040184635668992996, "learning_rate": 2.981962451331858e-06, "loss": 0.04873341, "memory(GiB)": 13.7, "step": 69395, "train_speed(iter/s)": 1.532633 }, { "acc": 0.98395824, "epoch": 32.52870869463323, "grad_norm": 6.623206615447998, "learning_rate": 2.981253349197841e-06, "loss": 0.04194715, "memory(GiB)": 13.7, "step": 69400, "train_speed(iter/s)": 1.532631 }, { "acc": 0.9895833, "epoch": 32.531052261542065, "grad_norm": 3.116590976715088, "learning_rate": 2.9805442955998793e-06, "loss": 0.02013854, "memory(GiB)": 13.7, "step": 69405, "train_speed(iter/s)": 1.532631 }, { "acc": 0.97312498, "epoch": 32.5333958284509, "grad_norm": 7.327861785888672, "learning_rate": 2.979835290555016e-06, "loss": 0.08253632, "memory(GiB)": 13.7, "step": 69410, "train_speed(iter/s)": 1.532635 }, { "acc": 0.97529764, "epoch": 32.53573939535974, "grad_norm": 5.708835601806641, "learning_rate": 2.9791263340802945e-06, "loss": 0.04070325, "memory(GiB)": 13.7, "step": 69415, "train_speed(iter/s)": 1.532643 }, { "acc": 0.97458334, "epoch": 32.538082962268575, "grad_norm": 6.335305690765381, "learning_rate": 2.9784174261927555e-06, "loss": 0.06892859, "memory(GiB)": 13.7, "step": 69420, "train_speed(iter/s)": 1.532642 }, { "acc": 0.98874998, "epoch": 32.54042652917741, "grad_norm": 3.3329696655273438, "learning_rate": 2.9777085669094375e-06, "loss": 0.05058527, "memory(GiB)": 13.7, "step": 69425, "train_speed(iter/s)": 1.532644 }, { "acc": 0.97651291, "epoch": 32.542770096086244, "grad_norm": 7.611074924468994, "learning_rate": 2.976999756247381e-06, "loss": 0.08822665, "memory(GiB)": 13.7, "step": 69430, "train_speed(iter/s)": 1.532645 }, { "acc": 0.98243055, "epoch": 32.54511366299508, "grad_norm": 2.3695337772369385, "learning_rate": 2.976290994223624e-06, "loss": 0.07607028, "memory(GiB)": 13.7, "step": 69435, "train_speed(iter/s)": 1.532644 }, { "acc": 0.98583336, "epoch": 32.54745722990391, "grad_norm": 2.652857780456543, "learning_rate": 2.9755822808551996e-06, "loss": 0.03103369, "memory(GiB)": 13.7, "step": 69440, "train_speed(iter/s)": 1.532649 }, { "acc": 0.99020834, "epoch": 32.54980079681275, "grad_norm": 0.6031724810600281, "learning_rate": 2.9748736161591457e-06, "loss": 0.02616461, "memory(GiB)": 13.7, "step": 69445, "train_speed(iter/s)": 1.532652 }, { "acc": 0.97279758, "epoch": 32.55214436372158, "grad_norm": 7.223906993865967, "learning_rate": 2.9741650001524947e-06, "loss": 0.07089556, "memory(GiB)": 13.7, "step": 69450, "train_speed(iter/s)": 1.532659 }, { "acc": 0.99312496, "epoch": 32.55448793063042, "grad_norm": 2.1077587604522705, "learning_rate": 2.973456432852279e-06, "loss": 0.02531181, "memory(GiB)": 13.7, "step": 69455, "train_speed(iter/s)": 1.532662 }, { "acc": 0.972822, "epoch": 32.55683149753926, "grad_norm": 2.9956257343292236, "learning_rate": 2.9727479142755334e-06, "loss": 0.07196219, "memory(GiB)": 13.7, "step": 69460, "train_speed(iter/s)": 1.532664 }, { "acc": 0.96955357, "epoch": 32.55917506444809, "grad_norm": 4.997478008270264, "learning_rate": 2.9720394444392853e-06, "loss": 0.1236707, "memory(GiB)": 13.7, "step": 69465, "train_speed(iter/s)": 1.532668 }, { "acc": 0.9623106, "epoch": 32.561518631356925, "grad_norm": 6.996724605560303, "learning_rate": 2.971331023360565e-06, "loss": 0.08869396, "memory(GiB)": 13.7, "step": 69470, "train_speed(iter/s)": 1.532669 }, { "acc": 0.98751488, "epoch": 32.56386219826576, "grad_norm": 4.4783244132995605, "learning_rate": 2.970622651056401e-06, "loss": 0.05434591, "memory(GiB)": 13.7, "step": 69475, "train_speed(iter/s)": 1.532674 }, { "acc": 0.96229162, "epoch": 32.566205765174594, "grad_norm": 6.921422958374023, "learning_rate": 2.9699143275438203e-06, "loss": 0.11422629, "memory(GiB)": 13.7, "step": 69480, "train_speed(iter/s)": 1.532679 }, { "acc": 0.9741518, "epoch": 32.56854933208343, "grad_norm": 1.1311397552490234, "learning_rate": 2.9692060528398465e-06, "loss": 0.07614033, "memory(GiB)": 13.7, "step": 69485, "train_speed(iter/s)": 1.532684 }, { "acc": 0.98892546, "epoch": 32.57089289899227, "grad_norm": 7.846683025360107, "learning_rate": 2.9684978269615065e-06, "loss": 0.0344562, "memory(GiB)": 13.7, "step": 69490, "train_speed(iter/s)": 1.532689 }, { "acc": 0.98363094, "epoch": 32.573236465901104, "grad_norm": 2.980618715286255, "learning_rate": 2.9677896499258233e-06, "loss": 0.06234446, "memory(GiB)": 13.7, "step": 69495, "train_speed(iter/s)": 1.532692 }, { "acc": 0.97868309, "epoch": 32.57558003280994, "grad_norm": 6.3404130935668945, "learning_rate": 2.9670815217498187e-06, "loss": 0.06197806, "memory(GiB)": 13.7, "step": 69500, "train_speed(iter/s)": 1.53269 }, { "acc": 0.97740536, "epoch": 32.57792359971877, "grad_norm": 4.6324992179870605, "learning_rate": 2.966373442450514e-06, "loss": 0.0501449, "memory(GiB)": 13.7, "step": 69505, "train_speed(iter/s)": 1.532687 }, { "acc": 0.98812504, "epoch": 32.58026716662761, "grad_norm": 4.671541690826416, "learning_rate": 2.965665412044929e-06, "loss": 0.02842022, "memory(GiB)": 13.7, "step": 69510, "train_speed(iter/s)": 1.53269 }, { "acc": 0.99348211, "epoch": 32.58261073353644, "grad_norm": 7.0138654708862305, "learning_rate": 2.964957430550084e-06, "loss": 0.07205447, "memory(GiB)": 13.7, "step": 69515, "train_speed(iter/s)": 1.53269 }, { "acc": 0.98479166, "epoch": 32.584954300445276, "grad_norm": 4.165108680725098, "learning_rate": 2.9642494979829942e-06, "loss": 0.03861244, "memory(GiB)": 13.7, "step": 69520, "train_speed(iter/s)": 1.532694 }, { "acc": 0.97857637, "epoch": 32.58729786735411, "grad_norm": 3.746272563934326, "learning_rate": 2.9635416143606773e-06, "loss": 0.08551211, "memory(GiB)": 13.7, "step": 69525, "train_speed(iter/s)": 1.532701 }, { "acc": 0.96674824, "epoch": 32.58964143426295, "grad_norm": 2.870284080505371, "learning_rate": 2.962833779700146e-06, "loss": 0.09764056, "memory(GiB)": 13.7, "step": 69530, "train_speed(iter/s)": 1.532704 }, { "acc": 0.99333324, "epoch": 32.591985001171786, "grad_norm": 0.8442707061767578, "learning_rate": 2.9621259940184184e-06, "loss": 0.02339536, "memory(GiB)": 13.7, "step": 69535, "train_speed(iter/s)": 1.532707 }, { "acc": 0.97672348, "epoch": 32.59432856808062, "grad_norm": 1.3627489805221558, "learning_rate": 2.961418257332504e-06, "loss": 0.0533155, "memory(GiB)": 13.7, "step": 69540, "train_speed(iter/s)": 1.532709 }, { "acc": 0.97208338, "epoch": 32.596672134989454, "grad_norm": 1.277077317237854, "learning_rate": 2.9607105696594164e-06, "loss": 0.06701735, "memory(GiB)": 13.7, "step": 69545, "train_speed(iter/s)": 1.532714 }, { "acc": 0.97354164, "epoch": 32.59901570189829, "grad_norm": 6.487968921661377, "learning_rate": 2.9600029310161667e-06, "loss": 0.05940043, "memory(GiB)": 13.7, "step": 69550, "train_speed(iter/s)": 1.532716 }, { "acc": 1.0, "epoch": 32.60135926880712, "grad_norm": 3.780320882797241, "learning_rate": 2.9592953414197604e-06, "loss": 0.02069252, "memory(GiB)": 13.7, "step": 69555, "train_speed(iter/s)": 1.532721 }, { "acc": 0.98427944, "epoch": 32.60370283571596, "grad_norm": 0.7414024472236633, "learning_rate": 2.9585878008872117e-06, "loss": 0.04755754, "memory(GiB)": 13.7, "step": 69560, "train_speed(iter/s)": 1.532721 }, { "acc": 0.9854167, "epoch": 32.6060464026248, "grad_norm": 4.553589344024658, "learning_rate": 2.957880309435522e-06, "loss": 0.03609208, "memory(GiB)": 13.7, "step": 69565, "train_speed(iter/s)": 1.532724 }, { "acc": 0.98894348, "epoch": 32.60838996953363, "grad_norm": 0.020447801798582077, "learning_rate": 2.9571728670816995e-06, "loss": 0.06955061, "memory(GiB)": 13.7, "step": 69570, "train_speed(iter/s)": 1.532722 }, { "acc": 0.98579369, "epoch": 32.61073353644247, "grad_norm": 6.508991241455078, "learning_rate": 2.9564654738427465e-06, "loss": 0.03096097, "memory(GiB)": 13.7, "step": 69575, "train_speed(iter/s)": 1.532727 }, { "acc": 0.97331839, "epoch": 32.6130771033513, "grad_norm": 4.789071083068848, "learning_rate": 2.9557581297356708e-06, "loss": 0.05827045, "memory(GiB)": 13.7, "step": 69580, "train_speed(iter/s)": 1.532728 }, { "acc": 0.98819447, "epoch": 32.615420670260136, "grad_norm": 1.2461633682250977, "learning_rate": 2.955050834777471e-06, "loss": 0.03351853, "memory(GiB)": 13.7, "step": 69585, "train_speed(iter/s)": 1.532731 }, { "acc": 0.9848958, "epoch": 32.61776423716897, "grad_norm": 2.9363484382629395, "learning_rate": 2.9543435889851497e-06, "loss": 0.03426763, "memory(GiB)": 13.7, "step": 69590, "train_speed(iter/s)": 1.532734 }, { "acc": 0.98500004, "epoch": 32.620107804077804, "grad_norm": 2.9416728019714355, "learning_rate": 2.953636392375706e-06, "loss": 0.02629769, "memory(GiB)": 13.7, "step": 69595, "train_speed(iter/s)": 1.532735 }, { "acc": 0.98048611, "epoch": 32.62245137098664, "grad_norm": 5.448093891143799, "learning_rate": 2.9529292449661405e-06, "loss": 0.05183635, "memory(GiB)": 13.7, "step": 69600, "train_speed(iter/s)": 1.532745 }, { "acc": 0.97979164, "epoch": 32.62479493789548, "grad_norm": 1.3953083753585815, "learning_rate": 2.9522221467734473e-06, "loss": 0.04430874, "memory(GiB)": 13.7, "step": 69605, "train_speed(iter/s)": 1.532747 }, { "acc": 0.98601189, "epoch": 32.627138504804314, "grad_norm": 2.815525770187378, "learning_rate": 2.9515150978146264e-06, "loss": 0.07680433, "memory(GiB)": 13.7, "step": 69610, "train_speed(iter/s)": 1.53275 }, { "acc": 0.98666668, "epoch": 32.62948207171315, "grad_norm": 2.0187602043151855, "learning_rate": 2.9508080981066683e-06, "loss": 0.03308371, "memory(GiB)": 13.7, "step": 69615, "train_speed(iter/s)": 1.532749 }, { "acc": 0.96737175, "epoch": 32.63182563862198, "grad_norm": 5.166128158569336, "learning_rate": 2.9501011476665713e-06, "loss": 0.06323522, "memory(GiB)": 13.7, "step": 69620, "train_speed(iter/s)": 1.532756 }, { "acc": 0.9885417, "epoch": 32.63416920553082, "grad_norm": 6.18150520324707, "learning_rate": 2.9493942465113273e-06, "loss": 0.0274136, "memory(GiB)": 13.7, "step": 69625, "train_speed(iter/s)": 1.532757 }, { "acc": 0.97770834, "epoch": 32.63651277243965, "grad_norm": 3.6717967987060547, "learning_rate": 2.9486873946579257e-06, "loss": 0.06226094, "memory(GiB)": 13.7, "step": 69630, "train_speed(iter/s)": 1.532754 }, { "acc": 0.97277966, "epoch": 32.638856339348486, "grad_norm": 2.1163296699523926, "learning_rate": 2.94798059212336e-06, "loss": 0.08182721, "memory(GiB)": 13.7, "step": 69635, "train_speed(iter/s)": 1.532758 }, { "acc": 0.9854167, "epoch": 32.64119990625732, "grad_norm": 1.3198951482772827, "learning_rate": 2.947273838924617e-06, "loss": 0.03610297, "memory(GiB)": 13.7, "step": 69640, "train_speed(iter/s)": 1.532764 }, { "acc": 0.9875, "epoch": 32.64354347316616, "grad_norm": 2.093907356262207, "learning_rate": 2.946567135078686e-06, "loss": 0.02443979, "memory(GiB)": 13.7, "step": 69645, "train_speed(iter/s)": 1.532767 }, { "acc": 0.98091354, "epoch": 32.645887040074996, "grad_norm": 5.683613300323486, "learning_rate": 2.9458604806025537e-06, "loss": 0.05186038, "memory(GiB)": 13.7, "step": 69650, "train_speed(iter/s)": 1.532765 }, { "acc": 0.98395834, "epoch": 32.64823060698383, "grad_norm": 2.461606025695801, "learning_rate": 2.945153875513205e-06, "loss": 0.02795537, "memory(GiB)": 13.7, "step": 69655, "train_speed(iter/s)": 1.532765 }, { "acc": 0.97758932, "epoch": 32.650574173892664, "grad_norm": 3.4056801795959473, "learning_rate": 2.9444473198276236e-06, "loss": 0.04951477, "memory(GiB)": 13.7, "step": 69660, "train_speed(iter/s)": 1.532766 }, { "acc": 0.98653851, "epoch": 32.6529177408015, "grad_norm": 4.428588390350342, "learning_rate": 2.9437408135627954e-06, "loss": 0.04254798, "memory(GiB)": 13.7, "step": 69665, "train_speed(iter/s)": 1.532767 }, { "acc": 0.98125, "epoch": 32.65526130771033, "grad_norm": 2.9798219203948975, "learning_rate": 2.9430343567357008e-06, "loss": 0.06208108, "memory(GiB)": 13.7, "step": 69670, "train_speed(iter/s)": 1.532765 }, { "acc": 0.9717803, "epoch": 32.65760487461917, "grad_norm": 3.5546681880950928, "learning_rate": 2.94232794936332e-06, "loss": 0.0635062, "memory(GiB)": 13.7, "step": 69675, "train_speed(iter/s)": 1.53277 }, { "acc": 0.99050598, "epoch": 32.65994844152801, "grad_norm": 2.072963237762451, "learning_rate": 2.9416215914626346e-06, "loss": 0.03444734, "memory(GiB)": 13.7, "step": 69680, "train_speed(iter/s)": 1.532775 }, { "acc": 0.9947917, "epoch": 32.66229200843684, "grad_norm": 0.00104809133335948, "learning_rate": 2.940915283050623e-06, "loss": 0.01369539, "memory(GiB)": 13.7, "step": 69685, "train_speed(iter/s)": 1.532775 }, { "acc": 0.98520832, "epoch": 32.66463557534568, "grad_norm": 3.6971168518066406, "learning_rate": 2.9402090241442607e-06, "loss": 0.03505805, "memory(GiB)": 13.7, "step": 69690, "train_speed(iter/s)": 1.53278 }, { "acc": 0.99541664, "epoch": 32.66697914225451, "grad_norm": 2.045616865158081, "learning_rate": 2.9395028147605253e-06, "loss": 0.03172118, "memory(GiB)": 13.7, "step": 69695, "train_speed(iter/s)": 1.532777 }, { "acc": 0.98973217, "epoch": 32.669322709163346, "grad_norm": 3.0402214527130127, "learning_rate": 2.938796654916391e-06, "loss": 0.04399359, "memory(GiB)": 13.7, "step": 69700, "train_speed(iter/s)": 1.532779 }, { "acc": 0.98022175, "epoch": 32.67166627607218, "grad_norm": 3.0413639545440674, "learning_rate": 2.9380905446288304e-06, "loss": 0.05462949, "memory(GiB)": 13.7, "step": 69705, "train_speed(iter/s)": 1.532784 }, { "acc": 0.99571428, "epoch": 32.674009842981015, "grad_norm": 1.5723328590393066, "learning_rate": 2.9373844839148203e-06, "loss": 0.03586365, "memory(GiB)": 13.7, "step": 69710, "train_speed(iter/s)": 1.532791 }, { "acc": 0.9875, "epoch": 32.67635340988985, "grad_norm": 2.8675873279571533, "learning_rate": 2.936678472791328e-06, "loss": 0.0421053, "memory(GiB)": 13.7, "step": 69715, "train_speed(iter/s)": 1.532797 }, { "acc": 0.9864583, "epoch": 32.67869697679869, "grad_norm": 0.9647718071937561, "learning_rate": 2.935972511275324e-06, "loss": 0.03783341, "memory(GiB)": 13.7, "step": 69720, "train_speed(iter/s)": 1.532799 }, { "acc": 0.98113098, "epoch": 32.681040543707525, "grad_norm": 3.803868293762207, "learning_rate": 2.9352665993837815e-06, "loss": 0.05301261, "memory(GiB)": 13.7, "step": 69725, "train_speed(iter/s)": 1.532801 }, { "acc": 0.98625002, "epoch": 32.68338411061636, "grad_norm": 2.8111186027526855, "learning_rate": 2.934560737133663e-06, "loss": 0.02769583, "memory(GiB)": 13.7, "step": 69730, "train_speed(iter/s)": 1.532801 }, { "acc": 0.97923613, "epoch": 32.68572767752519, "grad_norm": 1.5690184831619263, "learning_rate": 2.9338549245419367e-06, "loss": 0.05643332, "memory(GiB)": 13.7, "step": 69735, "train_speed(iter/s)": 1.5328 }, { "acc": 0.99392853, "epoch": 32.68807124443403, "grad_norm": 3.016728162765503, "learning_rate": 2.9331491616255693e-06, "loss": 0.01732706, "memory(GiB)": 13.7, "step": 69740, "train_speed(iter/s)": 1.532808 }, { "acc": 0.96664906, "epoch": 32.69041481134286, "grad_norm": 5.921335697174072, "learning_rate": 2.9324434484015247e-06, "loss": 0.06350125, "memory(GiB)": 13.7, "step": 69745, "train_speed(iter/s)": 1.532809 }, { "acc": 0.9822916, "epoch": 32.692758378251696, "grad_norm": 0.7845150232315063, "learning_rate": 2.931737784886764e-06, "loss": 0.04263372, "memory(GiB)": 13.7, "step": 69750, "train_speed(iter/s)": 1.532812 }, { "acc": 0.99508934, "epoch": 32.69510194516054, "grad_norm": 0.5114882588386536, "learning_rate": 2.9310321710982515e-06, "loss": 0.00906357, "memory(GiB)": 13.7, "step": 69755, "train_speed(iter/s)": 1.532806 }, { "acc": 0.98164139, "epoch": 32.69744551206937, "grad_norm": 6.123640060424805, "learning_rate": 2.930326607052946e-06, "loss": 0.04072171, "memory(GiB)": 13.7, "step": 69760, "train_speed(iter/s)": 1.532807 }, { "acc": 0.9963542, "epoch": 32.699789078978206, "grad_norm": 2.5375261306762695, "learning_rate": 2.9296210927678093e-06, "loss": 0.02312403, "memory(GiB)": 13.7, "step": 69765, "train_speed(iter/s)": 1.532808 }, { "acc": 0.98581848, "epoch": 32.70213264588704, "grad_norm": 3.942366123199463, "learning_rate": 2.928915628259799e-06, "loss": 0.06664915, "memory(GiB)": 13.7, "step": 69770, "train_speed(iter/s)": 1.532809 }, { "acc": 0.97927084, "epoch": 32.704476212795875, "grad_norm": 3.3687610626220703, "learning_rate": 2.9282102135458707e-06, "loss": 0.03899994, "memory(GiB)": 13.7, "step": 69775, "train_speed(iter/s)": 1.532812 }, { "acc": 0.97612181, "epoch": 32.70681977970471, "grad_norm": 2.066211462020874, "learning_rate": 2.9275048486429786e-06, "loss": 0.10182569, "memory(GiB)": 13.7, "step": 69780, "train_speed(iter/s)": 1.532817 }, { "acc": 0.99375, "epoch": 32.70916334661354, "grad_norm": 4.9750657081604, "learning_rate": 2.9267995335680817e-06, "loss": 0.03254398, "memory(GiB)": 13.7, "step": 69785, "train_speed(iter/s)": 1.532817 }, { "acc": 0.96758938, "epoch": 32.71150691352238, "grad_norm": 8.38948917388916, "learning_rate": 2.92609426833813e-06, "loss": 0.06437666, "memory(GiB)": 13.7, "step": 69790, "train_speed(iter/s)": 1.532816 }, { "acc": 0.99196434, "epoch": 32.71385048043122, "grad_norm": 1.5007089376449585, "learning_rate": 2.925389052970078e-06, "loss": 0.02539164, "memory(GiB)": 13.7, "step": 69795, "train_speed(iter/s)": 1.532816 }, { "acc": 0.98052082, "epoch": 32.71619404734005, "grad_norm": 0.15092721581459045, "learning_rate": 2.9246838874808765e-06, "loss": 0.04469547, "memory(GiB)": 13.7, "step": 69800, "train_speed(iter/s)": 1.532823 }, { "acc": 0.98656254, "epoch": 32.71853761424889, "grad_norm": 5.67509651184082, "learning_rate": 2.923978771887473e-06, "loss": 0.05050007, "memory(GiB)": 13.7, "step": 69805, "train_speed(iter/s)": 1.532833 }, { "acc": 0.97215281, "epoch": 32.72088118115772, "grad_norm": 3.987271308898926, "learning_rate": 2.92327370620682e-06, "loss": 0.05648862, "memory(GiB)": 13.7, "step": 69810, "train_speed(iter/s)": 1.532839 }, { "acc": 0.98038692, "epoch": 32.723224748066556, "grad_norm": 5.19142484664917, "learning_rate": 2.9225686904558624e-06, "loss": 0.05209378, "memory(GiB)": 13.7, "step": 69815, "train_speed(iter/s)": 1.532841 }, { "acc": 0.97895832, "epoch": 32.72556831497539, "grad_norm": 2.4498131275177, "learning_rate": 2.921863724651546e-06, "loss": 0.04072957, "memory(GiB)": 13.7, "step": 69820, "train_speed(iter/s)": 1.532853 }, { "acc": 0.98500004, "epoch": 32.727911881884225, "grad_norm": 4.0920209884643555, "learning_rate": 2.9211588088108174e-06, "loss": 0.02666177, "memory(GiB)": 13.7, "step": 69825, "train_speed(iter/s)": 1.532855 }, { "acc": 0.98249998, "epoch": 32.730255448793066, "grad_norm": 2.1762382984161377, "learning_rate": 2.9204539429506194e-06, "loss": 0.04349913, "memory(GiB)": 13.7, "step": 69830, "train_speed(iter/s)": 1.532855 }, { "acc": 0.9926136, "epoch": 32.7325990157019, "grad_norm": 2.78074312210083, "learning_rate": 2.919749127087895e-06, "loss": 0.0235921, "memory(GiB)": 13.7, "step": 69835, "train_speed(iter/s)": 1.532857 }, { "acc": 0.98812504, "epoch": 32.734942582610735, "grad_norm": 0.6834130883216858, "learning_rate": 2.9190443612395856e-06, "loss": 0.03372202, "memory(GiB)": 13.7, "step": 69840, "train_speed(iter/s)": 1.532859 }, { "acc": 0.98113098, "epoch": 32.73728614951957, "grad_norm": 3.017596960067749, "learning_rate": 2.9183396454226337e-06, "loss": 0.04970489, "memory(GiB)": 13.7, "step": 69845, "train_speed(iter/s)": 1.532861 }, { "acc": 0.98571434, "epoch": 32.739629716428404, "grad_norm": 1.244787335395813, "learning_rate": 2.9176349796539744e-06, "loss": 0.02937724, "memory(GiB)": 13.7, "step": 69850, "train_speed(iter/s)": 1.532865 }, { "acc": 0.9885417, "epoch": 32.74197328333724, "grad_norm": 2.133876323699951, "learning_rate": 2.9169303639505485e-06, "loss": 0.0619092, "memory(GiB)": 13.7, "step": 69855, "train_speed(iter/s)": 1.53287 }, { "acc": 0.98923616, "epoch": 32.74431685024607, "grad_norm": 2.2753703594207764, "learning_rate": 2.9162257983292903e-06, "loss": 0.03902198, "memory(GiB)": 13.7, "step": 69860, "train_speed(iter/s)": 1.532874 }, { "acc": 0.99298611, "epoch": 32.74666041715491, "grad_norm": 3.5505454540252686, "learning_rate": 2.9155212828071387e-06, "loss": 0.02399895, "memory(GiB)": 13.7, "step": 69865, "train_speed(iter/s)": 1.532881 }, { "acc": 0.98883934, "epoch": 32.74900398406375, "grad_norm": 0.0016156653873622417, "learning_rate": 2.9148168174010235e-06, "loss": 0.03090523, "memory(GiB)": 13.7, "step": 69870, "train_speed(iter/s)": 1.532887 }, { "acc": 0.98374996, "epoch": 32.75134755097258, "grad_norm": 0.002091037342324853, "learning_rate": 2.91411240212788e-06, "loss": 0.04513606, "memory(GiB)": 13.7, "step": 69875, "train_speed(iter/s)": 1.53289 }, { "acc": 0.98468752, "epoch": 32.75369111788142, "grad_norm": 1.8270999193191528, "learning_rate": 2.9134080370046423e-06, "loss": 0.04137023, "memory(GiB)": 13.7, "step": 69880, "train_speed(iter/s)": 1.532896 }, { "acc": 0.98182001, "epoch": 32.75603468479025, "grad_norm": 2.995258331298828, "learning_rate": 2.9127037220482367e-06, "loss": 0.06408127, "memory(GiB)": 13.7, "step": 69885, "train_speed(iter/s)": 1.532897 }, { "acc": 0.98946428, "epoch": 32.758378251699085, "grad_norm": 4.2899169921875, "learning_rate": 2.9119994572755943e-06, "loss": 0.03036579, "memory(GiB)": 13.7, "step": 69890, "train_speed(iter/s)": 1.5329 }, { "acc": 0.9864584, "epoch": 32.76072181860792, "grad_norm": 0.4333207607269287, "learning_rate": 2.9112952427036462e-06, "loss": 0.03095225, "memory(GiB)": 13.7, "step": 69895, "train_speed(iter/s)": 1.532903 }, { "acc": 0.99260416, "epoch": 32.763065385516754, "grad_norm": 1.761325716972351, "learning_rate": 2.910591078349317e-06, "loss": 0.03251942, "memory(GiB)": 13.7, "step": 69900, "train_speed(iter/s)": 1.532904 }, { "acc": 0.98342266, "epoch": 32.765408952425595, "grad_norm": 4.208589553833008, "learning_rate": 2.90988696422953e-06, "loss": 0.05984299, "memory(GiB)": 13.7, "step": 69905, "train_speed(iter/s)": 1.532913 }, { "acc": 0.99541664, "epoch": 32.76775251933443, "grad_norm": 1.2838194370269775, "learning_rate": 2.9091829003612115e-06, "loss": 0.05640244, "memory(GiB)": 13.7, "step": 69910, "train_speed(iter/s)": 1.532916 }, { "acc": 0.98604164, "epoch": 32.770096086243264, "grad_norm": 3.608647346496582, "learning_rate": 2.908478886761286e-06, "loss": 0.02359234, "memory(GiB)": 13.7, "step": 69915, "train_speed(iter/s)": 1.532917 }, { "acc": 0.97701397, "epoch": 32.7724396531521, "grad_norm": 5.0365729331970215, "learning_rate": 2.907774923446678e-06, "loss": 0.07485788, "memory(GiB)": 13.7, "step": 69920, "train_speed(iter/s)": 1.532916 }, { "acc": 0.98467255, "epoch": 32.77478322006093, "grad_norm": 3.3650050163269043, "learning_rate": 2.907071010434303e-06, "loss": 0.04381836, "memory(GiB)": 13.7, "step": 69925, "train_speed(iter/s)": 1.532918 }, { "acc": 0.98967266, "epoch": 32.77712678696977, "grad_norm": 1.8389554023742676, "learning_rate": 2.9063671477410837e-06, "loss": 0.03281534, "memory(GiB)": 13.7, "step": 69930, "train_speed(iter/s)": 1.532915 }, { "acc": 0.99296875, "epoch": 32.7794703538786, "grad_norm": 2.380310535430908, "learning_rate": 2.905663335383941e-06, "loss": 0.02009568, "memory(GiB)": 13.7, "step": 69935, "train_speed(iter/s)": 1.532912 }, { "acc": 0.98500004, "epoch": 32.781813920787435, "grad_norm": 5.628510475158691, "learning_rate": 2.9049595733797887e-06, "loss": 0.06415412, "memory(GiB)": 13.7, "step": 69940, "train_speed(iter/s)": 1.532916 }, { "acc": 0.98008928, "epoch": 32.78415748769628, "grad_norm": 1.958574652671814, "learning_rate": 2.9042558617455425e-06, "loss": 0.0424341, "memory(GiB)": 13.7, "step": 69945, "train_speed(iter/s)": 1.53292 }, { "acc": 0.99359951, "epoch": 32.78650105460511, "grad_norm": 3.942715644836426, "learning_rate": 2.9035522004981183e-06, "loss": 0.02852978, "memory(GiB)": 13.7, "step": 69950, "train_speed(iter/s)": 1.532931 }, { "acc": 0.98010416, "epoch": 32.788844621513945, "grad_norm": 5.3259382247924805, "learning_rate": 2.9028485896544324e-06, "loss": 0.07364913, "memory(GiB)": 13.7, "step": 69955, "train_speed(iter/s)": 1.53294 }, { "acc": 0.95437498, "epoch": 32.79118818842278, "grad_norm": 2.119138479232788, "learning_rate": 2.9021450292313928e-06, "loss": 0.09905061, "memory(GiB)": 13.7, "step": 69960, "train_speed(iter/s)": 1.532942 }, { "acc": 0.975, "epoch": 32.793531755331614, "grad_norm": 3.8012733459472656, "learning_rate": 2.9014415192459134e-06, "loss": 0.07736093, "memory(GiB)": 13.7, "step": 69965, "train_speed(iter/s)": 1.53295 }, { "acc": 0.98979168, "epoch": 32.79587532224045, "grad_norm": 2.161102294921875, "learning_rate": 2.9007380597149053e-06, "loss": 0.04508168, "memory(GiB)": 13.7, "step": 69970, "train_speed(iter/s)": 1.532957 }, { "acc": 0.99291668, "epoch": 32.79821888914928, "grad_norm": 1.4528776407241821, "learning_rate": 2.900034650655275e-06, "loss": 0.03608656, "memory(GiB)": 13.7, "step": 69975, "train_speed(iter/s)": 1.532969 }, { "acc": 0.98098965, "epoch": 32.800562456058124, "grad_norm": 3.266726016998291, "learning_rate": 2.8993312920839324e-06, "loss": 0.04630658, "memory(GiB)": 13.7, "step": 69980, "train_speed(iter/s)": 1.532974 }, { "acc": 0.98104172, "epoch": 32.80290602296696, "grad_norm": 5.06449031829834, "learning_rate": 2.8986279840177804e-06, "loss": 0.04371764, "memory(GiB)": 13.7, "step": 69985, "train_speed(iter/s)": 1.532979 }, { "acc": 0.98500004, "epoch": 32.80524958987579, "grad_norm": 4.831459999084473, "learning_rate": 2.8979247264737277e-06, "loss": 0.05196108, "memory(GiB)": 13.7, "step": 69990, "train_speed(iter/s)": 1.532983 }, { "acc": 0.97488098, "epoch": 32.80759315678463, "grad_norm": 3.2025601863861084, "learning_rate": 2.897221519468678e-06, "loss": 0.04238677, "memory(GiB)": 13.7, "step": 69995, "train_speed(iter/s)": 1.532986 }, { "acc": 0.98736115, "epoch": 32.80993672369346, "grad_norm": 0.009045878425240517, "learning_rate": 2.8965183630195317e-06, "loss": 0.03806722, "memory(GiB)": 13.7, "step": 70000, "train_speed(iter/s)": 1.532988 }, { "epoch": 32.80993672369346, "eval_acc": 0.7785121737371012, "eval_loss": 1.2162010669708252, "eval_runtime": 144.1583, "eval_samples_per_second": 55.966, "eval_steps_per_second": 6.999, "step": 70000 }, { "acc": 0.9822916, "epoch": 32.812280290602295, "grad_norm": 1.107996940612793, "learning_rate": 2.895815257143192e-06, "loss": 0.03738624, "memory(GiB)": 13.7, "step": 70005, "train_speed(iter/s)": 1.527154 }, { "acc": 0.9807291, "epoch": 32.81462385751113, "grad_norm": 1.4403423070907593, "learning_rate": 2.8951122018565597e-06, "loss": 0.05611072, "memory(GiB)": 13.7, "step": 70010, "train_speed(iter/s)": 1.527156 }, { "acc": 0.96800594, "epoch": 32.816967424419964, "grad_norm": 6.230090618133545, "learning_rate": 2.894409197176533e-06, "loss": 0.08261417, "memory(GiB)": 13.7, "step": 70015, "train_speed(iter/s)": 1.527157 }, { "acc": 0.98625002, "epoch": 32.819310991328805, "grad_norm": 0.008628561161458492, "learning_rate": 2.8937062431200112e-06, "loss": 0.03352882, "memory(GiB)": 13.7, "step": 70020, "train_speed(iter/s)": 1.527161 }, { "acc": 0.98862181, "epoch": 32.82165455823764, "grad_norm": 4.530770778656006, "learning_rate": 2.8930033397038886e-06, "loss": 0.01701648, "memory(GiB)": 13.7, "step": 70025, "train_speed(iter/s)": 1.527172 }, { "acc": 0.996875, "epoch": 32.823998125146474, "grad_norm": 0.007363700307905674, "learning_rate": 2.892300486945064e-06, "loss": 0.00958154, "memory(GiB)": 13.7, "step": 70030, "train_speed(iter/s)": 1.52718 }, { "acc": 0.98090277, "epoch": 32.82634169205531, "grad_norm": 3.657053232192993, "learning_rate": 2.891597684860427e-06, "loss": 0.04694746, "memory(GiB)": 13.7, "step": 70035, "train_speed(iter/s)": 1.527185 }, { "acc": 0.98913689, "epoch": 32.82868525896414, "grad_norm": 2.503700017929077, "learning_rate": 2.890894933466874e-06, "loss": 0.03023821, "memory(GiB)": 13.7, "step": 70040, "train_speed(iter/s)": 1.527194 }, { "acc": 0.98550053, "epoch": 32.83102882587298, "grad_norm": 3.7808475494384766, "learning_rate": 2.890192232781298e-06, "loss": 0.07861245, "memory(GiB)": 13.7, "step": 70045, "train_speed(iter/s)": 1.527199 }, { "acc": 0.98458328, "epoch": 32.83337239278181, "grad_norm": 0.0013084466336295009, "learning_rate": 2.8894895828205853e-06, "loss": 0.05158681, "memory(GiB)": 13.7, "step": 70050, "train_speed(iter/s)": 1.527199 }, { "acc": 0.9859375, "epoch": 32.835715959690646, "grad_norm": 2.267103433609009, "learning_rate": 2.8887869836016285e-06, "loss": 0.03840304, "memory(GiB)": 13.7, "step": 70055, "train_speed(iter/s)": 1.527203 }, { "acc": 0.97350082, "epoch": 32.83805952659949, "grad_norm": 5.313534259796143, "learning_rate": 2.8880844351413168e-06, "loss": 0.07756646, "memory(GiB)": 13.7, "step": 70060, "train_speed(iter/s)": 1.527209 }, { "acc": 0.97406254, "epoch": 32.84040309350832, "grad_norm": 5.302783489227295, "learning_rate": 2.887381937456534e-06, "loss": 0.08651261, "memory(GiB)": 13.7, "step": 70065, "train_speed(iter/s)": 1.52721 }, { "acc": 0.99080353, "epoch": 32.842746660417156, "grad_norm": 1.9429429769515991, "learning_rate": 2.8866794905641694e-06, "loss": 0.06454443, "memory(GiB)": 13.7, "step": 70070, "train_speed(iter/s)": 1.527213 }, { "acc": 0.99085321, "epoch": 32.84509022732599, "grad_norm": 5.476876258850098, "learning_rate": 2.8859770944811025e-06, "loss": 0.04558831, "memory(GiB)": 13.7, "step": 70075, "train_speed(iter/s)": 1.527218 }, { "acc": 0.98069439, "epoch": 32.847433794234824, "grad_norm": 4.222438812255859, "learning_rate": 2.88527474922422e-06, "loss": 0.04361127, "memory(GiB)": 13.7, "step": 70080, "train_speed(iter/s)": 1.527218 }, { "acc": 0.99451923, "epoch": 32.84977736114366, "grad_norm": 1.1480196714401245, "learning_rate": 2.8845724548104052e-06, "loss": 0.00950197, "memory(GiB)": 13.7, "step": 70085, "train_speed(iter/s)": 1.527223 }, { "acc": 0.99148064, "epoch": 32.85212092805249, "grad_norm": 4.733564853668213, "learning_rate": 2.8838702112565354e-06, "loss": 0.03242074, "memory(GiB)": 13.7, "step": 70090, "train_speed(iter/s)": 1.527228 }, { "acc": 0.98974361, "epoch": 32.854464494961334, "grad_norm": 0.1758381426334381, "learning_rate": 2.883168018579492e-06, "loss": 0.03388875, "memory(GiB)": 13.7, "step": 70095, "train_speed(iter/s)": 1.52723 }, { "acc": 0.9895834, "epoch": 32.85680806187017, "grad_norm": 1.6063334941864014, "learning_rate": 2.882465876796156e-06, "loss": 0.04623734, "memory(GiB)": 13.7, "step": 70100, "train_speed(iter/s)": 1.52724 }, { "acc": 0.98312492, "epoch": 32.859151628779, "grad_norm": 5.700941562652588, "learning_rate": 2.8817637859234018e-06, "loss": 0.03988154, "memory(GiB)": 13.7, "step": 70105, "train_speed(iter/s)": 1.527243 }, { "acc": 0.98611107, "epoch": 32.86149519568784, "grad_norm": 1.7963614463806152, "learning_rate": 2.881061745978103e-06, "loss": 0.02496516, "memory(GiB)": 13.7, "step": 70110, "train_speed(iter/s)": 1.527251 }, { "acc": 0.9854167, "epoch": 32.86383876259667, "grad_norm": 5.395052433013916, "learning_rate": 2.8803597569771373e-06, "loss": 0.05725751, "memory(GiB)": 13.7, "step": 70115, "train_speed(iter/s)": 1.52725 }, { "acc": 0.98458328, "epoch": 32.866182329505506, "grad_norm": 2.5624282360076904, "learning_rate": 2.87965781893738e-06, "loss": 0.06541467, "memory(GiB)": 13.7, "step": 70120, "train_speed(iter/s)": 1.527255 }, { "acc": 0.98468752, "epoch": 32.86852589641434, "grad_norm": 5.544587135314941, "learning_rate": 2.8789559318756993e-06, "loss": 0.07578279, "memory(GiB)": 13.7, "step": 70125, "train_speed(iter/s)": 1.527255 }, { "acc": 0.98916664, "epoch": 32.870869463323174, "grad_norm": 3.5263524055480957, "learning_rate": 2.8782540958089678e-06, "loss": 0.05551362, "memory(GiB)": 13.7, "step": 70130, "train_speed(iter/s)": 1.527257 }, { "acc": 0.96364594, "epoch": 32.873213030232016, "grad_norm": 8.535609245300293, "learning_rate": 2.877552310754056e-06, "loss": 0.080293, "memory(GiB)": 13.7, "step": 70135, "train_speed(iter/s)": 1.527257 }, { "acc": 0.99281254, "epoch": 32.87555659714085, "grad_norm": 2.162625789642334, "learning_rate": 2.8768505767278345e-06, "loss": 0.03156784, "memory(GiB)": 13.7, "step": 70140, "train_speed(iter/s)": 1.527263 }, { "acc": 0.97635422, "epoch": 32.877900164049684, "grad_norm": 3.6525793075561523, "learning_rate": 2.876148893747168e-06, "loss": 0.03103072, "memory(GiB)": 13.7, "step": 70145, "train_speed(iter/s)": 1.527267 }, { "acc": 0.99348211, "epoch": 32.88024373095852, "grad_norm": 2.116255283355713, "learning_rate": 2.8754472618289207e-06, "loss": 0.04176691, "memory(GiB)": 13.7, "step": 70150, "train_speed(iter/s)": 1.527275 }, { "acc": 0.9916667, "epoch": 32.88258729786735, "grad_norm": 5.571310997009277, "learning_rate": 2.87474568098996e-06, "loss": 0.03551534, "memory(GiB)": 13.7, "step": 70155, "train_speed(iter/s)": 1.527277 }, { "acc": 0.9885417, "epoch": 32.88493086477619, "grad_norm": 1.5960726737976074, "learning_rate": 2.8740441512471525e-06, "loss": 0.05571018, "memory(GiB)": 13.7, "step": 70160, "train_speed(iter/s)": 1.527287 }, { "acc": 0.98321428, "epoch": 32.88727443168502, "grad_norm": 8.285293579101562, "learning_rate": 2.873342672617354e-06, "loss": 0.08773357, "memory(GiB)": 13.7, "step": 70165, "train_speed(iter/s)": 1.527292 }, { "acc": 1.0, "epoch": 32.88961799859386, "grad_norm": 1.440242886543274, "learning_rate": 2.8726412451174308e-06, "loss": 0.01164679, "memory(GiB)": 13.7, "step": 70170, "train_speed(iter/s)": 1.527295 }, { "acc": 0.97349205, "epoch": 32.8919615655027, "grad_norm": 2.9436819553375244, "learning_rate": 2.871939868764243e-06, "loss": 0.0619725, "memory(GiB)": 13.7, "step": 70175, "train_speed(iter/s)": 1.527295 }, { "acc": 0.99666672, "epoch": 32.89430513241153, "grad_norm": 2.0315654277801514, "learning_rate": 2.871238543574646e-06, "loss": 0.0125515, "memory(GiB)": 13.7, "step": 70180, "train_speed(iter/s)": 1.527299 }, { "acc": 0.98988972, "epoch": 32.896648699320366, "grad_norm": 2.657766819000244, "learning_rate": 2.8705372695654994e-06, "loss": 0.04158337, "memory(GiB)": 13.7, "step": 70185, "train_speed(iter/s)": 1.527305 }, { "acc": 0.96384802, "epoch": 32.8989922662292, "grad_norm": 2.223905086517334, "learning_rate": 2.869836046753661e-06, "loss": 0.079613, "memory(GiB)": 13.7, "step": 70190, "train_speed(iter/s)": 1.52731 }, { "acc": 0.98187504, "epoch": 32.901335833138035, "grad_norm": 2.4842276573181152, "learning_rate": 2.869134875155982e-06, "loss": 0.03228571, "memory(GiB)": 13.7, "step": 70195, "train_speed(iter/s)": 1.527312 }, { "acc": 0.9822917, "epoch": 32.90367940004687, "grad_norm": 9.704938888549805, "learning_rate": 2.868433754789321e-06, "loss": 0.06072559, "memory(GiB)": 13.7, "step": 70200, "train_speed(iter/s)": 1.527315 }, { "acc": 0.9810606, "epoch": 32.9060229669557, "grad_norm": 4.003514766693115, "learning_rate": 2.8677326856705256e-06, "loss": 0.08395726, "memory(GiB)": 13.7, "step": 70205, "train_speed(iter/s)": 1.527309 }, { "acc": 0.98125, "epoch": 32.908366533864545, "grad_norm": 1.1441435813903809, "learning_rate": 2.8670316678164513e-06, "loss": 0.04073809, "memory(GiB)": 13.7, "step": 70210, "train_speed(iter/s)": 1.527304 }, { "acc": 0.97749996, "epoch": 32.91071010077338, "grad_norm": 5.345898151397705, "learning_rate": 2.8663307012439466e-06, "loss": 0.06568267, "memory(GiB)": 13.7, "step": 70215, "train_speed(iter/s)": 1.527307 }, { "acc": 0.9802083, "epoch": 32.91305366768221, "grad_norm": 3.496140241622925, "learning_rate": 2.86562978596986e-06, "loss": 0.08576725, "memory(GiB)": 13.7, "step": 70220, "train_speed(iter/s)": 1.527306 }, { "acc": 0.96916122, "epoch": 32.91539723459105, "grad_norm": 9.116331100463867, "learning_rate": 2.8649289220110394e-06, "loss": 0.07114248, "memory(GiB)": 13.7, "step": 70225, "train_speed(iter/s)": 1.527302 }, { "acc": 0.9885417, "epoch": 32.91774080149988, "grad_norm": 0.03325332701206207, "learning_rate": 2.8642281093843334e-06, "loss": 0.03805773, "memory(GiB)": 13.7, "step": 70230, "train_speed(iter/s)": 1.527308 }, { "acc": 0.98718748, "epoch": 32.920084368408716, "grad_norm": 3.793588161468506, "learning_rate": 2.8635273481065863e-06, "loss": 0.02933357, "memory(GiB)": 13.7, "step": 70235, "train_speed(iter/s)": 1.527302 }, { "acc": 0.97654762, "epoch": 32.92242793531755, "grad_norm": 0.17147736251354218, "learning_rate": 2.862826638194638e-06, "loss": 0.0567574, "memory(GiB)": 13.7, "step": 70240, "train_speed(iter/s)": 1.527301 }, { "acc": 0.99520836, "epoch": 32.92477150222639, "grad_norm": 2.4475831985473633, "learning_rate": 2.862125979665335e-06, "loss": 0.02104464, "memory(GiB)": 13.7, "step": 70245, "train_speed(iter/s)": 1.527308 }, { "acc": 0.99112186, "epoch": 32.927115069135226, "grad_norm": 1.6698617935180664, "learning_rate": 2.861425372535521e-06, "loss": 0.02714947, "memory(GiB)": 13.7, "step": 70250, "train_speed(iter/s)": 1.527313 }, { "acc": 0.96203375, "epoch": 32.92945863604406, "grad_norm": 5.590994358062744, "learning_rate": 2.860724816822031e-06, "loss": 0.15638744, "memory(GiB)": 13.7, "step": 70255, "train_speed(iter/s)": 1.527317 }, { "acc": 0.98440475, "epoch": 32.931802202952895, "grad_norm": 4.0223069190979, "learning_rate": 2.8600243125417066e-06, "loss": 0.0330536, "memory(GiB)": 13.7, "step": 70260, "train_speed(iter/s)": 1.527321 }, { "acc": 0.97322922, "epoch": 32.93414576986173, "grad_norm": 5.8105902671813965, "learning_rate": 2.8593238597113875e-06, "loss": 0.07845121, "memory(GiB)": 13.7, "step": 70265, "train_speed(iter/s)": 1.527325 }, { "acc": 0.98458328, "epoch": 32.93648933677056, "grad_norm": 0.002497471636161208, "learning_rate": 2.858623458347907e-06, "loss": 0.04408708, "memory(GiB)": 13.7, "step": 70270, "train_speed(iter/s)": 1.52733 }, { "acc": 0.98777781, "epoch": 32.9388329036794, "grad_norm": 2.6380765438079834, "learning_rate": 2.857923108468104e-06, "loss": 0.05448354, "memory(GiB)": 13.7, "step": 70275, "train_speed(iter/s)": 1.527332 }, { "acc": 0.99131947, "epoch": 32.94117647058823, "grad_norm": 4.120615005493164, "learning_rate": 2.857222810088808e-06, "loss": 0.0437775, "memory(GiB)": 13.7, "step": 70280, "train_speed(iter/s)": 1.527333 }, { "acc": 0.9854167, "epoch": 32.94352003749707, "grad_norm": 1.6335108280181885, "learning_rate": 2.8565225632268544e-06, "loss": 0.05474429, "memory(GiB)": 13.7, "step": 70285, "train_speed(iter/s)": 1.527342 }, { "acc": 0.98940973, "epoch": 32.94586360440591, "grad_norm": 1.6331621408462524, "learning_rate": 2.8558223678990782e-06, "loss": 0.03479054, "memory(GiB)": 13.7, "step": 70290, "train_speed(iter/s)": 1.527342 }, { "acc": 0.98291664, "epoch": 32.94820717131474, "grad_norm": 3.02524995803833, "learning_rate": 2.8551222241223032e-06, "loss": 0.03824354, "memory(GiB)": 13.7, "step": 70295, "train_speed(iter/s)": 1.527351 }, { "acc": 0.99118595, "epoch": 32.950550738223576, "grad_norm": 3.7377266883850098, "learning_rate": 2.854422131913362e-06, "loss": 0.07132234, "memory(GiB)": 13.7, "step": 70300, "train_speed(iter/s)": 1.527349 }, { "acc": 0.95977678, "epoch": 32.95289430513241, "grad_norm": 5.1625075340271, "learning_rate": 2.8537220912890844e-06, "loss": 0.13840505, "memory(GiB)": 13.7, "step": 70305, "train_speed(iter/s)": 1.527351 }, { "acc": 0.99333334, "epoch": 32.955237872041245, "grad_norm": 0.8424485325813293, "learning_rate": 2.853022102266293e-06, "loss": 0.01687378, "memory(GiB)": 13.7, "step": 70310, "train_speed(iter/s)": 1.527352 }, { "acc": 0.98166666, "epoch": 32.95758143895008, "grad_norm": 7.034421443939209, "learning_rate": 2.8523221648618176e-06, "loss": 0.04659565, "memory(GiB)": 13.7, "step": 70315, "train_speed(iter/s)": 1.527358 }, { "acc": 0.98625002, "epoch": 32.95992500585892, "grad_norm": 2.7108259201049805, "learning_rate": 2.851622279092478e-06, "loss": 0.05128562, "memory(GiB)": 13.7, "step": 70320, "train_speed(iter/s)": 1.52736 }, { "acc": 0.96937504, "epoch": 32.962268572767755, "grad_norm": 6.50102424621582, "learning_rate": 2.850922444975101e-06, "loss": 0.06769873, "memory(GiB)": 13.7, "step": 70325, "train_speed(iter/s)": 1.527361 }, { "acc": 0.98556814, "epoch": 32.96461213967659, "grad_norm": 3.0893802642822266, "learning_rate": 2.850222662526504e-06, "loss": 0.08403707, "memory(GiB)": 13.7, "step": 70330, "train_speed(iter/s)": 1.527366 }, { "acc": 0.98000002, "epoch": 32.96695570658542, "grad_norm": 5.790128231048584, "learning_rate": 2.849522931763511e-06, "loss": 0.04899251, "memory(GiB)": 13.7, "step": 70335, "train_speed(iter/s)": 1.527362 }, { "acc": 0.9791666, "epoch": 32.96929927349426, "grad_norm": 4.626092433929443, "learning_rate": 2.8488232527029397e-06, "loss": 0.04585751, "memory(GiB)": 13.7, "step": 70340, "train_speed(iter/s)": 1.527368 }, { "acc": 0.98416672, "epoch": 32.97164284040309, "grad_norm": 4.331629276275635, "learning_rate": 2.8481236253616106e-06, "loss": 0.04202175, "memory(GiB)": 13.7, "step": 70345, "train_speed(iter/s)": 1.527368 }, { "acc": 0.9807292, "epoch": 32.973986407311926, "grad_norm": 7.210825443267822, "learning_rate": 2.8474240497563364e-06, "loss": 0.06016998, "memory(GiB)": 13.7, "step": 70350, "train_speed(iter/s)": 1.527371 }, { "acc": 0.98812504, "epoch": 32.97632997422076, "grad_norm": 0.9523271918296814, "learning_rate": 2.846724525903937e-06, "loss": 0.03224939, "memory(GiB)": 13.7, "step": 70355, "train_speed(iter/s)": 1.527371 }, { "acc": 0.98562498, "epoch": 32.9786735411296, "grad_norm": 2.892120361328125, "learning_rate": 2.8460250538212214e-06, "loss": 0.05801517, "memory(GiB)": 13.7, "step": 70360, "train_speed(iter/s)": 1.527378 }, { "acc": 0.98291674, "epoch": 32.981017108038436, "grad_norm": 3.90012526512146, "learning_rate": 2.845325633525009e-06, "loss": 0.03249404, "memory(GiB)": 13.7, "step": 70365, "train_speed(iter/s)": 1.527385 }, { "acc": 0.984375, "epoch": 32.98336067494727, "grad_norm": 3.6023752689361572, "learning_rate": 2.844626265032105e-06, "loss": 0.0751855, "memory(GiB)": 13.7, "step": 70370, "train_speed(iter/s)": 1.52738 }, { "acc": 0.98312502, "epoch": 32.985704241856105, "grad_norm": 1.1544193029403687, "learning_rate": 2.8439269483593223e-06, "loss": 0.03085156, "memory(GiB)": 13.7, "step": 70375, "train_speed(iter/s)": 1.527378 }, { "acc": 0.9770834, "epoch": 32.98804780876494, "grad_norm": 0.0016061317874118686, "learning_rate": 2.8432276835234747e-06, "loss": 0.04817042, "memory(GiB)": 13.7, "step": 70380, "train_speed(iter/s)": 1.527383 }, { "acc": 0.98675594, "epoch": 32.990391375673774, "grad_norm": 3.790365695953369, "learning_rate": 2.842528470541363e-06, "loss": 0.03624761, "memory(GiB)": 13.7, "step": 70385, "train_speed(iter/s)": 1.52738 }, { "acc": 0.98321428, "epoch": 32.99273494258261, "grad_norm": 3.8578526973724365, "learning_rate": 2.8418293094297976e-06, "loss": 0.04350621, "memory(GiB)": 13.7, "step": 70390, "train_speed(iter/s)": 1.527384 }, { "acc": 0.99434528, "epoch": 32.99507850949145, "grad_norm": 0.022472821176052094, "learning_rate": 2.841130200205587e-06, "loss": 0.01220822, "memory(GiB)": 13.7, "step": 70395, "train_speed(iter/s)": 1.52739 }, { "acc": 0.98416662, "epoch": 32.997422076400284, "grad_norm": 4.301823616027832, "learning_rate": 2.8404311428855306e-06, "loss": 0.03271437, "memory(GiB)": 13.7, "step": 70400, "train_speed(iter/s)": 1.527398 }, { "acc": 0.97947311, "epoch": 32.99976564330912, "grad_norm": 3.046386241912842, "learning_rate": 2.8397321374864316e-06, "loss": 0.0446804, "memory(GiB)": 13.7, "step": 70405, "train_speed(iter/s)": 1.527405 }, { "acc": 0.99258928, "epoch": 33.00210921021795, "grad_norm": 4.03696870803833, "learning_rate": 2.8390331840250946e-06, "loss": 0.02837658, "memory(GiB)": 13.7, "step": 70410, "train_speed(iter/s)": 1.527382 }, { "acc": 0.98812504, "epoch": 33.00445277712679, "grad_norm": 3.5105390548706055, "learning_rate": 2.8383342825183173e-06, "loss": 0.02184097, "memory(GiB)": 13.7, "step": 70415, "train_speed(iter/s)": 1.527382 }, { "acc": 0.98391781, "epoch": 33.00679634403562, "grad_norm": 4.3017425537109375, "learning_rate": 2.8376354329829037e-06, "loss": 0.05439183, "memory(GiB)": 13.7, "step": 70420, "train_speed(iter/s)": 1.527385 }, { "acc": 0.98125, "epoch": 33.009139910944455, "grad_norm": 3.5680735111236572, "learning_rate": 2.8369366354356455e-06, "loss": 0.04611027, "memory(GiB)": 13.7, "step": 70425, "train_speed(iter/s)": 1.52739 }, { "acc": 0.9895834, "epoch": 33.01148347785329, "grad_norm": 6.4468841552734375, "learning_rate": 2.8362378898933442e-06, "loss": 0.06355736, "memory(GiB)": 13.7, "step": 70430, "train_speed(iter/s)": 1.527391 }, { "acc": 0.98194447, "epoch": 33.01382704476213, "grad_norm": 2.569354772567749, "learning_rate": 2.8355391963727954e-06, "loss": 0.07246313, "memory(GiB)": 13.7, "step": 70435, "train_speed(iter/s)": 1.527389 }, { "acc": 0.98703365, "epoch": 33.016170611670965, "grad_norm": 1.833968997001648, "learning_rate": 2.834840554890791e-06, "loss": 0.03588615, "memory(GiB)": 13.7, "step": 70440, "train_speed(iter/s)": 1.527392 }, { "acc": 0.9802084, "epoch": 33.0185141785798, "grad_norm": 4.0037736892700195, "learning_rate": 2.8341419654641234e-06, "loss": 0.04217256, "memory(GiB)": 13.7, "step": 70445, "train_speed(iter/s)": 1.527396 }, { "acc": 0.98604164, "epoch": 33.020857745488634, "grad_norm": 4.608185768127441, "learning_rate": 2.8334434281095864e-06, "loss": 0.04222721, "memory(GiB)": 13.7, "step": 70450, "train_speed(iter/s)": 1.527396 }, { "acc": 0.9921875, "epoch": 33.02320131239747, "grad_norm": 2.1208813190460205, "learning_rate": 2.832744942843971e-06, "loss": 0.02334917, "memory(GiB)": 13.7, "step": 70455, "train_speed(iter/s)": 1.527396 }, { "acc": 0.975947, "epoch": 33.0255448793063, "grad_norm": 1.6978158950805664, "learning_rate": 2.8320465096840644e-06, "loss": 0.07309896, "memory(GiB)": 13.7, "step": 70460, "train_speed(iter/s)": 1.527401 }, { "acc": 0.9979166, "epoch": 33.02788844621514, "grad_norm": 2.324159860610962, "learning_rate": 2.831348128646656e-06, "loss": 0.03625149, "memory(GiB)": 13.7, "step": 70465, "train_speed(iter/s)": 1.527405 }, { "acc": 0.99196434, "epoch": 33.03023201312398, "grad_norm": 2.1515629291534424, "learning_rate": 2.8306497997485344e-06, "loss": 0.02787372, "memory(GiB)": 13.7, "step": 70470, "train_speed(iter/s)": 1.527407 }, { "acc": 0.99511366, "epoch": 33.03257558003281, "grad_norm": 1.506142020225525, "learning_rate": 2.8299515230064807e-06, "loss": 0.02923906, "memory(GiB)": 13.7, "step": 70475, "train_speed(iter/s)": 1.527409 }, { "acc": 0.97541666, "epoch": 33.03491914694165, "grad_norm": 3.5232298374176025, "learning_rate": 2.8292532984372833e-06, "loss": 0.05616238, "memory(GiB)": 13.7, "step": 70480, "train_speed(iter/s)": 1.527407 }, { "acc": 0.98343754, "epoch": 33.03726271385048, "grad_norm": 1.9774489402770996, "learning_rate": 2.8285551260577215e-06, "loss": 0.03315999, "memory(GiB)": 13.7, "step": 70485, "train_speed(iter/s)": 1.527409 }, { "acc": 0.97830362, "epoch": 33.039606280759315, "grad_norm": 2.100980281829834, "learning_rate": 2.8278570058845796e-06, "loss": 0.04999403, "memory(GiB)": 13.7, "step": 70490, "train_speed(iter/s)": 1.527414 }, { "acc": 0.99363098, "epoch": 33.04194984766815, "grad_norm": 2.026369333267212, "learning_rate": 2.8271589379346393e-06, "loss": 0.0244948, "memory(GiB)": 13.7, "step": 70495, "train_speed(iter/s)": 1.52742 }, { "acc": 0.990625, "epoch": 33.044293414576984, "grad_norm": 4.935471534729004, "learning_rate": 2.8264609222246758e-06, "loss": 0.02699277, "memory(GiB)": 13.7, "step": 70500, "train_speed(iter/s)": 1.527418 }, { "acc": 0.99613094, "epoch": 33.04663698148582, "grad_norm": 2.8939318656921387, "learning_rate": 2.82576295877147e-06, "loss": 0.01976411, "memory(GiB)": 13.7, "step": 70505, "train_speed(iter/s)": 1.527421 }, { "acc": 0.99020834, "epoch": 33.04898054839466, "grad_norm": 3.063204050064087, "learning_rate": 2.8250650475918e-06, "loss": 0.03702412, "memory(GiB)": 13.7, "step": 70510, "train_speed(iter/s)": 1.527426 }, { "acc": 0.99111118, "epoch": 33.051324115303494, "grad_norm": 3.8962793350219727, "learning_rate": 2.8243671887024376e-06, "loss": 0.05433527, "memory(GiB)": 13.7, "step": 70515, "train_speed(iter/s)": 1.527426 }, { "acc": 0.98862181, "epoch": 33.05366768221233, "grad_norm": 0.001359306275844574, "learning_rate": 2.8236693821201583e-06, "loss": 0.02961518, "memory(GiB)": 13.7, "step": 70520, "train_speed(iter/s)": 1.527431 }, { "acc": 0.9854167, "epoch": 33.05601124912116, "grad_norm": 2.623131036758423, "learning_rate": 2.8229716278617387e-06, "loss": 0.05732391, "memory(GiB)": 13.7, "step": 70525, "train_speed(iter/s)": 1.527438 }, { "acc": 0.97578526, "epoch": 33.05835481603, "grad_norm": 1.9121835231781006, "learning_rate": 2.8222739259439465e-06, "loss": 0.05724772, "memory(GiB)": 13.7, "step": 70530, "train_speed(iter/s)": 1.527446 }, { "acc": 0.98843136, "epoch": 33.06069838293883, "grad_norm": 0.714763879776001, "learning_rate": 2.8215762763835506e-06, "loss": 0.03992466, "memory(GiB)": 13.7, "step": 70535, "train_speed(iter/s)": 1.527448 }, { "acc": 0.9791667, "epoch": 33.063041949847666, "grad_norm": 0.05846192687749863, "learning_rate": 2.8208786791973238e-06, "loss": 0.04977645, "memory(GiB)": 13.7, "step": 70540, "train_speed(iter/s)": 1.527457 }, { "acc": 0.98447914, "epoch": 33.06538551675651, "grad_norm": 3.826362133026123, "learning_rate": 2.820181134402034e-06, "loss": 0.06385882, "memory(GiB)": 13.7, "step": 70545, "train_speed(iter/s)": 1.527459 }, { "acc": 0.98154755, "epoch": 33.06772908366534, "grad_norm": 4.448110103607178, "learning_rate": 2.8194836420144457e-06, "loss": 0.05704588, "memory(GiB)": 13.7, "step": 70550, "train_speed(iter/s)": 1.527462 }, { "acc": 0.97979174, "epoch": 33.070072650574176, "grad_norm": 4.6307477951049805, "learning_rate": 2.8187862020513253e-06, "loss": 0.04625061, "memory(GiB)": 13.7, "step": 70555, "train_speed(iter/s)": 1.527469 }, { "acc": 0.98125, "epoch": 33.07241621748301, "grad_norm": 0.007057465147227049, "learning_rate": 2.8180888145294367e-06, "loss": 0.04178436, "memory(GiB)": 13.7, "step": 70560, "train_speed(iter/s)": 1.527479 }, { "acc": 0.98883924, "epoch": 33.074759784391844, "grad_norm": 1.6091843843460083, "learning_rate": 2.8173914794655454e-06, "loss": 0.01935023, "memory(GiB)": 13.7, "step": 70565, "train_speed(iter/s)": 1.527486 }, { "acc": 0.98621521, "epoch": 33.07710335130068, "grad_norm": 2.090947151184082, "learning_rate": 2.8166941968764104e-06, "loss": 0.05022471, "memory(GiB)": 13.7, "step": 70570, "train_speed(iter/s)": 1.527491 }, { "acc": 0.97833338, "epoch": 33.07944691820951, "grad_norm": 3.560253858566284, "learning_rate": 2.8159969667787913e-06, "loss": 0.04731086, "memory(GiB)": 13.7, "step": 70575, "train_speed(iter/s)": 1.52749 }, { "acc": 0.97809477, "epoch": 33.08179048511835, "grad_norm": 2.072134494781494, "learning_rate": 2.8152997891894486e-06, "loss": 0.06002903, "memory(GiB)": 13.7, "step": 70580, "train_speed(iter/s)": 1.527491 }, { "acc": 0.98708334, "epoch": 33.08413405202719, "grad_norm": 2.9414262771606445, "learning_rate": 2.814602664125141e-06, "loss": 0.04879588, "memory(GiB)": 13.7, "step": 70585, "train_speed(iter/s)": 1.527498 }, { "acc": 0.990625, "epoch": 33.08647761893602, "grad_norm": 2.316094398498535, "learning_rate": 2.813905591602623e-06, "loss": 0.03467004, "memory(GiB)": 13.7, "step": 70590, "train_speed(iter/s)": 1.527495 }, { "acc": 0.99196434, "epoch": 33.08882118584486, "grad_norm": 0.01267706137150526, "learning_rate": 2.813208571638651e-06, "loss": 0.02180086, "memory(GiB)": 13.7, "step": 70595, "train_speed(iter/s)": 1.527502 }, { "acc": 0.9958334, "epoch": 33.09116475275369, "grad_norm": 0.024456379935145378, "learning_rate": 2.8125116042499805e-06, "loss": 0.0152823, "memory(GiB)": 13.7, "step": 70600, "train_speed(iter/s)": 1.527503 }, { "acc": 0.98062506, "epoch": 33.093508319662526, "grad_norm": 5.227491855621338, "learning_rate": 2.811814689453364e-06, "loss": 0.0514708, "memory(GiB)": 13.7, "step": 70605, "train_speed(iter/s)": 1.527508 }, { "acc": 0.98474197, "epoch": 33.09585188657136, "grad_norm": 3.708468198776245, "learning_rate": 2.8111178272655487e-06, "loss": 0.05654295, "memory(GiB)": 13.7, "step": 70610, "train_speed(iter/s)": 1.527513 }, { "acc": 0.99875002, "epoch": 33.098195453480194, "grad_norm": 1.6076158285140991, "learning_rate": 2.810421017703289e-06, "loss": 0.04388995, "memory(GiB)": 13.7, "step": 70615, "train_speed(iter/s)": 1.527514 }, { "acc": 0.98410091, "epoch": 33.10053902038903, "grad_norm": 0.0028723841533064842, "learning_rate": 2.809724260783333e-06, "loss": 0.04972622, "memory(GiB)": 13.7, "step": 70620, "train_speed(iter/s)": 1.527515 }, { "acc": 0.99050598, "epoch": 33.10288258729787, "grad_norm": 2.7962234020233154, "learning_rate": 2.8090275565224306e-06, "loss": 0.0417427, "memory(GiB)": 13.7, "step": 70625, "train_speed(iter/s)": 1.527516 }, { "acc": 0.97770834, "epoch": 33.105226154206704, "grad_norm": 3.618330955505371, "learning_rate": 2.8083309049373234e-06, "loss": 0.04715043, "memory(GiB)": 13.7, "step": 70630, "train_speed(iter/s)": 1.527518 }, { "acc": 0.9927083, "epoch": 33.10756972111554, "grad_norm": 3.8124115467071533, "learning_rate": 2.8076343060447603e-06, "loss": 0.02100558, "memory(GiB)": 13.7, "step": 70635, "train_speed(iter/s)": 1.527522 }, { "acc": 0.9802084, "epoch": 33.10991328802437, "grad_norm": 4.545658111572266, "learning_rate": 2.8069377598614855e-06, "loss": 0.05668111, "memory(GiB)": 13.7, "step": 70640, "train_speed(iter/s)": 1.527523 }, { "acc": 0.99821434, "epoch": 33.11225685493321, "grad_norm": 1.9170275926589966, "learning_rate": 2.806241266404239e-06, "loss": 0.00850991, "memory(GiB)": 13.7, "step": 70645, "train_speed(iter/s)": 1.527528 }, { "acc": 0.9822916, "epoch": 33.11460042184204, "grad_norm": 6.402393341064453, "learning_rate": 2.805544825689766e-06, "loss": 0.06491504, "memory(GiB)": 13.7, "step": 70650, "train_speed(iter/s)": 1.527528 }, { "acc": 0.99541664, "epoch": 33.116943988750876, "grad_norm": 0.6483427286148071, "learning_rate": 2.804848437734803e-06, "loss": 0.0389798, "memory(GiB)": 13.7, "step": 70655, "train_speed(iter/s)": 1.527526 }, { "acc": 0.98249998, "epoch": 33.11928755565972, "grad_norm": 4.43277645111084, "learning_rate": 2.804152102556091e-06, "loss": 0.0283585, "memory(GiB)": 13.7, "step": 70660, "train_speed(iter/s)": 1.527532 }, { "acc": 0.9942235, "epoch": 33.12163112256855, "grad_norm": 1.5362149477005005, "learning_rate": 2.8034558201703648e-06, "loss": 0.02240968, "memory(GiB)": 13.7, "step": 70665, "train_speed(iter/s)": 1.527537 }, { "acc": 0.97755642, "epoch": 33.123974689477386, "grad_norm": 6.028317451477051, "learning_rate": 2.8027595905943645e-06, "loss": 0.05219218, "memory(GiB)": 13.7, "step": 70670, "train_speed(iter/s)": 1.527541 }, { "acc": 0.97999992, "epoch": 33.12631825638622, "grad_norm": 3.2335004806518555, "learning_rate": 2.8020634138448243e-06, "loss": 0.0443532, "memory(GiB)": 13.7, "step": 70675, "train_speed(iter/s)": 1.52754 }, { "acc": 0.99080353, "epoch": 33.128661823295054, "grad_norm": 0.9130675792694092, "learning_rate": 2.8013672899384755e-06, "loss": 0.02677137, "memory(GiB)": 13.7, "step": 70680, "train_speed(iter/s)": 1.527551 }, { "acc": 0.9739584, "epoch": 33.13100539020389, "grad_norm": 3.5471889972686768, "learning_rate": 2.800671218892052e-06, "loss": 0.04458024, "memory(GiB)": 13.7, "step": 70685, "train_speed(iter/s)": 1.527555 }, { "acc": 0.98341351, "epoch": 33.13334895711272, "grad_norm": 2.6747071743011475, "learning_rate": 2.7999752007222876e-06, "loss": 0.04331488, "memory(GiB)": 13.7, "step": 70690, "train_speed(iter/s)": 1.527557 }, { "acc": 0.98500004, "epoch": 33.13569252402156, "grad_norm": 1.6631771326065063, "learning_rate": 2.799279235445908e-06, "loss": 0.03381428, "memory(GiB)": 13.7, "step": 70695, "train_speed(iter/s)": 1.527565 }, { "acc": 0.97416668, "epoch": 33.1380360909304, "grad_norm": 3.648651123046875, "learning_rate": 2.798583323079646e-06, "loss": 0.06328521, "memory(GiB)": 13.7, "step": 70700, "train_speed(iter/s)": 1.527569 }, { "acc": 0.97579584, "epoch": 33.14037965783923, "grad_norm": 4.788072109222412, "learning_rate": 2.7978874636402255e-06, "loss": 0.09577762, "memory(GiB)": 13.7, "step": 70705, "train_speed(iter/s)": 1.527568 }, { "acc": 0.97979164, "epoch": 33.14272322474807, "grad_norm": 3.8454906940460205, "learning_rate": 2.797191657144374e-06, "loss": 0.05591195, "memory(GiB)": 13.7, "step": 70710, "train_speed(iter/s)": 1.527571 }, { "acc": 0.9864584, "epoch": 33.1450667916569, "grad_norm": 5.931278228759766, "learning_rate": 2.796495903608818e-06, "loss": 0.05644001, "memory(GiB)": 13.7, "step": 70715, "train_speed(iter/s)": 1.527569 }, { "acc": 0.98552074, "epoch": 33.147410358565736, "grad_norm": 3.258391857147217, "learning_rate": 2.7958002030502786e-06, "loss": 0.04410467, "memory(GiB)": 13.7, "step": 70720, "train_speed(iter/s)": 1.527567 }, { "acc": 0.99291668, "epoch": 33.14975392547457, "grad_norm": 4.0745978355407715, "learning_rate": 2.7951045554854785e-06, "loss": 0.04489759, "memory(GiB)": 13.7, "step": 70725, "train_speed(iter/s)": 1.527572 }, { "acc": 0.98504467, "epoch": 33.152097492383405, "grad_norm": 5.489627361297607, "learning_rate": 2.794408960931142e-06, "loss": 0.05492014, "memory(GiB)": 13.7, "step": 70730, "train_speed(iter/s)": 1.527579 }, { "acc": 0.9979167, "epoch": 33.154441059292246, "grad_norm": 1.7860311269760132, "learning_rate": 2.7937134194039865e-06, "loss": 0.04483582, "memory(GiB)": 13.7, "step": 70735, "train_speed(iter/s)": 1.527579 }, { "acc": 0.98601761, "epoch": 33.15678462620108, "grad_norm": 4.412435054779053, "learning_rate": 2.7930179309207293e-06, "loss": 0.05904716, "memory(GiB)": 13.7, "step": 70740, "train_speed(iter/s)": 1.527581 }, { "acc": 0.98708334, "epoch": 33.159128193109915, "grad_norm": 1.6593780517578125, "learning_rate": 2.7923224954980883e-06, "loss": 0.03988162, "memory(GiB)": 13.7, "step": 70745, "train_speed(iter/s)": 1.52758 }, { "acc": 0.97666664, "epoch": 33.16147176001875, "grad_norm": 3.2896924018859863, "learning_rate": 2.7916271131527824e-06, "loss": 0.05457105, "memory(GiB)": 13.7, "step": 70750, "train_speed(iter/s)": 1.527583 }, { "acc": 0.9958334, "epoch": 33.16381532692758, "grad_norm": 4.409972667694092, "learning_rate": 2.790931783901521e-06, "loss": 0.03208538, "memory(GiB)": 13.7, "step": 70755, "train_speed(iter/s)": 1.527593 }, { "acc": 0.98832798, "epoch": 33.16615889383642, "grad_norm": 0.0026918489020317793, "learning_rate": 2.790236507761021e-06, "loss": 0.03236049, "memory(GiB)": 13.7, "step": 70760, "train_speed(iter/s)": 1.527595 }, { "acc": 0.98916664, "epoch": 33.16850246074525, "grad_norm": 2.1989283561706543, "learning_rate": 2.789541284747994e-06, "loss": 0.02202429, "memory(GiB)": 13.7, "step": 70765, "train_speed(iter/s)": 1.527593 }, { "acc": 0.97529764, "epoch": 33.170846027654086, "grad_norm": 1.5947693586349487, "learning_rate": 2.7888461148791534e-06, "loss": 0.05697387, "memory(GiB)": 13.7, "step": 70770, "train_speed(iter/s)": 1.527597 }, { "acc": 0.99048615, "epoch": 33.17318959456293, "grad_norm": 4.623304843902588, "learning_rate": 2.7881509981712063e-06, "loss": 0.02601613, "memory(GiB)": 13.7, "step": 70775, "train_speed(iter/s)": 1.527603 }, { "acc": 0.98379955, "epoch": 33.17553316147176, "grad_norm": 2.3995285034179688, "learning_rate": 2.787455934640859e-06, "loss": 0.05468395, "memory(GiB)": 13.7, "step": 70780, "train_speed(iter/s)": 1.527605 }, { "acc": 0.99187498, "epoch": 33.177876728380596, "grad_norm": 1.838666558265686, "learning_rate": 2.78676092430482e-06, "loss": 0.01425225, "memory(GiB)": 13.7, "step": 70785, "train_speed(iter/s)": 1.527604 }, { "acc": 0.98343639, "epoch": 33.18022029528943, "grad_norm": 2.6632699966430664, "learning_rate": 2.7860659671797986e-06, "loss": 0.05592003, "memory(GiB)": 13.7, "step": 70790, "train_speed(iter/s)": 1.527607 }, { "acc": 0.97446423, "epoch": 33.182563862198265, "grad_norm": 6.7259955406188965, "learning_rate": 2.7853710632824938e-06, "loss": 0.0589832, "memory(GiB)": 13.7, "step": 70795, "train_speed(iter/s)": 1.52761 }, { "acc": 0.97375002, "epoch": 33.1849074291071, "grad_norm": 3.4513094425201416, "learning_rate": 2.7846762126296118e-06, "loss": 0.03998289, "memory(GiB)": 13.7, "step": 70800, "train_speed(iter/s)": 1.527615 }, { "acc": 0.9739583, "epoch": 33.18725099601593, "grad_norm": 4.092371463775635, "learning_rate": 2.7839814152378553e-06, "loss": 0.04771282, "memory(GiB)": 13.7, "step": 70805, "train_speed(iter/s)": 1.527614 }, { "acc": 0.9640625, "epoch": 33.189594562924775, "grad_norm": 8.03563117980957, "learning_rate": 2.7832866711239228e-06, "loss": 0.09365991, "memory(GiB)": 13.7, "step": 70810, "train_speed(iter/s)": 1.527619 }, { "acc": 0.97654762, "epoch": 33.19193812983361, "grad_norm": 2.9898016452789307, "learning_rate": 2.7825919803045155e-06, "loss": 0.04596934, "memory(GiB)": 13.7, "step": 70815, "train_speed(iter/s)": 1.52762 }, { "acc": 0.99323864, "epoch": 33.19428169674244, "grad_norm": 2.422506332397461, "learning_rate": 2.781897342796329e-06, "loss": 0.02354321, "memory(GiB)": 13.7, "step": 70820, "train_speed(iter/s)": 1.527631 }, { "acc": 0.990625, "epoch": 33.19662526365128, "grad_norm": 0.7266841530799866, "learning_rate": 2.7812027586160646e-06, "loss": 0.02369518, "memory(GiB)": 13.7, "step": 70825, "train_speed(iter/s)": 1.527635 }, { "acc": 0.9963542, "epoch": 33.19896883056011, "grad_norm": 0.01727004162967205, "learning_rate": 2.780508227780412e-06, "loss": 0.01478093, "memory(GiB)": 13.7, "step": 70830, "train_speed(iter/s)": 1.527638 }, { "acc": 0.99020834, "epoch": 33.201312397468946, "grad_norm": 0.0008645412162877619, "learning_rate": 2.779813750306069e-06, "loss": 0.02174338, "memory(GiB)": 13.7, "step": 70835, "train_speed(iter/s)": 1.527643 }, { "acc": 0.99040184, "epoch": 33.20365596437778, "grad_norm": 3.285600423812866, "learning_rate": 2.7791193262097273e-06, "loss": 0.02446121, "memory(GiB)": 13.7, "step": 70840, "train_speed(iter/s)": 1.527652 }, { "acc": 0.9927084, "epoch": 33.205999531286615, "grad_norm": 3.855501413345337, "learning_rate": 2.7784249555080812e-06, "loss": 0.01533183, "memory(GiB)": 13.7, "step": 70845, "train_speed(iter/s)": 1.527658 }, { "acc": 0.99144344, "epoch": 33.208343098195456, "grad_norm": 1.238808512687683, "learning_rate": 2.7777306382178176e-06, "loss": 0.03460671, "memory(GiB)": 13.7, "step": 70850, "train_speed(iter/s)": 1.527664 }, { "acc": 0.99333334, "epoch": 33.21068666510429, "grad_norm": 0.8355150818824768, "learning_rate": 2.7770363743556266e-06, "loss": 0.02903586, "memory(GiB)": 13.7, "step": 70855, "train_speed(iter/s)": 1.527669 }, { "acc": 0.98779755, "epoch": 33.213030232013125, "grad_norm": 3.2466816902160645, "learning_rate": 2.776342163938199e-06, "loss": 0.03753288, "memory(GiB)": 13.7, "step": 70860, "train_speed(iter/s)": 1.527675 }, { "acc": 0.99333324, "epoch": 33.21537379892196, "grad_norm": 2.022937536239624, "learning_rate": 2.775648006982218e-06, "loss": 0.03852803, "memory(GiB)": 13.7, "step": 70865, "train_speed(iter/s)": 1.527682 }, { "acc": 0.99910717, "epoch": 33.217717365830794, "grad_norm": 0.004197141155600548, "learning_rate": 2.7749539035043686e-06, "loss": 0.01648217, "memory(GiB)": 13.7, "step": 70870, "train_speed(iter/s)": 1.527684 }, { "acc": 0.98500004, "epoch": 33.22006093273963, "grad_norm": 4.16435432434082, "learning_rate": 2.7742598535213354e-06, "loss": 0.05084222, "memory(GiB)": 13.7, "step": 70875, "train_speed(iter/s)": 1.527695 }, { "acc": 0.97822914, "epoch": 33.22240449964846, "grad_norm": 3.229555606842041, "learning_rate": 2.7735658570498036e-06, "loss": 0.05992743, "memory(GiB)": 13.7, "step": 70880, "train_speed(iter/s)": 1.527702 }, { "acc": 0.9817709, "epoch": 33.224748066557304, "grad_norm": 4.41361665725708, "learning_rate": 2.77287191410645e-06, "loss": 0.03609052, "memory(GiB)": 13.7, "step": 70885, "train_speed(iter/s)": 1.52771 }, { "acc": 0.97868423, "epoch": 33.22709163346614, "grad_norm": 2.221015214920044, "learning_rate": 2.772178024707959e-06, "loss": 0.07316688, "memory(GiB)": 13.7, "step": 70890, "train_speed(iter/s)": 1.527713 }, { "acc": 0.98571434, "epoch": 33.22943520037497, "grad_norm": 4.0448079109191895, "learning_rate": 2.771484188871008e-06, "loss": 0.06569779, "memory(GiB)": 13.7, "step": 70895, "train_speed(iter/s)": 1.52771 }, { "acc": 0.98321428, "epoch": 33.23177876728381, "grad_norm": 3.1506035327911377, "learning_rate": 2.7707904066122756e-06, "loss": 0.06364143, "memory(GiB)": 13.7, "step": 70900, "train_speed(iter/s)": 1.527714 }, { "acc": 0.97465277, "epoch": 33.23412233419264, "grad_norm": 3.4203195571899414, "learning_rate": 2.770096677948434e-06, "loss": 0.08582644, "memory(GiB)": 13.7, "step": 70905, "train_speed(iter/s)": 1.527719 }, { "acc": 0.97349215, "epoch": 33.236465901101475, "grad_norm": 0.05819211155176163, "learning_rate": 2.76940300289616e-06, "loss": 0.05281525, "memory(GiB)": 13.7, "step": 70910, "train_speed(iter/s)": 1.527723 }, { "acc": 0.99325848, "epoch": 33.23880946801031, "grad_norm": 2.689044713973999, "learning_rate": 2.768709381472129e-06, "loss": 0.02243733, "memory(GiB)": 13.7, "step": 70915, "train_speed(iter/s)": 1.527729 }, { "acc": 0.97446423, "epoch": 33.241153034919144, "grad_norm": 3.1654083728790283, "learning_rate": 2.7680158136930136e-06, "loss": 0.05831991, "memory(GiB)": 13.7, "step": 70920, "train_speed(iter/s)": 1.527732 }, { "acc": 0.98467264, "epoch": 33.243496601827985, "grad_norm": 5.215171813964844, "learning_rate": 2.7673222995754834e-06, "loss": 0.06135659, "memory(GiB)": 13.7, "step": 70925, "train_speed(iter/s)": 1.527737 }, { "acc": 0.9895834, "epoch": 33.24584016873682, "grad_norm": 2.6817586421966553, "learning_rate": 2.766628839136207e-06, "loss": 0.03561703, "memory(GiB)": 13.7, "step": 70930, "train_speed(iter/s)": 1.527734 }, { "acc": 0.97967262, "epoch": 33.248183735645654, "grad_norm": 4.808220863342285, "learning_rate": 2.7659354323918568e-06, "loss": 0.06853376, "memory(GiB)": 13.7, "step": 70935, "train_speed(iter/s)": 1.527741 }, { "acc": 0.98562508, "epoch": 33.25052730255449, "grad_norm": 1.6534309387207031, "learning_rate": 2.7652420793590963e-06, "loss": 0.02410877, "memory(GiB)": 13.7, "step": 70940, "train_speed(iter/s)": 1.527741 }, { "acc": 0.96739769, "epoch": 33.25287086946332, "grad_norm": 6.016488075256348, "learning_rate": 2.764548780054594e-06, "loss": 0.06509818, "memory(GiB)": 13.7, "step": 70945, "train_speed(iter/s)": 1.527744 }, { "acc": 0.996875, "epoch": 33.25521443637216, "grad_norm": 4.025484085083008, "learning_rate": 2.7638555344950126e-06, "loss": 0.0264297, "memory(GiB)": 13.7, "step": 70950, "train_speed(iter/s)": 1.527743 }, { "acc": 0.996875, "epoch": 33.25755800328099, "grad_norm": 0.003733242629095912, "learning_rate": 2.7631623426970167e-06, "loss": 0.02687337, "memory(GiB)": 13.7, "step": 70955, "train_speed(iter/s)": 1.527748 }, { "acc": 0.97797346, "epoch": 33.25990157018983, "grad_norm": 1.0595674514770508, "learning_rate": 2.7624692046772667e-06, "loss": 0.06170003, "memory(GiB)": 13.7, "step": 70960, "train_speed(iter/s)": 1.527749 }, { "acc": 0.97520838, "epoch": 33.26224513709867, "grad_norm": 3.7413246631622314, "learning_rate": 2.761776120452424e-06, "loss": 0.0584856, "memory(GiB)": 13.7, "step": 70965, "train_speed(iter/s)": 1.527747 }, { "acc": 0.9854166, "epoch": 33.2645887040075, "grad_norm": 0.004228383302688599, "learning_rate": 2.7610830900391515e-06, "loss": 0.03357501, "memory(GiB)": 13.7, "step": 70970, "train_speed(iter/s)": 1.527748 }, { "acc": 0.97166672, "epoch": 33.266932270916335, "grad_norm": 6.711165904998779, "learning_rate": 2.760390113454102e-06, "loss": 0.06407118, "memory(GiB)": 13.7, "step": 70975, "train_speed(iter/s)": 1.527749 }, { "acc": 0.98779764, "epoch": 33.26927583782517, "grad_norm": 3.8438162803649902, "learning_rate": 2.7596971907139346e-06, "loss": 0.04518045, "memory(GiB)": 13.7, "step": 70980, "train_speed(iter/s)": 1.527753 }, { "acc": 0.984375, "epoch": 33.271619404734004, "grad_norm": 4.622232437133789, "learning_rate": 2.7590043218353067e-06, "loss": 0.06095346, "memory(GiB)": 13.7, "step": 70985, "train_speed(iter/s)": 1.527754 }, { "acc": 0.98944025, "epoch": 33.27396297164284, "grad_norm": 0.28548458218574524, "learning_rate": 2.7583115068348703e-06, "loss": 0.0329617, "memory(GiB)": 13.7, "step": 70990, "train_speed(iter/s)": 1.527757 }, { "acc": 0.9833333, "epoch": 33.27630653855167, "grad_norm": 0.1461772471666336, "learning_rate": 2.7576187457292803e-06, "loss": 0.03586313, "memory(GiB)": 13.7, "step": 70995, "train_speed(iter/s)": 1.527763 }, { "acc": 0.98604164, "epoch": 33.278650105460514, "grad_norm": 0.001553483889438212, "learning_rate": 2.7569260385351855e-06, "loss": 0.04839432, "memory(GiB)": 13.7, "step": 71000, "train_speed(iter/s)": 1.527769 }, { "acc": 0.98520832, "epoch": 33.28099367236935, "grad_norm": 0.8690246343612671, "learning_rate": 2.7562333852692373e-06, "loss": 0.04436068, "memory(GiB)": 13.7, "step": 71005, "train_speed(iter/s)": 1.52777 }, { "acc": 0.98458328, "epoch": 33.28333723927818, "grad_norm": 3.932140588760376, "learning_rate": 2.755540785948088e-06, "loss": 0.05619144, "memory(GiB)": 13.7, "step": 71010, "train_speed(iter/s)": 1.527772 }, { "acc": 0.98594704, "epoch": 33.28568080618702, "grad_norm": 4.121140003204346, "learning_rate": 2.754848240588382e-06, "loss": 0.06072898, "memory(GiB)": 13.7, "step": 71015, "train_speed(iter/s)": 1.527774 }, { "acc": 0.98891945, "epoch": 33.28802437309585, "grad_norm": 2.4225523471832275, "learning_rate": 2.754155749206765e-06, "loss": 0.03918389, "memory(GiB)": 13.7, "step": 71020, "train_speed(iter/s)": 1.527776 }, { "acc": 0.99333334, "epoch": 33.290367940004685, "grad_norm": 1.02950119972229, "learning_rate": 2.7534633118198872e-06, "loss": 0.00888023, "memory(GiB)": 13.7, "step": 71025, "train_speed(iter/s)": 1.527773 }, { "acc": 0.9863636, "epoch": 33.29271150691352, "grad_norm": 5.3218994140625, "learning_rate": 2.7527709284443888e-06, "loss": 0.06683869, "memory(GiB)": 13.7, "step": 71030, "train_speed(iter/s)": 1.527772 }, { "acc": 0.98113976, "epoch": 33.295055073822354, "grad_norm": 1.554223656654358, "learning_rate": 2.7520785990969106e-06, "loss": 0.04059059, "memory(GiB)": 13.7, "step": 71035, "train_speed(iter/s)": 1.527776 }, { "acc": 0.99611111, "epoch": 33.297398640731195, "grad_norm": 1.764798641204834, "learning_rate": 2.751386323794097e-06, "loss": 0.00809341, "memory(GiB)": 13.7, "step": 71040, "train_speed(iter/s)": 1.527781 }, { "acc": 0.97458334, "epoch": 33.29974220764003, "grad_norm": 1.256211280822754, "learning_rate": 2.7506941025525867e-06, "loss": 0.03241885, "memory(GiB)": 13.7, "step": 71045, "train_speed(iter/s)": 1.527778 }, { "acc": 0.9848485, "epoch": 33.302085774548864, "grad_norm": 0.9080880284309387, "learning_rate": 2.7500019353890218e-06, "loss": 0.05409728, "memory(GiB)": 13.7, "step": 71050, "train_speed(iter/s)": 1.527781 }, { "acc": 0.98291664, "epoch": 33.3044293414577, "grad_norm": 2.4428961277008057, "learning_rate": 2.7493098223200335e-06, "loss": 0.03035207, "memory(GiB)": 13.7, "step": 71055, "train_speed(iter/s)": 1.527784 }, { "acc": 0.98686008, "epoch": 33.30677290836653, "grad_norm": 0.017976881936192513, "learning_rate": 2.7486177633622624e-06, "loss": 0.04448727, "memory(GiB)": 13.7, "step": 71060, "train_speed(iter/s)": 1.52779 }, { "acc": 0.99011364, "epoch": 33.30911647527537, "grad_norm": 4.4708404541015625, "learning_rate": 2.7479257585323437e-06, "loss": 0.0308806, "memory(GiB)": 13.7, "step": 71065, "train_speed(iter/s)": 1.527793 }, { "acc": 0.97416668, "epoch": 33.3114600421842, "grad_norm": 2.9933054447174072, "learning_rate": 2.74723380784691e-06, "loss": 0.03664379, "memory(GiB)": 13.7, "step": 71070, "train_speed(iter/s)": 1.527794 }, { "acc": 0.97994041, "epoch": 33.31380360909304, "grad_norm": 4.6128926277160645, "learning_rate": 2.746541911322591e-06, "loss": 0.05838567, "memory(GiB)": 13.7, "step": 71075, "train_speed(iter/s)": 1.527803 }, { "acc": 0.971875, "epoch": 33.31614717600188, "grad_norm": 7.601564407348633, "learning_rate": 2.74585006897602e-06, "loss": 0.07950012, "memory(GiB)": 13.7, "step": 71080, "train_speed(iter/s)": 1.527805 }, { "acc": 0.99250002, "epoch": 33.31849074291071, "grad_norm": 3.0578129291534424, "learning_rate": 2.745158280823827e-06, "loss": 0.02244531, "memory(GiB)": 13.7, "step": 71085, "train_speed(iter/s)": 1.527814 }, { "acc": 0.97946434, "epoch": 33.320834309819546, "grad_norm": 6.448491096496582, "learning_rate": 2.744466546882638e-06, "loss": 0.05224746, "memory(GiB)": 13.7, "step": 71090, "train_speed(iter/s)": 1.527815 }, { "acc": 0.9895834, "epoch": 33.32317787672838, "grad_norm": 5.8059611320495605, "learning_rate": 2.7437748671690816e-06, "loss": 0.04325356, "memory(GiB)": 13.7, "step": 71095, "train_speed(iter/s)": 1.527816 }, { "acc": 0.97625008, "epoch": 33.325521443637214, "grad_norm": 5.230194568634033, "learning_rate": 2.743083241699786e-06, "loss": 0.06040039, "memory(GiB)": 13.7, "step": 71100, "train_speed(iter/s)": 1.52781 }, { "acc": 0.99161711, "epoch": 33.32786501054605, "grad_norm": 2.1235501766204834, "learning_rate": 2.7423916704913704e-06, "loss": 0.04294511, "memory(GiB)": 13.7, "step": 71105, "train_speed(iter/s)": 1.52781 }, { "acc": 0.9760416, "epoch": 33.33020857745488, "grad_norm": 2.805710792541504, "learning_rate": 2.741700153560463e-06, "loss": 0.06433691, "memory(GiB)": 13.7, "step": 71110, "train_speed(iter/s)": 1.527814 }, { "acc": 0.99348955, "epoch": 33.332552144363724, "grad_norm": 1.5517069101333618, "learning_rate": 2.7410086909236815e-06, "loss": 0.04398057, "memory(GiB)": 13.7, "step": 71115, "train_speed(iter/s)": 1.527809 }, { "acc": 0.97842274, "epoch": 33.33489571127256, "grad_norm": 6.148375034332275, "learning_rate": 2.740317282597647e-06, "loss": 0.05615405, "memory(GiB)": 13.7, "step": 71120, "train_speed(iter/s)": 1.527806 }, { "acc": 0.98723221, "epoch": 33.33723927818139, "grad_norm": 3.0213463306427, "learning_rate": 2.7396259285989828e-06, "loss": 0.05180528, "memory(GiB)": 13.7, "step": 71125, "train_speed(iter/s)": 1.527806 }, { "acc": 0.98654766, "epoch": 33.33958284509023, "grad_norm": 4.355924129486084, "learning_rate": 2.7389346289443015e-06, "loss": 0.04818467, "memory(GiB)": 13.7, "step": 71130, "train_speed(iter/s)": 1.52781 }, { "acc": 0.98291664, "epoch": 33.34192641199906, "grad_norm": 5.09477424621582, "learning_rate": 2.7382433836502213e-06, "loss": 0.03888597, "memory(GiB)": 13.7, "step": 71135, "train_speed(iter/s)": 1.527818 }, { "acc": 0.978125, "epoch": 33.344269978907896, "grad_norm": 0.004430168308317661, "learning_rate": 2.7375521927333608e-06, "loss": 0.04491498, "memory(GiB)": 13.7, "step": 71140, "train_speed(iter/s)": 1.527826 }, { "acc": 0.9712698, "epoch": 33.34661354581673, "grad_norm": 3.575289726257324, "learning_rate": 2.7368610562103295e-06, "loss": 0.07175387, "memory(GiB)": 13.7, "step": 71145, "train_speed(iter/s)": 1.527838 }, { "acc": 0.98067713, "epoch": 33.34895711272557, "grad_norm": 6.138729095458984, "learning_rate": 2.736169974097743e-06, "loss": 0.04375992, "memory(GiB)": 13.7, "step": 71150, "train_speed(iter/s)": 1.527842 }, { "acc": 0.96686954, "epoch": 33.351300679634406, "grad_norm": 2.895859718322754, "learning_rate": 2.73547894641221e-06, "loss": 0.07455519, "memory(GiB)": 13.7, "step": 71155, "train_speed(iter/s)": 1.52785 }, { "acc": 0.99028845, "epoch": 33.35364424654324, "grad_norm": 0.9241886138916016, "learning_rate": 2.734787973170343e-06, "loss": 0.04682772, "memory(GiB)": 13.7, "step": 71160, "train_speed(iter/s)": 1.527857 }, { "acc": 0.9666666, "epoch": 33.355987813452074, "grad_norm": 3.1264829635620117, "learning_rate": 2.734097054388748e-06, "loss": 0.05783818, "memory(GiB)": 13.7, "step": 71165, "train_speed(iter/s)": 1.52786 }, { "acc": 0.9830019, "epoch": 33.35833138036091, "grad_norm": 4.91582727432251, "learning_rate": 2.7334061900840347e-06, "loss": 0.0839576, "memory(GiB)": 13.7, "step": 71170, "train_speed(iter/s)": 1.52786 }, { "acc": 0.9958334, "epoch": 33.36067494726974, "grad_norm": 0.9058834314346313, "learning_rate": 2.7327153802728103e-06, "loss": 0.01821024, "memory(GiB)": 13.7, "step": 71175, "train_speed(iter/s)": 1.52786 }, { "acc": 0.97770834, "epoch": 33.36301851417858, "grad_norm": 4.42132568359375, "learning_rate": 2.7320246249716758e-06, "loss": 0.07839648, "memory(GiB)": 13.7, "step": 71180, "train_speed(iter/s)": 1.527868 }, { "acc": 0.99472218, "epoch": 33.36536208108741, "grad_norm": 2.695863723754883, "learning_rate": 2.7313339241972365e-06, "loss": 0.03697041, "memory(GiB)": 13.7, "step": 71185, "train_speed(iter/s)": 1.527872 }, { "acc": 0.97613106, "epoch": 33.36770564799625, "grad_norm": 4.759210586547852, "learning_rate": 2.730643277966095e-06, "loss": 0.10751072, "memory(GiB)": 13.7, "step": 71190, "train_speed(iter/s)": 1.527875 }, { "acc": 0.98576393, "epoch": 33.37004921490509, "grad_norm": 2.507316827774048, "learning_rate": 2.729952686294855e-06, "loss": 0.03312121, "memory(GiB)": 13.7, "step": 71195, "train_speed(iter/s)": 1.527872 }, { "acc": 0.97821426, "epoch": 33.37239278181392, "grad_norm": 5.44956636428833, "learning_rate": 2.729262149200112e-06, "loss": 0.0530483, "memory(GiB)": 13.7, "step": 71200, "train_speed(iter/s)": 1.527875 }, { "acc": 0.97145834, "epoch": 33.374736348722756, "grad_norm": 0.0006398375844582915, "learning_rate": 2.728571666698464e-06, "loss": 0.06580522, "memory(GiB)": 13.7, "step": 71205, "train_speed(iter/s)": 1.527879 }, { "acc": 0.98312502, "epoch": 33.37707991563159, "grad_norm": 4.117238521575928, "learning_rate": 2.727881238806509e-06, "loss": 0.04415472, "memory(GiB)": 13.7, "step": 71210, "train_speed(iter/s)": 1.527882 }, { "acc": 0.9838541, "epoch": 33.379423482540425, "grad_norm": 5.428488731384277, "learning_rate": 2.7271908655408446e-06, "loss": 0.04680042, "memory(GiB)": 13.7, "step": 71215, "train_speed(iter/s)": 1.527883 }, { "acc": 0.98083334, "epoch": 33.38176704944926, "grad_norm": 2.0285699367523193, "learning_rate": 2.7265005469180623e-06, "loss": 0.06193073, "memory(GiB)": 13.7, "step": 71220, "train_speed(iter/s)": 1.527884 }, { "acc": 0.98869047, "epoch": 33.3841106163581, "grad_norm": 2.3666467666625977, "learning_rate": 2.725810282954755e-06, "loss": 0.02017165, "memory(GiB)": 13.7, "step": 71225, "train_speed(iter/s)": 1.527882 }, { "acc": 0.99136362, "epoch": 33.386454183266935, "grad_norm": 1.082737684249878, "learning_rate": 2.725120073667518e-06, "loss": 0.02327852, "memory(GiB)": 13.7, "step": 71230, "train_speed(iter/s)": 1.527886 }, { "acc": 0.9882143, "epoch": 33.38879775017577, "grad_norm": 2.7839395999908447, "learning_rate": 2.7244299190729396e-06, "loss": 0.04506686, "memory(GiB)": 13.7, "step": 71235, "train_speed(iter/s)": 1.527889 }, { "acc": 0.9707386, "epoch": 33.3911413170846, "grad_norm": 6.470211029052734, "learning_rate": 2.7237398191876064e-06, "loss": 0.06951271, "memory(GiB)": 13.7, "step": 71240, "train_speed(iter/s)": 1.527894 }, { "acc": 0.98187504, "epoch": 33.39348488399344, "grad_norm": 2.7994165420532227, "learning_rate": 2.7230497740281083e-06, "loss": 0.04029752, "memory(GiB)": 13.7, "step": 71245, "train_speed(iter/s)": 1.5279 }, { "acc": 0.97571335, "epoch": 33.39582845090227, "grad_norm": 0.21476486325263977, "learning_rate": 2.7223597836110334e-06, "loss": 0.05506452, "memory(GiB)": 13.7, "step": 71250, "train_speed(iter/s)": 1.527906 }, { "acc": 0.99020834, "epoch": 33.398172017811106, "grad_norm": 0.27159151434898376, "learning_rate": 2.7216698479529626e-06, "loss": 0.0452372, "memory(GiB)": 13.7, "step": 71255, "train_speed(iter/s)": 1.527907 }, { "acc": 0.9916667, "epoch": 33.40051558471994, "grad_norm": 0.45146870613098145, "learning_rate": 2.720979967070482e-06, "loss": 0.02375626, "memory(GiB)": 13.7, "step": 71260, "train_speed(iter/s)": 1.527905 }, { "acc": 0.98425598, "epoch": 33.40285915162878, "grad_norm": 3.1792924404144287, "learning_rate": 2.7202901409801745e-06, "loss": 0.0426258, "memory(GiB)": 13.7, "step": 71265, "train_speed(iter/s)": 1.527906 }, { "acc": 0.98411865, "epoch": 33.405202718537616, "grad_norm": 3.980159282684326, "learning_rate": 2.719600369698624e-06, "loss": 0.06700305, "memory(GiB)": 13.7, "step": 71270, "train_speed(iter/s)": 1.527906 }, { "acc": 0.98832722, "epoch": 33.40754628544645, "grad_norm": 1.3658095598220825, "learning_rate": 2.7189106532424042e-06, "loss": 0.07208213, "memory(GiB)": 13.7, "step": 71275, "train_speed(iter/s)": 1.527909 }, { "acc": 0.98895836, "epoch": 33.409889852355285, "grad_norm": 0.21476662158966064, "learning_rate": 2.7182209916280987e-06, "loss": 0.03011033, "memory(GiB)": 13.7, "step": 71280, "train_speed(iter/s)": 1.527908 }, { "acc": 0.98708324, "epoch": 33.41223341926412, "grad_norm": 3.654222249984741, "learning_rate": 2.717531384872281e-06, "loss": 0.04620071, "memory(GiB)": 13.7, "step": 71285, "train_speed(iter/s)": 1.527906 }, { "acc": 0.99125004, "epoch": 33.41457698617295, "grad_norm": 4.922201156616211, "learning_rate": 2.716841832991531e-06, "loss": 0.01891214, "memory(GiB)": 13.7, "step": 71290, "train_speed(iter/s)": 1.527905 }, { "acc": 0.97881947, "epoch": 33.41692055308179, "grad_norm": 3.1987318992614746, "learning_rate": 2.7161523360024185e-06, "loss": 0.04808655, "memory(GiB)": 13.7, "step": 71295, "train_speed(iter/s)": 1.527908 }, { "acc": 0.98133926, "epoch": 33.41926411999063, "grad_norm": 3.4880006313323975, "learning_rate": 2.715462893921519e-06, "loss": 0.05719228, "memory(GiB)": 13.7, "step": 71300, "train_speed(iter/s)": 1.527912 }, { "acc": 0.98187504, "epoch": 33.42160768689946, "grad_norm": 4.857186794281006, "learning_rate": 2.714773506765407e-06, "loss": 0.03997973, "memory(GiB)": 13.7, "step": 71305, "train_speed(iter/s)": 1.527919 }, { "acc": 0.984375, "epoch": 33.4239512538083, "grad_norm": 0.10592013597488403, "learning_rate": 2.714084174550648e-06, "loss": 0.04670515, "memory(GiB)": 13.7, "step": 71310, "train_speed(iter/s)": 1.527923 }, { "acc": 0.97559433, "epoch": 33.42629482071713, "grad_norm": 3.2834529876708984, "learning_rate": 2.7133948972938147e-06, "loss": 0.06377032, "memory(GiB)": 13.7, "step": 71315, "train_speed(iter/s)": 1.527929 }, { "acc": 0.99125004, "epoch": 33.428638387625966, "grad_norm": 0.8848364353179932, "learning_rate": 2.7127056750114763e-06, "loss": 0.02593184, "memory(GiB)": 13.7, "step": 71320, "train_speed(iter/s)": 1.52794 }, { "acc": 0.98506947, "epoch": 33.4309819545348, "grad_norm": 4.572206974029541, "learning_rate": 2.7120165077201977e-06, "loss": 0.03308493, "memory(GiB)": 13.7, "step": 71325, "train_speed(iter/s)": 1.527941 }, { "acc": 0.99380684, "epoch": 33.433325521443635, "grad_norm": 1.8165326118469238, "learning_rate": 2.7113273954365415e-06, "loss": 0.0305961, "memory(GiB)": 13.7, "step": 71330, "train_speed(iter/s)": 1.52794 }, { "acc": 0.95791664, "epoch": 33.43566908835247, "grad_norm": 5.3993940353393555, "learning_rate": 2.710638338177074e-06, "loss": 0.11638677, "memory(GiB)": 13.7, "step": 71335, "train_speed(iter/s)": 1.52794 }, { "acc": 0.98604164, "epoch": 33.43801265526131, "grad_norm": 0.7947439551353455, "learning_rate": 2.7099493359583577e-06, "loss": 0.03149374, "memory(GiB)": 13.7, "step": 71340, "train_speed(iter/s)": 1.527946 }, { "acc": 0.98773804, "epoch": 33.440356222170145, "grad_norm": 1.915742039680481, "learning_rate": 2.7092603887969576e-06, "loss": 0.05875206, "memory(GiB)": 13.7, "step": 71345, "train_speed(iter/s)": 1.527944 }, { "acc": 0.97424679, "epoch": 33.44269978907898, "grad_norm": 3.2112503051757812, "learning_rate": 2.708571496709428e-06, "loss": 0.08367645, "memory(GiB)": 13.7, "step": 71350, "train_speed(iter/s)": 1.527947 }, { "acc": 0.98296709, "epoch": 33.44504335598781, "grad_norm": 1.4789280891418457, "learning_rate": 2.70788265971233e-06, "loss": 0.06400161, "memory(GiB)": 13.7, "step": 71355, "train_speed(iter/s)": 1.52795 }, { "acc": 0.99571428, "epoch": 33.44738692289665, "grad_norm": 2.6700685024261475, "learning_rate": 2.7071938778222223e-06, "loss": 0.02013285, "memory(GiB)": 13.7, "step": 71360, "train_speed(iter/s)": 1.527946 }, { "acc": 0.98008928, "epoch": 33.44973048980548, "grad_norm": 2.5422723293304443, "learning_rate": 2.7065051510556606e-06, "loss": 0.04622937, "memory(GiB)": 13.7, "step": 71365, "train_speed(iter/s)": 1.527947 }, { "acc": 0.9958333, "epoch": 33.452074056714316, "grad_norm": 2.2247414588928223, "learning_rate": 2.705816479429197e-06, "loss": 0.04389422, "memory(GiB)": 13.7, "step": 71370, "train_speed(iter/s)": 1.527953 }, { "acc": 0.97529764, "epoch": 33.45441762362316, "grad_norm": 3.1270201206207275, "learning_rate": 2.7051278629593857e-06, "loss": 0.06113186, "memory(GiB)": 13.7, "step": 71375, "train_speed(iter/s)": 1.52796 }, { "acc": 0.98175592, "epoch": 33.45676119053199, "grad_norm": 4.192458629608154, "learning_rate": 2.7044393016627817e-06, "loss": 0.07992175, "memory(GiB)": 13.7, "step": 71380, "train_speed(iter/s)": 1.527962 }, { "acc": 0.9801136, "epoch": 33.45910475744083, "grad_norm": 7.041220188140869, "learning_rate": 2.703750795555932e-06, "loss": 0.08207813, "memory(GiB)": 13.7, "step": 71385, "train_speed(iter/s)": 1.527968 }, { "acc": 0.9895833, "epoch": 33.46144832434966, "grad_norm": 2.212383985519409, "learning_rate": 2.7030623446553877e-06, "loss": 0.02996141, "memory(GiB)": 13.7, "step": 71390, "train_speed(iter/s)": 1.527969 }, { "acc": 0.98249454, "epoch": 33.463791891258495, "grad_norm": 4.740431308746338, "learning_rate": 2.702373948977699e-06, "loss": 0.04295487, "memory(GiB)": 13.7, "step": 71395, "train_speed(iter/s)": 1.527971 }, { "acc": 0.98842258, "epoch": 33.46613545816733, "grad_norm": 2.9040470123291016, "learning_rate": 2.701685608539409e-06, "loss": 0.03662013, "memory(GiB)": 13.7, "step": 71400, "train_speed(iter/s)": 1.527974 }, { "acc": 0.9854167, "epoch": 33.468479025076164, "grad_norm": 3.336925983428955, "learning_rate": 2.700997323357067e-06, "loss": 0.02902672, "memory(GiB)": 13.7, "step": 71405, "train_speed(iter/s)": 1.527977 }, { "acc": 0.97554922, "epoch": 33.470822591985, "grad_norm": 4.656731605529785, "learning_rate": 2.700309093447212e-06, "loss": 0.06614184, "memory(GiB)": 13.7, "step": 71410, "train_speed(iter/s)": 1.527979 }, { "acc": 0.9802084, "epoch": 33.47316615889384, "grad_norm": 5.2514872550964355, "learning_rate": 2.69962091882639e-06, "loss": 0.03469114, "memory(GiB)": 13.7, "step": 71415, "train_speed(iter/s)": 1.527984 }, { "acc": 0.98213539, "epoch": 33.475509725802674, "grad_norm": 7.508632659912109, "learning_rate": 2.6989327995111443e-06, "loss": 0.04951336, "memory(GiB)": 13.7, "step": 71420, "train_speed(iter/s)": 1.527987 }, { "acc": 0.9875, "epoch": 33.47785329271151, "grad_norm": 0.060418784618377686, "learning_rate": 2.6982447355180115e-06, "loss": 0.04676665, "memory(GiB)": 13.7, "step": 71425, "train_speed(iter/s)": 1.527991 }, { "acc": 0.99333324, "epoch": 33.48019685962034, "grad_norm": 0.010819478891789913, "learning_rate": 2.6975567268635303e-06, "loss": 0.02470457, "memory(GiB)": 13.7, "step": 71430, "train_speed(iter/s)": 1.527993 }, { "acc": 0.97362175, "epoch": 33.48254042652918, "grad_norm": 3.8291192054748535, "learning_rate": 2.696868773564242e-06, "loss": 0.05343524, "memory(GiB)": 13.7, "step": 71435, "train_speed(iter/s)": 1.527995 }, { "acc": 0.9894886, "epoch": 33.48488399343801, "grad_norm": 6.090353012084961, "learning_rate": 2.696180875636679e-06, "loss": 0.054339, "memory(GiB)": 13.7, "step": 71440, "train_speed(iter/s)": 1.528004 }, { "acc": 0.98383932, "epoch": 33.487227560346845, "grad_norm": 3.9166862964630127, "learning_rate": 2.6954930330973795e-06, "loss": 0.0335827, "memory(GiB)": 13.7, "step": 71445, "train_speed(iter/s)": 1.528007 }, { "acc": 0.9776042, "epoch": 33.48957112725569, "grad_norm": 3.635862350463867, "learning_rate": 2.694805245962873e-06, "loss": 0.06714267, "memory(GiB)": 13.7, "step": 71450, "train_speed(iter/s)": 1.52801 }, { "acc": 0.9802083, "epoch": 33.49191469416452, "grad_norm": 3.1919758319854736, "learning_rate": 2.6941175142496943e-06, "loss": 0.06302608, "memory(GiB)": 13.7, "step": 71455, "train_speed(iter/s)": 1.528012 }, { "acc": 0.97986107, "epoch": 33.494258261073355, "grad_norm": 2.8610620498657227, "learning_rate": 2.693429837974373e-06, "loss": 0.0386402, "memory(GiB)": 13.7, "step": 71460, "train_speed(iter/s)": 1.528011 }, { "acc": 0.98298607, "epoch": 33.49660182798219, "grad_norm": 2.6738998889923096, "learning_rate": 2.692742217153439e-06, "loss": 0.0357469, "memory(GiB)": 13.7, "step": 71465, "train_speed(iter/s)": 1.528015 }, { "acc": 0.97639961, "epoch": 33.498945394891024, "grad_norm": 2.9898064136505127, "learning_rate": 2.69205465180342e-06, "loss": 0.06908343, "memory(GiB)": 13.7, "step": 71470, "train_speed(iter/s)": 1.528021 }, { "acc": 0.9794445, "epoch": 33.50128896179986, "grad_norm": 0.9239953756332397, "learning_rate": 2.6913671419408455e-06, "loss": 0.04577105, "memory(GiB)": 13.7, "step": 71475, "train_speed(iter/s)": 1.528024 }, { "acc": 0.98708334, "epoch": 33.50363252870869, "grad_norm": 0.00015780805551912636, "learning_rate": 2.6906796875822365e-06, "loss": 0.02887498, "memory(GiB)": 13.7, "step": 71480, "train_speed(iter/s)": 1.528026 }, { "acc": 0.97104168, "epoch": 33.50597609561753, "grad_norm": 3.941505193710327, "learning_rate": 2.6899922887441193e-06, "loss": 0.06286649, "memory(GiB)": 13.7, "step": 71485, "train_speed(iter/s)": 1.52803 }, { "acc": 0.98518734, "epoch": 33.50831966252637, "grad_norm": 4.996614456176758, "learning_rate": 2.6893049454430197e-06, "loss": 0.05135561, "memory(GiB)": 13.7, "step": 71490, "train_speed(iter/s)": 1.528035 }, { "acc": 0.97822914, "epoch": 33.5106632294352, "grad_norm": 2.8420915603637695, "learning_rate": 2.688617657695456e-06, "loss": 0.06975175, "memory(GiB)": 13.7, "step": 71495, "train_speed(iter/s)": 1.528049 }, { "acc": 0.96416664, "epoch": 33.51300679634404, "grad_norm": 5.20876932144165, "learning_rate": 2.687930425517946e-06, "loss": 0.08968643, "memory(GiB)": 13.7, "step": 71500, "train_speed(iter/s)": 1.528055 }, { "acc": 0.97562504, "epoch": 33.51535036325287, "grad_norm": 5.591251373291016, "learning_rate": 2.6872432489270124e-06, "loss": 0.09905342, "memory(GiB)": 13.7, "step": 71505, "train_speed(iter/s)": 1.528054 }, { "acc": 0.97870541, "epoch": 33.517693930161705, "grad_norm": 3.4657769203186035, "learning_rate": 2.6865561279391732e-06, "loss": 0.08823435, "memory(GiB)": 13.7, "step": 71510, "train_speed(iter/s)": 1.528051 }, { "acc": 0.98592262, "epoch": 33.52003749707054, "grad_norm": 3.040087938308716, "learning_rate": 2.68586906257094e-06, "loss": 0.02754959, "memory(GiB)": 13.7, "step": 71515, "train_speed(iter/s)": 1.528054 }, { "acc": 0.97770834, "epoch": 33.522381063979374, "grad_norm": 4.192566871643066, "learning_rate": 2.685182052838831e-06, "loss": 0.05047921, "memory(GiB)": 13.7, "step": 71520, "train_speed(iter/s)": 1.528052 }, { "acc": 0.97208328, "epoch": 33.524724630888215, "grad_norm": 7.868078231811523, "learning_rate": 2.684495098759361e-06, "loss": 0.05145111, "memory(GiB)": 13.7, "step": 71525, "train_speed(iter/s)": 1.528056 }, { "acc": 0.97703381, "epoch": 33.52706819779705, "grad_norm": 1.9357585906982422, "learning_rate": 2.68380820034904e-06, "loss": 0.0527184, "memory(GiB)": 13.7, "step": 71530, "train_speed(iter/s)": 1.528059 }, { "acc": 0.98395834, "epoch": 33.529411764705884, "grad_norm": 2.1020801067352295, "learning_rate": 2.683121357624377e-06, "loss": 0.03153661, "memory(GiB)": 13.7, "step": 71535, "train_speed(iter/s)": 1.528059 }, { "acc": 0.98133926, "epoch": 33.53175533161472, "grad_norm": 2.1036152839660645, "learning_rate": 2.6824345706018836e-06, "loss": 0.04075053, "memory(GiB)": 13.7, "step": 71540, "train_speed(iter/s)": 1.528061 }, { "acc": 0.9666666, "epoch": 33.53409889852355, "grad_norm": 5.303613185882568, "learning_rate": 2.681747839298067e-06, "loss": 0.07724214, "memory(GiB)": 13.7, "step": 71545, "train_speed(iter/s)": 1.528065 }, { "acc": 0.98916664, "epoch": 33.53644246543239, "grad_norm": 0.9650856852531433, "learning_rate": 2.681061163729437e-06, "loss": 0.02422405, "memory(GiB)": 13.7, "step": 71550, "train_speed(iter/s)": 1.528068 }, { "acc": 0.979072, "epoch": 33.53878603234122, "grad_norm": 1.6714762449264526, "learning_rate": 2.680374543912495e-06, "loss": 0.05098604, "memory(GiB)": 13.7, "step": 71555, "train_speed(iter/s)": 1.528072 }, { "acc": 0.99125004, "epoch": 33.541129599250056, "grad_norm": 3.2988739013671875, "learning_rate": 2.6796879798637467e-06, "loss": 0.06810758, "memory(GiB)": 13.7, "step": 71560, "train_speed(iter/s)": 1.528079 }, { "acc": 0.99613094, "epoch": 33.5434731661589, "grad_norm": 3.044025421142578, "learning_rate": 2.6790014715996964e-06, "loss": 0.02365467, "memory(GiB)": 13.7, "step": 71565, "train_speed(iter/s)": 1.528083 }, { "acc": 0.98011017, "epoch": 33.54581673306773, "grad_norm": 4.2523298263549805, "learning_rate": 2.678315019136844e-06, "loss": 0.04754431, "memory(GiB)": 13.7, "step": 71570, "train_speed(iter/s)": 1.528087 }, { "acc": 0.98187504, "epoch": 33.548160299976566, "grad_norm": 4.9520955085754395, "learning_rate": 2.6776286224916885e-06, "loss": 0.06319036, "memory(GiB)": 13.7, "step": 71575, "train_speed(iter/s)": 1.52809 }, { "acc": 0.97976646, "epoch": 33.5505038668854, "grad_norm": 3.9666736125946045, "learning_rate": 2.6769422816807296e-06, "loss": 0.06952127, "memory(GiB)": 13.7, "step": 71580, "train_speed(iter/s)": 1.528096 }, { "acc": 0.98490534, "epoch": 33.552847433794234, "grad_norm": 0.02036120742559433, "learning_rate": 2.6762559967204656e-06, "loss": 0.03508118, "memory(GiB)": 13.7, "step": 71585, "train_speed(iter/s)": 1.528097 }, { "acc": 0.97895832, "epoch": 33.55519100070307, "grad_norm": 2.31087064743042, "learning_rate": 2.67556976762739e-06, "loss": 0.0444702, "memory(GiB)": 13.7, "step": 71590, "train_speed(iter/s)": 1.528099 }, { "acc": 0.98050594, "epoch": 33.5575345676119, "grad_norm": 6.549178123474121, "learning_rate": 2.6748835944179997e-06, "loss": 0.03217378, "memory(GiB)": 13.7, "step": 71595, "train_speed(iter/s)": 1.528099 }, { "acc": 0.97416668, "epoch": 33.55987813452074, "grad_norm": 6.059483528137207, "learning_rate": 2.6741974771087897e-06, "loss": 0.05572281, "memory(GiB)": 13.7, "step": 71600, "train_speed(iter/s)": 1.528103 }, { "acc": 0.98580046, "epoch": 33.56222170142958, "grad_norm": 3.7426064014434814, "learning_rate": 2.673511415716247e-06, "loss": 0.05083404, "memory(GiB)": 13.7, "step": 71605, "train_speed(iter/s)": 1.528109 }, { "acc": 0.990625, "epoch": 33.56456526833841, "grad_norm": 5.250463008880615, "learning_rate": 2.672825410256866e-06, "loss": 0.0297335, "memory(GiB)": 13.7, "step": 71610, "train_speed(iter/s)": 1.528116 }, { "acc": 0.97020836, "epoch": 33.56690883524725, "grad_norm": 10.65575122833252, "learning_rate": 2.6721394607471375e-06, "loss": 0.0499643, "memory(GiB)": 13.7, "step": 71615, "train_speed(iter/s)": 1.528117 }, { "acc": 0.97001982, "epoch": 33.56925240215608, "grad_norm": 4.2359490394592285, "learning_rate": 2.671453567203544e-06, "loss": 0.05902771, "memory(GiB)": 13.7, "step": 71620, "train_speed(iter/s)": 1.52812 }, { "acc": 0.98230667, "epoch": 33.571595969064916, "grad_norm": 2.724675416946411, "learning_rate": 2.670767729642578e-06, "loss": 0.04103292, "memory(GiB)": 13.7, "step": 71625, "train_speed(iter/s)": 1.528126 }, { "acc": 0.98270836, "epoch": 33.57393953597375, "grad_norm": 1.2406185865402222, "learning_rate": 2.6700819480807195e-06, "loss": 0.06013852, "memory(GiB)": 13.7, "step": 71630, "train_speed(iter/s)": 1.528132 }, { "acc": 0.984375, "epoch": 33.576283102882584, "grad_norm": 5.30467414855957, "learning_rate": 2.6693962225344548e-06, "loss": 0.0425505, "memory(GiB)": 13.7, "step": 71635, "train_speed(iter/s)": 1.528136 }, { "acc": 0.98187504, "epoch": 33.578626669791426, "grad_norm": 3.1783061027526855, "learning_rate": 2.6687105530202695e-06, "loss": 0.13014977, "memory(GiB)": 13.7, "step": 71640, "train_speed(iter/s)": 1.528137 }, { "acc": 0.99599361, "epoch": 33.58097023670026, "grad_norm": 2.9997642040252686, "learning_rate": 2.6680249395546385e-06, "loss": 0.03749492, "memory(GiB)": 13.7, "step": 71645, "train_speed(iter/s)": 1.528137 }, { "acc": 0.97654762, "epoch": 33.583313803609094, "grad_norm": 5.375589370727539, "learning_rate": 2.667339382154047e-06, "loss": 0.0387857, "memory(GiB)": 13.7, "step": 71650, "train_speed(iter/s)": 1.528139 }, { "acc": 0.97979164, "epoch": 33.58565737051793, "grad_norm": 2.727489471435547, "learning_rate": 2.6666538808349723e-06, "loss": 0.05579417, "memory(GiB)": 13.7, "step": 71655, "train_speed(iter/s)": 1.528145 }, { "acc": 0.99125004, "epoch": 33.58800093742676, "grad_norm": 0.01399192400276661, "learning_rate": 2.665968435613892e-06, "loss": 0.04695321, "memory(GiB)": 13.7, "step": 71660, "train_speed(iter/s)": 1.528151 }, { "acc": 0.97770834, "epoch": 33.5903445043356, "grad_norm": 5.783019542694092, "learning_rate": 2.665283046507278e-06, "loss": 0.05348086, "memory(GiB)": 13.7, "step": 71665, "train_speed(iter/s)": 1.528156 }, { "acc": 0.99125004, "epoch": 33.59268807124443, "grad_norm": 2.006054639816284, "learning_rate": 2.6645977135316092e-06, "loss": 0.02311961, "memory(GiB)": 13.7, "step": 71670, "train_speed(iter/s)": 1.528158 }, { "acc": 0.9880209, "epoch": 33.595031638153266, "grad_norm": 0.21593432128429413, "learning_rate": 2.663912436703358e-06, "loss": 0.02662179, "memory(GiB)": 13.7, "step": 71675, "train_speed(iter/s)": 1.528156 }, { "acc": 0.99144344, "epoch": 33.59737520506211, "grad_norm": 3.683281660079956, "learning_rate": 2.6632272160389942e-06, "loss": 0.02414854, "memory(GiB)": 13.7, "step": 71680, "train_speed(iter/s)": 1.528163 }, { "acc": 0.98589287, "epoch": 33.59971877197094, "grad_norm": 1.9567885398864746, "learning_rate": 2.6625420515549892e-06, "loss": 0.06079037, "memory(GiB)": 13.7, "step": 71685, "train_speed(iter/s)": 1.528167 }, { "acc": 0.9791667, "epoch": 33.602062338879776, "grad_norm": 4.473321437835693, "learning_rate": 2.661856943267813e-06, "loss": 0.06440628, "memory(GiB)": 13.7, "step": 71690, "train_speed(iter/s)": 1.52817 }, { "acc": 0.98083334, "epoch": 33.60440590578861, "grad_norm": 4.603550434112549, "learning_rate": 2.6611718911939344e-06, "loss": 0.05045672, "memory(GiB)": 13.7, "step": 71695, "train_speed(iter/s)": 1.528169 }, { "acc": 0.9791667, "epoch": 33.606749472697445, "grad_norm": 3.1172401905059814, "learning_rate": 2.6604868953498184e-06, "loss": 0.0435155, "memory(GiB)": 13.7, "step": 71700, "train_speed(iter/s)": 1.528179 }, { "acc": 0.9854167, "epoch": 33.60909303960628, "grad_norm": 0.19565674662590027, "learning_rate": 2.6598019557519276e-06, "loss": 0.02763813, "memory(GiB)": 13.7, "step": 71705, "train_speed(iter/s)": 1.528176 }, { "acc": 0.9854166, "epoch": 33.61143660651511, "grad_norm": 3.645045042037964, "learning_rate": 2.6591170724167285e-06, "loss": 0.0291033, "memory(GiB)": 13.7, "step": 71710, "train_speed(iter/s)": 1.528179 }, { "acc": 0.98999996, "epoch": 33.613780173423955, "grad_norm": 0.08201592415571213, "learning_rate": 2.6584322453606846e-06, "loss": 0.03072656, "memory(GiB)": 13.7, "step": 71715, "train_speed(iter/s)": 1.528182 }, { "acc": 0.9958334, "epoch": 33.61612374033279, "grad_norm": 0.0012802346609532833, "learning_rate": 2.6577474746002533e-06, "loss": 0.02777639, "memory(GiB)": 13.7, "step": 71720, "train_speed(iter/s)": 1.528187 }, { "acc": 0.9802083, "epoch": 33.61846730724162, "grad_norm": 5.45465087890625, "learning_rate": 2.6570627601518963e-06, "loss": 0.04930009, "memory(GiB)": 13.7, "step": 71725, "train_speed(iter/s)": 1.528189 }, { "acc": 0.9864584, "epoch": 33.62081087415046, "grad_norm": 3.6063361167907715, "learning_rate": 2.656378102032074e-06, "loss": 0.03428352, "memory(GiB)": 13.7, "step": 71730, "train_speed(iter/s)": 1.528194 }, { "acc": 0.98715286, "epoch": 33.62315444105929, "grad_norm": 2.1153440475463867, "learning_rate": 2.655693500257239e-06, "loss": 0.06303367, "memory(GiB)": 13.7, "step": 71735, "train_speed(iter/s)": 1.528197 }, { "acc": 0.99359379, "epoch": 33.625498007968126, "grad_norm": 0.7936880588531494, "learning_rate": 2.6550089548438507e-06, "loss": 0.01665459, "memory(GiB)": 13.7, "step": 71740, "train_speed(iter/s)": 1.5282 }, { "acc": 0.9760417, "epoch": 33.62784157487696, "grad_norm": 3.636843681335449, "learning_rate": 2.6543244658083594e-06, "loss": 0.04460027, "memory(GiB)": 13.7, "step": 71745, "train_speed(iter/s)": 1.528206 }, { "acc": 0.9786129, "epoch": 33.630185141785795, "grad_norm": 0.4074357748031616, "learning_rate": 2.653640033167223e-06, "loss": 0.05447153, "memory(GiB)": 13.7, "step": 71750, "train_speed(iter/s)": 1.528213 }, { "acc": 0.99750004, "epoch": 33.632528708694636, "grad_norm": 1.1321336030960083, "learning_rate": 2.652955656936887e-06, "loss": 0.01235913, "memory(GiB)": 13.7, "step": 71755, "train_speed(iter/s)": 1.528216 }, { "acc": 0.97979164, "epoch": 33.63487227560347, "grad_norm": 2.45757794380188, "learning_rate": 2.652271337133805e-06, "loss": 0.05637863, "memory(GiB)": 13.7, "step": 71760, "train_speed(iter/s)": 1.528218 }, { "acc": 0.98083334, "epoch": 33.637215842512305, "grad_norm": 0.053085580468177795, "learning_rate": 2.651587073774426e-06, "loss": 0.06267976, "memory(GiB)": 13.7, "step": 71765, "train_speed(iter/s)": 1.528224 }, { "acc": 0.99008923, "epoch": 33.63955940942114, "grad_norm": 0.9238554835319519, "learning_rate": 2.6509028668751992e-06, "loss": 0.0248096, "memory(GiB)": 13.7, "step": 71770, "train_speed(iter/s)": 1.528227 }, { "acc": 0.98916664, "epoch": 33.64190297632997, "grad_norm": 3.2405483722686768, "learning_rate": 2.6502187164525657e-06, "loss": 0.02425784, "memory(GiB)": 13.7, "step": 71775, "train_speed(iter/s)": 1.528233 }, { "acc": 0.99048615, "epoch": 33.64424654323881, "grad_norm": 3.4781227111816406, "learning_rate": 2.6495346225229753e-06, "loss": 0.03925005, "memory(GiB)": 13.7, "step": 71780, "train_speed(iter/s)": 1.52824 }, { "acc": 0.98141022, "epoch": 33.64659011014764, "grad_norm": 1.2576215267181396, "learning_rate": 2.6488505851028666e-06, "loss": 0.0469242, "memory(GiB)": 13.7, "step": 71785, "train_speed(iter/s)": 1.528244 }, { "acc": 0.9879261, "epoch": 33.64893367705648, "grad_norm": 0.0016107133124023676, "learning_rate": 2.6481666042086868e-06, "loss": 0.04758436, "memory(GiB)": 13.7, "step": 71790, "train_speed(iter/s)": 1.528249 }, { "acc": 0.98625002, "epoch": 33.65127724396532, "grad_norm": 0.8296487331390381, "learning_rate": 2.6474826798568713e-06, "loss": 0.0438383, "memory(GiB)": 13.7, "step": 71795, "train_speed(iter/s)": 1.52825 }, { "acc": 0.9822917, "epoch": 33.65362081087415, "grad_norm": 4.629707336425781, "learning_rate": 2.6467988120638626e-06, "loss": 0.02823884, "memory(GiB)": 13.7, "step": 71800, "train_speed(iter/s)": 1.528256 }, { "acc": 0.975, "epoch": 33.655964377782986, "grad_norm": 0.005344029050320387, "learning_rate": 2.646115000846099e-06, "loss": 0.07235891, "memory(GiB)": 13.7, "step": 71805, "train_speed(iter/s)": 1.528257 }, { "acc": 0.98843746, "epoch": 33.65830794469182, "grad_norm": 2.4355103969573975, "learning_rate": 2.645431246220015e-06, "loss": 0.06036007, "memory(GiB)": 13.7, "step": 71810, "train_speed(iter/s)": 1.528258 }, { "acc": 0.98592262, "epoch": 33.660651511600655, "grad_norm": 4.548756122589111, "learning_rate": 2.644747548202046e-06, "loss": 0.04953899, "memory(GiB)": 13.7, "step": 71815, "train_speed(iter/s)": 1.528261 }, { "acc": 0.96833334, "epoch": 33.66299507850949, "grad_norm": 6.477904319763184, "learning_rate": 2.6440639068086293e-06, "loss": 0.06168441, "memory(GiB)": 13.7, "step": 71820, "train_speed(iter/s)": 1.528273 }, { "acc": 0.98084278, "epoch": 33.66533864541832, "grad_norm": 4.232789516448975, "learning_rate": 2.6433803220561925e-06, "loss": 0.05557539, "memory(GiB)": 13.7, "step": 71825, "train_speed(iter/s)": 1.528278 }, { "acc": 0.996875, "epoch": 33.667682212327165, "grad_norm": 0.0009483852772973478, "learning_rate": 2.6426967939611713e-06, "loss": 0.0206067, "memory(GiB)": 13.7, "step": 71830, "train_speed(iter/s)": 1.528272 }, { "acc": 0.97833328, "epoch": 33.670025779236, "grad_norm": 3.6189358234405518, "learning_rate": 2.642013322539991e-06, "loss": 0.06477221, "memory(GiB)": 13.7, "step": 71835, "train_speed(iter/s)": 1.528277 }, { "acc": 0.98729162, "epoch": 33.67236934614483, "grad_norm": 0.026305295526981354, "learning_rate": 2.6413299078090823e-06, "loss": 0.0198215, "memory(GiB)": 13.7, "step": 71840, "train_speed(iter/s)": 1.528281 }, { "acc": 0.97758932, "epoch": 33.67471291305367, "grad_norm": 2.3041768074035645, "learning_rate": 2.6406465497848742e-06, "loss": 0.06240515, "memory(GiB)": 13.7, "step": 71845, "train_speed(iter/s)": 1.528287 }, { "acc": 0.99004726, "epoch": 33.6770564799625, "grad_norm": 0.5665509104728699, "learning_rate": 2.6399632484837873e-06, "loss": 0.02720879, "memory(GiB)": 13.7, "step": 71850, "train_speed(iter/s)": 1.528293 }, { "acc": 0.98041668, "epoch": 33.679400046871336, "grad_norm": 6.429932117462158, "learning_rate": 2.6392800039222506e-06, "loss": 0.06243694, "memory(GiB)": 13.7, "step": 71855, "train_speed(iter/s)": 1.528294 }, { "acc": 0.9826952, "epoch": 33.68174361378017, "grad_norm": 4.640219688415527, "learning_rate": 2.638596816116686e-06, "loss": 0.03955385, "memory(GiB)": 13.7, "step": 71860, "train_speed(iter/s)": 1.528298 }, { "acc": 0.98359375, "epoch": 33.68408718068901, "grad_norm": 6.698074817657471, "learning_rate": 2.637913685083515e-06, "loss": 0.04144292, "memory(GiB)": 13.7, "step": 71865, "train_speed(iter/s)": 1.528307 }, { "acc": 0.98675594, "epoch": 33.686430747597846, "grad_norm": 0.003632752224802971, "learning_rate": 2.6372306108391553e-06, "loss": 0.02426222, "memory(GiB)": 13.7, "step": 71870, "train_speed(iter/s)": 1.528309 }, { "acc": 0.996875, "epoch": 33.68877431450668, "grad_norm": 3.5582239627838135, "learning_rate": 2.6365475934000277e-06, "loss": 0.04978524, "memory(GiB)": 13.7, "step": 71875, "train_speed(iter/s)": 1.528314 }, { "acc": 0.990625, "epoch": 33.691117881415515, "grad_norm": 2.261874198913574, "learning_rate": 2.6358646327825526e-06, "loss": 0.01961856, "memory(GiB)": 13.7, "step": 71880, "train_speed(iter/s)": 1.528318 }, { "acc": 0.96875, "epoch": 33.69346144832435, "grad_norm": 3.7072925567626953, "learning_rate": 2.6351817290031403e-06, "loss": 0.05586796, "memory(GiB)": 13.7, "step": 71885, "train_speed(iter/s)": 1.528315 }, { "acc": 0.99258928, "epoch": 33.695805015233184, "grad_norm": 1.8986479043960571, "learning_rate": 2.634498882078209e-06, "loss": 0.03080578, "memory(GiB)": 13.7, "step": 71890, "train_speed(iter/s)": 1.528316 }, { "acc": 0.99020834, "epoch": 33.69814858214202, "grad_norm": 0.00400373712182045, "learning_rate": 2.6338160920241735e-06, "loss": 0.02516271, "memory(GiB)": 13.7, "step": 71895, "train_speed(iter/s)": 1.528324 }, { "acc": 0.98738098, "epoch": 33.70049214905085, "grad_norm": 0.00305158575065434, "learning_rate": 2.633133358857442e-06, "loss": 0.02338137, "memory(GiB)": 13.7, "step": 71900, "train_speed(iter/s)": 1.528327 }, { "acc": 0.97758923, "epoch": 33.702835715959694, "grad_norm": 2.35209321975708, "learning_rate": 2.632450682594429e-06, "loss": 0.05212216, "memory(GiB)": 13.7, "step": 71905, "train_speed(iter/s)": 1.528335 }, { "acc": 0.98354168, "epoch": 33.70517928286853, "grad_norm": 1.8944673538208008, "learning_rate": 2.6317680632515406e-06, "loss": 0.07124945, "memory(GiB)": 13.7, "step": 71910, "train_speed(iter/s)": 1.528338 }, { "acc": 0.98732948, "epoch": 33.70752284977736, "grad_norm": 5.050865650177002, "learning_rate": 2.6310855008451856e-06, "loss": 0.03102377, "memory(GiB)": 13.7, "step": 71915, "train_speed(iter/s)": 1.528339 }, { "acc": 0.98135414, "epoch": 33.7098664166862, "grad_norm": 0.8600426316261292, "learning_rate": 2.6304029953917724e-06, "loss": 0.06053579, "memory(GiB)": 13.7, "step": 71920, "train_speed(iter/s)": 1.528339 }, { "acc": 0.9927084, "epoch": 33.71220998359503, "grad_norm": 2.5568413734436035, "learning_rate": 2.6297205469077036e-06, "loss": 0.0298274, "memory(GiB)": 13.7, "step": 71925, "train_speed(iter/s)": 1.528341 }, { "acc": 0.98666668, "epoch": 33.714553550503865, "grad_norm": 0.10657543689012527, "learning_rate": 2.6290381554093848e-06, "loss": 0.03719895, "memory(GiB)": 13.7, "step": 71930, "train_speed(iter/s)": 1.528343 }, { "acc": 0.99750004, "epoch": 33.7168971174127, "grad_norm": 0.06357450038194656, "learning_rate": 2.6283558209132188e-06, "loss": 0.00954677, "memory(GiB)": 13.7, "step": 71935, "train_speed(iter/s)": 1.528343 }, { "acc": 0.99045677, "epoch": 33.71924068432154, "grad_norm": 4.196784019470215, "learning_rate": 2.6276735434356038e-06, "loss": 0.05128308, "memory(GiB)": 13.7, "step": 71940, "train_speed(iter/s)": 1.528346 }, { "acc": 0.99020834, "epoch": 33.721584251230375, "grad_norm": 6.249744415283203, "learning_rate": 2.6269913229929416e-06, "loss": 0.01782744, "memory(GiB)": 13.7, "step": 71945, "train_speed(iter/s)": 1.528347 }, { "acc": 0.9895834, "epoch": 33.72392781813921, "grad_norm": 3.4030349254608154, "learning_rate": 2.6263091596016327e-06, "loss": 0.02379148, "memory(GiB)": 13.7, "step": 71950, "train_speed(iter/s)": 1.528348 }, { "acc": 0.96944447, "epoch": 33.726271385048044, "grad_norm": 3.540720224380493, "learning_rate": 2.625627053278072e-06, "loss": 0.0792289, "memory(GiB)": 13.7, "step": 71955, "train_speed(iter/s)": 1.528348 }, { "acc": 0.97999458, "epoch": 33.72861495195688, "grad_norm": 3.818622589111328, "learning_rate": 2.6249450040386533e-06, "loss": 0.05806668, "memory(GiB)": 13.7, "step": 71960, "train_speed(iter/s)": 1.52835 }, { "acc": 0.97972221, "epoch": 33.73095851886571, "grad_norm": 0.872295081615448, "learning_rate": 2.6242630118997707e-06, "loss": 0.06873918, "memory(GiB)": 13.7, "step": 71965, "train_speed(iter/s)": 1.528355 }, { "acc": 0.98393307, "epoch": 33.73330208577455, "grad_norm": 0.0038126001600176096, "learning_rate": 2.6235810768778198e-06, "loss": 0.04835053, "memory(GiB)": 13.7, "step": 71970, "train_speed(iter/s)": 1.528358 }, { "acc": 0.98633928, "epoch": 33.73564565268338, "grad_norm": 3.6531171798706055, "learning_rate": 2.622899198989193e-06, "loss": 0.05082594, "memory(GiB)": 13.7, "step": 71975, "train_speed(iter/s)": 1.528367 }, { "acc": 0.98708334, "epoch": 33.73798921959222, "grad_norm": 3.6410629749298096, "learning_rate": 2.6222173782502764e-06, "loss": 0.03156281, "memory(GiB)": 13.7, "step": 71980, "train_speed(iter/s)": 1.52837 }, { "acc": 0.98085766, "epoch": 33.74033278650106, "grad_norm": 6.59494686126709, "learning_rate": 2.6215356146774612e-06, "loss": 0.04692458, "memory(GiB)": 13.7, "step": 71985, "train_speed(iter/s)": 1.528375 }, { "acc": 0.98500004, "epoch": 33.74267635340989, "grad_norm": 2.820251941680908, "learning_rate": 2.620853908287136e-06, "loss": 0.03236953, "memory(GiB)": 13.7, "step": 71990, "train_speed(iter/s)": 1.528381 }, { "acc": 0.99541664, "epoch": 33.745019920318725, "grad_norm": 1.8005459308624268, "learning_rate": 2.620172259095684e-06, "loss": 0.04099657, "memory(GiB)": 13.7, "step": 71995, "train_speed(iter/s)": 1.52838 }, { "acc": 0.98979168, "epoch": 33.74736348722756, "grad_norm": 2.867866039276123, "learning_rate": 2.6194906671194907e-06, "loss": 0.02626033, "memory(GiB)": 13.7, "step": 72000, "train_speed(iter/s)": 1.528379 }, { "acc": 0.97425594, "epoch": 33.749707054136394, "grad_norm": 6.552136421203613, "learning_rate": 2.618809132374938e-06, "loss": 0.08920645, "memory(GiB)": 13.7, "step": 72005, "train_speed(iter/s)": 1.528382 }, { "acc": 0.97729168, "epoch": 33.75205062104523, "grad_norm": 5.277868747711182, "learning_rate": 2.6181276548784123e-06, "loss": 0.05560031, "memory(GiB)": 13.7, "step": 72010, "train_speed(iter/s)": 1.528378 }, { "acc": 0.99444447, "epoch": 33.75439418795406, "grad_norm": 2.117288589477539, "learning_rate": 2.617446234646289e-06, "loss": 0.02287142, "memory(GiB)": 13.7, "step": 72015, "train_speed(iter/s)": 1.528382 }, { "acc": 0.9822917, "epoch": 33.756737754862904, "grad_norm": 1.9731253385543823, "learning_rate": 2.616764871694949e-06, "loss": 0.03669207, "memory(GiB)": 13.7, "step": 72020, "train_speed(iter/s)": 1.528382 }, { "acc": 0.98321428, "epoch": 33.75908132177174, "grad_norm": 0.20509175956249237, "learning_rate": 2.6160835660407725e-06, "loss": 0.07320172, "memory(GiB)": 13.7, "step": 72025, "train_speed(iter/s)": 1.528388 }, { "acc": 0.9833333, "epoch": 33.76142488868057, "grad_norm": 2.950535774230957, "learning_rate": 2.6154023177001314e-06, "loss": 0.03771546, "memory(GiB)": 13.7, "step": 72030, "train_speed(iter/s)": 1.528395 }, { "acc": 0.9854167, "epoch": 33.76376845558941, "grad_norm": 1.993784785270691, "learning_rate": 2.614721126689405e-06, "loss": 0.04275791, "memory(GiB)": 13.7, "step": 72035, "train_speed(iter/s)": 1.528392 }, { "acc": 0.98833332, "epoch": 33.76611202249824, "grad_norm": 3.907447338104248, "learning_rate": 2.6140399930249633e-06, "loss": 0.03631794, "memory(GiB)": 13.7, "step": 72040, "train_speed(iter/s)": 1.528396 }, { "acc": 0.98688097, "epoch": 33.768455589407075, "grad_norm": 2.2620582580566406, "learning_rate": 2.613358916723179e-06, "loss": 0.04427685, "memory(GiB)": 13.7, "step": 72045, "train_speed(iter/s)": 1.528399 }, { "acc": 0.98187504, "epoch": 33.77079915631591, "grad_norm": 2.3226470947265625, "learning_rate": 2.612677897800427e-06, "loss": 0.04204841, "memory(GiB)": 13.7, "step": 72050, "train_speed(iter/s)": 1.528398 }, { "acc": 0.98619118, "epoch": 33.77314272322475, "grad_norm": 0.10577355325222015, "learning_rate": 2.611996936273071e-06, "loss": 0.05257561, "memory(GiB)": 13.7, "step": 72055, "train_speed(iter/s)": 1.528401 }, { "acc": 0.97833328, "epoch": 33.775486290133585, "grad_norm": 5.538036823272705, "learning_rate": 2.611316032157482e-06, "loss": 0.04550692, "memory(GiB)": 13.7, "step": 72060, "train_speed(iter/s)": 1.5284 }, { "acc": 0.98973217, "epoch": 33.77782985704242, "grad_norm": 2.4335570335388184, "learning_rate": 2.6106351854700284e-06, "loss": 0.0294898, "memory(GiB)": 13.7, "step": 72065, "train_speed(iter/s)": 1.528402 }, { "acc": 0.9802083, "epoch": 33.780173423951254, "grad_norm": 5.003070831298828, "learning_rate": 2.6099543962270728e-06, "loss": 0.03281835, "memory(GiB)": 13.7, "step": 72070, "train_speed(iter/s)": 1.528405 }, { "acc": 0.98175602, "epoch": 33.78251699086009, "grad_norm": 0.059973422437906265, "learning_rate": 2.6092736644449806e-06, "loss": 0.03028562, "memory(GiB)": 13.7, "step": 72075, "train_speed(iter/s)": 1.528415 }, { "acc": 0.98389568, "epoch": 33.78486055776892, "grad_norm": 0.9086629152297974, "learning_rate": 2.608592990140112e-06, "loss": 0.0457128, "memory(GiB)": 13.7, "step": 72080, "train_speed(iter/s)": 1.528416 }, { "acc": 0.9875, "epoch": 33.78720412467776, "grad_norm": 3.0913469791412354, "learning_rate": 2.6079123733288325e-06, "loss": 0.03963596, "memory(GiB)": 13.7, "step": 72085, "train_speed(iter/s)": 1.528422 }, { "acc": 0.98833332, "epoch": 33.78954769158659, "grad_norm": 3.9602246284484863, "learning_rate": 2.6072318140274966e-06, "loss": 0.06007792, "memory(GiB)": 13.7, "step": 72090, "train_speed(iter/s)": 1.52843 }, { "acc": 0.98777781, "epoch": 33.79189125849543, "grad_norm": 0.9297956228256226, "learning_rate": 2.6065513122524655e-06, "loss": 0.03342974, "memory(GiB)": 13.7, "step": 72095, "train_speed(iter/s)": 1.528438 }, { "acc": 0.98350868, "epoch": 33.79423482540427, "grad_norm": 3.8097496032714844, "learning_rate": 2.605870868020099e-06, "loss": 0.03318505, "memory(GiB)": 13.7, "step": 72100, "train_speed(iter/s)": 1.528441 }, { "acc": 0.97413692, "epoch": 33.7965783923131, "grad_norm": 4.8874969482421875, "learning_rate": 2.6051904813467462e-06, "loss": 0.03986557, "memory(GiB)": 13.7, "step": 72105, "train_speed(iter/s)": 1.528443 }, { "acc": 0.99437504, "epoch": 33.798921959221936, "grad_norm": 4.130817890167236, "learning_rate": 2.6045101522487666e-06, "loss": 0.02284132, "memory(GiB)": 13.7, "step": 72110, "train_speed(iter/s)": 1.528442 }, { "acc": 0.98391943, "epoch": 33.80126552613077, "grad_norm": 0.0012013514060527086, "learning_rate": 2.6038298807425124e-06, "loss": 0.03460447, "memory(GiB)": 13.7, "step": 72115, "train_speed(iter/s)": 1.528439 }, { "acc": 0.98998508, "epoch": 33.803609093039604, "grad_norm": 1.4101825952529907, "learning_rate": 2.6031496668443323e-06, "loss": 0.03750034, "memory(GiB)": 13.7, "step": 72120, "train_speed(iter/s)": 1.528443 }, { "acc": 0.9897049, "epoch": 33.80595265994844, "grad_norm": 1.5749996900558472, "learning_rate": 2.602469510570581e-06, "loss": 0.06064808, "memory(GiB)": 13.7, "step": 72125, "train_speed(iter/s)": 1.52845 }, { "acc": 0.97562504, "epoch": 33.80829622685728, "grad_norm": 3.201674222946167, "learning_rate": 2.6017894119376024e-06, "loss": 0.04467852, "memory(GiB)": 13.7, "step": 72130, "train_speed(iter/s)": 1.528449 }, { "acc": 0.97166672, "epoch": 33.810639793766114, "grad_norm": 2.287928581237793, "learning_rate": 2.6011093709617456e-06, "loss": 0.06733441, "memory(GiB)": 13.7, "step": 72135, "train_speed(iter/s)": 1.528452 }, { "acc": 0.98797121, "epoch": 33.81298336067495, "grad_norm": 0.3516112267971039, "learning_rate": 2.600429387659359e-06, "loss": 0.06185289, "memory(GiB)": 13.7, "step": 72140, "train_speed(iter/s)": 1.528453 }, { "acc": 0.99125004, "epoch": 33.81532692758378, "grad_norm": 1.5454801321029663, "learning_rate": 2.5997494620467832e-06, "loss": 0.01753472, "memory(GiB)": 13.7, "step": 72145, "train_speed(iter/s)": 1.528457 }, { "acc": 0.97217255, "epoch": 33.81767049449262, "grad_norm": 5.779909610748291, "learning_rate": 2.5990695941403634e-06, "loss": 0.05168774, "memory(GiB)": 13.7, "step": 72150, "train_speed(iter/s)": 1.528458 }, { "acc": 0.96925602, "epoch": 33.82001406140145, "grad_norm": 2.286466598510742, "learning_rate": 2.5983897839564428e-06, "loss": 0.0536557, "memory(GiB)": 13.7, "step": 72155, "train_speed(iter/s)": 1.528461 }, { "acc": 0.98152771, "epoch": 33.822357628310286, "grad_norm": 2.3937273025512695, "learning_rate": 2.5977100315113603e-06, "loss": 0.06717597, "memory(GiB)": 13.7, "step": 72160, "train_speed(iter/s)": 1.528468 }, { "acc": 0.9802084, "epoch": 33.82470119521912, "grad_norm": 4.05157995223999, "learning_rate": 2.597030336821453e-06, "loss": 0.05643177, "memory(GiB)": 13.7, "step": 72165, "train_speed(iter/s)": 1.528478 }, { "acc": 0.98833923, "epoch": 33.82704476212796, "grad_norm": 2.986814498901367, "learning_rate": 2.59635069990306e-06, "loss": 0.03382505, "memory(GiB)": 13.7, "step": 72170, "train_speed(iter/s)": 1.528475 }, { "acc": 0.98416672, "epoch": 33.829388329036796, "grad_norm": 4.1986870765686035, "learning_rate": 2.5956711207725206e-06, "loss": 0.03665667, "memory(GiB)": 13.7, "step": 72175, "train_speed(iter/s)": 1.52848 }, { "acc": 0.99444447, "epoch": 33.83173189594563, "grad_norm": 1.7694493532180786, "learning_rate": 2.594991599446165e-06, "loss": 0.02100632, "memory(GiB)": 13.7, "step": 72180, "train_speed(iter/s)": 1.528485 }, { "acc": 0.97196426, "epoch": 33.834075462854464, "grad_norm": 3.9477806091308594, "learning_rate": 2.5943121359403285e-06, "loss": 0.06129245, "memory(GiB)": 13.7, "step": 72185, "train_speed(iter/s)": 1.528489 }, { "acc": 0.99092264, "epoch": 33.8364190297633, "grad_norm": 0.002634792821481824, "learning_rate": 2.5936327302713436e-06, "loss": 0.04451765, "memory(GiB)": 13.7, "step": 72190, "train_speed(iter/s)": 1.52849 }, { "acc": 0.98425598, "epoch": 33.83876259667213, "grad_norm": 3.466387987136841, "learning_rate": 2.592953382455542e-06, "loss": 0.03955238, "memory(GiB)": 13.7, "step": 72195, "train_speed(iter/s)": 1.528489 }, { "acc": 0.9875, "epoch": 33.84110616358097, "grad_norm": 3.630479574203491, "learning_rate": 2.5922740925092516e-06, "loss": 0.03653474, "memory(GiB)": 13.7, "step": 72200, "train_speed(iter/s)": 1.528492 }, { "acc": 0.97559528, "epoch": 33.84344973048981, "grad_norm": 2.9371497631073, "learning_rate": 2.591594860448799e-06, "loss": 0.11120776, "memory(GiB)": 13.7, "step": 72205, "train_speed(iter/s)": 1.528502 }, { "acc": 0.99125004, "epoch": 33.84579329739864, "grad_norm": 0.15255014598369598, "learning_rate": 2.590915686290512e-06, "loss": 0.01909047, "memory(GiB)": 13.7, "step": 72210, "train_speed(iter/s)": 1.528506 }, { "acc": 0.9895833, "epoch": 33.84813686430748, "grad_norm": 0.0011917927768081427, "learning_rate": 2.5902365700507175e-06, "loss": 0.03188286, "memory(GiB)": 13.7, "step": 72215, "train_speed(iter/s)": 1.52851 }, { "acc": 0.98395824, "epoch": 33.85048043121631, "grad_norm": 2.0474143028259277, "learning_rate": 2.5895575117457357e-06, "loss": 0.03596864, "memory(GiB)": 13.7, "step": 72220, "train_speed(iter/s)": 1.528512 }, { "acc": 0.9890625, "epoch": 33.852823998125146, "grad_norm": 2.2547695636749268, "learning_rate": 2.5888785113918904e-06, "loss": 0.03700134, "memory(GiB)": 13.7, "step": 72225, "train_speed(iter/s)": 1.528513 }, { "acc": 0.97956848, "epoch": 33.85516756503398, "grad_norm": 1.616715669631958, "learning_rate": 2.588199569005506e-06, "loss": 0.0451894, "memory(GiB)": 13.7, "step": 72230, "train_speed(iter/s)": 1.52852 }, { "acc": 0.98911705, "epoch": 33.857511131942815, "grad_norm": 1.7995232343673706, "learning_rate": 2.5875206846028955e-06, "loss": 0.04889604, "memory(GiB)": 13.7, "step": 72235, "train_speed(iter/s)": 1.528524 }, { "acc": 0.98708324, "epoch": 33.85985469885165, "grad_norm": 2.481034278869629, "learning_rate": 2.586841858200383e-06, "loss": 0.03034411, "memory(GiB)": 13.7, "step": 72240, "train_speed(iter/s)": 1.528529 }, { "acc": 0.99278851, "epoch": 33.86219826576049, "grad_norm": 0.8635045886039734, "learning_rate": 2.586163089814281e-06, "loss": 0.03795803, "memory(GiB)": 13.7, "step": 72245, "train_speed(iter/s)": 1.528528 }, { "acc": 0.99565973, "epoch": 33.864541832669325, "grad_norm": 1.640824317932129, "learning_rate": 2.585484379460907e-06, "loss": 0.02955085, "memory(GiB)": 13.7, "step": 72250, "train_speed(iter/s)": 1.528526 }, { "acc": 0.98208332, "epoch": 33.86688539957816, "grad_norm": 3.7486424446105957, "learning_rate": 2.5848057271565752e-06, "loss": 0.03766576, "memory(GiB)": 13.7, "step": 72255, "train_speed(iter/s)": 1.528532 }, { "acc": 0.98062496, "epoch": 33.86922896648699, "grad_norm": 0.04317387193441391, "learning_rate": 2.5841271329175964e-06, "loss": 0.05640444, "memory(GiB)": 13.7, "step": 72260, "train_speed(iter/s)": 1.528532 }, { "acc": 0.9890625, "epoch": 33.87157253339583, "grad_norm": 6.984123706817627, "learning_rate": 2.583448596760283e-06, "loss": 0.04794597, "memory(GiB)": 13.7, "step": 72265, "train_speed(iter/s)": 1.528538 }, { "acc": 0.97807541, "epoch": 33.87391610030466, "grad_norm": 3.22885799407959, "learning_rate": 2.5827701187009468e-06, "loss": 0.05681359, "memory(GiB)": 13.7, "step": 72270, "train_speed(iter/s)": 1.52854 }, { "acc": 0.99598217, "epoch": 33.876259667213496, "grad_norm": 0.005635404027998447, "learning_rate": 2.5820916987558918e-06, "loss": 0.04580256, "memory(GiB)": 13.7, "step": 72275, "train_speed(iter/s)": 1.528541 }, { "acc": 0.98986111, "epoch": 33.87860323412234, "grad_norm": 4.477783203125, "learning_rate": 2.581413336941428e-06, "loss": 0.03004534, "memory(GiB)": 13.7, "step": 72280, "train_speed(iter/s)": 1.528541 }, { "acc": 0.95924816, "epoch": 33.88094680103117, "grad_norm": 4.353105545043945, "learning_rate": 2.580735033273862e-06, "loss": 0.08828507, "memory(GiB)": 13.7, "step": 72285, "train_speed(iter/s)": 1.528542 }, { "acc": 0.984375, "epoch": 33.883290367940006, "grad_norm": 5.095606327056885, "learning_rate": 2.5800567877694953e-06, "loss": 0.0405266, "memory(GiB)": 13.7, "step": 72290, "train_speed(iter/s)": 1.528548 }, { "acc": 0.99285717, "epoch": 33.88563393484884, "grad_norm": 0.003472632495686412, "learning_rate": 2.579378600444631e-06, "loss": 0.03445948, "memory(GiB)": 13.7, "step": 72295, "train_speed(iter/s)": 1.528547 }, { "acc": 0.99020834, "epoch": 33.887977501757675, "grad_norm": 0.967365026473999, "learning_rate": 2.57870047131557e-06, "loss": 0.02429389, "memory(GiB)": 13.7, "step": 72300, "train_speed(iter/s)": 1.528547 }, { "acc": 0.98041668, "epoch": 33.89032106866651, "grad_norm": 1.224034309387207, "learning_rate": 2.5780224003986155e-06, "loss": 0.04940216, "memory(GiB)": 13.7, "step": 72305, "train_speed(iter/s)": 1.528548 }, { "acc": 0.97770824, "epoch": 33.89266463557534, "grad_norm": 0.017060536891222, "learning_rate": 2.5773443877100627e-06, "loss": 0.03676589, "memory(GiB)": 13.7, "step": 72310, "train_speed(iter/s)": 1.528557 }, { "acc": 0.98187504, "epoch": 33.89500820248418, "grad_norm": 4.618107795715332, "learning_rate": 2.57666643326621e-06, "loss": 0.04523392, "memory(GiB)": 13.7, "step": 72315, "train_speed(iter/s)": 1.528559 }, { "acc": 0.98601189, "epoch": 33.89735176939302, "grad_norm": 5.383455753326416, "learning_rate": 2.575988537083355e-06, "loss": 0.0574627, "memory(GiB)": 13.7, "step": 72320, "train_speed(iter/s)": 1.528558 }, { "acc": 0.9802084, "epoch": 33.89969533630185, "grad_norm": 1.8488786220550537, "learning_rate": 2.575310699177788e-06, "loss": 0.05830014, "memory(GiB)": 13.7, "step": 72325, "train_speed(iter/s)": 1.528562 }, { "acc": 0.99363976, "epoch": 33.90203890321069, "grad_norm": 2.6801583766937256, "learning_rate": 2.574632919565808e-06, "loss": 0.02032665, "memory(GiB)": 13.7, "step": 72330, "train_speed(iter/s)": 1.528563 }, { "acc": 0.98542614, "epoch": 33.90438247011952, "grad_norm": 4.203510761260986, "learning_rate": 2.5739551982636997e-06, "loss": 0.05628743, "memory(GiB)": 13.7, "step": 72335, "train_speed(iter/s)": 1.528562 }, { "acc": 0.9832386, "epoch": 33.906726037028356, "grad_norm": 2.1763598918914795, "learning_rate": 2.5732775352877578e-06, "loss": 0.04630393, "memory(GiB)": 13.7, "step": 72340, "train_speed(iter/s)": 1.528563 }, { "acc": 0.98095341, "epoch": 33.90906960393719, "grad_norm": 3.137206792831421, "learning_rate": 2.57259993065427e-06, "loss": 0.08388885, "memory(GiB)": 13.7, "step": 72345, "train_speed(iter/s)": 1.528566 }, { "acc": 0.9796875, "epoch": 33.911413170846025, "grad_norm": 4.3185133934021, "learning_rate": 2.5719223843795226e-06, "loss": 0.06019541, "memory(GiB)": 13.7, "step": 72350, "train_speed(iter/s)": 1.528567 }, { "acc": 0.9845623, "epoch": 33.913756737754866, "grad_norm": 5.264463424682617, "learning_rate": 2.571244896479803e-06, "loss": 0.04998493, "memory(GiB)": 13.7, "step": 72355, "train_speed(iter/s)": 1.52857 }, { "acc": 0.98827381, "epoch": 33.9161003046637, "grad_norm": 0.8853397369384766, "learning_rate": 2.5705674669713972e-06, "loss": 0.05010409, "memory(GiB)": 13.7, "step": 72360, "train_speed(iter/s)": 1.528576 }, { "acc": 0.98193188, "epoch": 33.918443871572535, "grad_norm": 3.7864348888397217, "learning_rate": 2.569890095870584e-06, "loss": 0.05268797, "memory(GiB)": 13.7, "step": 72365, "train_speed(iter/s)": 1.528576 }, { "acc": 0.97041664, "epoch": 33.92078743848137, "grad_norm": 4.326454162597656, "learning_rate": 2.56921278319365e-06, "loss": 0.05302477, "memory(GiB)": 13.7, "step": 72370, "train_speed(iter/s)": 1.528579 }, { "acc": 0.97624998, "epoch": 33.9231310053902, "grad_norm": 2.7695298194885254, "learning_rate": 2.5685355289568713e-06, "loss": 0.05299507, "memory(GiB)": 13.7, "step": 72375, "train_speed(iter/s)": 1.528585 }, { "acc": 0.9888195, "epoch": 33.92547457229904, "grad_norm": 0.6167279481887817, "learning_rate": 2.567858333176531e-06, "loss": 0.04424489, "memory(GiB)": 13.7, "step": 72380, "train_speed(iter/s)": 1.528587 }, { "acc": 0.9713542, "epoch": 33.92781813920787, "grad_norm": 0.9302729368209839, "learning_rate": 2.5671811958689023e-06, "loss": 0.0744092, "memory(GiB)": 13.7, "step": 72385, "train_speed(iter/s)": 1.52859 }, { "acc": 0.97027779, "epoch": 33.93016170611671, "grad_norm": 4.179884910583496, "learning_rate": 2.5665041170502645e-06, "loss": 0.0666976, "memory(GiB)": 13.7, "step": 72390, "train_speed(iter/s)": 1.528593 }, { "acc": 0.9794445, "epoch": 33.93250527302555, "grad_norm": 7.8914384841918945, "learning_rate": 2.5658270967368904e-06, "loss": 0.04083038, "memory(GiB)": 13.7, "step": 72395, "train_speed(iter/s)": 1.528597 }, { "acc": 0.98729172, "epoch": 33.93484883993438, "grad_norm": 3.8574299812316895, "learning_rate": 2.565150134945057e-06, "loss": 0.0245969, "memory(GiB)": 13.7, "step": 72400, "train_speed(iter/s)": 1.5286 }, { "acc": 0.95791664, "epoch": 33.93719240684322, "grad_norm": 3.601224660873413, "learning_rate": 2.564473231691031e-06, "loss": 0.07768553, "memory(GiB)": 13.7, "step": 72405, "train_speed(iter/s)": 1.528604 }, { "acc": 0.9697916, "epoch": 33.93953597375205, "grad_norm": 3.349005699157715, "learning_rate": 2.5637963869910876e-06, "loss": 0.06242314, "memory(GiB)": 13.7, "step": 72410, "train_speed(iter/s)": 1.528611 }, { "acc": 0.978125, "epoch": 33.941879540660885, "grad_norm": 2.959826946258545, "learning_rate": 2.5631196008614922e-06, "loss": 0.0630728, "memory(GiB)": 13.7, "step": 72415, "train_speed(iter/s)": 1.528617 }, { "acc": 0.98611107, "epoch": 33.94422310756972, "grad_norm": 2.432307720184326, "learning_rate": 2.562442873318516e-06, "loss": 0.05698994, "memory(GiB)": 13.7, "step": 72420, "train_speed(iter/s)": 1.52862 }, { "acc": 0.97791672, "epoch": 33.946566674478554, "grad_norm": 3.6354105472564697, "learning_rate": 2.5617662043784213e-06, "loss": 0.07432972, "memory(GiB)": 13.7, "step": 72425, "train_speed(iter/s)": 1.528622 }, { "acc": 0.98270836, "epoch": 33.948910241387395, "grad_norm": 2.2370457649230957, "learning_rate": 2.5610895940574755e-06, "loss": 0.03543626, "memory(GiB)": 13.7, "step": 72430, "train_speed(iter/s)": 1.528622 }, { "acc": 0.99375, "epoch": 33.95125380829623, "grad_norm": 3.9639697074890137, "learning_rate": 2.560413042371944e-06, "loss": 0.043673, "memory(GiB)": 13.7, "step": 72435, "train_speed(iter/s)": 1.528631 }, { "acc": 0.97645836, "epoch": 33.953597375205064, "grad_norm": 4.044863224029541, "learning_rate": 2.5597365493380845e-06, "loss": 0.05878323, "memory(GiB)": 13.7, "step": 72440, "train_speed(iter/s)": 1.528633 }, { "acc": 0.98279762, "epoch": 33.9559409421139, "grad_norm": 2.7199432849884033, "learning_rate": 2.5590601149721593e-06, "loss": 0.03224826, "memory(GiB)": 13.7, "step": 72445, "train_speed(iter/s)": 1.528636 }, { "acc": 0.98562498, "epoch": 33.95828450902273, "grad_norm": 0.9430720806121826, "learning_rate": 2.558383739290431e-06, "loss": 0.032762, "memory(GiB)": 13.7, "step": 72450, "train_speed(iter/s)": 1.528642 }, { "acc": 0.96968746, "epoch": 33.96062807593157, "grad_norm": 3.3116261959075928, "learning_rate": 2.557707422309154e-06, "loss": 0.05825211, "memory(GiB)": 13.7, "step": 72455, "train_speed(iter/s)": 1.528645 }, { "acc": 0.99229164, "epoch": 33.9629716428404, "grad_norm": 0.8910897970199585, "learning_rate": 2.557031164044584e-06, "loss": 0.02192443, "memory(GiB)": 13.7, "step": 72460, "train_speed(iter/s)": 1.528646 }, { "acc": 0.97437496, "epoch": 33.965315209749235, "grad_norm": 0.8394544124603271, "learning_rate": 2.5563549645129758e-06, "loss": 0.029503, "memory(GiB)": 13.7, "step": 72465, "train_speed(iter/s)": 1.528645 }, { "acc": 0.95375004, "epoch": 33.96765877665808, "grad_norm": 8.881246566772461, "learning_rate": 2.555678823730587e-06, "loss": 0.09543607, "memory(GiB)": 13.7, "step": 72470, "train_speed(iter/s)": 1.528648 }, { "acc": 0.96883926, "epoch": 33.97000234356691, "grad_norm": 4.550248622894287, "learning_rate": 2.5550027417136674e-06, "loss": 0.06843578, "memory(GiB)": 13.7, "step": 72475, "train_speed(iter/s)": 1.528649 }, { "acc": 0.98039141, "epoch": 33.972345910475745, "grad_norm": 8.396175384521484, "learning_rate": 2.5543267184784664e-06, "loss": 0.04445176, "memory(GiB)": 13.7, "step": 72480, "train_speed(iter/s)": 1.528657 }, { "acc": 0.99591351, "epoch": 33.97468947738458, "grad_norm": 0.9755571484565735, "learning_rate": 2.5536507540412353e-06, "loss": 0.01376497, "memory(GiB)": 13.7, "step": 72485, "train_speed(iter/s)": 1.528662 }, { "acc": 0.97354164, "epoch": 33.977033044293414, "grad_norm": 2.572868585586548, "learning_rate": 2.552974848418223e-06, "loss": 0.05023931, "memory(GiB)": 13.7, "step": 72490, "train_speed(iter/s)": 1.528667 }, { "acc": 0.98729172, "epoch": 33.97937661120225, "grad_norm": 6.728004455566406, "learning_rate": 2.552299001625675e-06, "loss": 0.02627487, "memory(GiB)": 13.7, "step": 72495, "train_speed(iter/s)": 1.528674 }, { "acc": 0.96583328, "epoch": 33.98172017811108, "grad_norm": 12.067231178283691, "learning_rate": 2.5516232136798334e-06, "loss": 0.1219223, "memory(GiB)": 13.7, "step": 72500, "train_speed(iter/s)": 1.528681 }, { "acc": 0.98631525, "epoch": 33.984063745019924, "grad_norm": 1.7642626762390137, "learning_rate": 2.5509474845969455e-06, "loss": 0.06431908, "memory(GiB)": 13.7, "step": 72505, "train_speed(iter/s)": 1.528686 }, { "acc": 0.9739583, "epoch": 33.98640731192876, "grad_norm": 6.186357021331787, "learning_rate": 2.550271814393254e-06, "loss": 0.10510304, "memory(GiB)": 13.7, "step": 72510, "train_speed(iter/s)": 1.528695 }, { "acc": 0.985322, "epoch": 33.98875087883759, "grad_norm": 2.906261444091797, "learning_rate": 2.5495962030849976e-06, "loss": 0.04284208, "memory(GiB)": 13.7, "step": 72515, "train_speed(iter/s)": 1.528689 }, { "acc": 0.99020834, "epoch": 33.99109444574643, "grad_norm": 1.664934515953064, "learning_rate": 2.548920650688415e-06, "loss": 0.01545474, "memory(GiB)": 13.7, "step": 72520, "train_speed(iter/s)": 1.528694 }, { "acc": 0.98708334, "epoch": 33.99343801265526, "grad_norm": 2.8585903644561768, "learning_rate": 2.5482451572197485e-06, "loss": 0.06202619, "memory(GiB)": 13.7, "step": 72525, "train_speed(iter/s)": 1.528698 }, { "acc": 0.98640881, "epoch": 33.995781579564095, "grad_norm": 3.1898088455200195, "learning_rate": 2.54756972269523e-06, "loss": 0.04814092, "memory(GiB)": 13.7, "step": 72530, "train_speed(iter/s)": 1.528705 }, { "acc": 0.97595243, "epoch": 33.99812514647293, "grad_norm": 5.739660739898682, "learning_rate": 2.546894347131099e-06, "loss": 0.06486819, "memory(GiB)": 13.7, "step": 72535, "train_speed(iter/s)": 1.528708 }, { "acc": 0.984375, "epoch": 34.000468713381764, "grad_norm": 0.043830130249261856, "learning_rate": 2.5462190305435856e-06, "loss": 0.0250575, "memory(GiB)": 13.7, "step": 72540, "train_speed(iter/s)": 1.528695 }, { "acc": 0.99080353, "epoch": 34.002812280290605, "grad_norm": 3.040386438369751, "learning_rate": 2.5455437729489234e-06, "loss": 0.03297908, "memory(GiB)": 13.7, "step": 72545, "train_speed(iter/s)": 1.528699 }, { "acc": 0.98104172, "epoch": 34.00515584719944, "grad_norm": 5.383057594299316, "learning_rate": 2.5448685743633455e-06, "loss": 0.04608984, "memory(GiB)": 13.7, "step": 72550, "train_speed(iter/s)": 1.528703 }, { "acc": 0.98083334, "epoch": 34.007499414108274, "grad_norm": 0.025904029607772827, "learning_rate": 2.5441934348030785e-06, "loss": 0.03124717, "memory(GiB)": 13.7, "step": 72555, "train_speed(iter/s)": 1.528703 }, { "acc": 0.99039764, "epoch": 34.00984298101711, "grad_norm": 0.12756212055683136, "learning_rate": 2.5435183542843512e-06, "loss": 0.03991887, "memory(GiB)": 13.7, "step": 72560, "train_speed(iter/s)": 1.528705 }, { "acc": 0.98041668, "epoch": 34.01218654792594, "grad_norm": 2.7174623012542725, "learning_rate": 2.5428433328233928e-06, "loss": 0.04867374, "memory(GiB)": 13.7, "step": 72565, "train_speed(iter/s)": 1.528711 }, { "acc": 0.97758932, "epoch": 34.01453011483478, "grad_norm": 4.132480144500732, "learning_rate": 2.5421683704364246e-06, "loss": 0.04861657, "memory(GiB)": 13.7, "step": 72570, "train_speed(iter/s)": 1.528716 }, { "acc": 0.97937508, "epoch": 34.01687368174361, "grad_norm": 0.940569281578064, "learning_rate": 2.541493467139672e-06, "loss": 0.03047524, "memory(GiB)": 13.7, "step": 72575, "train_speed(iter/s)": 1.528721 }, { "acc": 0.97854156, "epoch": 34.019217248652446, "grad_norm": 2.9866487979888916, "learning_rate": 2.5408186229493608e-06, "loss": 0.06862056, "memory(GiB)": 13.7, "step": 72580, "train_speed(iter/s)": 1.528724 }, { "acc": 0.98536701, "epoch": 34.02156081556129, "grad_norm": 0.6962735056877136, "learning_rate": 2.54014383788171e-06, "loss": 0.03985449, "memory(GiB)": 13.7, "step": 72585, "train_speed(iter/s)": 1.528732 }, { "acc": 0.98812504, "epoch": 34.02390438247012, "grad_norm": 3.0169432163238525, "learning_rate": 2.5394691119529352e-06, "loss": 0.03778119, "memory(GiB)": 13.7, "step": 72590, "train_speed(iter/s)": 1.528732 }, { "acc": 0.98571434, "epoch": 34.026247949378956, "grad_norm": 3.1235311031341553, "learning_rate": 2.538794445179258e-06, "loss": 0.05321786, "memory(GiB)": 13.7, "step": 72595, "train_speed(iter/s)": 1.528741 }, { "acc": 0.9774703, "epoch": 34.02859151628779, "grad_norm": 2.715195894241333, "learning_rate": 2.5381198375768974e-06, "loss": 0.05421405, "memory(GiB)": 13.7, "step": 72600, "train_speed(iter/s)": 1.52875 }, { "acc": 0.97229166, "epoch": 34.030935083196624, "grad_norm": 0.0035628851037472486, "learning_rate": 2.537445289162063e-06, "loss": 0.05450671, "memory(GiB)": 13.7, "step": 72605, "train_speed(iter/s)": 1.52876 }, { "acc": 0.98467264, "epoch": 34.03327865010546, "grad_norm": 0.06484095752239227, "learning_rate": 2.5367707999509745e-06, "loss": 0.03696485, "memory(GiB)": 13.7, "step": 72610, "train_speed(iter/s)": 1.528757 }, { "acc": 0.99375, "epoch": 34.03562221701429, "grad_norm": 6.149155616760254, "learning_rate": 2.536096369959841e-06, "loss": 0.04507855, "memory(GiB)": 13.7, "step": 72615, "train_speed(iter/s)": 1.528756 }, { "acc": 0.990625, "epoch": 34.037965783923134, "grad_norm": 0.5646355152130127, "learning_rate": 2.535421999204875e-06, "loss": 0.0259333, "memory(GiB)": 13.7, "step": 72620, "train_speed(iter/s)": 1.528765 }, { "acc": 0.9791666, "epoch": 34.04030935083197, "grad_norm": 4.720732688903809, "learning_rate": 2.5347476877022877e-06, "loss": 0.04012476, "memory(GiB)": 13.7, "step": 72625, "train_speed(iter/s)": 1.528766 }, { "acc": 0.9890625, "epoch": 34.0426529177408, "grad_norm": 2.93780779838562, "learning_rate": 2.534073435468282e-06, "loss": 0.0387364, "memory(GiB)": 13.7, "step": 72630, "train_speed(iter/s)": 1.528771 }, { "acc": 0.98125954, "epoch": 34.04499648464964, "grad_norm": 2.210449695587158, "learning_rate": 2.533399242519069e-06, "loss": 0.05050269, "memory(GiB)": 13.7, "step": 72635, "train_speed(iter/s)": 1.528776 }, { "acc": 0.98874998, "epoch": 34.04734005155847, "grad_norm": 4.953934192657471, "learning_rate": 2.532725108870855e-06, "loss": 0.02344708, "memory(GiB)": 13.7, "step": 72640, "train_speed(iter/s)": 1.528784 }, { "acc": 0.97490082, "epoch": 34.049683618467306, "grad_norm": 8.133049011230469, "learning_rate": 2.53205103453984e-06, "loss": 0.05523496, "memory(GiB)": 13.7, "step": 72645, "train_speed(iter/s)": 1.52879 }, { "acc": 0.9833334, "epoch": 34.05202718537614, "grad_norm": 5.154856204986572, "learning_rate": 2.5313770195422288e-06, "loss": 0.02536016, "memory(GiB)": 13.7, "step": 72650, "train_speed(iter/s)": 1.528795 }, { "acc": 0.97912102, "epoch": 34.054370752284974, "grad_norm": 10.02190113067627, "learning_rate": 2.5307030638942237e-06, "loss": 0.05982757, "memory(GiB)": 13.7, "step": 72655, "train_speed(iter/s)": 1.5288 }, { "acc": 0.98729172, "epoch": 34.056714319193816, "grad_norm": 3.213212490081787, "learning_rate": 2.5300291676120236e-06, "loss": 0.02549115, "memory(GiB)": 13.7, "step": 72660, "train_speed(iter/s)": 1.528802 }, { "acc": 0.9854166, "epoch": 34.05905788610265, "grad_norm": 5.2907843589782715, "learning_rate": 2.529355330711824e-06, "loss": 0.03129893, "memory(GiB)": 13.7, "step": 72665, "train_speed(iter/s)": 1.5288 }, { "acc": 0.9860363, "epoch": 34.061401453011484, "grad_norm": 0.015637438744306564, "learning_rate": 2.5286815532098235e-06, "loss": 0.05099633, "memory(GiB)": 13.7, "step": 72670, "train_speed(iter/s)": 1.528791 }, { "acc": 0.98402348, "epoch": 34.06374501992032, "grad_norm": 2.646085500717163, "learning_rate": 2.5280078351222187e-06, "loss": 0.05092361, "memory(GiB)": 13.7, "step": 72675, "train_speed(iter/s)": 1.528794 }, { "acc": 0.98800058, "epoch": 34.06608858682915, "grad_norm": 0.012140999548137188, "learning_rate": 2.5273341764652047e-06, "loss": 0.04250246, "memory(GiB)": 13.7, "step": 72680, "train_speed(iter/s)": 1.528794 }, { "acc": 0.97995195, "epoch": 34.06843215373799, "grad_norm": 2.7798213958740234, "learning_rate": 2.52666057725497e-06, "loss": 0.06633651, "memory(GiB)": 13.7, "step": 72685, "train_speed(iter/s)": 1.528801 }, { "acc": 0.9833334, "epoch": 34.07077572064682, "grad_norm": 5.571235656738281, "learning_rate": 2.5259870375077077e-06, "loss": 0.03401577, "memory(GiB)": 13.7, "step": 72690, "train_speed(iter/s)": 1.528805 }, { "acc": 0.98163691, "epoch": 34.07311928755566, "grad_norm": 4.730032444000244, "learning_rate": 2.5253135572396097e-06, "loss": 0.04470604, "memory(GiB)": 13.7, "step": 72695, "train_speed(iter/s)": 1.52881 }, { "acc": 0.99208336, "epoch": 34.0754628544645, "grad_norm": 2.726329803466797, "learning_rate": 2.5246401364668603e-06, "loss": 0.021664, "memory(GiB)": 13.7, "step": 72700, "train_speed(iter/s)": 1.528821 }, { "acc": 0.9927083, "epoch": 34.07780642137333, "grad_norm": 0.0020498973317444324, "learning_rate": 2.5239667752056496e-06, "loss": 0.01186789, "memory(GiB)": 13.7, "step": 72705, "train_speed(iter/s)": 1.52882 }, { "acc": 0.99136906, "epoch": 34.080149988282166, "grad_norm": 0.8109520077705383, "learning_rate": 2.5232934734721603e-06, "loss": 0.02305079, "memory(GiB)": 13.7, "step": 72710, "train_speed(iter/s)": 1.528819 }, { "acc": 0.99404764, "epoch": 34.082493555191, "grad_norm": 2.1695034503936768, "learning_rate": 2.5226202312825787e-06, "loss": 0.03820233, "memory(GiB)": 13.7, "step": 72715, "train_speed(iter/s)": 1.528823 }, { "acc": 0.99145298, "epoch": 34.084837122099835, "grad_norm": 0.012713066302239895, "learning_rate": 2.521947048653085e-06, "loss": 0.02975949, "memory(GiB)": 13.7, "step": 72720, "train_speed(iter/s)": 1.528822 }, { "acc": 0.98767853, "epoch": 34.08718068900867, "grad_norm": 4.661299228668213, "learning_rate": 2.5212739255998625e-06, "loss": 0.05558175, "memory(GiB)": 13.7, "step": 72725, "train_speed(iter/s)": 1.528819 }, { "acc": 0.98719692, "epoch": 34.0895242559175, "grad_norm": 0.002909906441345811, "learning_rate": 2.5206008621390904e-06, "loss": 0.03088772, "memory(GiB)": 13.7, "step": 72730, "train_speed(iter/s)": 1.52882 }, { "acc": 0.98415184, "epoch": 34.091867822826345, "grad_norm": 2.4279215335845947, "learning_rate": 2.5199278582869445e-06, "loss": 0.05160269, "memory(GiB)": 13.7, "step": 72735, "train_speed(iter/s)": 1.528825 }, { "acc": 0.984375, "epoch": 34.09421138973518, "grad_norm": 0.29189079999923706, "learning_rate": 2.5192549140596048e-06, "loss": 0.04997699, "memory(GiB)": 13.7, "step": 72740, "train_speed(iter/s)": 1.528828 }, { "acc": 0.98999996, "epoch": 34.09655495664401, "grad_norm": 3.8470776081085205, "learning_rate": 2.5185820294732465e-06, "loss": 0.02160304, "memory(GiB)": 13.7, "step": 72745, "train_speed(iter/s)": 1.528827 }, { "acc": 0.9864583, "epoch": 34.09889852355285, "grad_norm": 4.6995038986206055, "learning_rate": 2.517909204544041e-06, "loss": 0.03447003, "memory(GiB)": 13.7, "step": 72750, "train_speed(iter/s)": 1.528826 }, { "acc": 0.98208332, "epoch": 34.10124209046168, "grad_norm": 3.4960532188415527, "learning_rate": 2.517236439288164e-06, "loss": 0.03877169, "memory(GiB)": 13.7, "step": 72755, "train_speed(iter/s)": 1.528828 }, { "acc": 0.98302078, "epoch": 34.103585657370516, "grad_norm": 5.136164665222168, "learning_rate": 2.5165637337217824e-06, "loss": 0.04639227, "memory(GiB)": 13.7, "step": 72760, "train_speed(iter/s)": 1.528834 }, { "acc": 0.9739583, "epoch": 34.10592922427935, "grad_norm": 9.362122535705566, "learning_rate": 2.5158910878610686e-06, "loss": 0.08243008, "memory(GiB)": 13.7, "step": 72765, "train_speed(iter/s)": 1.528832 }, { "acc": 0.99020834, "epoch": 34.10827279118819, "grad_norm": 2.632051467895508, "learning_rate": 2.5152185017221923e-06, "loss": 0.02686568, "memory(GiB)": 13.7, "step": 72770, "train_speed(iter/s)": 1.528838 }, { "acc": 0.98760414, "epoch": 34.110616358097026, "grad_norm": 1.9396170377731323, "learning_rate": 2.514545975321317e-06, "loss": 0.04309292, "memory(GiB)": 13.7, "step": 72775, "train_speed(iter/s)": 1.528838 }, { "acc": 0.97782593, "epoch": 34.11295992500586, "grad_norm": 4.682993412017822, "learning_rate": 2.513873508674609e-06, "loss": 0.07582906, "memory(GiB)": 13.7, "step": 72780, "train_speed(iter/s)": 1.528843 }, { "acc": 0.98291664, "epoch": 34.115303491914695, "grad_norm": 4.34452486038208, "learning_rate": 2.513201101798235e-06, "loss": 0.03683206, "memory(GiB)": 13.7, "step": 72785, "train_speed(iter/s)": 1.528852 }, { "acc": 0.99330359, "epoch": 34.11764705882353, "grad_norm": 3.0726776123046875, "learning_rate": 2.512528754708354e-06, "loss": 0.04409994, "memory(GiB)": 13.7, "step": 72790, "train_speed(iter/s)": 1.528857 }, { "acc": 0.99613094, "epoch": 34.11999062573236, "grad_norm": 0.7937313914299011, "learning_rate": 2.5118564674211276e-06, "loss": 0.01620228, "memory(GiB)": 13.7, "step": 72795, "train_speed(iter/s)": 1.528858 }, { "acc": 0.9833333, "epoch": 34.1223341926412, "grad_norm": 3.444016456604004, "learning_rate": 2.5111842399527154e-06, "loss": 0.03063645, "memory(GiB)": 13.7, "step": 72800, "train_speed(iter/s)": 1.528862 }, { "acc": 0.99333334, "epoch": 34.12467775955003, "grad_norm": 6.58677864074707, "learning_rate": 2.5105120723192777e-06, "loss": 0.06026518, "memory(GiB)": 13.7, "step": 72805, "train_speed(iter/s)": 1.528866 }, { "acc": 0.98083344, "epoch": 34.12702132645887, "grad_norm": 6.848362445831299, "learning_rate": 2.5098399645369674e-06, "loss": 0.04597142, "memory(GiB)": 13.7, "step": 72810, "train_speed(iter/s)": 1.528871 }, { "acc": 0.97833328, "epoch": 34.12936489336771, "grad_norm": 6.80588436126709, "learning_rate": 2.509167916621943e-06, "loss": 0.05523204, "memory(GiB)": 13.7, "step": 72815, "train_speed(iter/s)": 1.528869 }, { "acc": 0.98041668, "epoch": 34.13170846027654, "grad_norm": 1.2826237678527832, "learning_rate": 2.5084959285903564e-06, "loss": 0.0344917, "memory(GiB)": 13.7, "step": 72820, "train_speed(iter/s)": 1.52887 }, { "acc": 0.99437504, "epoch": 34.134052027185376, "grad_norm": 0.541463315486908, "learning_rate": 2.5078240004583627e-06, "loss": 0.01366361, "memory(GiB)": 13.7, "step": 72825, "train_speed(iter/s)": 1.528875 }, { "acc": 0.9791667, "epoch": 34.13639559409421, "grad_norm": 6.494131088256836, "learning_rate": 2.5071521322421116e-06, "loss": 0.03481691, "memory(GiB)": 13.7, "step": 72830, "train_speed(iter/s)": 1.528881 }, { "acc": 0.98467264, "epoch": 34.138739161003045, "grad_norm": 2.140734910964966, "learning_rate": 2.5064803239577494e-06, "loss": 0.06074609, "memory(GiB)": 13.7, "step": 72835, "train_speed(iter/s)": 1.528889 }, { "acc": 0.9746726, "epoch": 34.14108272791188, "grad_norm": 4.490730285644531, "learning_rate": 2.5058085756214262e-06, "loss": 0.08364804, "memory(GiB)": 13.7, "step": 72840, "train_speed(iter/s)": 1.528891 }, { "acc": 0.990625, "epoch": 34.14342629482072, "grad_norm": 0.008493941277265549, "learning_rate": 2.5051368872492924e-06, "loss": 0.05222041, "memory(GiB)": 13.7, "step": 72845, "train_speed(iter/s)": 1.528891 }, { "acc": 0.97059031, "epoch": 34.145769861729555, "grad_norm": 3.827404737472534, "learning_rate": 2.504465258857487e-06, "loss": 0.0705265, "memory(GiB)": 13.7, "step": 72850, "train_speed(iter/s)": 1.528897 }, { "acc": 0.99020824, "epoch": 34.14811342863839, "grad_norm": 1.1718047857284546, "learning_rate": 2.5037936904621575e-06, "loss": 0.04405158, "memory(GiB)": 13.7, "step": 72855, "train_speed(iter/s)": 1.528896 }, { "acc": 0.96967258, "epoch": 34.15045699554722, "grad_norm": 2.334068536758423, "learning_rate": 2.5031221820794466e-06, "loss": 0.0926658, "memory(GiB)": 13.7, "step": 72860, "train_speed(iter/s)": 1.528899 }, { "acc": 0.97761364, "epoch": 34.15280056245606, "grad_norm": 4.9943528175354, "learning_rate": 2.5024507337254914e-06, "loss": 0.04651653, "memory(GiB)": 13.7, "step": 72865, "train_speed(iter/s)": 1.528903 }, { "acc": 0.99125004, "epoch": 34.15514412936489, "grad_norm": 0.0016256214585155249, "learning_rate": 2.5017793454164372e-06, "loss": 0.02626812, "memory(GiB)": 13.7, "step": 72870, "train_speed(iter/s)": 1.528906 }, { "acc": 0.98672466, "epoch": 34.157487696273726, "grad_norm": 3.834258556365967, "learning_rate": 2.501108017168416e-06, "loss": 0.05491325, "memory(GiB)": 13.7, "step": 72875, "train_speed(iter/s)": 1.52891 }, { "acc": 0.9864583, "epoch": 34.15983126318256, "grad_norm": 3.3191959857940674, "learning_rate": 2.5004367489975695e-06, "loss": 0.04450384, "memory(GiB)": 13.7, "step": 72880, "train_speed(iter/s)": 1.528915 }, { "acc": 0.99196434, "epoch": 34.1621748300914, "grad_norm": 3.1042325496673584, "learning_rate": 2.4997655409200274e-06, "loss": 0.02561972, "memory(GiB)": 13.7, "step": 72885, "train_speed(iter/s)": 1.528915 }, { "acc": 0.97062502, "epoch": 34.164518397000236, "grad_norm": 2.2000749111175537, "learning_rate": 2.499094392951926e-06, "loss": 0.08679199, "memory(GiB)": 13.7, "step": 72890, "train_speed(iter/s)": 1.528916 }, { "acc": 0.9890625, "epoch": 34.16686196390907, "grad_norm": 0.9597015976905823, "learning_rate": 2.498423305109398e-06, "loss": 0.06815684, "memory(GiB)": 13.7, "step": 72895, "train_speed(iter/s)": 1.528915 }, { "acc": 0.99750004, "epoch": 34.169205530817905, "grad_norm": 2.876812219619751, "learning_rate": 2.4977522774085765e-06, "loss": 0.01866423, "memory(GiB)": 13.7, "step": 72900, "train_speed(iter/s)": 1.528917 }, { "acc": 0.98726196, "epoch": 34.17154909772674, "grad_norm": 1.3573716878890991, "learning_rate": 2.4970813098655847e-06, "loss": 0.04498605, "memory(GiB)": 13.7, "step": 72905, "train_speed(iter/s)": 1.528916 }, { "acc": 0.98864088, "epoch": 34.173892664635574, "grad_norm": 3.850813388824463, "learning_rate": 2.4964104024965546e-06, "loss": 0.05918596, "memory(GiB)": 13.7, "step": 72910, "train_speed(iter/s)": 1.528924 }, { "acc": 0.996875, "epoch": 34.17623623154441, "grad_norm": 1.8342498540878296, "learning_rate": 2.495739555317614e-06, "loss": 0.02082096, "memory(GiB)": 13.7, "step": 72915, "train_speed(iter/s)": 1.528922 }, { "acc": 0.98249998, "epoch": 34.17857979845325, "grad_norm": 3.0972795486450195, "learning_rate": 2.4950687683448854e-06, "loss": 0.03736862, "memory(GiB)": 13.7, "step": 72920, "train_speed(iter/s)": 1.52892 }, { "acc": 0.97680807, "epoch": 34.180923365362084, "grad_norm": 4.73239278793335, "learning_rate": 2.494398041594491e-06, "loss": 0.05598423, "memory(GiB)": 13.7, "step": 72925, "train_speed(iter/s)": 1.528923 }, { "acc": 0.98166676, "epoch": 34.18326693227092, "grad_norm": 0.016514809802174568, "learning_rate": 2.4937273750825546e-06, "loss": 0.03443857, "memory(GiB)": 13.7, "step": 72930, "train_speed(iter/s)": 1.528926 }, { "acc": 0.9791666, "epoch": 34.18561049917975, "grad_norm": 4.050563335418701, "learning_rate": 2.493056768825198e-06, "loss": 0.06658396, "memory(GiB)": 13.7, "step": 72935, "train_speed(iter/s)": 1.52892 }, { "acc": 0.98187504, "epoch": 34.18795406608859, "grad_norm": 2.3838043212890625, "learning_rate": 2.4923862228385375e-06, "loss": 0.06815125, "memory(GiB)": 13.7, "step": 72940, "train_speed(iter/s)": 1.528923 }, { "acc": 0.97937498, "epoch": 34.19029763299742, "grad_norm": 1.857927918434143, "learning_rate": 2.491715737138692e-06, "loss": 0.0461848, "memory(GiB)": 13.7, "step": 72945, "train_speed(iter/s)": 1.528926 }, { "acc": 0.96963539, "epoch": 34.192641199906255, "grad_norm": 2.4279186725616455, "learning_rate": 2.4910453117417807e-06, "loss": 0.06630052, "memory(GiB)": 13.7, "step": 72950, "train_speed(iter/s)": 1.528925 }, { "acc": 0.996875, "epoch": 34.19498476681509, "grad_norm": 0.0015493771061301231, "learning_rate": 2.4903749466639157e-06, "loss": 0.04142853, "memory(GiB)": 13.7, "step": 72955, "train_speed(iter/s)": 1.528925 }, { "acc": 0.9864584, "epoch": 34.19732833372393, "grad_norm": 3.3786230087280273, "learning_rate": 2.4897046419212087e-06, "loss": 0.04983039, "memory(GiB)": 13.7, "step": 72960, "train_speed(iter/s)": 1.528926 }, { "acc": 0.98104172, "epoch": 34.199671900632765, "grad_norm": 5.023159503936768, "learning_rate": 2.4890343975297733e-06, "loss": 0.06236989, "memory(GiB)": 13.7, "step": 72965, "train_speed(iter/s)": 1.52893 }, { "acc": 0.99508934, "epoch": 34.2020154675416, "grad_norm": 1.8378771543502808, "learning_rate": 2.4883642135057205e-06, "loss": 0.03240721, "memory(GiB)": 13.7, "step": 72970, "train_speed(iter/s)": 1.528928 }, { "acc": 0.99011593, "epoch": 34.204359034450434, "grad_norm": 2.9165217876434326, "learning_rate": 2.4876940898651607e-06, "loss": 0.04692773, "memory(GiB)": 13.7, "step": 72975, "train_speed(iter/s)": 1.528935 }, { "acc": 0.99375, "epoch": 34.20670260135927, "grad_norm": 1.7600857019424438, "learning_rate": 2.487024026624198e-06, "loss": 0.01522508, "memory(GiB)": 13.7, "step": 72980, "train_speed(iter/s)": 1.528936 }, { "acc": 0.96803026, "epoch": 34.2090461682681, "grad_norm": 2.991539716720581, "learning_rate": 2.4863540237989403e-06, "loss": 0.09528062, "memory(GiB)": 13.7, "step": 72985, "train_speed(iter/s)": 1.52894 }, { "acc": 0.99125004, "epoch": 34.21138973517694, "grad_norm": 4.3507609367370605, "learning_rate": 2.485684081405494e-06, "loss": 0.0226012, "memory(GiB)": 13.7, "step": 72990, "train_speed(iter/s)": 1.528944 }, { "acc": 0.98731833, "epoch": 34.21373330208577, "grad_norm": 3.2959461212158203, "learning_rate": 2.4850141994599604e-06, "loss": 0.05589559, "memory(GiB)": 13.7, "step": 72995, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98946428, "epoch": 34.21607686899461, "grad_norm": 3.1200053691864014, "learning_rate": 2.48434437797844e-06, "loss": 0.03749486, "memory(GiB)": 13.7, "step": 73000, "train_speed(iter/s)": 1.528944 }, { "acc": 0.98653851, "epoch": 34.21842043590345, "grad_norm": 4.954666614532471, "learning_rate": 2.4836746169770346e-06, "loss": 0.03584459, "memory(GiB)": 13.7, "step": 73005, "train_speed(iter/s)": 1.528943 }, { "acc": 0.97307539, "epoch": 34.22076400281228, "grad_norm": 2.871152639389038, "learning_rate": 2.483004916471844e-06, "loss": 0.0575197, "memory(GiB)": 13.7, "step": 73010, "train_speed(iter/s)": 1.528944 }, { "acc": 0.98652782, "epoch": 34.223107569721115, "grad_norm": 2.0251541137695312, "learning_rate": 2.4823352764789637e-06, "loss": 0.0631687, "memory(GiB)": 13.7, "step": 73015, "train_speed(iter/s)": 1.528943 }, { "acc": 0.9958334, "epoch": 34.22545113662995, "grad_norm": 5.947258472442627, "learning_rate": 2.4816656970144896e-06, "loss": 0.05062319, "memory(GiB)": 13.7, "step": 73020, "train_speed(iter/s)": 1.528946 }, { "acc": 0.98604164, "epoch": 34.227794703538784, "grad_norm": 6.594733238220215, "learning_rate": 2.48099617809452e-06, "loss": 0.04482971, "memory(GiB)": 13.7, "step": 73025, "train_speed(iter/s)": 1.528949 }, { "acc": 0.99375, "epoch": 34.23013827044762, "grad_norm": 3.560917377471924, "learning_rate": 2.4803267197351423e-06, "loss": 0.01547693, "memory(GiB)": 13.7, "step": 73030, "train_speed(iter/s)": 1.528954 }, { "acc": 0.97770824, "epoch": 34.23248183735646, "grad_norm": 0.0008005138952285051, "learning_rate": 2.4796573219524515e-06, "loss": 0.06659095, "memory(GiB)": 13.7, "step": 73035, "train_speed(iter/s)": 1.528948 }, { "acc": 0.98708334, "epoch": 34.234825404265294, "grad_norm": 4.395784854888916, "learning_rate": 2.478987984762539e-06, "loss": 0.03507063, "memory(GiB)": 13.7, "step": 73040, "train_speed(iter/s)": 1.528951 }, { "acc": 0.98611116, "epoch": 34.23716897117413, "grad_norm": 4.966031551361084, "learning_rate": 2.4783187081814892e-06, "loss": 0.03608296, "memory(GiB)": 13.7, "step": 73045, "train_speed(iter/s)": 1.528954 }, { "acc": 0.97032738, "epoch": 34.23951253808296, "grad_norm": 3.006295919418335, "learning_rate": 2.477649492225394e-06, "loss": 0.04982033, "memory(GiB)": 13.7, "step": 73050, "train_speed(iter/s)": 1.528963 }, { "acc": 0.97741566, "epoch": 34.2418561049918, "grad_norm": 4.530633449554443, "learning_rate": 2.4769803369103347e-06, "loss": 0.05805652, "memory(GiB)": 13.7, "step": 73055, "train_speed(iter/s)": 1.528961 }, { "acc": 0.99278851, "epoch": 34.24419967190063, "grad_norm": 5.372644424438477, "learning_rate": 2.476311242252398e-06, "loss": 0.02930709, "memory(GiB)": 13.7, "step": 73060, "train_speed(iter/s)": 1.528965 }, { "acc": 0.98666668, "epoch": 34.246543238809465, "grad_norm": 1.0068403482437134, "learning_rate": 2.4756422082676678e-06, "loss": 0.05025315, "memory(GiB)": 13.7, "step": 73065, "train_speed(iter/s)": 1.528969 }, { "acc": 0.98758926, "epoch": 34.2488868057183, "grad_norm": 4.023874282836914, "learning_rate": 2.474973234972222e-06, "loss": 0.03302709, "memory(GiB)": 13.7, "step": 73070, "train_speed(iter/s)": 1.528973 }, { "acc": 0.97989578, "epoch": 34.25123037262714, "grad_norm": 2.8589863777160645, "learning_rate": 2.4743043223821427e-06, "loss": 0.03995992, "memory(GiB)": 13.7, "step": 73075, "train_speed(iter/s)": 1.528979 }, { "acc": 0.99386368, "epoch": 34.253573939535976, "grad_norm": 0.0014743588399142027, "learning_rate": 2.473635470513511e-06, "loss": 0.00948493, "memory(GiB)": 13.7, "step": 73080, "train_speed(iter/s)": 1.528979 }, { "acc": 0.98237171, "epoch": 34.25591750644481, "grad_norm": 4.274518013000488, "learning_rate": 2.4729666793824005e-06, "loss": 0.05461936, "memory(GiB)": 13.7, "step": 73085, "train_speed(iter/s)": 1.528986 }, { "acc": 0.97945518, "epoch": 34.258261073353644, "grad_norm": 0.12016848474740982, "learning_rate": 2.4722979490048854e-06, "loss": 0.03682169, "memory(GiB)": 13.7, "step": 73090, "train_speed(iter/s)": 1.528987 }, { "acc": 0.98729172, "epoch": 34.26060464026248, "grad_norm": 0.01391443982720375, "learning_rate": 2.4716292793970427e-06, "loss": 0.02260822, "memory(GiB)": 13.7, "step": 73095, "train_speed(iter/s)": 1.528991 }, { "acc": 0.99177074, "epoch": 34.26294820717131, "grad_norm": 2.9120757579803467, "learning_rate": 2.470960670574945e-06, "loss": 0.04461647, "memory(GiB)": 13.7, "step": 73100, "train_speed(iter/s)": 1.528988 }, { "acc": 0.98395834, "epoch": 34.26529177408015, "grad_norm": 5.96763277053833, "learning_rate": 2.470292122554661e-06, "loss": 0.03978867, "memory(GiB)": 13.7, "step": 73105, "train_speed(iter/s)": 1.52899 }, { "acc": 0.98766575, "epoch": 34.26763534098899, "grad_norm": 1.6699177026748657, "learning_rate": 2.4696236353522627e-06, "loss": 0.03675577, "memory(GiB)": 13.7, "step": 73110, "train_speed(iter/s)": 1.528995 }, { "acc": 0.97842264, "epoch": 34.26997890789782, "grad_norm": 1.7010670900344849, "learning_rate": 2.468955208983817e-06, "loss": 0.04856881, "memory(GiB)": 13.7, "step": 73115, "train_speed(iter/s)": 1.529004 }, { "acc": 0.99236107, "epoch": 34.27232247480666, "grad_norm": 3.2260799407958984, "learning_rate": 2.468286843465393e-06, "loss": 0.02476451, "memory(GiB)": 13.7, "step": 73120, "train_speed(iter/s)": 1.529005 }, { "acc": 0.97828369, "epoch": 34.27466604171549, "grad_norm": 3.604149580001831, "learning_rate": 2.467618538813055e-06, "loss": 0.03858004, "memory(GiB)": 13.7, "step": 73125, "train_speed(iter/s)": 1.52901 }, { "acc": 0.97613087, "epoch": 34.277009608624326, "grad_norm": 1.0147773027420044, "learning_rate": 2.4669502950428637e-06, "loss": 0.06195596, "memory(GiB)": 13.7, "step": 73130, "train_speed(iter/s)": 1.529003 }, { "acc": 0.97696428, "epoch": 34.27935317553316, "grad_norm": 4.859468936920166, "learning_rate": 2.4662821121708846e-06, "loss": 0.0518468, "memory(GiB)": 13.7, "step": 73135, "train_speed(iter/s)": 1.529005 }, { "acc": 0.98729172, "epoch": 34.281696742441994, "grad_norm": 0.012707590125501156, "learning_rate": 2.465613990213179e-06, "loss": 0.05095459, "memory(GiB)": 13.7, "step": 73140, "train_speed(iter/s)": 1.529014 }, { "acc": 0.99154758, "epoch": 34.28404030935083, "grad_norm": 1.0873650312423706, "learning_rate": 2.4649459291858043e-06, "loss": 0.05332539, "memory(GiB)": 13.7, "step": 73145, "train_speed(iter/s)": 1.529017 }, { "acc": 0.98354168, "epoch": 34.28638387625967, "grad_norm": 2.32473087310791, "learning_rate": 2.464277929104819e-06, "loss": 0.03303905, "memory(GiB)": 13.7, "step": 73150, "train_speed(iter/s)": 1.52902 }, { "acc": 0.98416672, "epoch": 34.288727443168504, "grad_norm": 5.576160430908203, "learning_rate": 2.463609989986282e-06, "loss": 0.05532687, "memory(GiB)": 13.7, "step": 73155, "train_speed(iter/s)": 1.529023 }, { "acc": 0.97842255, "epoch": 34.29107101007734, "grad_norm": 4.723426342010498, "learning_rate": 2.4629421118462445e-06, "loss": 0.05428612, "memory(GiB)": 13.7, "step": 73160, "train_speed(iter/s)": 1.529026 }, { "acc": 0.99022579, "epoch": 34.29341457698617, "grad_norm": 0.7002173066139221, "learning_rate": 2.4622742947007646e-06, "loss": 0.02974609, "memory(GiB)": 13.7, "step": 73165, "train_speed(iter/s)": 1.529033 }, { "acc": 0.98953533, "epoch": 34.29575814389501, "grad_norm": 2.244353771209717, "learning_rate": 2.4616065385658897e-06, "loss": 0.05830677, "memory(GiB)": 13.7, "step": 73170, "train_speed(iter/s)": 1.529039 }, { "acc": 0.9875, "epoch": 34.29810171080384, "grad_norm": 1.8795028924942017, "learning_rate": 2.4609388434576727e-06, "loss": 0.04207183, "memory(GiB)": 13.7, "step": 73175, "train_speed(iter/s)": 1.529043 }, { "acc": 0.9864584, "epoch": 34.300445277712676, "grad_norm": 4.797652244567871, "learning_rate": 2.460271209392165e-06, "loss": 0.06332113, "memory(GiB)": 13.7, "step": 73180, "train_speed(iter/s)": 1.529044 }, { "acc": 0.9947916, "epoch": 34.30278884462152, "grad_norm": 1.5944911241531372, "learning_rate": 2.45960363638541e-06, "loss": 0.0223583, "memory(GiB)": 13.7, "step": 73185, "train_speed(iter/s)": 1.529044 }, { "acc": 0.98458328, "epoch": 34.30513241153035, "grad_norm": 3.2748496532440186, "learning_rate": 2.4589361244534557e-06, "loss": 0.03946416, "memory(GiB)": 13.7, "step": 73190, "train_speed(iter/s)": 1.529045 }, { "acc": 0.9791667, "epoch": 34.307475978439186, "grad_norm": 5.471600532531738, "learning_rate": 2.4582686736123493e-06, "loss": 0.05691352, "memory(GiB)": 13.7, "step": 73195, "train_speed(iter/s)": 1.529044 }, { "acc": 0.99323864, "epoch": 34.30981954534802, "grad_norm": 2.342087745666504, "learning_rate": 2.4576012838781306e-06, "loss": 0.02582437, "memory(GiB)": 13.7, "step": 73200, "train_speed(iter/s)": 1.529048 }, { "acc": 0.98639889, "epoch": 34.312163112256854, "grad_norm": 3.701937675476074, "learning_rate": 2.4569339552668448e-06, "loss": 0.05926613, "memory(GiB)": 13.7, "step": 73205, "train_speed(iter/s)": 1.529052 }, { "acc": 0.98319445, "epoch": 34.31450667916569, "grad_norm": 1.7488224506378174, "learning_rate": 2.456266687794528e-06, "loss": 0.07146019, "memory(GiB)": 13.7, "step": 73210, "train_speed(iter/s)": 1.52906 }, { "acc": 0.95747032, "epoch": 34.31685024607452, "grad_norm": 9.396308898925781, "learning_rate": 2.4555994814772243e-06, "loss": 0.08623707, "memory(GiB)": 13.7, "step": 73215, "train_speed(iter/s)": 1.529063 }, { "acc": 0.97895832, "epoch": 34.31919381298336, "grad_norm": 3.382174015045166, "learning_rate": 2.454932336330966e-06, "loss": 0.03464695, "memory(GiB)": 13.7, "step": 73220, "train_speed(iter/s)": 1.529065 }, { "acc": 0.98729172, "epoch": 34.3215373798922, "grad_norm": 3.574648857116699, "learning_rate": 2.4542652523717917e-06, "loss": 0.03658238, "memory(GiB)": 13.7, "step": 73225, "train_speed(iter/s)": 1.529068 }, { "acc": 0.99565477, "epoch": 34.32388094680103, "grad_norm": 0.12077105790376663, "learning_rate": 2.4535982296157374e-06, "loss": 0.04821061, "memory(GiB)": 13.7, "step": 73230, "train_speed(iter/s)": 1.529072 }, { "acc": 0.99291668, "epoch": 34.32622451370987, "grad_norm": 1.8043841123580933, "learning_rate": 2.4529312680788327e-06, "loss": 0.04514193, "memory(GiB)": 13.7, "step": 73235, "train_speed(iter/s)": 1.529078 }, { "acc": 0.97458334, "epoch": 34.3285680806187, "grad_norm": 3.282097816467285, "learning_rate": 2.4522643677771106e-06, "loss": 0.09584866, "memory(GiB)": 13.7, "step": 73240, "train_speed(iter/s)": 1.529084 }, { "acc": 0.97951393, "epoch": 34.330911647527536, "grad_norm": 1.6981480121612549, "learning_rate": 2.4515975287266013e-06, "loss": 0.05903342, "memory(GiB)": 13.7, "step": 73245, "train_speed(iter/s)": 1.529089 }, { "acc": 0.98767853, "epoch": 34.33325521443637, "grad_norm": 1.8186320066452026, "learning_rate": 2.450930750943336e-06, "loss": 0.03004892, "memory(GiB)": 13.7, "step": 73250, "train_speed(iter/s)": 1.529089 }, { "acc": 0.98687496, "epoch": 34.335598781345205, "grad_norm": 3.242964744567871, "learning_rate": 2.45026403444334e-06, "loss": 0.02731374, "memory(GiB)": 13.7, "step": 73255, "train_speed(iter/s)": 1.529089 }, { "acc": 0.98500004, "epoch": 34.337942348254046, "grad_norm": 3.499807596206665, "learning_rate": 2.4495973792426357e-06, "loss": 0.03094569, "memory(GiB)": 13.7, "step": 73260, "train_speed(iter/s)": 1.529093 }, { "acc": 0.978125, "epoch": 34.34028591516288, "grad_norm": 3.357071876525879, "learning_rate": 2.4489307853572507e-06, "loss": 0.0508891, "memory(GiB)": 13.7, "step": 73265, "train_speed(iter/s)": 1.529097 }, { "acc": 0.99027777, "epoch": 34.342629482071715, "grad_norm": 3.162132978439331, "learning_rate": 2.448264252803208e-06, "loss": 0.05325804, "memory(GiB)": 13.7, "step": 73270, "train_speed(iter/s)": 1.529101 }, { "acc": 0.98363094, "epoch": 34.34497304898055, "grad_norm": 4.21467399597168, "learning_rate": 2.4475977815965267e-06, "loss": 0.03813522, "memory(GiB)": 13.7, "step": 73275, "train_speed(iter/s)": 1.529103 }, { "acc": 0.9874053, "epoch": 34.34731661588938, "grad_norm": 4.049099922180176, "learning_rate": 2.4469313717532277e-06, "loss": 0.05349114, "memory(GiB)": 13.7, "step": 73280, "train_speed(iter/s)": 1.529105 }, { "acc": 0.98395834, "epoch": 34.34966018279822, "grad_norm": 5.840653896331787, "learning_rate": 2.4462650232893313e-06, "loss": 0.03239101, "memory(GiB)": 13.7, "step": 73285, "train_speed(iter/s)": 1.529109 }, { "acc": 0.98395844, "epoch": 34.35200374970705, "grad_norm": 1.8758200407028198, "learning_rate": 2.4455987362208524e-06, "loss": 0.04253473, "memory(GiB)": 13.7, "step": 73290, "train_speed(iter/s)": 1.529116 }, { "acc": 0.98874998, "epoch": 34.354347316615886, "grad_norm": 3.6955759525299072, "learning_rate": 2.4449325105638043e-06, "loss": 0.02527236, "memory(GiB)": 13.7, "step": 73295, "train_speed(iter/s)": 1.529114 }, { "acc": 0.97539139, "epoch": 34.35669088352473, "grad_norm": 2.65374755859375, "learning_rate": 2.444266346334203e-06, "loss": 0.0492326, "memory(GiB)": 13.7, "step": 73300, "train_speed(iter/s)": 1.529117 }, { "acc": 0.98080807, "epoch": 34.35903445043356, "grad_norm": 2.103445291519165, "learning_rate": 2.4436002435480633e-06, "loss": 0.09031659, "memory(GiB)": 13.7, "step": 73305, "train_speed(iter/s)": 1.529122 }, { "acc": 0.97437506, "epoch": 34.361378017342396, "grad_norm": 2.989922523498535, "learning_rate": 2.4429342022213912e-06, "loss": 0.05470504, "memory(GiB)": 13.7, "step": 73310, "train_speed(iter/s)": 1.529124 }, { "acc": 0.97331562, "epoch": 34.36372158425123, "grad_norm": 3.9067602157592773, "learning_rate": 2.4422682223701987e-06, "loss": 0.06762545, "memory(GiB)": 13.7, "step": 73315, "train_speed(iter/s)": 1.529121 }, { "acc": 0.98395834, "epoch": 34.366065151160065, "grad_norm": 8.390849113464355, "learning_rate": 2.441602304010494e-06, "loss": 0.03534975, "memory(GiB)": 13.7, "step": 73320, "train_speed(iter/s)": 1.529127 }, { "acc": 0.97416668, "epoch": 34.3684087180689, "grad_norm": 2.181164264678955, "learning_rate": 2.440936447158285e-06, "loss": 0.06196938, "memory(GiB)": 13.7, "step": 73325, "train_speed(iter/s)": 1.529128 }, { "acc": 0.9833334, "epoch": 34.37075228497773, "grad_norm": 5.53234338760376, "learning_rate": 2.4402706518295753e-06, "loss": 0.02998256, "memory(GiB)": 13.7, "step": 73330, "train_speed(iter/s)": 1.529126 }, { "acc": 0.97383928, "epoch": 34.373095851886575, "grad_norm": 2.1036436557769775, "learning_rate": 2.439604918040365e-06, "loss": 0.05287844, "memory(GiB)": 13.7, "step": 73335, "train_speed(iter/s)": 1.529134 }, { "acc": 0.97327385, "epoch": 34.37543941879541, "grad_norm": 7.044703006744385, "learning_rate": 2.438939245806661e-06, "loss": 0.04757224, "memory(GiB)": 13.7, "step": 73340, "train_speed(iter/s)": 1.52914 }, { "acc": 0.98562508, "epoch": 34.37778298570424, "grad_norm": 4.567506313323975, "learning_rate": 2.438273635144463e-06, "loss": 0.04913701, "memory(GiB)": 13.7, "step": 73345, "train_speed(iter/s)": 1.529146 }, { "acc": 0.98666668, "epoch": 34.38012655261308, "grad_norm": 0.0037307823076844215, "learning_rate": 2.4376080860697672e-06, "loss": 0.04261262, "memory(GiB)": 13.7, "step": 73350, "train_speed(iter/s)": 1.529149 }, { "acc": 0.97677078, "epoch": 34.38247011952191, "grad_norm": 3.5071001052856445, "learning_rate": 2.436942598598574e-06, "loss": 0.05211478, "memory(GiB)": 13.7, "step": 73355, "train_speed(iter/s)": 1.529147 }, { "acc": 0.98395834, "epoch": 34.384813686430746, "grad_norm": 3.2886903285980225, "learning_rate": 2.43627717274688e-06, "loss": 0.05445055, "memory(GiB)": 13.7, "step": 73360, "train_speed(iter/s)": 1.529147 }, { "acc": 0.98562498, "epoch": 34.38715725333958, "grad_norm": 3.538750648498535, "learning_rate": 2.4356118085306772e-06, "loss": 0.02571515, "memory(GiB)": 13.7, "step": 73365, "train_speed(iter/s)": 1.529151 }, { "acc": 0.99068184, "epoch": 34.389500820248415, "grad_norm": 3.5005040168762207, "learning_rate": 2.4349465059659604e-06, "loss": 0.02666959, "memory(GiB)": 13.7, "step": 73370, "train_speed(iter/s)": 1.529157 }, { "acc": 0.9854166, "epoch": 34.391844387157256, "grad_norm": 2.66044282913208, "learning_rate": 2.4342812650687226e-06, "loss": 0.03028269, "memory(GiB)": 13.7, "step": 73375, "train_speed(iter/s)": 1.529165 }, { "acc": 0.99125004, "epoch": 34.39418795406609, "grad_norm": 3.578537940979004, "learning_rate": 2.433616085854952e-06, "loss": 0.02908885, "memory(GiB)": 13.7, "step": 73380, "train_speed(iter/s)": 1.529165 }, { "acc": 0.98339863, "epoch": 34.396531520974925, "grad_norm": 3.9064109325408936, "learning_rate": 2.4329509683406363e-06, "loss": 0.05389068, "memory(GiB)": 13.7, "step": 73385, "train_speed(iter/s)": 1.52917 }, { "acc": 0.97101192, "epoch": 34.39887508788376, "grad_norm": 2.1202800273895264, "learning_rate": 2.432285912541765e-06, "loss": 0.07608019, "memory(GiB)": 13.7, "step": 73390, "train_speed(iter/s)": 1.529175 }, { "acc": 0.99613972, "epoch": 34.401218654792594, "grad_norm": 2.461421251296997, "learning_rate": 2.4316209184743224e-06, "loss": 0.01807657, "memory(GiB)": 13.7, "step": 73395, "train_speed(iter/s)": 1.529179 }, { "acc": 0.99375, "epoch": 34.40356222170143, "grad_norm": 0.041146520525217056, "learning_rate": 2.430955986154296e-06, "loss": 0.01167488, "memory(GiB)": 13.7, "step": 73400, "train_speed(iter/s)": 1.52918 }, { "acc": 0.97416668, "epoch": 34.40590578861026, "grad_norm": 4.461732864379883, "learning_rate": 2.4302911155976634e-06, "loss": 0.04713434, "memory(GiB)": 13.7, "step": 73405, "train_speed(iter/s)": 1.52918 }, { "acc": 0.98973217, "epoch": 34.408249355519104, "grad_norm": 2.390584707260132, "learning_rate": 2.429626306820409e-06, "loss": 0.03325091, "memory(GiB)": 13.7, "step": 73410, "train_speed(iter/s)": 1.529185 }, { "acc": 0.98432541, "epoch": 34.41059292242794, "grad_norm": 4.9791998863220215, "learning_rate": 2.428961559838514e-06, "loss": 0.04844109, "memory(GiB)": 13.7, "step": 73415, "train_speed(iter/s)": 1.529192 }, { "acc": 0.98162775, "epoch": 34.41293648933677, "grad_norm": 4.981422424316406, "learning_rate": 2.4282968746679545e-06, "loss": 0.04505423, "memory(GiB)": 13.7, "step": 73420, "train_speed(iter/s)": 1.529197 }, { "acc": 0.97633934, "epoch": 34.41528005624561, "grad_norm": 1.716984748840332, "learning_rate": 2.4276322513247067e-06, "loss": 0.08867269, "memory(GiB)": 13.7, "step": 73425, "train_speed(iter/s)": 1.529205 }, { "acc": 0.97552662, "epoch": 34.41762362315444, "grad_norm": 4.348898887634277, "learning_rate": 2.4269676898247473e-06, "loss": 0.06254029, "memory(GiB)": 13.7, "step": 73430, "train_speed(iter/s)": 1.529208 }, { "acc": 0.98567543, "epoch": 34.419967190063275, "grad_norm": 2.3393607139587402, "learning_rate": 2.426303190184051e-06, "loss": 0.05150536, "memory(GiB)": 13.7, "step": 73435, "train_speed(iter/s)": 1.52921 }, { "acc": 0.98125, "epoch": 34.42231075697211, "grad_norm": 4.119168758392334, "learning_rate": 2.425638752418587e-06, "loss": 0.05594614, "memory(GiB)": 13.7, "step": 73440, "train_speed(iter/s)": 1.529211 }, { "acc": 0.9973011, "epoch": 34.424654323880944, "grad_norm": 0.008074495010077953, "learning_rate": 2.424974376544329e-06, "loss": 0.01446234, "memory(GiB)": 13.7, "step": 73445, "train_speed(iter/s)": 1.529211 }, { "acc": 0.97416668, "epoch": 34.426997890789785, "grad_norm": 6.388306617736816, "learning_rate": 2.4243100625772475e-06, "loss": 0.06615872, "memory(GiB)": 13.7, "step": 73450, "train_speed(iter/s)": 1.529216 }, { "acc": 0.9854167, "epoch": 34.42934145769862, "grad_norm": 7.120789527893066, "learning_rate": 2.4236458105333067e-06, "loss": 0.03511357, "memory(GiB)": 13.7, "step": 73455, "train_speed(iter/s)": 1.529222 }, { "acc": 0.9786459, "epoch": 34.431685024607454, "grad_norm": 5.682432174682617, "learning_rate": 2.422981620428477e-06, "loss": 0.07205003, "memory(GiB)": 13.7, "step": 73460, "train_speed(iter/s)": 1.529223 }, { "acc": 0.98770294, "epoch": 34.43402859151629, "grad_norm": 2.2308106422424316, "learning_rate": 2.422317492278719e-06, "loss": 0.02867689, "memory(GiB)": 13.7, "step": 73465, "train_speed(iter/s)": 1.529225 }, { "acc": 0.98767548, "epoch": 34.43637215842512, "grad_norm": 1.797696590423584, "learning_rate": 2.421653426099999e-06, "loss": 0.0306399, "memory(GiB)": 13.7, "step": 73470, "train_speed(iter/s)": 1.529224 }, { "acc": 0.98287964, "epoch": 34.43871572533396, "grad_norm": 7.454219818115234, "learning_rate": 2.4209894219082805e-06, "loss": 0.0761269, "memory(GiB)": 13.7, "step": 73475, "train_speed(iter/s)": 1.529223 }, { "acc": 1.0, "epoch": 34.44105929224279, "grad_norm": 0.26659485697746277, "learning_rate": 2.42032547971952e-06, "loss": 0.01636968, "memory(GiB)": 13.7, "step": 73480, "train_speed(iter/s)": 1.529228 }, { "acc": 0.97692709, "epoch": 34.44340285915163, "grad_norm": 3.2033753395080566, "learning_rate": 2.4196615995496774e-06, "loss": 0.06008846, "memory(GiB)": 13.7, "step": 73485, "train_speed(iter/s)": 1.52923 }, { "acc": 0.98128719, "epoch": 34.44574642606047, "grad_norm": 4.045220375061035, "learning_rate": 2.418997781414714e-06, "loss": 0.07230175, "memory(GiB)": 13.7, "step": 73490, "train_speed(iter/s)": 1.529233 }, { "acc": 0.9885417, "epoch": 34.4480899929693, "grad_norm": 0.8646719455718994, "learning_rate": 2.4183340253305807e-06, "loss": 0.01867507, "memory(GiB)": 13.7, "step": 73495, "train_speed(iter/s)": 1.529237 }, { "acc": 0.9927084, "epoch": 34.450433559878135, "grad_norm": 4.758116722106934, "learning_rate": 2.417670331313237e-06, "loss": 0.02952756, "memory(GiB)": 13.7, "step": 73500, "train_speed(iter/s)": 1.529241 }, { "acc": 0.98041668, "epoch": 34.45277712678697, "grad_norm": 3.540728807449341, "learning_rate": 2.417006699378631e-06, "loss": 0.03915498, "memory(GiB)": 13.7, "step": 73505, "train_speed(iter/s)": 1.529243 }, { "acc": 0.98604164, "epoch": 34.455120693695804, "grad_norm": 4.174478054046631, "learning_rate": 2.4163431295427183e-06, "loss": 0.04997608, "memory(GiB)": 13.7, "step": 73510, "train_speed(iter/s)": 1.529244 }, { "acc": 0.9802084, "epoch": 34.45746426060464, "grad_norm": 7.653597354888916, "learning_rate": 2.4156796218214454e-06, "loss": 0.0907744, "memory(GiB)": 13.7, "step": 73515, "train_speed(iter/s)": 1.529251 }, { "acc": 1.0, "epoch": 34.45980782751347, "grad_norm": 4.545604228973389, "learning_rate": 2.4150161762307627e-06, "loss": 0.04670014, "memory(GiB)": 13.7, "step": 73520, "train_speed(iter/s)": 1.529251 }, { "acc": 0.98187504, "epoch": 34.462151394422314, "grad_norm": 2.3243906497955322, "learning_rate": 2.4143527927866187e-06, "loss": 0.05714383, "memory(GiB)": 13.7, "step": 73525, "train_speed(iter/s)": 1.52925 }, { "acc": 0.9859046, "epoch": 34.46449496133115, "grad_norm": 3.4746475219726562, "learning_rate": 2.413689471504956e-06, "loss": 0.03892515, "memory(GiB)": 13.7, "step": 73530, "train_speed(iter/s)": 1.529254 }, { "acc": 0.98529758, "epoch": 34.46683852823998, "grad_norm": 4.180680274963379, "learning_rate": 2.4130262124017196e-06, "loss": 0.03152267, "memory(GiB)": 13.7, "step": 73535, "train_speed(iter/s)": 1.529257 }, { "acc": 0.98979168, "epoch": 34.46918209514882, "grad_norm": 0.09770729392766953, "learning_rate": 2.4123630154928548e-06, "loss": 0.02435892, "memory(GiB)": 13.7, "step": 73540, "train_speed(iter/s)": 1.529261 }, { "acc": 0.98571434, "epoch": 34.47152566205765, "grad_norm": 2.7210850715637207, "learning_rate": 2.411699880794298e-06, "loss": 0.03260769, "memory(GiB)": 13.7, "step": 73545, "train_speed(iter/s)": 1.529259 }, { "acc": 0.99508934, "epoch": 34.473869228966485, "grad_norm": 0.0034617262426763773, "learning_rate": 2.411036808321994e-06, "loss": 0.03779834, "memory(GiB)": 13.7, "step": 73550, "train_speed(iter/s)": 1.529263 }, { "acc": 0.9770833, "epoch": 34.47621279587532, "grad_norm": 5.099614143371582, "learning_rate": 2.410373798091875e-06, "loss": 0.07240838, "memory(GiB)": 13.7, "step": 73555, "train_speed(iter/s)": 1.529268 }, { "acc": 0.9791667, "epoch": 34.478556362784154, "grad_norm": 1.697122573852539, "learning_rate": 2.4097108501198816e-06, "loss": 0.03400088, "memory(GiB)": 13.7, "step": 73560, "train_speed(iter/s)": 1.529274 }, { "acc": 0.98277779, "epoch": 34.480899929692995, "grad_norm": 0.9279530048370361, "learning_rate": 2.4090479644219495e-06, "loss": 0.04780536, "memory(GiB)": 13.7, "step": 73565, "train_speed(iter/s)": 1.529272 }, { "acc": 0.96878719, "epoch": 34.48324349660183, "grad_norm": 6.064663410186768, "learning_rate": 2.408385141014009e-06, "loss": 0.07597635, "memory(GiB)": 13.7, "step": 73570, "train_speed(iter/s)": 1.529267 }, { "acc": 0.97002983, "epoch": 34.485587063510664, "grad_norm": 3.26424241065979, "learning_rate": 2.4077223799119943e-06, "loss": 0.06426547, "memory(GiB)": 13.7, "step": 73575, "train_speed(iter/s)": 1.529267 }, { "acc": 0.98187504, "epoch": 34.4879306304195, "grad_norm": 5.595219612121582, "learning_rate": 2.4070596811318364e-06, "loss": 0.02970031, "memory(GiB)": 13.7, "step": 73580, "train_speed(iter/s)": 1.529268 }, { "acc": 0.990625, "epoch": 34.49027419732833, "grad_norm": 0.30956971645355225, "learning_rate": 2.4063970446894647e-06, "loss": 0.01680999, "memory(GiB)": 13.7, "step": 73585, "train_speed(iter/s)": 1.529272 }, { "acc": 0.97383928, "epoch": 34.49261776423717, "grad_norm": 5.753934860229492, "learning_rate": 2.405734470600803e-06, "loss": 0.05870553, "memory(GiB)": 13.7, "step": 73590, "train_speed(iter/s)": 1.52927 }, { "acc": 0.98784723, "epoch": 34.494961331146, "grad_norm": 2.6535072326660156, "learning_rate": 2.405071958881781e-06, "loss": 0.03911826, "memory(GiB)": 13.7, "step": 73595, "train_speed(iter/s)": 1.52927 }, { "acc": 0.99541664, "epoch": 34.49730489805484, "grad_norm": 2.208371877670288, "learning_rate": 2.4044095095483224e-06, "loss": 0.03962179, "memory(GiB)": 13.7, "step": 73600, "train_speed(iter/s)": 1.529278 }, { "acc": 0.98779764, "epoch": 34.49964846496368, "grad_norm": 3.0907654762268066, "learning_rate": 2.4037471226163524e-06, "loss": 0.02449889, "memory(GiB)": 13.7, "step": 73605, "train_speed(iter/s)": 1.529277 }, { "acc": 0.96779766, "epoch": 34.50199203187251, "grad_norm": 5.715493679046631, "learning_rate": 2.403084798101788e-06, "loss": 0.06640135, "memory(GiB)": 13.7, "step": 73610, "train_speed(iter/s)": 1.529284 }, { "acc": 0.98627968, "epoch": 34.504335598781346, "grad_norm": 2.678311347961426, "learning_rate": 2.402422536020552e-06, "loss": 0.04396482, "memory(GiB)": 13.7, "step": 73615, "train_speed(iter/s)": 1.529291 }, { "acc": 0.97101192, "epoch": 34.50667916569018, "grad_norm": 0.5179216861724854, "learning_rate": 2.4017603363885646e-06, "loss": 0.07410645, "memory(GiB)": 13.7, "step": 73620, "train_speed(iter/s)": 1.529294 }, { "acc": 0.98806086, "epoch": 34.509022732599014, "grad_norm": 0.2921389043331146, "learning_rate": 2.401098199221742e-06, "loss": 0.05238984, "memory(GiB)": 13.7, "step": 73625, "train_speed(iter/s)": 1.529295 }, { "acc": 0.97133932, "epoch": 34.51136629950785, "grad_norm": 5.434792995452881, "learning_rate": 2.400436124535996e-06, "loss": 0.07391965, "memory(GiB)": 13.7, "step": 73630, "train_speed(iter/s)": 1.529293 }, { "acc": 0.9838542, "epoch": 34.51370986641668, "grad_norm": 0.9557108879089355, "learning_rate": 2.399774112347245e-06, "loss": 0.0469478, "memory(GiB)": 13.7, "step": 73635, "train_speed(iter/s)": 1.529294 }, { "acc": 0.98520832, "epoch": 34.516053433325524, "grad_norm": 2.3204071521759033, "learning_rate": 2.399112162671402e-06, "loss": 0.02495978, "memory(GiB)": 13.7, "step": 73640, "train_speed(iter/s)": 1.529295 }, { "acc": 0.9979167, "epoch": 34.51839700023436, "grad_norm": 0.049227308481931686, "learning_rate": 2.3984502755243737e-06, "loss": 0.02764272, "memory(GiB)": 13.7, "step": 73645, "train_speed(iter/s)": 1.529295 }, { "acc": 0.99125004, "epoch": 34.52074056714319, "grad_norm": 5.856452941894531, "learning_rate": 2.3977884509220724e-06, "loss": 0.04371547, "memory(GiB)": 13.7, "step": 73650, "train_speed(iter/s)": 1.529298 }, { "acc": 0.9895834, "epoch": 34.52308413405203, "grad_norm": 2.5808658599853516, "learning_rate": 2.3971266888804083e-06, "loss": 0.03495665, "memory(GiB)": 13.7, "step": 73655, "train_speed(iter/s)": 1.529299 }, { "acc": 0.99125004, "epoch": 34.52542770096086, "grad_norm": 0.5585330128669739, "learning_rate": 2.3964649894152844e-06, "loss": 0.02812665, "memory(GiB)": 13.7, "step": 73660, "train_speed(iter/s)": 1.529302 }, { "acc": 0.9875, "epoch": 34.527771267869696, "grad_norm": 8.171533584594727, "learning_rate": 2.3958033525426066e-06, "loss": 0.02148899, "memory(GiB)": 13.7, "step": 73665, "train_speed(iter/s)": 1.529304 }, { "acc": 0.9822917, "epoch": 34.53011483477853, "grad_norm": 4.120570182800293, "learning_rate": 2.395141778278281e-06, "loss": 0.05239668, "memory(GiB)": 13.7, "step": 73670, "train_speed(iter/s)": 1.529305 }, { "acc": 0.98999996, "epoch": 34.53245840168737, "grad_norm": 3.3068296909332275, "learning_rate": 2.394480266638206e-06, "loss": 0.01917266, "memory(GiB)": 13.7, "step": 73675, "train_speed(iter/s)": 1.529312 }, { "acc": 0.98139877, "epoch": 34.534801968596206, "grad_norm": 0.002452444052323699, "learning_rate": 2.3938188176382866e-06, "loss": 0.05828921, "memory(GiB)": 13.7, "step": 73680, "train_speed(iter/s)": 1.529314 }, { "acc": 0.98208332, "epoch": 34.53714553550504, "grad_norm": 2.5029640197753906, "learning_rate": 2.3931574312944164e-06, "loss": 0.03174429, "memory(GiB)": 13.7, "step": 73685, "train_speed(iter/s)": 1.529318 }, { "acc": 0.97562504, "epoch": 34.539489102413874, "grad_norm": 5.336082458496094, "learning_rate": 2.3924961076224963e-06, "loss": 0.07563742, "memory(GiB)": 13.7, "step": 73690, "train_speed(iter/s)": 1.529325 }, { "acc": 0.98681545, "epoch": 34.54183266932271, "grad_norm": 1.6844227313995361, "learning_rate": 2.3918348466384232e-06, "loss": 0.06046559, "memory(GiB)": 13.7, "step": 73695, "train_speed(iter/s)": 1.529323 }, { "acc": 0.96488094, "epoch": 34.54417623623154, "grad_norm": 8.593949317932129, "learning_rate": 2.3911736483580893e-06, "loss": 0.09496282, "memory(GiB)": 13.7, "step": 73700, "train_speed(iter/s)": 1.529321 }, { "acc": 0.9822917, "epoch": 34.54651980314038, "grad_norm": 4.359985828399658, "learning_rate": 2.390512512797388e-06, "loss": 0.0636335, "memory(GiB)": 13.7, "step": 73705, "train_speed(iter/s)": 1.52932 }, { "acc": 0.98916664, "epoch": 34.54886337004921, "grad_norm": 3.9666616916656494, "learning_rate": 2.389851439972213e-06, "loss": 0.02343203, "memory(GiB)": 13.7, "step": 73710, "train_speed(iter/s)": 1.529326 }, { "acc": 0.97956848, "epoch": 34.55120693695805, "grad_norm": 1.8120976686477661, "learning_rate": 2.389190429898453e-06, "loss": 0.04895211, "memory(GiB)": 13.7, "step": 73715, "train_speed(iter/s)": 1.529325 }, { "acc": 1.0, "epoch": 34.55355050386689, "grad_norm": 1.1503503322601318, "learning_rate": 2.3885294825919944e-06, "loss": 0.01775925, "memory(GiB)": 13.7, "step": 73720, "train_speed(iter/s)": 1.529328 }, { "acc": 0.98145828, "epoch": 34.55589407077572, "grad_norm": 3.8045740127563477, "learning_rate": 2.387868598068726e-06, "loss": 0.0687448, "memory(GiB)": 13.7, "step": 73725, "train_speed(iter/s)": 1.529331 }, { "acc": 0.9875, "epoch": 34.558237637684556, "grad_norm": 5.140192031860352, "learning_rate": 2.387207776344535e-06, "loss": 0.03511753, "memory(GiB)": 13.7, "step": 73730, "train_speed(iter/s)": 1.529334 }, { "acc": 0.97633934, "epoch": 34.56058120459339, "grad_norm": 4.521142482757568, "learning_rate": 2.386547017435302e-06, "loss": 0.0617124, "memory(GiB)": 13.7, "step": 73735, "train_speed(iter/s)": 1.529328 }, { "acc": 0.9875, "epoch": 34.562924771502225, "grad_norm": 1.8515087366104126, "learning_rate": 2.3858863213569114e-06, "loss": 0.05173123, "memory(GiB)": 13.7, "step": 73740, "train_speed(iter/s)": 1.529328 }, { "acc": 0.98552074, "epoch": 34.56526833841106, "grad_norm": 3.76623797416687, "learning_rate": 2.385225688125244e-06, "loss": 0.0471958, "memory(GiB)": 13.7, "step": 73745, "train_speed(iter/s)": 1.529325 }, { "acc": 0.98883934, "epoch": 34.5676119053199, "grad_norm": 1.8480533361434937, "learning_rate": 2.384565117756181e-06, "loss": 0.0302224, "memory(GiB)": 13.7, "step": 73750, "train_speed(iter/s)": 1.529328 }, { "acc": 0.97770824, "epoch": 34.569955472228735, "grad_norm": 2.1655948162078857, "learning_rate": 2.3839046102655984e-06, "loss": 0.0507045, "memory(GiB)": 13.7, "step": 73755, "train_speed(iter/s)": 1.529326 }, { "acc": 0.978125, "epoch": 34.57229903913757, "grad_norm": 1.2366087436676025, "learning_rate": 2.383244165669371e-06, "loss": 0.05251327, "memory(GiB)": 13.7, "step": 73760, "train_speed(iter/s)": 1.529328 }, { "acc": 0.98779755, "epoch": 34.5746426060464, "grad_norm": 4.92374849319458, "learning_rate": 2.3825837839833756e-06, "loss": 0.03578462, "memory(GiB)": 13.7, "step": 73765, "train_speed(iter/s)": 1.529334 }, { "acc": 0.98008919, "epoch": 34.57698617295524, "grad_norm": 3.0966339111328125, "learning_rate": 2.381923465223488e-06, "loss": 0.08298302, "memory(GiB)": 13.7, "step": 73770, "train_speed(iter/s)": 1.529335 }, { "acc": 0.97156248, "epoch": 34.57932973986407, "grad_norm": 2.2945549488067627, "learning_rate": 2.3812632094055744e-06, "loss": 0.05111411, "memory(GiB)": 13.7, "step": 73775, "train_speed(iter/s)": 1.529339 }, { "acc": 0.98187504, "epoch": 34.581673306772906, "grad_norm": 1.7769213914871216, "learning_rate": 2.3806030165455088e-06, "loss": 0.05159774, "memory(GiB)": 13.7, "step": 73780, "train_speed(iter/s)": 1.529346 }, { "acc": 0.98511362, "epoch": 34.58401687368174, "grad_norm": 3.772948741912842, "learning_rate": 2.3799428866591617e-06, "loss": 0.03093675, "memory(GiB)": 13.7, "step": 73785, "train_speed(iter/s)": 1.529347 }, { "acc": 0.98590279, "epoch": 34.58636044059058, "grad_norm": 5.487252235412598, "learning_rate": 2.3792828197623973e-06, "loss": 0.05627219, "memory(GiB)": 13.7, "step": 73790, "train_speed(iter/s)": 1.529353 }, { "acc": 0.97479172, "epoch": 34.588704007499416, "grad_norm": 4.898382186889648, "learning_rate": 2.3786228158710826e-06, "loss": 0.06235635, "memory(GiB)": 13.7, "step": 73795, "train_speed(iter/s)": 1.529355 }, { "acc": 0.97994041, "epoch": 34.59104757440825, "grad_norm": 2.3832778930664062, "learning_rate": 2.3779628750010813e-06, "loss": 0.03968863, "memory(GiB)": 13.7, "step": 73800, "train_speed(iter/s)": 1.529359 }, { "acc": 0.990625, "epoch": 34.593391141317085, "grad_norm": 2.952324151992798, "learning_rate": 2.3773029971682587e-06, "loss": 0.02821697, "memory(GiB)": 13.7, "step": 73805, "train_speed(iter/s)": 1.529358 }, { "acc": 0.98139133, "epoch": 34.59573470822592, "grad_norm": 5.3683342933654785, "learning_rate": 2.376643182388472e-06, "loss": 0.04215927, "memory(GiB)": 13.7, "step": 73810, "train_speed(iter/s)": 1.529363 }, { "acc": 0.98779764, "epoch": 34.59807827513475, "grad_norm": 0.3897150754928589, "learning_rate": 2.3759834306775836e-06, "loss": 0.04095024, "memory(GiB)": 13.7, "step": 73815, "train_speed(iter/s)": 1.529363 }, { "acc": 0.97297621, "epoch": 34.60042184204359, "grad_norm": 3.2645742893218994, "learning_rate": 2.3753237420514508e-06, "loss": 0.06125754, "memory(GiB)": 13.7, "step": 73820, "train_speed(iter/s)": 1.529367 }, { "acc": 0.9944643, "epoch": 34.60276540895243, "grad_norm": 1.432132601737976, "learning_rate": 2.3746641165259322e-06, "loss": 0.01242841, "memory(GiB)": 13.7, "step": 73825, "train_speed(iter/s)": 1.529369 }, { "acc": 0.97742424, "epoch": 34.60510897586126, "grad_norm": 2.914071798324585, "learning_rate": 2.3740045541168815e-06, "loss": 0.06296329, "memory(GiB)": 13.7, "step": 73830, "train_speed(iter/s)": 1.529372 }, { "acc": 0.97562504, "epoch": 34.6074525427701, "grad_norm": 3.7007968425750732, "learning_rate": 2.3733450548401533e-06, "loss": 0.05989675, "memory(GiB)": 13.7, "step": 73835, "train_speed(iter/s)": 1.529368 }, { "acc": 0.9838542, "epoch": 34.60979610967893, "grad_norm": 3.016700267791748, "learning_rate": 2.3726856187115973e-06, "loss": 0.04673904, "memory(GiB)": 13.7, "step": 73840, "train_speed(iter/s)": 1.52937 }, { "acc": 0.9791666, "epoch": 34.612139676587766, "grad_norm": 2.677579641342163, "learning_rate": 2.372026245747068e-06, "loss": 0.04760638, "memory(GiB)": 13.7, "step": 73845, "train_speed(iter/s)": 1.529375 }, { "acc": 0.9880209, "epoch": 34.6144832434966, "grad_norm": 8.455474853515625, "learning_rate": 2.3713669359624106e-06, "loss": 0.06306251, "memory(GiB)": 13.7, "step": 73850, "train_speed(iter/s)": 1.529381 }, { "acc": 0.98187504, "epoch": 34.616826810405435, "grad_norm": 3.134101390838623, "learning_rate": 2.3707076893734745e-06, "loss": 0.05193571, "memory(GiB)": 13.7, "step": 73855, "train_speed(iter/s)": 1.529382 }, { "acc": 0.99437504, "epoch": 34.61917037731427, "grad_norm": 0.004959722049534321, "learning_rate": 2.370048505996108e-06, "loss": 0.02848452, "memory(GiB)": 13.7, "step": 73860, "train_speed(iter/s)": 1.529378 }, { "acc": 0.99354172, "epoch": 34.62151394422311, "grad_norm": 1.079123616218567, "learning_rate": 2.369389385846152e-06, "loss": 0.036129, "memory(GiB)": 13.7, "step": 73865, "train_speed(iter/s)": 1.529382 }, { "acc": 0.97895832, "epoch": 34.623857511131945, "grad_norm": 7.522196292877197, "learning_rate": 2.3687303289394516e-06, "loss": 0.04916967, "memory(GiB)": 13.7, "step": 73870, "train_speed(iter/s)": 1.529389 }, { "acc": 0.97387352, "epoch": 34.62620107804078, "grad_norm": 4.517843246459961, "learning_rate": 2.3680713352918496e-06, "loss": 0.04680913, "memory(GiB)": 13.7, "step": 73875, "train_speed(iter/s)": 1.529395 }, { "acc": 0.98965816, "epoch": 34.62854464494961, "grad_norm": 4.357455253601074, "learning_rate": 2.3674124049191825e-06, "loss": 0.02369226, "memory(GiB)": 13.7, "step": 73880, "train_speed(iter/s)": 1.529397 }, { "acc": 0.98729172, "epoch": 34.63088821185845, "grad_norm": 0.007382168900221586, "learning_rate": 2.3667535378372928e-06, "loss": 0.0438468, "memory(GiB)": 13.7, "step": 73885, "train_speed(iter/s)": 1.529395 }, { "acc": 0.9875, "epoch": 34.63323177876728, "grad_norm": 0.005873126443475485, "learning_rate": 2.3660947340620137e-06, "loss": 0.02714559, "memory(GiB)": 13.7, "step": 73890, "train_speed(iter/s)": 1.529398 }, { "acc": 0.97587128, "epoch": 34.635575345676116, "grad_norm": 2.8547046184539795, "learning_rate": 2.365435993609182e-06, "loss": 0.0846841, "memory(GiB)": 13.7, "step": 73895, "train_speed(iter/s)": 1.529401 }, { "acc": 0.98206844, "epoch": 34.63791891258495, "grad_norm": 0.0030470441561192274, "learning_rate": 2.364777316494635e-06, "loss": 0.04686454, "memory(GiB)": 13.7, "step": 73900, "train_speed(iter/s)": 1.529404 }, { "acc": 0.97979164, "epoch": 34.64026247949379, "grad_norm": 0.1233322024345398, "learning_rate": 2.3641187027342e-06, "loss": 0.02800244, "memory(GiB)": 13.7, "step": 73905, "train_speed(iter/s)": 1.529411 }, { "acc": 0.97837162, "epoch": 34.642606046402626, "grad_norm": 2.3147311210632324, "learning_rate": 2.3634601523437096e-06, "loss": 0.07060413, "memory(GiB)": 13.7, "step": 73910, "train_speed(iter/s)": 1.529411 }, { "acc": 0.984375, "epoch": 34.64494961331146, "grad_norm": 6.188353061676025, "learning_rate": 2.3628016653389966e-06, "loss": 0.04019442, "memory(GiB)": 13.7, "step": 73915, "train_speed(iter/s)": 1.529415 }, { "acc": 0.99707794, "epoch": 34.647293180220295, "grad_norm": 0.003386431373655796, "learning_rate": 2.362143241735886e-06, "loss": 0.0134777, "memory(GiB)": 13.7, "step": 73920, "train_speed(iter/s)": 1.529416 }, { "acc": 0.99020824, "epoch": 34.64963674712913, "grad_norm": 0.011952691711485386, "learning_rate": 2.3614848815502015e-06, "loss": 0.0302354, "memory(GiB)": 13.7, "step": 73925, "train_speed(iter/s)": 1.529415 }, { "acc": 0.98458328, "epoch": 34.651980314037964, "grad_norm": 3.2129526138305664, "learning_rate": 2.3608265847977714e-06, "loss": 0.03294351, "memory(GiB)": 13.7, "step": 73930, "train_speed(iter/s)": 1.529418 }, { "acc": 0.97952385, "epoch": 34.6543238809468, "grad_norm": 3.0142555236816406, "learning_rate": 2.360168351494419e-06, "loss": 0.04657384, "memory(GiB)": 13.7, "step": 73935, "train_speed(iter/s)": 1.529418 }, { "acc": 0.98726635, "epoch": 34.65666744785564, "grad_norm": 3.3855974674224854, "learning_rate": 2.3595101816559643e-06, "loss": 0.04355858, "memory(GiB)": 13.7, "step": 73940, "train_speed(iter/s)": 1.529419 }, { "acc": 0.98583336, "epoch": 34.659011014764474, "grad_norm": 3.5470423698425293, "learning_rate": 2.3588520752982287e-06, "loss": 0.04321721, "memory(GiB)": 13.7, "step": 73945, "train_speed(iter/s)": 1.529421 }, { "acc": 0.98351192, "epoch": 34.66135458167331, "grad_norm": 0.0010831989347934723, "learning_rate": 2.358194032437032e-06, "loss": 0.03450477, "memory(GiB)": 13.7, "step": 73950, "train_speed(iter/s)": 1.529426 }, { "acc": 0.98986111, "epoch": 34.66369814858214, "grad_norm": 3.699899673461914, "learning_rate": 2.3575360530881885e-06, "loss": 0.04287509, "memory(GiB)": 13.7, "step": 73955, "train_speed(iter/s)": 1.529428 }, { "acc": 0.9817709, "epoch": 34.66604171549098, "grad_norm": 3.241647243499756, "learning_rate": 2.3568781372675176e-06, "loss": 0.08442606, "memory(GiB)": 13.7, "step": 73960, "train_speed(iter/s)": 1.529429 }, { "acc": 0.99031658, "epoch": 34.66838528239981, "grad_norm": 2.2023096084594727, "learning_rate": 2.3562202849908285e-06, "loss": 0.04876627, "memory(GiB)": 13.7, "step": 73965, "train_speed(iter/s)": 1.52943 }, { "acc": 0.99092264, "epoch": 34.670728849308645, "grad_norm": 0.11675248295068741, "learning_rate": 2.355562496273938e-06, "loss": 0.04240755, "memory(GiB)": 13.7, "step": 73970, "train_speed(iter/s)": 1.529432 }, { "acc": 0.97489586, "epoch": 34.67307241621748, "grad_norm": 0.22892992198467255, "learning_rate": 2.3549047711326565e-06, "loss": 0.04822804, "memory(GiB)": 13.7, "step": 73975, "train_speed(iter/s)": 1.529433 }, { "acc": 0.9791666, "epoch": 34.67541598312632, "grad_norm": 4.415374755859375, "learning_rate": 2.3542471095827912e-06, "loss": 0.06568747, "memory(GiB)": 13.7, "step": 73980, "train_speed(iter/s)": 1.529437 }, { "acc": 0.97687492, "epoch": 34.677759550035155, "grad_norm": 1.6895660161972046, "learning_rate": 2.3535895116401523e-06, "loss": 0.0436079, "memory(GiB)": 13.7, "step": 73985, "train_speed(iter/s)": 1.529443 }, { "acc": 0.9822916, "epoch": 34.68010311694399, "grad_norm": 3.211625337600708, "learning_rate": 2.3529319773205476e-06, "loss": 0.0386336, "memory(GiB)": 13.7, "step": 73990, "train_speed(iter/s)": 1.529446 }, { "acc": 0.97784967, "epoch": 34.682446683852824, "grad_norm": 5.144615650177002, "learning_rate": 2.352274506639778e-06, "loss": 0.05239713, "memory(GiB)": 13.7, "step": 73995, "train_speed(iter/s)": 1.529448 }, { "acc": 0.98913689, "epoch": 34.68479025076166, "grad_norm": 0.0065231784246861935, "learning_rate": 2.351617099613649e-06, "loss": 0.03087496, "memory(GiB)": 13.7, "step": 74000, "train_speed(iter/s)": 1.529451 }, { "acc": 0.98849201, "epoch": 34.68713381767049, "grad_norm": 1.4525431394577026, "learning_rate": 2.350959756257965e-06, "loss": 0.03069063, "memory(GiB)": 13.7, "step": 74005, "train_speed(iter/s)": 1.529453 }, { "acc": 0.98166046, "epoch": 34.68947738457933, "grad_norm": 2.317861557006836, "learning_rate": 2.350302476588524e-06, "loss": 0.04349088, "memory(GiB)": 13.7, "step": 74010, "train_speed(iter/s)": 1.529456 }, { "acc": 0.98562498, "epoch": 34.69182095148817, "grad_norm": 1.6172865629196167, "learning_rate": 2.3496452606211227e-06, "loss": 0.02926275, "memory(GiB)": 13.7, "step": 74015, "train_speed(iter/s)": 1.529459 }, { "acc": 0.98500004, "epoch": 34.694164518397, "grad_norm": 1.3146792650222778, "learning_rate": 2.3489881083715603e-06, "loss": 0.03348498, "memory(GiB)": 13.7, "step": 74020, "train_speed(iter/s)": 1.529461 }, { "acc": 0.97344704, "epoch": 34.69650808530584, "grad_norm": 2.2604575157165527, "learning_rate": 2.3483310198556337e-06, "loss": 0.05151265, "memory(GiB)": 13.7, "step": 74025, "train_speed(iter/s)": 1.529464 }, { "acc": 0.98083334, "epoch": 34.69885165221467, "grad_norm": 4.286846160888672, "learning_rate": 2.3476739950891377e-06, "loss": 0.04370464, "memory(GiB)": 13.7, "step": 74030, "train_speed(iter/s)": 1.529463 }, { "acc": 0.98869047, "epoch": 34.701195219123505, "grad_norm": 1.368024468421936, "learning_rate": 2.347017034087862e-06, "loss": 0.04186896, "memory(GiB)": 13.7, "step": 74035, "train_speed(iter/s)": 1.529466 }, { "acc": 0.9828125, "epoch": 34.70353878603234, "grad_norm": 4.405857563018799, "learning_rate": 2.3463601368675997e-06, "loss": 0.04380164, "memory(GiB)": 13.7, "step": 74040, "train_speed(iter/s)": 1.529471 }, { "acc": 0.99375, "epoch": 34.705882352941174, "grad_norm": 0.014138532802462578, "learning_rate": 2.345703303444141e-06, "loss": 0.01287096, "memory(GiB)": 13.7, "step": 74045, "train_speed(iter/s)": 1.529471 }, { "acc": 0.979072, "epoch": 34.70822591985001, "grad_norm": 2.106996774673462, "learning_rate": 2.345046533833274e-06, "loss": 0.06428276, "memory(GiB)": 13.7, "step": 74050, "train_speed(iter/s)": 1.529472 }, { "acc": 0.97687492, "epoch": 34.71056948675885, "grad_norm": 5.814931869506836, "learning_rate": 2.3443898280507825e-06, "loss": 0.03906679, "memory(GiB)": 13.7, "step": 74055, "train_speed(iter/s)": 1.529475 }, { "acc": 0.98916664, "epoch": 34.712913053667684, "grad_norm": 4.419069766998291, "learning_rate": 2.3437331861124542e-06, "loss": 0.01885355, "memory(GiB)": 13.7, "step": 74060, "train_speed(iter/s)": 1.529481 }, { "acc": 0.98467264, "epoch": 34.71525662057652, "grad_norm": 5.0176167488098145, "learning_rate": 2.3430766080340727e-06, "loss": 0.04461791, "memory(GiB)": 13.7, "step": 74065, "train_speed(iter/s)": 1.529484 }, { "acc": 0.98741322, "epoch": 34.71760018748535, "grad_norm": 8.073559761047363, "learning_rate": 2.342420093831418e-06, "loss": 0.04740018, "memory(GiB)": 13.7, "step": 74070, "train_speed(iter/s)": 1.529488 }, { "acc": 0.98833332, "epoch": 34.71994375439419, "grad_norm": 4.022894382476807, "learning_rate": 2.341763643520272e-06, "loss": 0.03859098, "memory(GiB)": 13.7, "step": 74075, "train_speed(iter/s)": 1.529493 }, { "acc": 0.97354164, "epoch": 34.72228732130302, "grad_norm": 5.469015121459961, "learning_rate": 2.3411072571164144e-06, "loss": 0.0461119, "memory(GiB)": 13.7, "step": 74080, "train_speed(iter/s)": 1.529496 }, { "acc": 0.9869792, "epoch": 34.724630888211856, "grad_norm": 0.5451601147651672, "learning_rate": 2.340450934635622e-06, "loss": 0.02729602, "memory(GiB)": 13.7, "step": 74085, "train_speed(iter/s)": 1.529501 }, { "acc": 0.98570518, "epoch": 34.7269744551207, "grad_norm": 3.0622823238372803, "learning_rate": 2.3397946760936685e-06, "loss": 0.06268796, "memory(GiB)": 13.7, "step": 74090, "train_speed(iter/s)": 1.529508 }, { "acc": 0.9854166, "epoch": 34.72931802202953, "grad_norm": 1.541458249092102, "learning_rate": 2.3391384815063295e-06, "loss": 0.04157707, "memory(GiB)": 13.7, "step": 74095, "train_speed(iter/s)": 1.529507 }, { "acc": 0.98198872, "epoch": 34.731661588938366, "grad_norm": 0.9427868723869324, "learning_rate": 2.3384823508893785e-06, "loss": 0.04228347, "memory(GiB)": 13.7, "step": 74100, "train_speed(iter/s)": 1.529515 }, { "acc": 0.98885422, "epoch": 34.7340051558472, "grad_norm": 6.613715171813965, "learning_rate": 2.3378262842585885e-06, "loss": 0.02625743, "memory(GiB)": 13.7, "step": 74105, "train_speed(iter/s)": 1.529519 }, { "acc": 0.96714287, "epoch": 34.736348722756034, "grad_norm": 3.4393832683563232, "learning_rate": 2.3371702816297244e-06, "loss": 0.06394003, "memory(GiB)": 13.7, "step": 74110, "train_speed(iter/s)": 1.529522 }, { "acc": 0.98770828, "epoch": 34.73869228966487, "grad_norm": 2.7442002296447754, "learning_rate": 2.3365143430185577e-06, "loss": 0.04064689, "memory(GiB)": 13.7, "step": 74115, "train_speed(iter/s)": 1.529526 }, { "acc": 0.984375, "epoch": 34.7410358565737, "grad_norm": 3.209960460662842, "learning_rate": 2.3358584684408557e-06, "loss": 0.03260283, "memory(GiB)": 13.7, "step": 74120, "train_speed(iter/s)": 1.529529 }, { "acc": 0.9819643, "epoch": 34.74337942348254, "grad_norm": 3.213357448577881, "learning_rate": 2.33520265791238e-06, "loss": 0.04690009, "memory(GiB)": 13.7, "step": 74125, "train_speed(iter/s)": 1.529528 }, { "acc": 0.97727776, "epoch": 34.74572299039138, "grad_norm": 6.411430835723877, "learning_rate": 2.334546911448899e-06, "loss": 0.05160078, "memory(GiB)": 13.7, "step": 74130, "train_speed(iter/s)": 1.529533 }, { "acc": 0.9833334, "epoch": 34.74806655730021, "grad_norm": 6.195568561553955, "learning_rate": 2.33389122906617e-06, "loss": 0.04637693, "memory(GiB)": 13.7, "step": 74135, "train_speed(iter/s)": 1.529537 }, { "acc": 0.98842258, "epoch": 34.75041012420905, "grad_norm": 2.722860336303711, "learning_rate": 2.333235610779956e-06, "loss": 0.04004129, "memory(GiB)": 13.7, "step": 74140, "train_speed(iter/s)": 1.529542 }, { "acc": 0.98612175, "epoch": 34.75275369111788, "grad_norm": 4.375929355621338, "learning_rate": 2.3325800566060144e-06, "loss": 0.04034156, "memory(GiB)": 13.7, "step": 74145, "train_speed(iter/s)": 1.529547 }, { "acc": 0.9770834, "epoch": 34.755097258026716, "grad_norm": 0.00413130410015583, "learning_rate": 2.331924566560103e-06, "loss": 0.0488829, "memory(GiB)": 13.7, "step": 74150, "train_speed(iter/s)": 1.529552 }, { "acc": 0.98979168, "epoch": 34.75744082493555, "grad_norm": 2.950899362564087, "learning_rate": 2.3312691406579806e-06, "loss": 0.03156596, "memory(GiB)": 13.7, "step": 74155, "train_speed(iter/s)": 1.529558 }, { "acc": 0.98371983, "epoch": 34.759784391844384, "grad_norm": 3.187709093093872, "learning_rate": 2.330613778915397e-06, "loss": 0.0369307, "memory(GiB)": 13.7, "step": 74160, "train_speed(iter/s)": 1.52956 }, { "acc": 0.97875004, "epoch": 34.762127958753226, "grad_norm": 5.816977500915527, "learning_rate": 2.329958481348106e-06, "loss": 0.07574145, "memory(GiB)": 13.7, "step": 74165, "train_speed(iter/s)": 1.529565 }, { "acc": 0.97485123, "epoch": 34.76447152566206, "grad_norm": 5.01556921005249, "learning_rate": 2.3293032479718626e-06, "loss": 0.06215342, "memory(GiB)": 13.7, "step": 74170, "train_speed(iter/s)": 1.529569 }, { "acc": 0.99102688, "epoch": 34.766815092570894, "grad_norm": 1.6687593460083008, "learning_rate": 2.328648078802411e-06, "loss": 0.02956084, "memory(GiB)": 13.7, "step": 74175, "train_speed(iter/s)": 1.529566 }, { "acc": 0.98604164, "epoch": 34.76915865947973, "grad_norm": 0.40848320722579956, "learning_rate": 2.327992973855504e-06, "loss": 0.05093744, "memory(GiB)": 13.7, "step": 74180, "train_speed(iter/s)": 1.529572 }, { "acc": 0.98604164, "epoch": 34.77150222638856, "grad_norm": 2.9651620388031006, "learning_rate": 2.3273379331468844e-06, "loss": 0.02041397, "memory(GiB)": 13.7, "step": 74185, "train_speed(iter/s)": 1.529579 }, { "acc": 0.99660721, "epoch": 34.7738457932974, "grad_norm": 2.020573377609253, "learning_rate": 2.3266829566922988e-06, "loss": 0.01846941, "memory(GiB)": 13.7, "step": 74190, "train_speed(iter/s)": 1.529585 }, { "acc": 0.98154764, "epoch": 34.77618936020623, "grad_norm": 2.9585776329040527, "learning_rate": 2.326028044507492e-06, "loss": 0.0756923, "memory(GiB)": 13.7, "step": 74195, "train_speed(iter/s)": 1.529588 }, { "acc": 0.98291664, "epoch": 34.778532927115066, "grad_norm": 1.2066307067871094, "learning_rate": 2.325373196608203e-06, "loss": 0.05929184, "memory(GiB)": 13.7, "step": 74200, "train_speed(iter/s)": 1.52959 }, { "acc": 0.97749996, "epoch": 34.78087649402391, "grad_norm": 3.447512626647949, "learning_rate": 2.3247184130101747e-06, "loss": 0.05387501, "memory(GiB)": 13.7, "step": 74205, "train_speed(iter/s)": 1.529594 }, { "acc": 0.98500004, "epoch": 34.78322006093274, "grad_norm": 1.146687626838684, "learning_rate": 2.324063693729146e-06, "loss": 0.02457706, "memory(GiB)": 13.7, "step": 74210, "train_speed(iter/s)": 1.529603 }, { "acc": 0.9828125, "epoch": 34.785563627841576, "grad_norm": 1.6274687051773071, "learning_rate": 2.3234090387808543e-06, "loss": 0.03447244, "memory(GiB)": 13.7, "step": 74215, "train_speed(iter/s)": 1.529609 }, { "acc": 0.9859375, "epoch": 34.78790719475041, "grad_norm": 5.950809478759766, "learning_rate": 2.3227544481810316e-06, "loss": 0.05210971, "memory(GiB)": 13.7, "step": 74220, "train_speed(iter/s)": 1.529612 }, { "acc": 0.97638893, "epoch": 34.790250761659244, "grad_norm": 0.002348152920603752, "learning_rate": 2.3220999219454156e-06, "loss": 0.05400217, "memory(GiB)": 13.7, "step": 74225, "train_speed(iter/s)": 1.529615 }, { "acc": 0.9838542, "epoch": 34.79259432856808, "grad_norm": 3.518036365509033, "learning_rate": 2.32144546008974e-06, "loss": 0.02596643, "memory(GiB)": 13.7, "step": 74230, "train_speed(iter/s)": 1.529615 }, { "acc": 0.97312508, "epoch": 34.79493789547691, "grad_norm": 2.002511501312256, "learning_rate": 2.3207910626297316e-06, "loss": 0.04347419, "memory(GiB)": 13.7, "step": 74235, "train_speed(iter/s)": 1.529624 }, { "acc": 0.97863102, "epoch": 34.797281462385754, "grad_norm": 1.8043406009674072, "learning_rate": 2.3201367295811232e-06, "loss": 0.05001718, "memory(GiB)": 13.7, "step": 74240, "train_speed(iter/s)": 1.529618 }, { "acc": 0.98770838, "epoch": 34.79962502929459, "grad_norm": 0.007977359928190708, "learning_rate": 2.319482460959642e-06, "loss": 0.02806407, "memory(GiB)": 13.7, "step": 74245, "train_speed(iter/s)": 1.529624 }, { "acc": 0.98633928, "epoch": 34.80196859620342, "grad_norm": 3.8773419857025146, "learning_rate": 2.318828256781016e-06, "loss": 0.02641428, "memory(GiB)": 13.7, "step": 74250, "train_speed(iter/s)": 1.529629 }, { "acc": 0.9927084, "epoch": 34.80431216311226, "grad_norm": 3.9969539642333984, "learning_rate": 2.318174117060969e-06, "loss": 0.00841107, "memory(GiB)": 13.7, "step": 74255, "train_speed(iter/s)": 1.529636 }, { "acc": 0.98102684, "epoch": 34.80665573002109, "grad_norm": 3.3514046669006348, "learning_rate": 2.3175200418152235e-06, "loss": 0.08256498, "memory(GiB)": 13.7, "step": 74260, "train_speed(iter/s)": 1.529639 }, { "acc": 0.99561005, "epoch": 34.808999296929926, "grad_norm": 3.051466941833496, "learning_rate": 2.3168660310595e-06, "loss": 0.04233664, "memory(GiB)": 13.7, "step": 74265, "train_speed(iter/s)": 1.529642 }, { "acc": 0.984375, "epoch": 34.81134286383876, "grad_norm": 2.629473924636841, "learning_rate": 2.3162120848095235e-06, "loss": 0.04182629, "memory(GiB)": 13.7, "step": 74270, "train_speed(iter/s)": 1.529647 }, { "acc": 0.9958334, "epoch": 34.813686430747595, "grad_norm": 5.843111038208008, "learning_rate": 2.3155582030810085e-06, "loss": 0.03428687, "memory(GiB)": 13.7, "step": 74275, "train_speed(iter/s)": 1.529647 }, { "acc": 0.9958333, "epoch": 34.816029997656436, "grad_norm": 0.006574882660061121, "learning_rate": 2.3149043858896724e-06, "loss": 0.00885239, "memory(GiB)": 13.7, "step": 74280, "train_speed(iter/s)": 1.529652 }, { "acc": 0.9729166, "epoch": 34.81837356456527, "grad_norm": 7.4141621589660645, "learning_rate": 2.3142506332512346e-06, "loss": 0.07312589, "memory(GiB)": 13.7, "step": 74285, "train_speed(iter/s)": 1.529657 }, { "acc": 0.98050594, "epoch": 34.820717131474105, "grad_norm": 4.952637672424316, "learning_rate": 2.313596945181405e-06, "loss": 0.04970215, "memory(GiB)": 13.7, "step": 74290, "train_speed(iter/s)": 1.52966 }, { "acc": 0.97327385, "epoch": 34.82306069838294, "grad_norm": 3.6531081199645996, "learning_rate": 2.3129433216958992e-06, "loss": 0.05086092, "memory(GiB)": 13.7, "step": 74295, "train_speed(iter/s)": 1.529661 }, { "acc": 0.99219704, "epoch": 34.82540426529177, "grad_norm": 1.5437544584274292, "learning_rate": 2.3122897628104244e-06, "loss": 0.03223096, "memory(GiB)": 13.7, "step": 74300, "train_speed(iter/s)": 1.529662 }, { "acc": 0.99750004, "epoch": 34.82774783220061, "grad_norm": 3.7100791931152344, "learning_rate": 2.311636268540694e-06, "loss": 0.04634916, "memory(GiB)": 13.7, "step": 74305, "train_speed(iter/s)": 1.529669 }, { "acc": 0.98529758, "epoch": 34.83009139910944, "grad_norm": 4.034443378448486, "learning_rate": 2.3109828389024126e-06, "loss": 0.04016435, "memory(GiB)": 13.7, "step": 74310, "train_speed(iter/s)": 1.529675 }, { "acc": 0.98657198, "epoch": 34.83243496601828, "grad_norm": 6.851846218109131, "learning_rate": 2.3103294739112887e-06, "loss": 0.03922973, "memory(GiB)": 13.7, "step": 74315, "train_speed(iter/s)": 1.529677 }, { "acc": 0.97264881, "epoch": 34.83477853292712, "grad_norm": 0.19287443161010742, "learning_rate": 2.3096761735830254e-06, "loss": 0.04131376, "memory(GiB)": 13.7, "step": 74320, "train_speed(iter/s)": 1.529684 }, { "acc": 0.98217258, "epoch": 34.83712209983595, "grad_norm": 2.1434683799743652, "learning_rate": 2.3090229379333286e-06, "loss": 0.05677634, "memory(GiB)": 13.7, "step": 74325, "train_speed(iter/s)": 1.529686 }, { "acc": 0.97875004, "epoch": 34.839465666744786, "grad_norm": 4.358101844787598, "learning_rate": 2.308369766977897e-06, "loss": 0.03411456, "memory(GiB)": 13.7, "step": 74330, "train_speed(iter/s)": 1.529687 }, { "acc": 0.996875, "epoch": 34.84180923365362, "grad_norm": 0.013456206768751144, "learning_rate": 2.3077166607324313e-06, "loss": 0.02770727, "memory(GiB)": 13.7, "step": 74335, "train_speed(iter/s)": 1.529682 }, { "acc": 0.97493057, "epoch": 34.844152800562455, "grad_norm": 2.8652796745300293, "learning_rate": 2.307063619212633e-06, "loss": 0.06572599, "memory(GiB)": 13.7, "step": 74340, "train_speed(iter/s)": 1.529685 }, { "acc": 0.98383923, "epoch": 34.84649636747129, "grad_norm": 4.19293212890625, "learning_rate": 2.3064106424341966e-06, "loss": 0.03559676, "memory(GiB)": 13.7, "step": 74345, "train_speed(iter/s)": 1.529689 }, { "acc": 0.97894344, "epoch": 34.84883993438012, "grad_norm": 2.2666707038879395, "learning_rate": 2.3057577304128153e-06, "loss": 0.04329299, "memory(GiB)": 13.7, "step": 74350, "train_speed(iter/s)": 1.52969 }, { "acc": 0.99125004, "epoch": 34.851183501288965, "grad_norm": 2.493929624557495, "learning_rate": 2.3051048831641857e-06, "loss": 0.03720754, "memory(GiB)": 13.7, "step": 74355, "train_speed(iter/s)": 1.529696 }, { "acc": 0.98674679, "epoch": 34.8535270681978, "grad_norm": 1.1387379169464111, "learning_rate": 2.304452100704001e-06, "loss": 0.02413743, "memory(GiB)": 13.7, "step": 74360, "train_speed(iter/s)": 1.529693 }, { "acc": 0.98104172, "epoch": 34.85587063510663, "grad_norm": 3.307464361190796, "learning_rate": 2.303799383047949e-06, "loss": 0.07501284, "memory(GiB)": 13.7, "step": 74365, "train_speed(iter/s)": 1.529694 }, { "acc": 0.98638887, "epoch": 34.85821420201547, "grad_norm": 4.039882659912109, "learning_rate": 2.303146730211721e-06, "loss": 0.04396293, "memory(GiB)": 13.7, "step": 74370, "train_speed(iter/s)": 1.5297 }, { "acc": 0.9871726, "epoch": 34.8605577689243, "grad_norm": 0.617898166179657, "learning_rate": 2.302494142211005e-06, "loss": 0.0505354, "memory(GiB)": 13.7, "step": 74375, "train_speed(iter/s)": 1.529697 }, { "acc": 0.98788691, "epoch": 34.862901335833136, "grad_norm": 1.603403091430664, "learning_rate": 2.301841619061485e-06, "loss": 0.04434074, "memory(GiB)": 13.7, "step": 74380, "train_speed(iter/s)": 1.529703 }, { "acc": 0.98625002, "epoch": 34.86524490274197, "grad_norm": 3.56630539894104, "learning_rate": 2.3011891607788482e-06, "loss": 0.03265803, "memory(GiB)": 13.7, "step": 74385, "train_speed(iter/s)": 1.529708 }, { "acc": 0.98090286, "epoch": 34.86758846965081, "grad_norm": 4.569229602813721, "learning_rate": 2.300536767378775e-06, "loss": 0.05441337, "memory(GiB)": 13.7, "step": 74390, "train_speed(iter/s)": 1.529713 }, { "acc": 1.0, "epoch": 34.869932036559646, "grad_norm": 0.005627335514873266, "learning_rate": 2.2998844388769466e-06, "loss": 0.00154966, "memory(GiB)": 13.7, "step": 74395, "train_speed(iter/s)": 1.529721 }, { "acc": 0.9911397, "epoch": 34.87227560346848, "grad_norm": 0.8917535543441772, "learning_rate": 2.2992321752890465e-06, "loss": 0.04466154, "memory(GiB)": 13.7, "step": 74400, "train_speed(iter/s)": 1.529717 }, { "acc": 0.99187498, "epoch": 34.874619170377315, "grad_norm": 3.1541905403137207, "learning_rate": 2.2985799766307487e-06, "loss": 0.01737065, "memory(GiB)": 13.7, "step": 74405, "train_speed(iter/s)": 1.529717 }, { "acc": 0.9885416, "epoch": 34.87696273728615, "grad_norm": 1.8104586601257324, "learning_rate": 2.297927842917732e-06, "loss": 0.03796133, "memory(GiB)": 13.7, "step": 74410, "train_speed(iter/s)": 1.529715 }, { "acc": 0.97196426, "epoch": 34.879306304194984, "grad_norm": 3.4058327674865723, "learning_rate": 2.2972757741656725e-06, "loss": 0.06178509, "memory(GiB)": 13.7, "step": 74415, "train_speed(iter/s)": 1.529718 }, { "acc": 0.98125, "epoch": 34.88164987110382, "grad_norm": 7.258237838745117, "learning_rate": 2.2966237703902435e-06, "loss": 0.0894303, "memory(GiB)": 13.7, "step": 74420, "train_speed(iter/s)": 1.529723 }, { "acc": 0.978125, "epoch": 34.88399343801265, "grad_norm": 8.358689308166504, "learning_rate": 2.295971831607114e-06, "loss": 0.06278133, "memory(GiB)": 13.7, "step": 74425, "train_speed(iter/s)": 1.529722 }, { "acc": 0.98258934, "epoch": 34.886337004921494, "grad_norm": 2.725794553756714, "learning_rate": 2.2953199578319563e-06, "loss": 0.08500401, "memory(GiB)": 13.7, "step": 74430, "train_speed(iter/s)": 1.529726 }, { "acc": 0.9895834, "epoch": 34.88868057183033, "grad_norm": 5.117053508758545, "learning_rate": 2.2946681490804417e-06, "loss": 0.03203778, "memory(GiB)": 13.7, "step": 74435, "train_speed(iter/s)": 1.529731 }, { "acc": 0.9927084, "epoch": 34.89102413873916, "grad_norm": 5.357486248016357, "learning_rate": 2.2940164053682333e-06, "loss": 0.01944849, "memory(GiB)": 13.7, "step": 74440, "train_speed(iter/s)": 1.529736 }, { "acc": 0.96937494, "epoch": 34.893367705648, "grad_norm": 3.428877830505371, "learning_rate": 2.293364726711e-06, "loss": 0.06768604, "memory(GiB)": 13.7, "step": 74445, "train_speed(iter/s)": 1.529734 }, { "acc": 0.9916666, "epoch": 34.89571127255683, "grad_norm": 2.2818922996520996, "learning_rate": 2.292713113124404e-06, "loss": 0.03362465, "memory(GiB)": 13.7, "step": 74450, "train_speed(iter/s)": 1.529738 }, { "acc": 0.9697917, "epoch": 34.898054839465665, "grad_norm": 3.9517300128936768, "learning_rate": 2.2920615646241108e-06, "loss": 0.04994583, "memory(GiB)": 13.7, "step": 74455, "train_speed(iter/s)": 1.52974 }, { "acc": 0.98113098, "epoch": 34.9003984063745, "grad_norm": 6.315621852874756, "learning_rate": 2.2914100812257786e-06, "loss": 0.0373324, "memory(GiB)": 13.7, "step": 74460, "train_speed(iter/s)": 1.529741 }, { "acc": 0.9882143, "epoch": 34.90274197328334, "grad_norm": 2.615201711654663, "learning_rate": 2.2907586629450693e-06, "loss": 0.04732569, "memory(GiB)": 13.7, "step": 74465, "train_speed(iter/s)": 1.529741 }, { "acc": 0.990625, "epoch": 34.905085540192175, "grad_norm": 2.207779884338379, "learning_rate": 2.290107309797638e-06, "loss": 0.03449285, "memory(GiB)": 13.7, "step": 74470, "train_speed(iter/s)": 1.529743 }, { "acc": 0.99187498, "epoch": 34.90742910710101, "grad_norm": 1.7808549404144287, "learning_rate": 2.289456021799144e-06, "loss": 0.01829933, "memory(GiB)": 13.7, "step": 74475, "train_speed(iter/s)": 1.529745 }, { "acc": 0.99125004, "epoch": 34.909772674009844, "grad_norm": 4.076516628265381, "learning_rate": 2.288804798965239e-06, "loss": 0.02220456, "memory(GiB)": 13.7, "step": 74480, "train_speed(iter/s)": 1.529746 }, { "acc": 0.9875, "epoch": 34.91211624091868, "grad_norm": 3.7338953018188477, "learning_rate": 2.2881536413115788e-06, "loss": 0.04018884, "memory(GiB)": 13.7, "step": 74485, "train_speed(iter/s)": 1.529752 }, { "acc": 0.9947917, "epoch": 34.91445980782751, "grad_norm": 0.0004522619128692895, "learning_rate": 2.287502548853815e-06, "loss": 0.01891139, "memory(GiB)": 13.7, "step": 74490, "train_speed(iter/s)": 1.529757 }, { "acc": 0.98458328, "epoch": 34.91680337473635, "grad_norm": 1.7320027351379395, "learning_rate": 2.286851521607596e-06, "loss": 0.03287317, "memory(GiB)": 13.7, "step": 74495, "train_speed(iter/s)": 1.529759 }, { "acc": 0.98945513, "epoch": 34.91914694164518, "grad_norm": 3.5501184463500977, "learning_rate": 2.2862005595885713e-06, "loss": 0.04300872, "memory(GiB)": 13.7, "step": 74500, "train_speed(iter/s)": 1.529765 }, { "acc": 0.98500004, "epoch": 34.92149050855402, "grad_norm": 0.6552686095237732, "learning_rate": 2.2855496628123896e-06, "loss": 0.03946609, "memory(GiB)": 13.7, "step": 74505, "train_speed(iter/s)": 1.529764 }, { "acc": 0.97854166, "epoch": 34.92383407546286, "grad_norm": 1.1926339864730835, "learning_rate": 2.284898831294695e-06, "loss": 0.06879796, "memory(GiB)": 13.7, "step": 74510, "train_speed(iter/s)": 1.529763 }, { "acc": 0.98423615, "epoch": 34.92617764237169, "grad_norm": 0.14191478490829468, "learning_rate": 2.284248065051129e-06, "loss": 0.03332341, "memory(GiB)": 13.7, "step": 74515, "train_speed(iter/s)": 1.52976 }, { "acc": 0.98497906, "epoch": 34.928521209280525, "grad_norm": 3.75636625289917, "learning_rate": 2.2835973640973353e-06, "loss": 0.04081182, "memory(GiB)": 13.7, "step": 74520, "train_speed(iter/s)": 1.529765 }, { "acc": 0.98520832, "epoch": 34.93086477618936, "grad_norm": 0.03503771871328354, "learning_rate": 2.2829467284489566e-06, "loss": 0.02094413, "memory(GiB)": 13.7, "step": 74525, "train_speed(iter/s)": 1.529771 }, { "acc": 0.9864583, "epoch": 34.933208343098194, "grad_norm": 4.374512195587158, "learning_rate": 2.2822961581216314e-06, "loss": 0.05369933, "memory(GiB)": 13.7, "step": 74530, "train_speed(iter/s)": 1.529776 }, { "acc": 0.9744791, "epoch": 34.93555191000703, "grad_norm": 3.952678918838501, "learning_rate": 2.281645653130996e-06, "loss": 0.07925485, "memory(GiB)": 13.7, "step": 74535, "train_speed(iter/s)": 1.529777 }, { "acc": 0.98687496, "epoch": 34.93789547691586, "grad_norm": 0.44910451769828796, "learning_rate": 2.2809952134926858e-06, "loss": 0.04004632, "memory(GiB)": 13.7, "step": 74540, "train_speed(iter/s)": 1.529779 }, { "acc": 0.9947917, "epoch": 34.940239043824704, "grad_norm": 0.002440914511680603, "learning_rate": 2.2803448392223387e-06, "loss": 0.01121536, "memory(GiB)": 13.7, "step": 74545, "train_speed(iter/s)": 1.529785 }, { "acc": 0.98187504, "epoch": 34.94258261073354, "grad_norm": 6.249340057373047, "learning_rate": 2.279694530335585e-06, "loss": 0.02915099, "memory(GiB)": 13.7, "step": 74550, "train_speed(iter/s)": 1.529788 }, { "acc": 0.98684216, "epoch": 34.94492617764237, "grad_norm": 4.456034183502197, "learning_rate": 2.2790442868480545e-06, "loss": 0.03717396, "memory(GiB)": 13.7, "step": 74555, "train_speed(iter/s)": 1.529795 }, { "acc": 0.99229164, "epoch": 34.94726974455121, "grad_norm": 3.5145504474639893, "learning_rate": 2.2783941087753785e-06, "loss": 0.03328907, "memory(GiB)": 13.7, "step": 74560, "train_speed(iter/s)": 1.529793 }, { "acc": 0.9755209, "epoch": 34.94961331146004, "grad_norm": 1.4770458936691284, "learning_rate": 2.277743996133187e-06, "loss": 0.04434514, "memory(GiB)": 13.7, "step": 74565, "train_speed(iter/s)": 1.529801 }, { "acc": 0.9839489, "epoch": 34.951956878368875, "grad_norm": 1.5233839750289917, "learning_rate": 2.277093948937104e-06, "loss": 0.05305552, "memory(GiB)": 13.7, "step": 74570, "train_speed(iter/s)": 1.529806 }, { "acc": 0.98937492, "epoch": 34.95430044527771, "grad_norm": 1.1735953092575073, "learning_rate": 2.2764439672027546e-06, "loss": 0.03139082, "memory(GiB)": 13.7, "step": 74575, "train_speed(iter/s)": 1.529807 }, { "acc": 0.9746726, "epoch": 34.95664401218655, "grad_norm": 4.903630256652832, "learning_rate": 2.2757940509457653e-06, "loss": 0.04310668, "memory(GiB)": 13.7, "step": 74580, "train_speed(iter/s)": 1.529814 }, { "acc": 0.99020834, "epoch": 34.958987579095385, "grad_norm": 1.733252763748169, "learning_rate": 2.2751442001817536e-06, "loss": 0.02827401, "memory(GiB)": 13.7, "step": 74585, "train_speed(iter/s)": 1.529824 }, { "acc": 0.98217258, "epoch": 34.96133114600422, "grad_norm": 2.5536205768585205, "learning_rate": 2.274494414926344e-06, "loss": 0.07295884, "memory(GiB)": 13.7, "step": 74590, "train_speed(iter/s)": 1.529829 }, { "acc": 0.97099209, "epoch": 34.963674712913054, "grad_norm": 2.3143553733825684, "learning_rate": 2.2738446951951502e-06, "loss": 0.06944883, "memory(GiB)": 13.7, "step": 74595, "train_speed(iter/s)": 1.529835 }, { "acc": 0.98770828, "epoch": 34.96601827982189, "grad_norm": 0.01061464287340641, "learning_rate": 2.2731950410037932e-06, "loss": 0.02307893, "memory(GiB)": 13.7, "step": 74600, "train_speed(iter/s)": 1.529839 }, { "acc": 0.9894886, "epoch": 34.96836184673072, "grad_norm": 0.0029907950665801764, "learning_rate": 2.2725454523678892e-06, "loss": 0.04180292, "memory(GiB)": 13.7, "step": 74605, "train_speed(iter/s)": 1.529839 }, { "acc": 0.9864584, "epoch": 34.97070541363956, "grad_norm": 6.2269792556762695, "learning_rate": 2.2718959293030482e-06, "loss": 0.03931746, "memory(GiB)": 13.7, "step": 74610, "train_speed(iter/s)": 1.529839 }, { "acc": 0.98529758, "epoch": 34.97304898054839, "grad_norm": 5.820158004760742, "learning_rate": 2.2712464718248852e-06, "loss": 0.02696996, "memory(GiB)": 13.7, "step": 74615, "train_speed(iter/s)": 1.529837 }, { "acc": 0.98432541, "epoch": 34.97539254745723, "grad_norm": 2.1419339179992676, "learning_rate": 2.2705970799490128e-06, "loss": 0.04966917, "memory(GiB)": 13.7, "step": 74620, "train_speed(iter/s)": 1.52984 }, { "acc": 0.97448864, "epoch": 34.97773611436607, "grad_norm": 3.147407293319702, "learning_rate": 2.2699477536910357e-06, "loss": 0.05672886, "memory(GiB)": 13.7, "step": 74625, "train_speed(iter/s)": 1.529845 }, { "acc": 0.99056549, "epoch": 34.9800796812749, "grad_norm": 4.00111198425293, "learning_rate": 2.269298493066566e-06, "loss": 0.03523985, "memory(GiB)": 13.7, "step": 74630, "train_speed(iter/s)": 1.529842 }, { "acc": 0.97559528, "epoch": 34.982423248183736, "grad_norm": 5.009132385253906, "learning_rate": 2.268649298091207e-06, "loss": 0.09476833, "memory(GiB)": 13.7, "step": 74635, "train_speed(iter/s)": 1.529843 }, { "acc": 0.9791667, "epoch": 34.98476681509257, "grad_norm": 3.4695987701416016, "learning_rate": 2.2680001687805645e-06, "loss": 0.03450191, "memory(GiB)": 13.7, "step": 74640, "train_speed(iter/s)": 1.529846 }, { "acc": 0.98245039, "epoch": 34.987110382001404, "grad_norm": 2.9285757541656494, "learning_rate": 2.26735110515024e-06, "loss": 0.06285322, "memory(GiB)": 13.7, "step": 74645, "train_speed(iter/s)": 1.529843 }, { "acc": 0.9802084, "epoch": 34.98945394891024, "grad_norm": 2.2146072387695312, "learning_rate": 2.266702107215836e-06, "loss": 0.05483613, "memory(GiB)": 13.7, "step": 74650, "train_speed(iter/s)": 1.52984 }, { "acc": 0.99125004, "epoch": 34.99179751581908, "grad_norm": 3.5889532566070557, "learning_rate": 2.2660531749929534e-06, "loss": 0.03560774, "memory(GiB)": 13.7, "step": 74655, "train_speed(iter/s)": 1.529843 }, { "acc": 0.97020836, "epoch": 34.994141082727914, "grad_norm": 5.3101983070373535, "learning_rate": 2.2654043084971875e-06, "loss": 0.11891788, "memory(GiB)": 13.7, "step": 74660, "train_speed(iter/s)": 1.529851 }, { "acc": 0.984375, "epoch": 34.99648464963675, "grad_norm": 0.8378571271896362, "learning_rate": 2.264755507744136e-06, "loss": 0.03089389, "memory(GiB)": 13.7, "step": 74665, "train_speed(iter/s)": 1.529856 }, { "acc": 0.98091717, "epoch": 34.99882821654558, "grad_norm": 1.5306251049041748, "learning_rate": 2.264106772749395e-06, "loss": 0.0484454, "memory(GiB)": 13.7, "step": 74670, "train_speed(iter/s)": 1.529857 }, { "acc": 0.98321428, "epoch": 35.00117178345442, "grad_norm": 0.8979211449623108, "learning_rate": 2.2634581035285593e-06, "loss": 0.05303615, "memory(GiB)": 13.7, "step": 74675, "train_speed(iter/s)": 1.529837 }, { "acc": 0.97840281, "epoch": 35.00351535036325, "grad_norm": 4.628819465637207, "learning_rate": 2.2628095000972185e-06, "loss": 0.06260253, "memory(GiB)": 13.7, "step": 74680, "train_speed(iter/s)": 1.529841 }, { "acc": 0.98479176, "epoch": 35.005858917272086, "grad_norm": 0.012001430615782738, "learning_rate": 2.262160962470962e-06, "loss": 0.03591084, "memory(GiB)": 13.7, "step": 74685, "train_speed(iter/s)": 1.529846 }, { "acc": 0.98443184, "epoch": 35.00820248418092, "grad_norm": 5.604336738586426, "learning_rate": 2.26151249066538e-06, "loss": 0.0397235, "memory(GiB)": 13.7, "step": 74690, "train_speed(iter/s)": 1.52985 }, { "acc": 0.9802083, "epoch": 35.01054605108976, "grad_norm": 3.238840103149414, "learning_rate": 2.2608640846960607e-06, "loss": 0.05378186, "memory(GiB)": 13.7, "step": 74695, "train_speed(iter/s)": 1.529853 }, { "acc": 0.98032198, "epoch": 35.012889617998596, "grad_norm": 4.349214553833008, "learning_rate": 2.2602157445785865e-06, "loss": 0.06166015, "memory(GiB)": 13.7, "step": 74700, "train_speed(iter/s)": 1.52986 }, { "acc": 0.97302074, "epoch": 35.01523318490743, "grad_norm": 0.9179095029830933, "learning_rate": 2.2595674703285436e-06, "loss": 0.05758429, "memory(GiB)": 13.7, "step": 74705, "train_speed(iter/s)": 1.52986 }, { "acc": 0.9802083, "epoch": 35.017576751816264, "grad_norm": 3.1834187507629395, "learning_rate": 2.258919261961516e-06, "loss": 0.03814376, "memory(GiB)": 13.7, "step": 74710, "train_speed(iter/s)": 1.529872 }, { "acc": 0.97952461, "epoch": 35.0199203187251, "grad_norm": 2.561899423599243, "learning_rate": 2.2582711194930824e-06, "loss": 0.03364795, "memory(GiB)": 13.7, "step": 74715, "train_speed(iter/s)": 1.529877 }, { "acc": 0.99666672, "epoch": 35.02226388563393, "grad_norm": 1.8111485242843628, "learning_rate": 2.25762304293882e-06, "loss": 0.02841839, "memory(GiB)": 13.7, "step": 74720, "train_speed(iter/s)": 1.529875 }, { "acc": 0.99092264, "epoch": 35.02460745254277, "grad_norm": 2.547469139099121, "learning_rate": 2.256975032314309e-06, "loss": 0.02430843, "memory(GiB)": 13.7, "step": 74725, "train_speed(iter/s)": 1.529873 }, { "acc": 0.98258934, "epoch": 35.02695101945161, "grad_norm": 3.766857385635376, "learning_rate": 2.2563270876351273e-06, "loss": 0.05597723, "memory(GiB)": 13.7, "step": 74730, "train_speed(iter/s)": 1.529877 }, { "acc": 0.97279758, "epoch": 35.02929458636044, "grad_norm": 4.1954827308654785, "learning_rate": 2.255679208916845e-06, "loss": 0.0576036, "memory(GiB)": 13.7, "step": 74735, "train_speed(iter/s)": 1.52988 }, { "acc": 0.98609848, "epoch": 35.03163815326928, "grad_norm": 2.5095620155334473, "learning_rate": 2.255031396175036e-06, "loss": 0.02806877, "memory(GiB)": 13.7, "step": 74740, "train_speed(iter/s)": 1.529884 }, { "acc": 0.98739586, "epoch": 35.03398172017811, "grad_norm": 3.084750175476074, "learning_rate": 2.254383649425274e-06, "loss": 0.0335903, "memory(GiB)": 13.7, "step": 74745, "train_speed(iter/s)": 1.529888 }, { "acc": 0.9916667, "epoch": 35.036325287086946, "grad_norm": 1.484751582145691, "learning_rate": 2.2537359686831284e-06, "loss": 0.05669591, "memory(GiB)": 13.7, "step": 74750, "train_speed(iter/s)": 1.529893 }, { "acc": 0.975, "epoch": 35.03866885399578, "grad_norm": 8.414881706237793, "learning_rate": 2.253088353964165e-06, "loss": 0.03473842, "memory(GiB)": 13.7, "step": 74755, "train_speed(iter/s)": 1.529899 }, { "acc": 0.99211807, "epoch": 35.041012420904615, "grad_norm": 0.010690203867852688, "learning_rate": 2.252440805283953e-06, "loss": 0.02995762, "memory(GiB)": 13.7, "step": 74760, "train_speed(iter/s)": 1.529905 }, { "acc": 0.9885417, "epoch": 35.04335598781345, "grad_norm": 5.932562351226807, "learning_rate": 2.2517933226580543e-06, "loss": 0.03580412, "memory(GiB)": 13.7, "step": 74765, "train_speed(iter/s)": 1.529902 }, { "acc": 0.9895833, "epoch": 35.04569955472229, "grad_norm": 2.50040602684021, "learning_rate": 2.2511459061020364e-06, "loss": 0.01555406, "memory(GiB)": 13.7, "step": 74770, "train_speed(iter/s)": 1.529905 }, { "acc": 0.99125004, "epoch": 35.048043121631125, "grad_norm": 2.343088150024414, "learning_rate": 2.250498555631456e-06, "loss": 0.06220372, "memory(GiB)": 13.7, "step": 74775, "train_speed(iter/s)": 1.529904 }, { "acc": 0.99392357, "epoch": 35.05038668853996, "grad_norm": 2.4481611251831055, "learning_rate": 2.2498512712618757e-06, "loss": 0.02929001, "memory(GiB)": 13.7, "step": 74780, "train_speed(iter/s)": 1.529907 }, { "acc": 0.99937496, "epoch": 35.05273025544879, "grad_norm": 0.3212103843688965, "learning_rate": 2.2492040530088568e-06, "loss": 0.0177009, "memory(GiB)": 13.7, "step": 74785, "train_speed(iter/s)": 1.529914 }, { "acc": 0.98291664, "epoch": 35.05507382235763, "grad_norm": 0.8076342344284058, "learning_rate": 2.2485569008879517e-06, "loss": 0.03467826, "memory(GiB)": 13.7, "step": 74790, "train_speed(iter/s)": 1.529918 }, { "acc": 0.98562498, "epoch": 35.05741738926646, "grad_norm": 3.700259208679199, "learning_rate": 2.2479098149147176e-06, "loss": 0.02776833, "memory(GiB)": 13.7, "step": 74795, "train_speed(iter/s)": 1.529922 }, { "acc": 0.97299671, "epoch": 35.059760956175296, "grad_norm": 3.891148090362549, "learning_rate": 2.2472627951047105e-06, "loss": 0.0653038, "memory(GiB)": 13.7, "step": 74800, "train_speed(iter/s)": 1.529925 }, { "acc": 0.99017859, "epoch": 35.06210452308414, "grad_norm": 4.75426721572876, "learning_rate": 2.2466158414734787e-06, "loss": 0.02334988, "memory(GiB)": 13.7, "step": 74805, "train_speed(iter/s)": 1.529927 }, { "acc": 0.9895834, "epoch": 35.06444808999297, "grad_norm": 2.212864398956299, "learning_rate": 2.2459689540365774e-06, "loss": 0.01666344, "memory(GiB)": 13.7, "step": 74810, "train_speed(iter/s)": 1.529928 }, { "acc": 0.99930553, "epoch": 35.066791656901806, "grad_norm": 2.069230318069458, "learning_rate": 2.2453221328095506e-06, "loss": 0.01355357, "memory(GiB)": 13.7, "step": 74815, "train_speed(iter/s)": 1.529932 }, { "acc": 0.98125954, "epoch": 35.06913522381064, "grad_norm": 4.566405773162842, "learning_rate": 2.244675377807949e-06, "loss": 0.05349723, "memory(GiB)": 13.7, "step": 74820, "train_speed(iter/s)": 1.52993 }, { "acc": 0.99750004, "epoch": 35.071478790719475, "grad_norm": 1.083427906036377, "learning_rate": 2.244028689047319e-06, "loss": 0.01516679, "memory(GiB)": 13.7, "step": 74825, "train_speed(iter/s)": 1.529931 }, { "acc": 0.98135414, "epoch": 35.07382235762831, "grad_norm": 5.403359889984131, "learning_rate": 2.2433820665432013e-06, "loss": 0.05607179, "memory(GiB)": 13.7, "step": 74830, "train_speed(iter/s)": 1.529938 }, { "acc": 0.99363098, "epoch": 35.07616592453714, "grad_norm": 3.352674961090088, "learning_rate": 2.242735510311142e-06, "loss": 0.03823051, "memory(GiB)": 13.7, "step": 74835, "train_speed(iter/s)": 1.52994 }, { "acc": 0.9822916, "epoch": 35.07850949144598, "grad_norm": 5.284857749938965, "learning_rate": 2.242089020366682e-06, "loss": 0.04834355, "memory(GiB)": 13.7, "step": 74840, "train_speed(iter/s)": 1.52994 }, { "acc": 0.97979164, "epoch": 35.08085305835482, "grad_norm": 4.632091045379639, "learning_rate": 2.2414425967253602e-06, "loss": 0.07557632, "memory(GiB)": 13.7, "step": 74845, "train_speed(iter/s)": 1.529941 }, { "acc": 0.97520838, "epoch": 35.08319662526365, "grad_norm": 2.2705986499786377, "learning_rate": 2.2407962394027123e-06, "loss": 0.05866462, "memory(GiB)": 13.7, "step": 74850, "train_speed(iter/s)": 1.529945 }, { "acc": 0.98475876, "epoch": 35.08554019217249, "grad_norm": 9.531005859375, "learning_rate": 2.2401499484142765e-06, "loss": 0.09833871, "memory(GiB)": 13.7, "step": 74855, "train_speed(iter/s)": 1.529947 }, { "acc": 0.99125004, "epoch": 35.08788375908132, "grad_norm": 0.03791089355945587, "learning_rate": 2.239503723775589e-06, "loss": 0.02466626, "memory(GiB)": 13.7, "step": 74860, "train_speed(iter/s)": 1.529947 }, { "acc": 0.98625002, "epoch": 35.090227325990156, "grad_norm": 2.235945463180542, "learning_rate": 2.23885756550218e-06, "loss": 0.03537661, "memory(GiB)": 13.7, "step": 74865, "train_speed(iter/s)": 1.529952 }, { "acc": 0.98125, "epoch": 35.09257089289899, "grad_norm": 4.376235008239746, "learning_rate": 2.2382114736095825e-06, "loss": 0.02854881, "memory(GiB)": 13.7, "step": 74870, "train_speed(iter/s)": 1.529953 }, { "acc": 0.97946434, "epoch": 35.094914459807825, "grad_norm": 5.1866984367370605, "learning_rate": 2.237565448113328e-06, "loss": 0.03686916, "memory(GiB)": 13.7, "step": 74875, "train_speed(iter/s)": 1.529954 }, { "acc": 0.98125, "epoch": 35.097258026716666, "grad_norm": 0.9300395250320435, "learning_rate": 2.236919489028941e-06, "loss": 0.03297971, "memory(GiB)": 13.7, "step": 74880, "train_speed(iter/s)": 1.529962 }, { "acc": 0.9734127, "epoch": 35.0996015936255, "grad_norm": 3.01790189743042, "learning_rate": 2.236273596371953e-06, "loss": 0.07902988, "memory(GiB)": 13.7, "step": 74885, "train_speed(iter/s)": 1.529969 }, { "acc": 0.9916667, "epoch": 35.101945160534335, "grad_norm": 3.786306619644165, "learning_rate": 2.235627770157884e-06, "loss": 0.02414688, "memory(GiB)": 13.7, "step": 74890, "train_speed(iter/s)": 1.529974 }, { "acc": 0.97998505, "epoch": 35.10428872744317, "grad_norm": 4.974720478057861, "learning_rate": 2.2349820104022606e-06, "loss": 0.06883913, "memory(GiB)": 13.7, "step": 74895, "train_speed(iter/s)": 1.529977 }, { "acc": 0.990625, "epoch": 35.106632294352, "grad_norm": 0.004914848133921623, "learning_rate": 2.2343363171206055e-06, "loss": 0.02949499, "memory(GiB)": 13.7, "step": 74900, "train_speed(iter/s)": 1.529981 }, { "acc": 0.9927084, "epoch": 35.10897586126084, "grad_norm": 1.877107858657837, "learning_rate": 2.233690690328437e-06, "loss": 0.02782816, "memory(GiB)": 13.7, "step": 74905, "train_speed(iter/s)": 1.52998 }, { "acc": 0.9848011, "epoch": 35.11131942816967, "grad_norm": 4.776147842407227, "learning_rate": 2.233045130041274e-06, "loss": 0.0498785, "memory(GiB)": 13.7, "step": 74910, "train_speed(iter/s)": 1.52998 }, { "acc": 0.98812504, "epoch": 35.113662995078506, "grad_norm": 4.2451066970825195, "learning_rate": 2.232399636274636e-06, "loss": 0.04580418, "memory(GiB)": 13.7, "step": 74915, "train_speed(iter/s)": 1.529988 }, { "acc": 0.97416668, "epoch": 35.11600656198735, "grad_norm": 0.9636116623878479, "learning_rate": 2.231754209044035e-06, "loss": 0.08847464, "memory(GiB)": 13.7, "step": 74920, "train_speed(iter/s)": 1.529986 }, { "acc": 0.98488274, "epoch": 35.11835012889618, "grad_norm": 3.232236862182617, "learning_rate": 2.2311088483649882e-06, "loss": 0.06032979, "memory(GiB)": 13.7, "step": 74925, "train_speed(iter/s)": 1.529991 }, { "acc": 0.9890625, "epoch": 35.120693695805016, "grad_norm": 2.0709123611450195, "learning_rate": 2.2304635542530056e-06, "loss": 0.03164345, "memory(GiB)": 13.7, "step": 74930, "train_speed(iter/s)": 1.529998 }, { "acc": 0.98708334, "epoch": 35.12303726271385, "grad_norm": 4.147565841674805, "learning_rate": 2.2298183267236e-06, "loss": 0.03324478, "memory(GiB)": 13.7, "step": 74935, "train_speed(iter/s)": 1.530002 }, { "acc": 0.98482141, "epoch": 35.125380829622685, "grad_norm": 1.8326082229614258, "learning_rate": 2.229173165792278e-06, "loss": 0.05419874, "memory(GiB)": 13.7, "step": 74940, "train_speed(iter/s)": 1.530007 }, { "acc": 0.98056545, "epoch": 35.12772439653152, "grad_norm": 3.5480473041534424, "learning_rate": 2.2285280714745487e-06, "loss": 0.04949775, "memory(GiB)": 13.7, "step": 74945, "train_speed(iter/s)": 1.530011 }, { "acc": 0.9885417, "epoch": 35.130067963440354, "grad_norm": 1.4476085901260376, "learning_rate": 2.2278830437859177e-06, "loss": 0.02510939, "memory(GiB)": 13.7, "step": 74950, "train_speed(iter/s)": 1.530016 }, { "acc": 0.98553028, "epoch": 35.13241153034919, "grad_norm": 2.500744581222534, "learning_rate": 2.227238082741891e-06, "loss": 0.06897165, "memory(GiB)": 13.7, "step": 74955, "train_speed(iter/s)": 1.530014 }, { "acc": 0.98668652, "epoch": 35.13475509725803, "grad_norm": 3.5310351848602295, "learning_rate": 2.226593188357969e-06, "loss": 0.07196735, "memory(GiB)": 13.7, "step": 74960, "train_speed(iter/s)": 1.530021 }, { "acc": 0.98918648, "epoch": 35.137098664166864, "grad_norm": 1.645896553993225, "learning_rate": 2.2259483606496533e-06, "loss": 0.0477197, "memory(GiB)": 13.7, "step": 74965, "train_speed(iter/s)": 1.530024 }, { "acc": 0.97927084, "epoch": 35.1394422310757, "grad_norm": 4.454467296600342, "learning_rate": 2.2253035996324453e-06, "loss": 0.03850893, "memory(GiB)": 13.7, "step": 74970, "train_speed(iter/s)": 1.530027 }, { "acc": 0.97312508, "epoch": 35.14178579798453, "grad_norm": 4.862399101257324, "learning_rate": 2.224658905321842e-06, "loss": 0.04344419, "memory(GiB)": 13.7, "step": 74975, "train_speed(iter/s)": 1.530032 }, { "acc": 0.98874998, "epoch": 35.14412936489337, "grad_norm": 0.0015509281074628234, "learning_rate": 2.2240142777333376e-06, "loss": 0.02997994, "memory(GiB)": 13.7, "step": 74980, "train_speed(iter/s)": 1.530033 }, { "acc": 0.990625, "epoch": 35.1464729318022, "grad_norm": 0.006764412857592106, "learning_rate": 2.223369716882428e-06, "loss": 0.01943877, "memory(GiB)": 13.7, "step": 74985, "train_speed(iter/s)": 1.530033 }, { "acc": 0.97145834, "epoch": 35.148816498711035, "grad_norm": 6.478753566741943, "learning_rate": 2.222725222784609e-06, "loss": 0.04295662, "memory(GiB)": 13.7, "step": 74990, "train_speed(iter/s)": 1.530035 }, { "acc": 0.97666664, "epoch": 35.15116006561988, "grad_norm": 2.802558183670044, "learning_rate": 2.222080795455368e-06, "loss": 0.0707146, "memory(GiB)": 13.7, "step": 74995, "train_speed(iter/s)": 1.530037 }, { "acc": 0.97562504, "epoch": 35.15350363252871, "grad_norm": 2.413238763809204, "learning_rate": 2.2214364349101973e-06, "loss": 0.04511445, "memory(GiB)": 13.7, "step": 75000, "train_speed(iter/s)": 1.530041 }, { "acc": 0.98187504, "epoch": 35.155847199437545, "grad_norm": 5.353327751159668, "learning_rate": 2.2207921411645854e-06, "loss": 0.0510084, "memory(GiB)": 13.7, "step": 75005, "train_speed(iter/s)": 1.53005 }, { "acc": 0.98472586, "epoch": 35.15819076634638, "grad_norm": 3.765205144882202, "learning_rate": 2.220147914234019e-06, "loss": 0.03059868, "memory(GiB)": 13.7, "step": 75010, "train_speed(iter/s)": 1.530046 }, { "acc": 0.98737183, "epoch": 35.160534333255214, "grad_norm": 1.3067138195037842, "learning_rate": 2.2195037541339815e-06, "loss": 0.03674939, "memory(GiB)": 13.7, "step": 75015, "train_speed(iter/s)": 1.530049 }, { "acc": 0.98312502, "epoch": 35.16287790016405, "grad_norm": 5.073030948638916, "learning_rate": 2.218859660879957e-06, "loss": 0.03908026, "memory(GiB)": 13.7, "step": 75020, "train_speed(iter/s)": 1.53005 }, { "acc": 0.9958334, "epoch": 35.16522146707288, "grad_norm": 1.0678564310073853, "learning_rate": 2.2182156344874276e-06, "loss": 0.01757664, "memory(GiB)": 13.7, "step": 75025, "train_speed(iter/s)": 1.530049 }, { "acc": 0.98195515, "epoch": 35.16756503398172, "grad_norm": 2.5373284816741943, "learning_rate": 2.217571674971876e-06, "loss": 0.02714232, "memory(GiB)": 13.7, "step": 75030, "train_speed(iter/s)": 1.530048 }, { "acc": 0.97738094, "epoch": 35.16990860089056, "grad_norm": 3.9660282135009766, "learning_rate": 2.2169277823487765e-06, "loss": 0.05992113, "memory(GiB)": 13.7, "step": 75035, "train_speed(iter/s)": 1.530049 }, { "acc": 0.98979168, "epoch": 35.17225216779939, "grad_norm": 3.5904340744018555, "learning_rate": 2.2162839566336087e-06, "loss": 0.03120524, "memory(GiB)": 13.7, "step": 75040, "train_speed(iter/s)": 1.53005 }, { "acc": 0.96999998, "epoch": 35.17459573470823, "grad_norm": 2.1986982822418213, "learning_rate": 2.215640197841849e-06, "loss": 0.05237941, "memory(GiB)": 13.7, "step": 75045, "train_speed(iter/s)": 1.530054 }, { "acc": 0.99508934, "epoch": 35.17693930161706, "grad_norm": 2.189168691635132, "learning_rate": 2.2149965059889706e-06, "loss": 0.02733425, "memory(GiB)": 13.7, "step": 75050, "train_speed(iter/s)": 1.530053 }, { "acc": 0.97320509, "epoch": 35.179282868525895, "grad_norm": 5.903890132904053, "learning_rate": 2.214352881090443e-06, "loss": 0.07011282, "memory(GiB)": 13.7, "step": 75055, "train_speed(iter/s)": 1.530053 }, { "acc": 0.97461309, "epoch": 35.18162643543473, "grad_norm": 4.641384124755859, "learning_rate": 2.213709323161739e-06, "loss": 0.05556, "memory(GiB)": 13.7, "step": 75060, "train_speed(iter/s)": 1.530056 }, { "acc": 0.96729164, "epoch": 35.183970002343564, "grad_norm": 6.682961940765381, "learning_rate": 2.2130658322183293e-06, "loss": 0.05661613, "memory(GiB)": 13.7, "step": 75065, "train_speed(iter/s)": 1.530058 }, { "acc": 0.9755209, "epoch": 35.186313569252405, "grad_norm": 3.029130458831787, "learning_rate": 2.212422408275678e-06, "loss": 0.06085427, "memory(GiB)": 13.7, "step": 75070, "train_speed(iter/s)": 1.530063 }, { "acc": 0.98666668, "epoch": 35.18865713616124, "grad_norm": 2.8175129890441895, "learning_rate": 2.211779051349252e-06, "loss": 0.02802632, "memory(GiB)": 13.7, "step": 75075, "train_speed(iter/s)": 1.530061 }, { "acc": 0.97758923, "epoch": 35.191000703070074, "grad_norm": 3.1416444778442383, "learning_rate": 2.211135761454518e-06, "loss": 0.06499621, "memory(GiB)": 13.7, "step": 75080, "train_speed(iter/s)": 1.53006 }, { "acc": 0.98363094, "epoch": 35.19334426997891, "grad_norm": 3.997394323348999, "learning_rate": 2.210492538606935e-06, "loss": 0.02718503, "memory(GiB)": 13.7, "step": 75085, "train_speed(iter/s)": 1.53006 }, { "acc": 0.9791666, "epoch": 35.19568783688774, "grad_norm": 2.297175407409668, "learning_rate": 2.2098493828219648e-06, "loss": 0.06094186, "memory(GiB)": 13.7, "step": 75090, "train_speed(iter/s)": 1.530065 }, { "acc": 0.98583336, "epoch": 35.19803140379658, "grad_norm": 0.08899639546871185, "learning_rate": 2.20920629411507e-06, "loss": 0.02694219, "memory(GiB)": 13.7, "step": 75095, "train_speed(iter/s)": 1.530068 }, { "acc": 1.0, "epoch": 35.20037497070541, "grad_norm": 0.8975445032119751, "learning_rate": 2.2085632725017036e-06, "loss": 0.02538846, "memory(GiB)": 13.7, "step": 75100, "train_speed(iter/s)": 1.530066 }, { "acc": 0.98165188, "epoch": 35.202718537614246, "grad_norm": 3.959817409515381, "learning_rate": 2.2079203179973258e-06, "loss": 0.06566591, "memory(GiB)": 13.7, "step": 75105, "train_speed(iter/s)": 1.530064 }, { "acc": 0.9864584, "epoch": 35.20506210452309, "grad_norm": 4.8240065574646, "learning_rate": 2.2072774306173876e-06, "loss": 0.04135993, "memory(GiB)": 13.7, "step": 75110, "train_speed(iter/s)": 1.530063 }, { "acc": 0.97106323, "epoch": 35.20740567143192, "grad_norm": 6.753756046295166, "learning_rate": 2.2066346103773436e-06, "loss": 0.08581409, "memory(GiB)": 13.7, "step": 75115, "train_speed(iter/s)": 1.530063 }, { "acc": 0.9953125, "epoch": 35.209749238340756, "grad_norm": 2.0785090923309326, "learning_rate": 2.205991857292646e-06, "loss": 0.01878919, "memory(GiB)": 13.7, "step": 75120, "train_speed(iter/s)": 1.530069 }, { "acc": 0.97758932, "epoch": 35.21209280524959, "grad_norm": 8.720425605773926, "learning_rate": 2.2053491713787424e-06, "loss": 0.06834525, "memory(GiB)": 13.7, "step": 75125, "train_speed(iter/s)": 1.530079 }, { "acc": 0.97654762, "epoch": 35.214436372158424, "grad_norm": 6.7236833572387695, "learning_rate": 2.2047065526510815e-06, "loss": 0.05770599, "memory(GiB)": 13.7, "step": 75130, "train_speed(iter/s)": 1.530074 }, { "acc": 0.99291668, "epoch": 35.21677993906726, "grad_norm": 1.102698802947998, "learning_rate": 2.2040640011251122e-06, "loss": 0.02362258, "memory(GiB)": 13.7, "step": 75135, "train_speed(iter/s)": 1.530073 }, { "acc": 0.98819447, "epoch": 35.21912350597609, "grad_norm": 4.459134101867676, "learning_rate": 2.2034215168162772e-06, "loss": 0.04610445, "memory(GiB)": 13.7, "step": 75140, "train_speed(iter/s)": 1.530072 }, { "acc": 0.97297125, "epoch": 35.221467072884934, "grad_norm": 4.083747863769531, "learning_rate": 2.202779099740019e-06, "loss": 0.10688539, "memory(GiB)": 13.7, "step": 75145, "train_speed(iter/s)": 1.530072 }, { "acc": 0.98291664, "epoch": 35.22381063979377, "grad_norm": 3.3480074405670166, "learning_rate": 2.2021367499117783e-06, "loss": 0.06732165, "memory(GiB)": 13.7, "step": 75150, "train_speed(iter/s)": 1.530073 }, { "acc": 0.98363094, "epoch": 35.2261542067026, "grad_norm": 0.009998851455748081, "learning_rate": 2.2014944673469997e-06, "loss": 0.04012614, "memory(GiB)": 13.7, "step": 75155, "train_speed(iter/s)": 1.530081 }, { "acc": 0.97964745, "epoch": 35.22849777361144, "grad_norm": 5.372348308563232, "learning_rate": 2.200852252061117e-06, "loss": 0.05118008, "memory(GiB)": 13.7, "step": 75160, "train_speed(iter/s)": 1.530082 }, { "acc": 0.97520828, "epoch": 35.23084134052027, "grad_norm": 4.968754768371582, "learning_rate": 2.200210104069568e-06, "loss": 0.07733029, "memory(GiB)": 13.7, "step": 75165, "train_speed(iter/s)": 1.530083 }, { "acc": 0.990625, "epoch": 35.233184907429106, "grad_norm": 3.205674409866333, "learning_rate": 2.1995680233877884e-06, "loss": 0.04474045, "memory(GiB)": 13.7, "step": 75170, "train_speed(iter/s)": 1.530084 }, { "acc": 0.97458324, "epoch": 35.23552847433794, "grad_norm": 2.847830295562744, "learning_rate": 2.1989260100312135e-06, "loss": 0.04475112, "memory(GiB)": 13.7, "step": 75175, "train_speed(iter/s)": 1.53009 }, { "acc": 0.97811956, "epoch": 35.237872041246774, "grad_norm": 1.56538724899292, "learning_rate": 2.1982840640152737e-06, "loss": 0.05112, "memory(GiB)": 13.7, "step": 75180, "train_speed(iter/s)": 1.530105 }, { "acc": 0.98708334, "epoch": 35.240215608155616, "grad_norm": 5.526215553283691, "learning_rate": 2.1976421853553976e-06, "loss": 0.08991616, "memory(GiB)": 13.7, "step": 75185, "train_speed(iter/s)": 1.530102 }, { "acc": 0.9854167, "epoch": 35.24255917506445, "grad_norm": 0.6012847423553467, "learning_rate": 2.1970003740670145e-06, "loss": 0.03040002, "memory(GiB)": 13.7, "step": 75190, "train_speed(iter/s)": 1.530102 }, { "acc": 0.9864583, "epoch": 35.244902741973284, "grad_norm": 1.9467928409576416, "learning_rate": 2.196358630165555e-06, "loss": 0.04232761, "memory(GiB)": 13.7, "step": 75195, "train_speed(iter/s)": 1.530103 }, { "acc": 0.98623514, "epoch": 35.24724630888212, "grad_norm": 5.9168829917907715, "learning_rate": 2.1957169536664387e-06, "loss": 0.04417325, "memory(GiB)": 13.7, "step": 75200, "train_speed(iter/s)": 1.530106 }, { "acc": 0.98184528, "epoch": 35.24958987579095, "grad_norm": 1.6582692861557007, "learning_rate": 2.1950753445850934e-06, "loss": 0.03424149, "memory(GiB)": 13.7, "step": 75205, "train_speed(iter/s)": 1.530103 }, { "acc": 0.97354164, "epoch": 35.25193344269979, "grad_norm": 3.9932861328125, "learning_rate": 2.194433802936942e-06, "loss": 0.06100429, "memory(GiB)": 13.7, "step": 75210, "train_speed(iter/s)": 1.53011 }, { "acc": 0.9874053, "epoch": 35.25427700960862, "grad_norm": 3.3666884899139404, "learning_rate": 2.1937923287374012e-06, "loss": 0.03387269, "memory(GiB)": 13.7, "step": 75215, "train_speed(iter/s)": 1.530109 }, { "acc": 0.9833333, "epoch": 35.25662057651746, "grad_norm": 3.6812617778778076, "learning_rate": 2.1931509220018936e-06, "loss": 0.02986992, "memory(GiB)": 13.7, "step": 75220, "train_speed(iter/s)": 1.530114 }, { "acc": 0.99006729, "epoch": 35.2589641434263, "grad_norm": 4.976161479949951, "learning_rate": 2.1925095827458333e-06, "loss": 0.03315206, "memory(GiB)": 13.7, "step": 75225, "train_speed(iter/s)": 1.530118 }, { "acc": 0.978125, "epoch": 35.26130771033513, "grad_norm": 2.650520086288452, "learning_rate": 2.1918683109846383e-06, "loss": 0.04472387, "memory(GiB)": 13.7, "step": 75230, "train_speed(iter/s)": 1.530124 }, { "acc": 0.97588539, "epoch": 35.263651277243966, "grad_norm": 4.7811174392700195, "learning_rate": 2.191227106733723e-06, "loss": 0.0369126, "memory(GiB)": 13.7, "step": 75235, "train_speed(iter/s)": 1.530125 }, { "acc": 0.97709284, "epoch": 35.2659948441528, "grad_norm": 0.9528996348381042, "learning_rate": 2.1905859700084974e-06, "loss": 0.05004244, "memory(GiB)": 13.7, "step": 75240, "train_speed(iter/s)": 1.53013 }, { "acc": 0.98395834, "epoch": 35.268338411061634, "grad_norm": 4.163708209991455, "learning_rate": 2.1899449008243738e-06, "loss": 0.05987349, "memory(GiB)": 13.7, "step": 75245, "train_speed(iter/s)": 1.530134 }, { "acc": 0.990625, "epoch": 35.27068197797047, "grad_norm": 3.995224714279175, "learning_rate": 2.189303899196763e-06, "loss": 0.03270601, "memory(GiB)": 13.7, "step": 75250, "train_speed(iter/s)": 1.530139 }, { "acc": 0.984375, "epoch": 35.2730255448793, "grad_norm": 2.2985281944274902, "learning_rate": 2.1886629651410697e-06, "loss": 0.03895008, "memory(GiB)": 13.7, "step": 75255, "train_speed(iter/s)": 1.530137 }, { "acc": 0.98562508, "epoch": 35.275369111788144, "grad_norm": 0.2722225487232208, "learning_rate": 2.1880220986727025e-06, "loss": 0.0511108, "memory(GiB)": 13.7, "step": 75260, "train_speed(iter/s)": 1.530145 }, { "acc": 0.98923607, "epoch": 35.27771267869698, "grad_norm": 3.0087554454803467, "learning_rate": 2.1873812998070628e-06, "loss": 0.02521552, "memory(GiB)": 13.7, "step": 75265, "train_speed(iter/s)": 1.53015 }, { "acc": 0.97647705, "epoch": 35.28005624560581, "grad_norm": 1.7662383317947388, "learning_rate": 2.1867405685595567e-06, "loss": 0.04475323, "memory(GiB)": 13.7, "step": 75270, "train_speed(iter/s)": 1.530152 }, { "acc": 0.99732141, "epoch": 35.28239981251465, "grad_norm": 0.7380565404891968, "learning_rate": 2.1860999049455808e-06, "loss": 0.01236333, "memory(GiB)": 13.7, "step": 75275, "train_speed(iter/s)": 1.530159 }, { "acc": 0.9932292, "epoch": 35.28474337942348, "grad_norm": 2.1459014415740967, "learning_rate": 2.185459308980538e-06, "loss": 0.02499203, "memory(GiB)": 13.7, "step": 75280, "train_speed(iter/s)": 1.530165 }, { "acc": 0.98923607, "epoch": 35.287086946332316, "grad_norm": 6.3075852394104, "learning_rate": 2.1848187806798267e-06, "loss": 0.04533152, "memory(GiB)": 13.7, "step": 75285, "train_speed(iter/s)": 1.530169 }, { "acc": 0.98705359, "epoch": 35.28943051324115, "grad_norm": 0.22092802822589874, "learning_rate": 2.1841783200588403e-06, "loss": 0.05413461, "memory(GiB)": 13.7, "step": 75290, "train_speed(iter/s)": 1.530173 }, { "acc": 0.98361607, "epoch": 35.29177408014999, "grad_norm": 5.015791416168213, "learning_rate": 2.1835379271329744e-06, "loss": 0.03595371, "memory(GiB)": 13.7, "step": 75295, "train_speed(iter/s)": 1.530177 }, { "acc": 0.97029762, "epoch": 35.294117647058826, "grad_norm": 3.484518527984619, "learning_rate": 2.1828976019176246e-06, "loss": 0.05864155, "memory(GiB)": 13.7, "step": 75300, "train_speed(iter/s)": 1.530187 }, { "acc": 0.9927083, "epoch": 35.29646121396766, "grad_norm": 2.318718194961548, "learning_rate": 2.1822573444281774e-06, "loss": 0.02202611, "memory(GiB)": 13.7, "step": 75305, "train_speed(iter/s)": 1.530194 }, { "acc": 0.99208336, "epoch": 35.298804780876495, "grad_norm": 3.6799330711364746, "learning_rate": 2.1816171546800275e-06, "loss": 0.03555517, "memory(GiB)": 13.7, "step": 75310, "train_speed(iter/s)": 1.530197 }, { "acc": 0.98312492, "epoch": 35.30114834778533, "grad_norm": 3.032799243927002, "learning_rate": 2.180977032688558e-06, "loss": 0.03800505, "memory(GiB)": 13.7, "step": 75315, "train_speed(iter/s)": 1.530207 }, { "acc": 0.98458328, "epoch": 35.30349191469416, "grad_norm": 3.6084342002868652, "learning_rate": 2.180336978469158e-06, "loss": 0.03295968, "memory(GiB)": 13.7, "step": 75320, "train_speed(iter/s)": 1.53021 }, { "acc": 0.97763882, "epoch": 35.305835481603, "grad_norm": 3.6785290241241455, "learning_rate": 2.1796969920372137e-06, "loss": 0.08079046, "memory(GiB)": 13.7, "step": 75325, "train_speed(iter/s)": 1.53021 }, { "acc": 0.98895836, "epoch": 35.30817904851183, "grad_norm": 0.0093448581174016, "learning_rate": 2.179057073408105e-06, "loss": 0.03115823, "memory(GiB)": 13.7, "step": 75330, "train_speed(iter/s)": 1.530217 }, { "acc": 0.99375, "epoch": 35.31052261542067, "grad_norm": 0.5686988234519958, "learning_rate": 2.1784172225972156e-06, "loss": 0.0235598, "memory(GiB)": 13.7, "step": 75335, "train_speed(iter/s)": 1.530217 }, { "acc": 0.97997026, "epoch": 35.31286618232951, "grad_norm": 4.998884677886963, "learning_rate": 2.177777439619926e-06, "loss": 0.07054582, "memory(GiB)": 13.7, "step": 75340, "train_speed(iter/s)": 1.530224 }, { "acc": 0.9875, "epoch": 35.31520974923834, "grad_norm": 2.2701399326324463, "learning_rate": 2.1771377244916136e-06, "loss": 0.07059666, "memory(GiB)": 13.7, "step": 75345, "train_speed(iter/s)": 1.530227 }, { "acc": 0.98696423, "epoch": 35.317553316147176, "grad_norm": 2.9228429794311523, "learning_rate": 2.176498077227653e-06, "loss": 0.03102391, "memory(GiB)": 13.7, "step": 75350, "train_speed(iter/s)": 1.530231 }, { "acc": 0.99375, "epoch": 35.31989688305601, "grad_norm": 0.30400893092155457, "learning_rate": 2.175858497843421e-06, "loss": 0.01147261, "memory(GiB)": 13.7, "step": 75355, "train_speed(iter/s)": 1.530233 }, { "acc": 0.97859545, "epoch": 35.322240449964845, "grad_norm": 3.7621397972106934, "learning_rate": 2.175218986354293e-06, "loss": 0.06193637, "memory(GiB)": 13.7, "step": 75360, "train_speed(iter/s)": 1.530239 }, { "acc": 0.9854167, "epoch": 35.32458401687368, "grad_norm": 2.6225645542144775, "learning_rate": 2.1745795427756374e-06, "loss": 0.04872465, "memory(GiB)": 13.7, "step": 75365, "train_speed(iter/s)": 1.530239 }, { "acc": 0.98698864, "epoch": 35.32692758378252, "grad_norm": 3.736278533935547, "learning_rate": 2.173940167122825e-06, "loss": 0.02423583, "memory(GiB)": 13.7, "step": 75370, "train_speed(iter/s)": 1.530238 }, { "acc": 0.99375, "epoch": 35.329271150691355, "grad_norm": 2.824483633041382, "learning_rate": 2.1733008594112254e-06, "loss": 0.02808182, "memory(GiB)": 13.7, "step": 75375, "train_speed(iter/s)": 1.530238 }, { "acc": 0.9875, "epoch": 35.33161471760019, "grad_norm": 1.4983646869659424, "learning_rate": 2.1726616196562068e-06, "loss": 0.02139616, "memory(GiB)": 13.7, "step": 75380, "train_speed(iter/s)": 1.530233 }, { "acc": 0.990625, "epoch": 35.33395828450902, "grad_norm": 2.3214163780212402, "learning_rate": 2.172022447873133e-06, "loss": 0.03788965, "memory(GiB)": 13.7, "step": 75385, "train_speed(iter/s)": 1.530229 }, { "acc": 0.98239584, "epoch": 35.33630185141786, "grad_norm": 6.4122185707092285, "learning_rate": 2.1713833440773652e-06, "loss": 0.04184982, "memory(GiB)": 13.7, "step": 75390, "train_speed(iter/s)": 1.530233 }, { "acc": 0.9755209, "epoch": 35.33864541832669, "grad_norm": 5.90094518661499, "learning_rate": 2.1707443082842673e-06, "loss": 0.03690842, "memory(GiB)": 13.7, "step": 75395, "train_speed(iter/s)": 1.530235 }, { "acc": 0.98777781, "epoch": 35.340988985235526, "grad_norm": 0.6692397594451904, "learning_rate": 2.170105340509202e-06, "loss": 0.02978971, "memory(GiB)": 13.7, "step": 75400, "train_speed(iter/s)": 1.530241 }, { "acc": 0.9989584, "epoch": 35.34333255214436, "grad_norm": 0.9843375086784363, "learning_rate": 2.169466440767523e-06, "loss": 0.01301575, "memory(GiB)": 13.7, "step": 75405, "train_speed(iter/s)": 1.530244 }, { "acc": 0.98217258, "epoch": 35.3456761190532, "grad_norm": 2.2729053497314453, "learning_rate": 2.1688276090745905e-06, "loss": 0.04770386, "memory(GiB)": 13.7, "step": 75410, "train_speed(iter/s)": 1.530252 }, { "acc": 0.97354164, "epoch": 35.348019685962036, "grad_norm": 3.2836172580718994, "learning_rate": 2.168188845445762e-06, "loss": 0.06821187, "memory(GiB)": 13.7, "step": 75415, "train_speed(iter/s)": 1.530255 }, { "acc": 0.99571428, "epoch": 35.35036325287087, "grad_norm": 2.1704485416412354, "learning_rate": 2.1675501498963863e-06, "loss": 0.01438122, "memory(GiB)": 13.7, "step": 75420, "train_speed(iter/s)": 1.530256 }, { "acc": 0.990625, "epoch": 35.352706819779705, "grad_norm": 0.002286681206896901, "learning_rate": 2.1669115224418188e-06, "loss": 0.04713986, "memory(GiB)": 13.7, "step": 75425, "train_speed(iter/s)": 1.530257 }, { "acc": 0.96517859, "epoch": 35.35505038668854, "grad_norm": 3.6673667430877686, "learning_rate": 2.1662729630974103e-06, "loss": 0.15224057, "memory(GiB)": 13.7, "step": 75430, "train_speed(iter/s)": 1.530261 }, { "acc": 0.98125, "epoch": 35.357393953597374, "grad_norm": 0.488355815410614, "learning_rate": 2.1656344718785085e-06, "loss": 0.0343188, "memory(GiB)": 13.7, "step": 75435, "train_speed(iter/s)": 1.530263 }, { "acc": 0.9916667, "epoch": 35.35973752050621, "grad_norm": 2.806849479675293, "learning_rate": 2.1649960488004594e-06, "loss": 0.03628134, "memory(GiB)": 13.7, "step": 75440, "train_speed(iter/s)": 1.530265 }, { "acc": 0.9869791, "epoch": 35.36208108741504, "grad_norm": 2.2299647331237793, "learning_rate": 2.1643576938786096e-06, "loss": 0.06749546, "memory(GiB)": 13.7, "step": 75445, "train_speed(iter/s)": 1.530271 }, { "acc": 0.99750004, "epoch": 35.364424654323884, "grad_norm": 0.6329981684684753, "learning_rate": 2.163719407128304e-06, "loss": 0.01642022, "memory(GiB)": 13.7, "step": 75450, "train_speed(iter/s)": 1.530271 }, { "acc": 0.9947916, "epoch": 35.36676822123272, "grad_norm": 3.2226455211639404, "learning_rate": 2.1630811885648855e-06, "loss": 0.02076342, "memory(GiB)": 13.7, "step": 75455, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98708334, "epoch": 35.36911178814155, "grad_norm": 5.283666133880615, "learning_rate": 2.1624430382036914e-06, "loss": 0.03554868, "memory(GiB)": 13.7, "step": 75460, "train_speed(iter/s)": 1.530276 }, { "acc": 0.97562504, "epoch": 35.37145535505039, "grad_norm": 7.858455181121826, "learning_rate": 2.161804956060064e-06, "loss": 0.05521671, "memory(GiB)": 13.7, "step": 75465, "train_speed(iter/s)": 1.530276 }, { "acc": 0.9875, "epoch": 35.37379892195922, "grad_norm": 0.0034424341283738613, "learning_rate": 2.1611669421493407e-06, "loss": 0.03063794, "memory(GiB)": 13.7, "step": 75470, "train_speed(iter/s)": 1.530281 }, { "acc": 0.9931345, "epoch": 35.376142488868055, "grad_norm": 3.485772132873535, "learning_rate": 2.160528996486857e-06, "loss": 0.03367287, "memory(GiB)": 13.7, "step": 75475, "train_speed(iter/s)": 1.530278 }, { "acc": 0.98354168, "epoch": 35.37848605577689, "grad_norm": 1.164145588874817, "learning_rate": 2.1598911190879435e-06, "loss": 0.03835056, "memory(GiB)": 13.7, "step": 75480, "train_speed(iter/s)": 1.530274 }, { "acc": 0.97880039, "epoch": 35.38082962268573, "grad_norm": 4.689613342285156, "learning_rate": 2.1592533099679356e-06, "loss": 0.05219474, "memory(GiB)": 13.7, "step": 75485, "train_speed(iter/s)": 1.530276 }, { "acc": 0.98822546, "epoch": 35.383173189594565, "grad_norm": 3.4998812675476074, "learning_rate": 2.1586155691421656e-06, "loss": 0.0234169, "memory(GiB)": 13.7, "step": 75490, "train_speed(iter/s)": 1.530276 }, { "acc": 0.9916667, "epoch": 35.3855167565034, "grad_norm": 7.117589950561523, "learning_rate": 2.1579778966259597e-06, "loss": 0.04293348, "memory(GiB)": 13.7, "step": 75495, "train_speed(iter/s)": 1.530276 }, { "acc": 0.98812504, "epoch": 35.387860323412234, "grad_norm": 4.0853352546691895, "learning_rate": 2.1573402924346458e-06, "loss": 0.03453229, "memory(GiB)": 13.7, "step": 75500, "train_speed(iter/s)": 1.530278 }, { "acc": 0.98812504, "epoch": 35.39020389032107, "grad_norm": 0.2395707219839096, "learning_rate": 2.1567027565835535e-06, "loss": 0.03373619, "memory(GiB)": 13.7, "step": 75505, "train_speed(iter/s)": 1.530283 }, { "acc": 1.0, "epoch": 35.3925474572299, "grad_norm": 0.13712447881698608, "learning_rate": 2.1560652890880022e-06, "loss": 0.00752177, "memory(GiB)": 13.7, "step": 75510, "train_speed(iter/s)": 1.530289 }, { "acc": 0.98500004, "epoch": 35.39489102413874, "grad_norm": 0.8159499764442444, "learning_rate": 2.1554278899633183e-06, "loss": 0.03851261, "memory(GiB)": 13.7, "step": 75515, "train_speed(iter/s)": 1.530289 }, { "acc": 0.99230118, "epoch": 35.39723459104757, "grad_norm": 1.7551891803741455, "learning_rate": 2.1547905592248198e-06, "loss": 0.03180959, "memory(GiB)": 13.7, "step": 75520, "train_speed(iter/s)": 1.530289 }, { "acc": 0.98909969, "epoch": 35.39957815795641, "grad_norm": 2.9930715560913086, "learning_rate": 2.154153296887827e-06, "loss": 0.03127948, "memory(GiB)": 13.7, "step": 75525, "train_speed(iter/s)": 1.530287 }, { "acc": 0.98738098, "epoch": 35.40192172486525, "grad_norm": 0.9536993503570557, "learning_rate": 2.1535161029676597e-06, "loss": 0.03987944, "memory(GiB)": 13.7, "step": 75530, "train_speed(iter/s)": 1.530288 }, { "acc": 0.97274799, "epoch": 35.40426529177408, "grad_norm": 7.4295244216918945, "learning_rate": 2.152878977479631e-06, "loss": 0.08522812, "memory(GiB)": 13.7, "step": 75535, "train_speed(iter/s)": 1.530294 }, { "acc": 0.98258934, "epoch": 35.406608858682915, "grad_norm": 3.6610002517700195, "learning_rate": 2.152241920439056e-06, "loss": 0.04438207, "memory(GiB)": 13.7, "step": 75540, "train_speed(iter/s)": 1.530301 }, { "acc": 0.98425598, "epoch": 35.40895242559175, "grad_norm": 3.0929698944091797, "learning_rate": 2.1516049318612496e-06, "loss": 0.03926928, "memory(GiB)": 13.7, "step": 75545, "train_speed(iter/s)": 1.530303 }, { "acc": 0.97666664, "epoch": 35.411295992500584, "grad_norm": 0.013270532712340355, "learning_rate": 2.1509680117615202e-06, "loss": 0.04380866, "memory(GiB)": 13.7, "step": 75550, "train_speed(iter/s)": 1.530302 }, { "acc": 0.98302078, "epoch": 35.41363955940942, "grad_norm": 3.0026068687438965, "learning_rate": 2.1503311601551798e-06, "loss": 0.03844619, "memory(GiB)": 13.7, "step": 75555, "train_speed(iter/s)": 1.530299 }, { "acc": 0.99125004, "epoch": 35.41598312631826, "grad_norm": 0.9690437316894531, "learning_rate": 2.1496943770575324e-06, "loss": 0.02141855, "memory(GiB)": 13.7, "step": 75560, "train_speed(iter/s)": 1.530302 }, { "acc": 0.98633928, "epoch": 35.418326693227094, "grad_norm": 0.05006545037031174, "learning_rate": 2.149057662483889e-06, "loss": 0.04277842, "memory(GiB)": 13.7, "step": 75565, "train_speed(iter/s)": 1.530313 }, { "acc": 0.99124994, "epoch": 35.42067026013593, "grad_norm": 1.3095356225967407, "learning_rate": 2.1484210164495497e-06, "loss": 0.02001619, "memory(GiB)": 13.7, "step": 75570, "train_speed(iter/s)": 1.530315 }, { "acc": 0.98738098, "epoch": 35.42301382704476, "grad_norm": 0.7792422771453857, "learning_rate": 2.1477844389698198e-06, "loss": 0.04263101, "memory(GiB)": 13.7, "step": 75575, "train_speed(iter/s)": 1.530319 }, { "acc": 0.98145828, "epoch": 35.4253573939536, "grad_norm": 0.9809358716011047, "learning_rate": 2.147147930060001e-06, "loss": 0.04357796, "memory(GiB)": 13.7, "step": 75580, "train_speed(iter/s)": 1.53032 }, { "acc": 0.96672125, "epoch": 35.42770096086243, "grad_norm": 4.223807334899902, "learning_rate": 2.1465114897353904e-06, "loss": 0.0963354, "memory(GiB)": 13.7, "step": 75585, "train_speed(iter/s)": 1.530321 }, { "acc": 0.98562498, "epoch": 35.430044527771265, "grad_norm": 0.019824976101517677, "learning_rate": 2.1458751180112882e-06, "loss": 0.03088482, "memory(GiB)": 13.7, "step": 75590, "train_speed(iter/s)": 1.530321 }, { "acc": 0.98604164, "epoch": 35.4323880946801, "grad_norm": 0.12920191884040833, "learning_rate": 2.145238814902991e-06, "loss": 0.03370688, "memory(GiB)": 13.7, "step": 75595, "train_speed(iter/s)": 1.530333 }, { "acc": 0.9895834, "epoch": 35.43473166158894, "grad_norm": 1.8031460046768188, "learning_rate": 2.144602580425791e-06, "loss": 0.03280954, "memory(GiB)": 13.7, "step": 75600, "train_speed(iter/s)": 1.530339 }, { "acc": 0.97361107, "epoch": 35.437075228497775, "grad_norm": 7.544613361358643, "learning_rate": 2.143966414594985e-06, "loss": 0.08813094, "memory(GiB)": 13.7, "step": 75605, "train_speed(iter/s)": 1.530338 }, { "acc": 0.990625, "epoch": 35.43941879540661, "grad_norm": 3.4036972522735596, "learning_rate": 2.1433303174258594e-06, "loss": 0.02583468, "memory(GiB)": 13.7, "step": 75610, "train_speed(iter/s)": 1.530343 }, { "acc": 0.97937498, "epoch": 35.441762362315444, "grad_norm": 6.097670555114746, "learning_rate": 2.1426942889337067e-06, "loss": 0.04188437, "memory(GiB)": 13.7, "step": 75615, "train_speed(iter/s)": 1.530346 }, { "acc": 0.98604164, "epoch": 35.44410592922428, "grad_norm": 3.1569955348968506, "learning_rate": 2.142058329133816e-06, "loss": 0.03407735, "memory(GiB)": 13.7, "step": 75620, "train_speed(iter/s)": 1.530354 }, { "acc": 0.96552086, "epoch": 35.44644949613311, "grad_norm": 6.278639316558838, "learning_rate": 2.141422438041471e-06, "loss": 0.04898188, "memory(GiB)": 13.7, "step": 75625, "train_speed(iter/s)": 1.530357 }, { "acc": 0.97633772, "epoch": 35.44879306304195, "grad_norm": 3.448613166809082, "learning_rate": 2.1407866156719577e-06, "loss": 0.07973604, "memory(GiB)": 13.7, "step": 75630, "train_speed(iter/s)": 1.530361 }, { "acc": 0.98812504, "epoch": 35.45113662995079, "grad_norm": 5.86044454574585, "learning_rate": 2.1401508620405607e-06, "loss": 0.03335306, "memory(GiB)": 13.7, "step": 75635, "train_speed(iter/s)": 1.530363 }, { "acc": 0.97241888, "epoch": 35.45348019685962, "grad_norm": 3.74501895904541, "learning_rate": 2.139515177162561e-06, "loss": 0.07998635, "memory(GiB)": 13.7, "step": 75640, "train_speed(iter/s)": 1.530366 }, { "acc": 0.97119045, "epoch": 35.45582376376846, "grad_norm": 3.8306007385253906, "learning_rate": 2.138879561053235e-06, "loss": 0.06961349, "memory(GiB)": 13.7, "step": 75645, "train_speed(iter/s)": 1.530368 }, { "acc": 0.97696495, "epoch": 35.45816733067729, "grad_norm": 5.366565227508545, "learning_rate": 2.1382440137278626e-06, "loss": 0.10120823, "memory(GiB)": 13.7, "step": 75650, "train_speed(iter/s)": 1.530375 }, { "acc": 0.97979164, "epoch": 35.460510897586126, "grad_norm": 4.4032721519470215, "learning_rate": 2.1376085352017216e-06, "loss": 0.06338809, "memory(GiB)": 13.7, "step": 75655, "train_speed(iter/s)": 1.530377 }, { "acc": 0.99750004, "epoch": 35.46285446449496, "grad_norm": 0.2405456304550171, "learning_rate": 2.136973125490088e-06, "loss": 0.02249294, "memory(GiB)": 13.7, "step": 75660, "train_speed(iter/s)": 1.530378 }, { "acc": 0.9958334, "epoch": 35.465198031403794, "grad_norm": 0.002457117196172476, "learning_rate": 2.1363377846082307e-06, "loss": 0.02442249, "memory(GiB)": 13.7, "step": 75665, "train_speed(iter/s)": 1.530381 }, { "acc": 0.990625, "epoch": 35.46754159831263, "grad_norm": 1.3051332235336304, "learning_rate": 2.135702512571424e-06, "loss": 0.03998954, "memory(GiB)": 13.7, "step": 75670, "train_speed(iter/s)": 1.530382 }, { "acc": 0.99305553, "epoch": 35.46988516522147, "grad_norm": 3.4917373657226562, "learning_rate": 2.135067309394939e-06, "loss": 0.02385016, "memory(GiB)": 13.7, "step": 75675, "train_speed(iter/s)": 1.530381 }, { "acc": 0.97247019, "epoch": 35.472228732130304, "grad_norm": 2.273833990097046, "learning_rate": 2.134432175094042e-06, "loss": 0.05389085, "memory(GiB)": 13.7, "step": 75680, "train_speed(iter/s)": 1.530388 }, { "acc": 0.9791667, "epoch": 35.47457229903914, "grad_norm": 3.0288949012756348, "learning_rate": 2.1337971096839984e-06, "loss": 0.057708, "memory(GiB)": 13.7, "step": 75685, "train_speed(iter/s)": 1.530393 }, { "acc": 0.98883934, "epoch": 35.47691586594797, "grad_norm": 3.479560613632202, "learning_rate": 2.1331621131800744e-06, "loss": 0.05543573, "memory(GiB)": 13.7, "step": 75690, "train_speed(iter/s)": 1.530395 }, { "acc": 0.9833334, "epoch": 35.47925943285681, "grad_norm": 3.049771785736084, "learning_rate": 2.1325271855975345e-06, "loss": 0.04616512, "memory(GiB)": 13.7, "step": 75695, "train_speed(iter/s)": 1.530396 }, { "acc": 0.9708333, "epoch": 35.48160299976564, "grad_norm": 0.015349173918366432, "learning_rate": 2.1318923269516375e-06, "loss": 0.03682624, "memory(GiB)": 13.7, "step": 75700, "train_speed(iter/s)": 1.530403 }, { "acc": 0.98708334, "epoch": 35.483946566674476, "grad_norm": 6.204432964324951, "learning_rate": 2.1312575372576444e-06, "loss": 0.05063329, "memory(GiB)": 13.7, "step": 75705, "train_speed(iter/s)": 1.530404 }, { "acc": 0.98425598, "epoch": 35.48629013358332, "grad_norm": 0.2542334496974945, "learning_rate": 2.1306228165308166e-06, "loss": 0.04746224, "memory(GiB)": 13.7, "step": 75710, "train_speed(iter/s)": 1.530403 }, { "acc": 0.98008928, "epoch": 35.48863370049215, "grad_norm": 2.2588062286376953, "learning_rate": 2.1299881647864055e-06, "loss": 0.04837348, "memory(GiB)": 13.7, "step": 75715, "train_speed(iter/s)": 1.530399 }, { "acc": 0.98336306, "epoch": 35.490977267400986, "grad_norm": 4.109454154968262, "learning_rate": 2.1293535820396706e-06, "loss": 0.05702052, "memory(GiB)": 13.7, "step": 75720, "train_speed(iter/s)": 1.530405 }, { "acc": 0.97881947, "epoch": 35.49332083430982, "grad_norm": 5.403238773345947, "learning_rate": 2.1287190683058612e-06, "loss": 0.03671314, "memory(GiB)": 13.7, "step": 75725, "train_speed(iter/s)": 1.530407 }, { "acc": 0.98892546, "epoch": 35.495664401218654, "grad_norm": 0.007893682457506657, "learning_rate": 2.1280846236002315e-06, "loss": 0.03165497, "memory(GiB)": 13.7, "step": 75730, "train_speed(iter/s)": 1.530405 }, { "acc": 0.996875, "epoch": 35.49800796812749, "grad_norm": 0.0002806037664413452, "learning_rate": 2.1274502479380322e-06, "loss": 0.02618, "memory(GiB)": 13.7, "step": 75735, "train_speed(iter/s)": 1.530402 }, { "acc": 0.98604164, "epoch": 35.50035153503632, "grad_norm": 2.5818235874176025, "learning_rate": 2.1268159413345084e-06, "loss": 0.0280631, "memory(GiB)": 13.7, "step": 75740, "train_speed(iter/s)": 1.530406 }, { "acc": 0.98812504, "epoch": 35.50269510194516, "grad_norm": 0.003005613572895527, "learning_rate": 2.1261817038049086e-06, "loss": 0.02441111, "memory(GiB)": 13.7, "step": 75745, "train_speed(iter/s)": 1.530406 }, { "acc": 0.9708334, "epoch": 35.505038668854, "grad_norm": 5.726613998413086, "learning_rate": 2.1255475353644795e-06, "loss": 0.06227541, "memory(GiB)": 13.7, "step": 75750, "train_speed(iter/s)": 1.53041 }, { "acc": 0.97717266, "epoch": 35.50738223576283, "grad_norm": 7.007373809814453, "learning_rate": 2.1249134360284614e-06, "loss": 0.06128337, "memory(GiB)": 13.7, "step": 75755, "train_speed(iter/s)": 1.53041 }, { "acc": 0.99375, "epoch": 35.50972580267167, "grad_norm": 0.0071201566606760025, "learning_rate": 2.1242794058120965e-06, "loss": 0.0217563, "memory(GiB)": 13.7, "step": 75760, "train_speed(iter/s)": 1.530414 }, { "acc": 0.98999996, "epoch": 35.5120693695805, "grad_norm": 3.2649576663970947, "learning_rate": 2.123645444730628e-06, "loss": 0.05930883, "memory(GiB)": 13.7, "step": 75765, "train_speed(iter/s)": 1.530417 }, { "acc": 0.98008938, "epoch": 35.514412936489336, "grad_norm": 3.289497137069702, "learning_rate": 2.1230115527992914e-06, "loss": 0.05586232, "memory(GiB)": 13.7, "step": 75770, "train_speed(iter/s)": 1.530418 }, { "acc": 0.98061008, "epoch": 35.51675650339817, "grad_norm": 1.303864598274231, "learning_rate": 2.122377730033322e-06, "loss": 0.05862197, "memory(GiB)": 13.7, "step": 75775, "train_speed(iter/s)": 1.530418 }, { "acc": 0.98611107, "epoch": 35.519100070307005, "grad_norm": 1.5520431995391846, "learning_rate": 2.121743976447957e-06, "loss": 0.05927376, "memory(GiB)": 13.7, "step": 75780, "train_speed(iter/s)": 1.530416 }, { "acc": 0.99361115, "epoch": 35.521443637215846, "grad_norm": 1.8748672008514404, "learning_rate": 2.121110292058431e-06, "loss": 0.02278304, "memory(GiB)": 13.7, "step": 75785, "train_speed(iter/s)": 1.530421 }, { "acc": 0.98736115, "epoch": 35.52378720412468, "grad_norm": 0.024013977497816086, "learning_rate": 2.120476676879972e-06, "loss": 0.03141992, "memory(GiB)": 13.7, "step": 75790, "train_speed(iter/s)": 1.530424 }, { "acc": 0.98534727, "epoch": 35.526130771033515, "grad_norm": 1.8397765159606934, "learning_rate": 2.1198431309278115e-06, "loss": 0.03137785, "memory(GiB)": 13.7, "step": 75795, "train_speed(iter/s)": 1.530424 }, { "acc": 0.99627972, "epoch": 35.52847433794235, "grad_norm": 1.8016152381896973, "learning_rate": 2.1192096542171797e-06, "loss": 0.02360129, "memory(GiB)": 13.7, "step": 75800, "train_speed(iter/s)": 1.530429 }, { "acc": 0.9979167, "epoch": 35.53081790485118, "grad_norm": 0.01781904138624668, "learning_rate": 2.1185762467633033e-06, "loss": 0.00938972, "memory(GiB)": 13.7, "step": 75805, "train_speed(iter/s)": 1.53043 }, { "acc": 0.99458332, "epoch": 35.53316147176002, "grad_norm": 2.870100975036621, "learning_rate": 2.1179429085814063e-06, "loss": 0.06454439, "memory(GiB)": 13.7, "step": 75810, "train_speed(iter/s)": 1.530429 }, { "acc": 0.9828125, "epoch": 35.53550503866885, "grad_norm": 0.002592789474874735, "learning_rate": 2.1173096396867095e-06, "loss": 0.03904332, "memory(GiB)": 13.7, "step": 75815, "train_speed(iter/s)": 1.530438 }, { "acc": 0.99077377, "epoch": 35.537848605577686, "grad_norm": 0.3884539306163788, "learning_rate": 2.1166764400944375e-06, "loss": 0.02789376, "memory(GiB)": 13.7, "step": 75820, "train_speed(iter/s)": 1.530439 }, { "acc": 0.98014879, "epoch": 35.54019217248653, "grad_norm": 5.72511625289917, "learning_rate": 2.116043309819811e-06, "loss": 0.0484229, "memory(GiB)": 13.7, "step": 75825, "train_speed(iter/s)": 1.530441 }, { "acc": 0.97520828, "epoch": 35.54253573939536, "grad_norm": 5.537901401519775, "learning_rate": 2.115410248878046e-06, "loss": 0.06531008, "memory(GiB)": 13.7, "step": 75830, "train_speed(iter/s)": 1.530444 }, { "acc": 0.98195515, "epoch": 35.544879306304196, "grad_norm": 3.604624032974243, "learning_rate": 2.11477725728436e-06, "loss": 0.03707743, "memory(GiB)": 13.7, "step": 75835, "train_speed(iter/s)": 1.530444 }, { "acc": 0.996875, "epoch": 35.54722287321303, "grad_norm": 0.3322221636772156, "learning_rate": 2.11414433505397e-06, "loss": 0.02466411, "memory(GiB)": 13.7, "step": 75840, "train_speed(iter/s)": 1.530443 }, { "acc": 0.98820515, "epoch": 35.549566440121865, "grad_norm": 1.162636399269104, "learning_rate": 2.1135114822020865e-06, "loss": 0.02276778, "memory(GiB)": 13.7, "step": 75845, "train_speed(iter/s)": 1.530446 }, { "acc": 0.97895298, "epoch": 35.5519100070307, "grad_norm": 2.896483898162842, "learning_rate": 2.1128786987439238e-06, "loss": 0.06425661, "memory(GiB)": 13.7, "step": 75850, "train_speed(iter/s)": 1.530454 }, { "acc": 0.9942708, "epoch": 35.55425357393953, "grad_norm": 0.015644071623682976, "learning_rate": 2.112245984694689e-06, "loss": 0.02098506, "memory(GiB)": 13.7, "step": 75855, "train_speed(iter/s)": 1.53045 }, { "acc": 0.9916666, "epoch": 35.55659714084837, "grad_norm": 2.7096166610717773, "learning_rate": 2.111613340069594e-06, "loss": 0.03982233, "memory(GiB)": 13.7, "step": 75860, "train_speed(iter/s)": 1.530449 }, { "acc": 0.98395834, "epoch": 35.55894070775721, "grad_norm": 1.3226215839385986, "learning_rate": 2.110980764883841e-06, "loss": 0.03076427, "memory(GiB)": 13.7, "step": 75865, "train_speed(iter/s)": 1.530442 }, { "acc": 0.98633928, "epoch": 35.56128427466604, "grad_norm": 2.8253493309020996, "learning_rate": 2.1103482591526385e-06, "loss": 0.04149248, "memory(GiB)": 13.7, "step": 75870, "train_speed(iter/s)": 1.530441 }, { "acc": 0.98458328, "epoch": 35.56362784157488, "grad_norm": 3.4681878089904785, "learning_rate": 2.1097158228911887e-06, "loss": 0.0255892, "memory(GiB)": 13.7, "step": 75875, "train_speed(iter/s)": 1.530441 }, { "acc": 0.99458332, "epoch": 35.56597140848371, "grad_norm": 4.62169075012207, "learning_rate": 2.109083456114696e-06, "loss": 0.01389156, "memory(GiB)": 13.7, "step": 75880, "train_speed(iter/s)": 1.530446 }, { "acc": 0.97994051, "epoch": 35.568314975392546, "grad_norm": 1.8324953317642212, "learning_rate": 2.1084511588383554e-06, "loss": 0.06563516, "memory(GiB)": 13.7, "step": 75885, "train_speed(iter/s)": 1.530445 }, { "acc": 0.98868055, "epoch": 35.57065854230138, "grad_norm": 3.769684314727783, "learning_rate": 2.1078189310773694e-06, "loss": 0.03710267, "memory(GiB)": 13.7, "step": 75890, "train_speed(iter/s)": 1.530445 }, { "acc": 0.97785053, "epoch": 35.573002109210215, "grad_norm": 3.3511452674865723, "learning_rate": 2.107186772846932e-06, "loss": 0.05369108, "memory(GiB)": 13.7, "step": 75895, "train_speed(iter/s)": 1.530443 }, { "acc": 0.9807065, "epoch": 35.575345676119056, "grad_norm": 1.1045359373092651, "learning_rate": 2.1065546841622405e-06, "loss": 0.06297046, "memory(GiB)": 13.7, "step": 75900, "train_speed(iter/s)": 1.530453 }, { "acc": 0.97347221, "epoch": 35.57768924302789, "grad_norm": 3.948976993560791, "learning_rate": 2.1059226650384858e-06, "loss": 0.1019035, "memory(GiB)": 13.7, "step": 75905, "train_speed(iter/s)": 1.53045 }, { "acc": 0.99153538, "epoch": 35.580032809936725, "grad_norm": 1.3475240468978882, "learning_rate": 2.105290715490861e-06, "loss": 0.02183531, "memory(GiB)": 13.7, "step": 75910, "train_speed(iter/s)": 1.530453 }, { "acc": 0.98604164, "epoch": 35.58237637684556, "grad_norm": 4.459865570068359, "learning_rate": 2.1046588355345568e-06, "loss": 0.04918761, "memory(GiB)": 13.7, "step": 75915, "train_speed(iter/s)": 1.530454 }, { "acc": 0.98604164, "epoch": 35.58471994375439, "grad_norm": 0.0006557137821801007, "learning_rate": 2.1040270251847593e-06, "loss": 0.01822679, "memory(GiB)": 13.7, "step": 75920, "train_speed(iter/s)": 1.530453 }, { "acc": 0.98008928, "epoch": 35.58706351066323, "grad_norm": 0.013725712895393372, "learning_rate": 2.1033952844566565e-06, "loss": 0.03312045, "memory(GiB)": 13.7, "step": 75925, "train_speed(iter/s)": 1.530457 }, { "acc": 0.97872028, "epoch": 35.58940707757206, "grad_norm": 2.9212093353271484, "learning_rate": 2.102763613365435e-06, "loss": 0.06208286, "memory(GiB)": 13.7, "step": 75930, "train_speed(iter/s)": 1.530461 }, { "acc": 0.975, "epoch": 35.591750644480896, "grad_norm": 9.932894706726074, "learning_rate": 2.1021320119262766e-06, "loss": 0.08230964, "memory(GiB)": 13.7, "step": 75935, "train_speed(iter/s)": 1.530469 }, { "acc": 0.98687496, "epoch": 35.59409421138974, "grad_norm": 1.6492316722869873, "learning_rate": 2.1015004801543605e-06, "loss": 0.04292225, "memory(GiB)": 13.7, "step": 75940, "train_speed(iter/s)": 1.53047 }, { "acc": 0.98968754, "epoch": 35.59643777829857, "grad_norm": 0.8889415860176086, "learning_rate": 2.1008690180648693e-06, "loss": 0.03277245, "memory(GiB)": 13.7, "step": 75945, "train_speed(iter/s)": 1.530474 }, { "acc": 0.95958586, "epoch": 35.598781345207406, "grad_norm": 5.986295223236084, "learning_rate": 2.100237625672981e-06, "loss": 0.10323448, "memory(GiB)": 13.7, "step": 75950, "train_speed(iter/s)": 1.530478 }, { "acc": 0.98562498, "epoch": 35.60112491211624, "grad_norm": 1.5633981227874756, "learning_rate": 2.0996063029938737e-06, "loss": 0.03050683, "memory(GiB)": 13.7, "step": 75955, "train_speed(iter/s)": 1.530476 }, { "acc": 0.98817539, "epoch": 35.603468479025075, "grad_norm": 3.129723072052002, "learning_rate": 2.098975050042719e-06, "loss": 0.04588413, "memory(GiB)": 13.7, "step": 75960, "train_speed(iter/s)": 1.530478 }, { "acc": 0.98291664, "epoch": 35.60581204593391, "grad_norm": 2.82255220413208, "learning_rate": 2.098343866834692e-06, "loss": 0.05410104, "memory(GiB)": 13.7, "step": 75965, "train_speed(iter/s)": 1.530478 }, { "acc": 0.98558903, "epoch": 35.608155612842744, "grad_norm": 2.727644205093384, "learning_rate": 2.0977127533849653e-06, "loss": 0.06792337, "memory(GiB)": 13.7, "step": 75970, "train_speed(iter/s)": 1.530481 }, { "acc": 0.9729167, "epoch": 35.610499179751585, "grad_norm": 4.850579738616943, "learning_rate": 2.097081709708708e-06, "loss": 0.08520122, "memory(GiB)": 13.7, "step": 75975, "train_speed(iter/s)": 1.530478 }, { "acc": 0.98145838, "epoch": 35.61284274666042, "grad_norm": 4.2382001876831055, "learning_rate": 2.0964507358210865e-06, "loss": 0.04297987, "memory(GiB)": 13.7, "step": 75980, "train_speed(iter/s)": 1.530481 }, { "acc": 0.9552084, "epoch": 35.615186313569254, "grad_norm": 9.438102722167969, "learning_rate": 2.095819831737268e-06, "loss": 0.1004976, "memory(GiB)": 13.7, "step": 75985, "train_speed(iter/s)": 1.530482 }, { "acc": 0.97250004, "epoch": 35.61752988047809, "grad_norm": 6.115264415740967, "learning_rate": 2.09518899747242e-06, "loss": 0.04543613, "memory(GiB)": 13.7, "step": 75990, "train_speed(iter/s)": 1.530487 }, { "acc": 0.98784723, "epoch": 35.61987344738692, "grad_norm": 0.00039248738903552294, "learning_rate": 2.0945582330417026e-06, "loss": 0.02825523, "memory(GiB)": 13.7, "step": 75995, "train_speed(iter/s)": 1.530486 }, { "acc": 0.97256947, "epoch": 35.62221701429576, "grad_norm": 3.8862507343292236, "learning_rate": 2.0939275384602783e-06, "loss": 0.04448006, "memory(GiB)": 13.7, "step": 76000, "train_speed(iter/s)": 1.530489 }, { "acc": 0.9927083, "epoch": 35.62456058120459, "grad_norm": 0.003929483238607645, "learning_rate": 2.093296913743309e-06, "loss": 0.01187613, "memory(GiB)": 13.7, "step": 76005, "train_speed(iter/s)": 1.530484 }, { "acc": 0.98197918, "epoch": 35.626904148113425, "grad_norm": 6.297896862030029, "learning_rate": 2.092666358905949e-06, "loss": 0.04591306, "memory(GiB)": 13.7, "step": 76010, "train_speed(iter/s)": 1.530482 }, { "acc": 0.97663689, "epoch": 35.62924771502227, "grad_norm": 4.4581074714660645, "learning_rate": 2.0920358739633586e-06, "loss": 0.06306516, "memory(GiB)": 13.7, "step": 76015, "train_speed(iter/s)": 1.53048 }, { "acc": 0.98592262, "epoch": 35.6315912819311, "grad_norm": 3.4715349674224854, "learning_rate": 2.0914054589306894e-06, "loss": 0.0395807, "memory(GiB)": 13.7, "step": 76020, "train_speed(iter/s)": 1.530485 }, { "acc": 0.98599539, "epoch": 35.633934848839935, "grad_norm": 0.07814546674489975, "learning_rate": 2.0907751138230956e-06, "loss": 0.04260406, "memory(GiB)": 13.7, "step": 76025, "train_speed(iter/s)": 1.530491 }, { "acc": 0.97156258, "epoch": 35.63627841574877, "grad_norm": 3.0238709449768066, "learning_rate": 2.0901448386557305e-06, "loss": 0.09668376, "memory(GiB)": 13.7, "step": 76030, "train_speed(iter/s)": 1.530492 }, { "acc": 0.9802084, "epoch": 35.638621982657604, "grad_norm": 5.396126747131348, "learning_rate": 2.08951463344374e-06, "loss": 0.04654148, "memory(GiB)": 13.7, "step": 76035, "train_speed(iter/s)": 1.53049 }, { "acc": 0.98034973, "epoch": 35.64096554956644, "grad_norm": 6.190113067626953, "learning_rate": 2.0888844982022748e-06, "loss": 0.0596793, "memory(GiB)": 13.7, "step": 76040, "train_speed(iter/s)": 1.530494 }, { "acc": 0.9833334, "epoch": 35.64330911647527, "grad_norm": 4.4377055168151855, "learning_rate": 2.088254432946483e-06, "loss": 0.04613843, "memory(GiB)": 13.7, "step": 76045, "train_speed(iter/s)": 1.530493 }, { "acc": 0.96875, "epoch": 35.645652683384114, "grad_norm": 5.690824031829834, "learning_rate": 2.0876244376915047e-06, "loss": 0.06672719, "memory(GiB)": 13.7, "step": 76050, "train_speed(iter/s)": 1.5305 }, { "acc": 0.98883934, "epoch": 35.64799625029295, "grad_norm": 2.6704022884368896, "learning_rate": 2.0869945124524864e-06, "loss": 0.06371489, "memory(GiB)": 13.7, "step": 76055, "train_speed(iter/s)": 1.530506 }, { "acc": 0.97361116, "epoch": 35.65033981720178, "grad_norm": 6.5009765625, "learning_rate": 2.086364657244569e-06, "loss": 0.05826288, "memory(GiB)": 13.7, "step": 76060, "train_speed(iter/s)": 1.530509 }, { "acc": 0.97479162, "epoch": 35.65268338411062, "grad_norm": 5.444285869598389, "learning_rate": 2.085734872082892e-06, "loss": 0.04192796, "memory(GiB)": 13.7, "step": 76065, "train_speed(iter/s)": 1.530514 }, { "acc": 0.97113094, "epoch": 35.65502695101945, "grad_norm": 3.812171220779419, "learning_rate": 2.0851051569825915e-06, "loss": 0.0611675, "memory(GiB)": 13.7, "step": 76070, "train_speed(iter/s)": 1.530519 }, { "acc": 0.98133926, "epoch": 35.657370517928285, "grad_norm": 5.681958198547363, "learning_rate": 2.0844755119588048e-06, "loss": 0.0564679, "memory(GiB)": 13.7, "step": 76075, "train_speed(iter/s)": 1.530521 }, { "acc": 0.98312502, "epoch": 35.65971408483712, "grad_norm": 3.4926912784576416, "learning_rate": 2.083845937026667e-06, "loss": 0.03771222, "memory(GiB)": 13.7, "step": 76080, "train_speed(iter/s)": 1.530521 }, { "acc": 0.97820511, "epoch": 35.662057651745954, "grad_norm": 6.294987201690674, "learning_rate": 2.0832164322013127e-06, "loss": 0.04856177, "memory(GiB)": 13.7, "step": 76085, "train_speed(iter/s)": 1.530525 }, { "acc": 0.97437496, "epoch": 35.664401218654795, "grad_norm": 5.087705612182617, "learning_rate": 2.08258699749787e-06, "loss": 0.06264206, "memory(GiB)": 13.7, "step": 76090, "train_speed(iter/s)": 1.530525 }, { "acc": 0.9854167, "epoch": 35.66674478556363, "grad_norm": 4.203575611114502, "learning_rate": 2.0819576329314688e-06, "loss": 0.04391814, "memory(GiB)": 13.7, "step": 76095, "train_speed(iter/s)": 1.530523 }, { "acc": 0.96279764, "epoch": 35.669088352472464, "grad_norm": 5.726504325866699, "learning_rate": 2.0813283385172406e-06, "loss": 0.0711802, "memory(GiB)": 13.7, "step": 76100, "train_speed(iter/s)": 1.530523 }, { "acc": 0.98467264, "epoch": 35.6714319193813, "grad_norm": 3.238278865814209, "learning_rate": 2.080699114270309e-06, "loss": 0.05943334, "memory(GiB)": 13.7, "step": 76105, "train_speed(iter/s)": 1.530529 }, { "acc": 0.99162769, "epoch": 35.67377548629013, "grad_norm": 4.385123252868652, "learning_rate": 2.080069960205796e-06, "loss": 0.02912975, "memory(GiB)": 13.7, "step": 76110, "train_speed(iter/s)": 1.530531 }, { "acc": 0.97579861, "epoch": 35.67611905319897, "grad_norm": 5.205446720123291, "learning_rate": 2.079440876338827e-06, "loss": 0.06210268, "memory(GiB)": 13.7, "step": 76115, "train_speed(iter/s)": 1.530538 }, { "acc": 0.99333334, "epoch": 35.6784626201078, "grad_norm": 2.062908887863159, "learning_rate": 2.0788118626845254e-06, "loss": 0.0163577, "memory(GiB)": 13.7, "step": 76120, "train_speed(iter/s)": 1.530542 }, { "acc": 0.9791667, "epoch": 35.68080618701664, "grad_norm": 2.8253376483917236, "learning_rate": 2.0781829192580056e-06, "loss": 0.0516507, "memory(GiB)": 13.7, "step": 76125, "train_speed(iter/s)": 1.53054 }, { "acc": 0.98395834, "epoch": 35.68314975392548, "grad_norm": 1.854324460029602, "learning_rate": 2.0775540460743875e-06, "loss": 0.01926731, "memory(GiB)": 13.7, "step": 76130, "train_speed(iter/s)": 1.530542 }, { "acc": 0.9885417, "epoch": 35.68549332083431, "grad_norm": 3.7325282096862793, "learning_rate": 2.07692524314879e-06, "loss": 0.02716459, "memory(GiB)": 13.7, "step": 76135, "train_speed(iter/s)": 1.530548 }, { "acc": 0.9895834, "epoch": 35.687836887743146, "grad_norm": 0.0015621634665876627, "learning_rate": 2.0762965104963246e-06, "loss": 0.03096303, "memory(GiB)": 13.7, "step": 76140, "train_speed(iter/s)": 1.530551 }, { "acc": 0.99437504, "epoch": 35.69018045465198, "grad_norm": 2.2022178173065186, "learning_rate": 2.0756678481321034e-06, "loss": 0.01231341, "memory(GiB)": 13.7, "step": 76145, "train_speed(iter/s)": 1.530556 }, { "acc": 0.98599205, "epoch": 35.692524021560814, "grad_norm": 5.316957950592041, "learning_rate": 2.0750392560712373e-06, "loss": 0.0408236, "memory(GiB)": 13.7, "step": 76150, "train_speed(iter/s)": 1.530561 }, { "acc": 0.9927084, "epoch": 35.69486758846965, "grad_norm": 2.879851818084717, "learning_rate": 2.0744107343288366e-06, "loss": 0.01886994, "memory(GiB)": 13.7, "step": 76155, "train_speed(iter/s)": 1.530563 }, { "acc": 0.98916664, "epoch": 35.69721115537848, "grad_norm": 1.3127330541610718, "learning_rate": 2.0737822829200115e-06, "loss": 0.01382887, "memory(GiB)": 13.7, "step": 76160, "train_speed(iter/s)": 1.530568 }, { "acc": 0.98232136, "epoch": 35.699554722287324, "grad_norm": 0.001722988672554493, "learning_rate": 2.073153901859863e-06, "loss": 0.07186445, "memory(GiB)": 13.7, "step": 76165, "train_speed(iter/s)": 1.530568 }, { "acc": 0.98614578, "epoch": 35.70189828919616, "grad_norm": 0.7815731763839722, "learning_rate": 2.0725255911634975e-06, "loss": 0.09160687, "memory(GiB)": 13.7, "step": 76170, "train_speed(iter/s)": 1.530569 }, { "acc": 0.97458334, "epoch": 35.70424185610499, "grad_norm": 1.290535807609558, "learning_rate": 2.0718973508460194e-06, "loss": 0.04850804, "memory(GiB)": 13.7, "step": 76175, "train_speed(iter/s)": 1.530568 }, { "acc": 0.98038692, "epoch": 35.70658542301383, "grad_norm": 0.7192647457122803, "learning_rate": 2.0712691809225264e-06, "loss": 0.07859311, "memory(GiB)": 13.7, "step": 76180, "train_speed(iter/s)": 1.530568 }, { "acc": 0.9833334, "epoch": 35.70892898992266, "grad_norm": 3.567129135131836, "learning_rate": 2.0706410814081203e-06, "loss": 0.05643439, "memory(GiB)": 13.7, "step": 76185, "train_speed(iter/s)": 1.530575 }, { "acc": 0.98779764, "epoch": 35.711272556831496, "grad_norm": 2.1167609691619873, "learning_rate": 2.0700130523178957e-06, "loss": 0.03202949, "memory(GiB)": 13.7, "step": 76190, "train_speed(iter/s)": 1.530579 }, { "acc": 0.97673607, "epoch": 35.71361612374033, "grad_norm": 2.6129181385040283, "learning_rate": 2.069385093666952e-06, "loss": 0.03816238, "memory(GiB)": 13.7, "step": 76195, "train_speed(iter/s)": 1.530577 }, { "acc": 0.98927078, "epoch": 35.71595969064917, "grad_norm": 2.2547507286071777, "learning_rate": 2.0687572054703795e-06, "loss": 0.03792832, "memory(GiB)": 13.7, "step": 76200, "train_speed(iter/s)": 1.530578 }, { "acc": 0.97994051, "epoch": 35.718303257558006, "grad_norm": 3.30188250541687, "learning_rate": 2.0681293877432715e-06, "loss": 0.03603283, "memory(GiB)": 13.7, "step": 76205, "train_speed(iter/s)": 1.530584 }, { "acc": 0.99020824, "epoch": 35.72064682446684, "grad_norm": 1.2032864093780518, "learning_rate": 2.067501640500722e-06, "loss": 0.0301398, "memory(GiB)": 13.7, "step": 76210, "train_speed(iter/s)": 1.530591 }, { "acc": 0.98916664, "epoch": 35.722990391375674, "grad_norm": 0.9321771264076233, "learning_rate": 2.066873963757815e-06, "loss": 0.03279235, "memory(GiB)": 13.7, "step": 76215, "train_speed(iter/s)": 1.530592 }, { "acc": 0.9854166, "epoch": 35.72533395828451, "grad_norm": 1.112040638923645, "learning_rate": 2.0662463575296405e-06, "loss": 0.03358657, "memory(GiB)": 13.7, "step": 76220, "train_speed(iter/s)": 1.530595 }, { "acc": 0.98708334, "epoch": 35.72767752519334, "grad_norm": 0.06060226634144783, "learning_rate": 2.0656188218312855e-06, "loss": 0.02801459, "memory(GiB)": 13.7, "step": 76225, "train_speed(iter/s)": 1.5306 }, { "acc": 0.98875008, "epoch": 35.73002109210218, "grad_norm": 3.837472438812256, "learning_rate": 2.0649913566778306e-06, "loss": 0.05952916, "memory(GiB)": 13.7, "step": 76230, "train_speed(iter/s)": 1.530605 }, { "acc": 0.98361111, "epoch": 35.73236465901101, "grad_norm": 1.1066662073135376, "learning_rate": 2.0643639620843615e-06, "loss": 0.0397739, "memory(GiB)": 13.7, "step": 76235, "train_speed(iter/s)": 1.530608 }, { "acc": 0.98125, "epoch": 35.73470822591985, "grad_norm": 3.4235823154449463, "learning_rate": 2.063736638065954e-06, "loss": 0.04619488, "memory(GiB)": 13.7, "step": 76240, "train_speed(iter/s)": 1.530613 }, { "acc": 0.98883934, "epoch": 35.73705179282869, "grad_norm": 0.01034422218799591, "learning_rate": 2.0631093846376912e-06, "loss": 0.03607846, "memory(GiB)": 13.7, "step": 76245, "train_speed(iter/s)": 1.530619 }, { "acc": 0.98125, "epoch": 35.73939535973752, "grad_norm": 3.874009847640991, "learning_rate": 2.0624822018146495e-06, "loss": 0.04637476, "memory(GiB)": 13.7, "step": 76250, "train_speed(iter/s)": 1.530618 }, { "acc": 0.97654762, "epoch": 35.741738926646356, "grad_norm": 2.3235878944396973, "learning_rate": 2.061855089611902e-06, "loss": 0.04941872, "memory(GiB)": 13.7, "step": 76255, "train_speed(iter/s)": 1.53062 }, { "acc": 0.98500004, "epoch": 35.74408249355519, "grad_norm": 4.236097812652588, "learning_rate": 2.0612280480445242e-06, "loss": 0.03902811, "memory(GiB)": 13.7, "step": 76260, "train_speed(iter/s)": 1.530622 }, { "acc": 0.98433704, "epoch": 35.746426060464024, "grad_norm": 2.9781694412231445, "learning_rate": 2.0606010771275896e-06, "loss": 0.03754526, "memory(GiB)": 13.7, "step": 76265, "train_speed(iter/s)": 1.530623 }, { "acc": 0.98530636, "epoch": 35.74876962737286, "grad_norm": 4.349602222442627, "learning_rate": 2.0599741768761663e-06, "loss": 0.05306661, "memory(GiB)": 13.7, "step": 76270, "train_speed(iter/s)": 1.530623 }, { "acc": 0.98317413, "epoch": 35.7511131942817, "grad_norm": 3.945518970489502, "learning_rate": 2.0593473473053217e-06, "loss": 0.0553431, "memory(GiB)": 13.7, "step": 76275, "train_speed(iter/s)": 1.530621 }, { "acc": 0.98988094, "epoch": 35.753456761190535, "grad_norm": 1.0324909687042236, "learning_rate": 2.058720588430125e-06, "loss": 0.03301612, "memory(GiB)": 13.7, "step": 76280, "train_speed(iter/s)": 1.530624 }, { "acc": 0.978125, "epoch": 35.75580032809937, "grad_norm": 3.714486598968506, "learning_rate": 2.0580939002656424e-06, "loss": 0.06733488, "memory(GiB)": 13.7, "step": 76285, "train_speed(iter/s)": 1.530631 }, { "acc": 0.9916666, "epoch": 35.7581438950082, "grad_norm": 0.0021551449317485094, "learning_rate": 2.057467282826934e-06, "loss": 0.0363288, "memory(GiB)": 13.7, "step": 76290, "train_speed(iter/s)": 1.530633 }, { "acc": 0.99375, "epoch": 35.76048746191704, "grad_norm": 1.9156550168991089, "learning_rate": 2.0568407361290636e-06, "loss": 0.04550284, "memory(GiB)": 13.7, "step": 76295, "train_speed(iter/s)": 1.530632 }, { "acc": 0.97885418, "epoch": 35.76283102882587, "grad_norm": 6.386012077331543, "learning_rate": 2.056214260187091e-06, "loss": 0.08265781, "memory(GiB)": 13.7, "step": 76300, "train_speed(iter/s)": 1.530635 }, { "acc": 0.98708334, "epoch": 35.765174595734706, "grad_norm": 3.8077595233917236, "learning_rate": 2.0555878550160766e-06, "loss": 0.04359819, "memory(GiB)": 13.7, "step": 76305, "train_speed(iter/s)": 1.530637 }, { "acc": 0.96734371, "epoch": 35.76751816264354, "grad_norm": 0.9163745045661926, "learning_rate": 2.054961520631076e-06, "loss": 0.07267895, "memory(GiB)": 13.7, "step": 76310, "train_speed(iter/s)": 1.530633 }, { "acc": 0.9729167, "epoch": 35.76986172955238, "grad_norm": 2.832042932510376, "learning_rate": 2.054335257047141e-06, "loss": 0.04811286, "memory(GiB)": 13.7, "step": 76315, "train_speed(iter/s)": 1.530642 }, { "acc": 0.9770834, "epoch": 35.772205296461216, "grad_norm": 12.611352920532227, "learning_rate": 2.053709064279328e-06, "loss": 0.19994953, "memory(GiB)": 13.7, "step": 76320, "train_speed(iter/s)": 1.530641 }, { "acc": 0.98204021, "epoch": 35.77454886337005, "grad_norm": 3.6525800228118896, "learning_rate": 2.0530829423426897e-06, "loss": 0.06282668, "memory(GiB)": 13.7, "step": 76325, "train_speed(iter/s)": 1.530646 }, { "acc": 0.95542612, "epoch": 35.776892430278885, "grad_norm": 3.604139566421509, "learning_rate": 2.0524568912522725e-06, "loss": 0.08925599, "memory(GiB)": 13.7, "step": 76330, "train_speed(iter/s)": 1.530653 }, { "acc": 0.99125004, "epoch": 35.77923599718772, "grad_norm": 2.227243185043335, "learning_rate": 2.0518309110231274e-06, "loss": 0.01471126, "memory(GiB)": 13.7, "step": 76335, "train_speed(iter/s)": 1.530651 }, { "acc": 0.97375002, "epoch": 35.78157956409655, "grad_norm": 2.665841579437256, "learning_rate": 2.051205001670301e-06, "loss": 0.06189213, "memory(GiB)": 13.7, "step": 76340, "train_speed(iter/s)": 1.530656 }, { "acc": 0.9739584, "epoch": 35.78392313100539, "grad_norm": 3.6492886543273926, "learning_rate": 2.0505791632088358e-06, "loss": 0.04354796, "memory(GiB)": 13.7, "step": 76345, "train_speed(iter/s)": 1.530662 }, { "acc": 0.98227177, "epoch": 35.78626669791423, "grad_norm": 0.654899001121521, "learning_rate": 2.049953395653778e-06, "loss": 0.06398993, "memory(GiB)": 13.7, "step": 76350, "train_speed(iter/s)": 1.53067 }, { "acc": 0.98277359, "epoch": 35.78861026482306, "grad_norm": 1.700357437133789, "learning_rate": 2.049327699020165e-06, "loss": 0.03676908, "memory(GiB)": 13.7, "step": 76355, "train_speed(iter/s)": 1.530674 }, { "acc": 0.9822916, "epoch": 35.7909538317319, "grad_norm": 2.5754339694976807, "learning_rate": 2.0487020733230416e-06, "loss": 0.02998232, "memory(GiB)": 13.7, "step": 76360, "train_speed(iter/s)": 1.530676 }, { "acc": 0.97406254, "epoch": 35.79329739864073, "grad_norm": 0.0332515574991703, "learning_rate": 2.0480765185774402e-06, "loss": 0.07187234, "memory(GiB)": 13.7, "step": 76365, "train_speed(iter/s)": 1.530677 }, { "acc": 0.98812504, "epoch": 35.795640965549566, "grad_norm": 0.011613896116614342, "learning_rate": 2.0474510347984005e-06, "loss": 0.02784288, "memory(GiB)": 13.7, "step": 76370, "train_speed(iter/s)": 1.530684 }, { "acc": 0.9708334, "epoch": 35.7979845324584, "grad_norm": 6.550114631652832, "learning_rate": 2.046825622000956e-06, "loss": 0.04251074, "memory(GiB)": 13.7, "step": 76375, "train_speed(iter/s)": 1.530684 }, { "acc": 0.98299675, "epoch": 35.800328099367235, "grad_norm": 5.546384811401367, "learning_rate": 2.0462002802001418e-06, "loss": 0.05320306, "memory(GiB)": 13.7, "step": 76380, "train_speed(iter/s)": 1.530681 }, { "acc": 0.9864584, "epoch": 35.80267166627607, "grad_norm": 3.1336748600006104, "learning_rate": 2.0455750094109857e-06, "loss": 0.02490184, "memory(GiB)": 13.7, "step": 76385, "train_speed(iter/s)": 1.530685 }, { "acc": 0.98395834, "epoch": 35.80501523318491, "grad_norm": 7.772560119628906, "learning_rate": 2.0449498096485183e-06, "loss": 0.05546783, "memory(GiB)": 13.7, "step": 76390, "train_speed(iter/s)": 1.530684 }, { "acc": 0.98812504, "epoch": 35.807358800093745, "grad_norm": 5.485183238983154, "learning_rate": 2.04432468092777e-06, "loss": 0.04006875, "memory(GiB)": 13.7, "step": 76395, "train_speed(iter/s)": 1.530684 }, { "acc": 0.98708334, "epoch": 35.80970236700258, "grad_norm": 3.7449095249176025, "learning_rate": 2.0436996232637644e-06, "loss": 0.03661188, "memory(GiB)": 13.7, "step": 76400, "train_speed(iter/s)": 1.530689 }, { "acc": 0.98312502, "epoch": 35.81204593391141, "grad_norm": 3.1704118251800537, "learning_rate": 2.0430746366715234e-06, "loss": 0.05701502, "memory(GiB)": 13.7, "step": 76405, "train_speed(iter/s)": 1.530697 }, { "acc": 0.99652786, "epoch": 35.81438950082025, "grad_norm": 2.176846742630005, "learning_rate": 2.0424497211660735e-06, "loss": 0.0690751, "memory(GiB)": 13.7, "step": 76410, "train_speed(iter/s)": 1.5307 }, { "acc": 0.98708324, "epoch": 35.81673306772908, "grad_norm": 2.904902219772339, "learning_rate": 2.0418248767624356e-06, "loss": 0.02731962, "memory(GiB)": 13.7, "step": 76415, "train_speed(iter/s)": 1.530702 }, { "acc": 0.9895833, "epoch": 35.819076634637916, "grad_norm": 3.2639331817626953, "learning_rate": 2.0412001034756256e-06, "loss": 0.03989073, "memory(GiB)": 13.7, "step": 76420, "train_speed(iter/s)": 1.530709 }, { "acc": 0.98479166, "epoch": 35.82142020154676, "grad_norm": 3.4682650566101074, "learning_rate": 2.0405754013206634e-06, "loss": 0.04383358, "memory(GiB)": 13.7, "step": 76425, "train_speed(iter/s)": 1.530711 }, { "acc": 0.98416672, "epoch": 35.82376376845559, "grad_norm": 3.1760714054107666, "learning_rate": 2.039950770312566e-06, "loss": 0.04509645, "memory(GiB)": 13.7, "step": 76430, "train_speed(iter/s)": 1.530713 }, { "acc": 0.9895834, "epoch": 35.826107335364426, "grad_norm": 3.1878933906555176, "learning_rate": 2.0393262104663443e-06, "loss": 0.01843808, "memory(GiB)": 13.7, "step": 76435, "train_speed(iter/s)": 1.530716 }, { "acc": 0.9927084, "epoch": 35.82845090227326, "grad_norm": 0.0029404691886156797, "learning_rate": 2.0387017217970147e-06, "loss": 0.01584076, "memory(GiB)": 13.7, "step": 76440, "train_speed(iter/s)": 1.530715 }, { "acc": 0.97743053, "epoch": 35.830794469182095, "grad_norm": 0.3179682195186615, "learning_rate": 2.0380773043195824e-06, "loss": 0.04222539, "memory(GiB)": 13.7, "step": 76445, "train_speed(iter/s)": 1.53072 }, { "acc": 0.996875, "epoch": 35.83313803609093, "grad_norm": 2.505370616912842, "learning_rate": 2.0374529580490604e-06, "loss": 0.02573341, "memory(GiB)": 13.7, "step": 76450, "train_speed(iter/s)": 1.530729 }, { "acc": 0.96800594, "epoch": 35.835481602999764, "grad_norm": 2.2986202239990234, "learning_rate": 2.0368286830004562e-06, "loss": 0.07949777, "memory(GiB)": 13.7, "step": 76455, "train_speed(iter/s)": 1.530732 }, { "acc": 0.97208328, "epoch": 35.8378251699086, "grad_norm": 5.00885009765625, "learning_rate": 2.0362044791887727e-06, "loss": 0.05149485, "memory(GiB)": 13.7, "step": 76460, "train_speed(iter/s)": 1.530736 }, { "acc": 0.99020834, "epoch": 35.84016873681744, "grad_norm": 4.949183464050293, "learning_rate": 2.035580346629015e-06, "loss": 0.05027472, "memory(GiB)": 13.7, "step": 76465, "train_speed(iter/s)": 1.530741 }, { "acc": 0.9845623, "epoch": 35.842512303726274, "grad_norm": 1.513763427734375, "learning_rate": 2.0349562853361862e-06, "loss": 0.05928426, "memory(GiB)": 13.7, "step": 76470, "train_speed(iter/s)": 1.530741 }, { "acc": 0.98446426, "epoch": 35.84485587063511, "grad_norm": 1.3955066204071045, "learning_rate": 2.034332295325286e-06, "loss": 0.03751281, "memory(GiB)": 13.7, "step": 76475, "train_speed(iter/s)": 1.530745 }, { "acc": 0.98291664, "epoch": 35.84719943754394, "grad_norm": 3.94167160987854, "learning_rate": 2.0337083766113117e-06, "loss": 0.044372, "memory(GiB)": 13.7, "step": 76480, "train_speed(iter/s)": 1.530747 }, { "acc": 0.98467264, "epoch": 35.84954300445278, "grad_norm": 6.326633930206299, "learning_rate": 2.033084529209261e-06, "loss": 0.03806339, "memory(GiB)": 13.7, "step": 76485, "train_speed(iter/s)": 1.530756 }, { "acc": 0.98812504, "epoch": 35.85188657136161, "grad_norm": 0.09777140617370605, "learning_rate": 2.0324607531341313e-06, "loss": 0.02791735, "memory(GiB)": 13.7, "step": 76490, "train_speed(iter/s)": 1.530761 }, { "acc": 0.991572, "epoch": 35.854230138270445, "grad_norm": 3.786752462387085, "learning_rate": 2.0318370484009124e-06, "loss": 0.0377964, "memory(GiB)": 13.7, "step": 76495, "train_speed(iter/s)": 1.530762 }, { "acc": 0.98309212, "epoch": 35.85657370517928, "grad_norm": 2.4887843132019043, "learning_rate": 2.0312134150245985e-06, "loss": 0.04612928, "memory(GiB)": 13.7, "step": 76500, "train_speed(iter/s)": 1.530763 }, { "acc": 0.97845364, "epoch": 35.85891727208812, "grad_norm": 5.951170444488525, "learning_rate": 2.0305898530201806e-06, "loss": 0.08672512, "memory(GiB)": 13.7, "step": 76505, "train_speed(iter/s)": 1.530767 }, { "acc": 0.98583336, "epoch": 35.861260838996955, "grad_norm": 5.496042251586914, "learning_rate": 2.029966362402644e-06, "loss": 0.03113068, "memory(GiB)": 13.7, "step": 76510, "train_speed(iter/s)": 1.53077 }, { "acc": 0.98425598, "epoch": 35.86360440590579, "grad_norm": 3.232036590576172, "learning_rate": 2.0293429431869772e-06, "loss": 0.0478792, "memory(GiB)": 13.7, "step": 76515, "train_speed(iter/s)": 1.530773 }, { "acc": 0.97333336, "epoch": 35.865947972814624, "grad_norm": 2.820035457611084, "learning_rate": 2.0287195953881666e-06, "loss": 0.06262975, "memory(GiB)": 13.7, "step": 76520, "train_speed(iter/s)": 1.530777 }, { "acc": 0.99375, "epoch": 35.86829153972346, "grad_norm": 2.7552902698516846, "learning_rate": 2.0280963190211923e-06, "loss": 0.01540258, "memory(GiB)": 13.7, "step": 76525, "train_speed(iter/s)": 1.530777 }, { "acc": 0.99125004, "epoch": 35.87063510663229, "grad_norm": 3.911749839782715, "learning_rate": 2.027473114101039e-06, "loss": 0.02912807, "memory(GiB)": 13.7, "step": 76530, "train_speed(iter/s)": 1.53078 }, { "acc": 0.9916667, "epoch": 35.87297867354113, "grad_norm": 0.09263766556978226, "learning_rate": 2.0268499806426836e-06, "loss": 0.0269042, "memory(GiB)": 13.7, "step": 76535, "train_speed(iter/s)": 1.530786 }, { "acc": 0.98916664, "epoch": 35.87532224044997, "grad_norm": 0.7429578900337219, "learning_rate": 2.0262269186611048e-06, "loss": 0.04948124, "memory(GiB)": 13.7, "step": 76540, "train_speed(iter/s)": 1.530788 }, { "acc": 0.9875, "epoch": 35.8776658073588, "grad_norm": 0.024612827226519585, "learning_rate": 2.025603928171282e-06, "loss": 0.02605892, "memory(GiB)": 13.7, "step": 76545, "train_speed(iter/s)": 1.530783 }, { "acc": 0.996875, "epoch": 35.88000937426764, "grad_norm": 2.8811697959899902, "learning_rate": 2.0249810091881854e-06, "loss": 0.00978536, "memory(GiB)": 13.7, "step": 76550, "train_speed(iter/s)": 1.530784 }, { "acc": 0.98395834, "epoch": 35.88235294117647, "grad_norm": 3.1412622928619385, "learning_rate": 2.02435816172679e-06, "loss": 0.02746159, "memory(GiB)": 13.7, "step": 76555, "train_speed(iter/s)": 1.53078 }, { "acc": 0.97861118, "epoch": 35.884696508085305, "grad_norm": 4.325421333312988, "learning_rate": 2.0237353858020688e-06, "loss": 0.05688339, "memory(GiB)": 13.7, "step": 76560, "train_speed(iter/s)": 1.530781 }, { "acc": 0.9885417, "epoch": 35.88704007499414, "grad_norm": 2.8955698013305664, "learning_rate": 2.023112681428989e-06, "loss": 0.01544908, "memory(GiB)": 13.7, "step": 76565, "train_speed(iter/s)": 1.530779 }, { "acc": 0.98279762, "epoch": 35.889383641902974, "grad_norm": 3.437922716140747, "learning_rate": 2.0224900486225176e-06, "loss": 0.03031654, "memory(GiB)": 13.7, "step": 76570, "train_speed(iter/s)": 1.530787 }, { "acc": 0.9895834, "epoch": 35.89172720881181, "grad_norm": 2.1110353469848633, "learning_rate": 2.0218674873976222e-06, "loss": 0.03218564, "memory(GiB)": 13.7, "step": 76575, "train_speed(iter/s)": 1.530786 }, { "acc": 0.98570518, "epoch": 35.89407077572065, "grad_norm": 3.1424756050109863, "learning_rate": 2.0212449977692667e-06, "loss": 0.03165237, "memory(GiB)": 13.7, "step": 76580, "train_speed(iter/s)": 1.530785 }, { "acc": 0.99174671, "epoch": 35.896414342629484, "grad_norm": 2.5947506427764893, "learning_rate": 2.020622579752415e-06, "loss": 0.02060103, "memory(GiB)": 13.7, "step": 76585, "train_speed(iter/s)": 1.530785 }, { "acc": 0.98154764, "epoch": 35.89875790953832, "grad_norm": 8.655778884887695, "learning_rate": 2.0200002333620253e-06, "loss": 0.07864059, "memory(GiB)": 13.7, "step": 76590, "train_speed(iter/s)": 1.530791 }, { "acc": 0.98205357, "epoch": 35.90110147644715, "grad_norm": 0.002517017535865307, "learning_rate": 2.0193779586130583e-06, "loss": 0.03278447, "memory(GiB)": 13.7, "step": 76595, "train_speed(iter/s)": 1.530796 }, { "acc": 0.9895834, "epoch": 35.90344504335599, "grad_norm": 3.3771636486053467, "learning_rate": 2.0187557555204735e-06, "loss": 0.04587542, "memory(GiB)": 13.7, "step": 76600, "train_speed(iter/s)": 1.530802 }, { "acc": 0.98661861, "epoch": 35.90578861026482, "grad_norm": 6.173781871795654, "learning_rate": 2.0181336240992246e-06, "loss": 0.05467507, "memory(GiB)": 13.7, "step": 76605, "train_speed(iter/s)": 1.530807 }, { "acc": 0.9739584, "epoch": 35.908132177173655, "grad_norm": 4.009970664978027, "learning_rate": 2.017511564364264e-06, "loss": 0.04065992, "memory(GiB)": 13.7, "step": 76610, "train_speed(iter/s)": 1.530815 }, { "acc": 0.98285179, "epoch": 35.9104757440825, "grad_norm": 2.6029489040374756, "learning_rate": 2.0168895763305444e-06, "loss": 0.06938682, "memory(GiB)": 13.7, "step": 76615, "train_speed(iter/s)": 1.530825 }, { "acc": 0.9916667, "epoch": 35.91281931099133, "grad_norm": 0.034827087074518204, "learning_rate": 2.01626766001302e-06, "loss": 0.01089185, "memory(GiB)": 13.7, "step": 76620, "train_speed(iter/s)": 1.530828 }, { "acc": 0.9869791, "epoch": 35.915162877900165, "grad_norm": 4.634591102600098, "learning_rate": 2.0156458154266353e-06, "loss": 0.03542833, "memory(GiB)": 13.7, "step": 76625, "train_speed(iter/s)": 1.530833 }, { "acc": 0.97590275, "epoch": 35.917506444809, "grad_norm": 4.716086387634277, "learning_rate": 2.015024042586339e-06, "loss": 0.06509681, "memory(GiB)": 13.7, "step": 76630, "train_speed(iter/s)": 1.530838 }, { "acc": 0.9864584, "epoch": 35.919850011717834, "grad_norm": 3.443108081817627, "learning_rate": 2.014402341507079e-06, "loss": 0.02993664, "memory(GiB)": 13.7, "step": 76635, "train_speed(iter/s)": 1.530838 }, { "acc": 0.98145828, "epoch": 35.92219357862667, "grad_norm": 4.626410007476807, "learning_rate": 2.013780712203794e-06, "loss": 0.07418319, "memory(GiB)": 13.7, "step": 76640, "train_speed(iter/s)": 1.530843 }, { "acc": 0.98706875, "epoch": 35.9245371455355, "grad_norm": 2.5235865116119385, "learning_rate": 2.0131591546914308e-06, "loss": 0.02844323, "memory(GiB)": 13.7, "step": 76645, "train_speed(iter/s)": 1.53085 }, { "acc": 0.96719704, "epoch": 35.92688071244434, "grad_norm": 4.445363998413086, "learning_rate": 2.0125376689849253e-06, "loss": 0.10438213, "memory(GiB)": 13.7, "step": 76650, "train_speed(iter/s)": 1.530858 }, { "acc": 0.9861805, "epoch": 35.92922427935318, "grad_norm": 4.709685325622559, "learning_rate": 2.011916255099219e-06, "loss": 0.04352404, "memory(GiB)": 13.7, "step": 76655, "train_speed(iter/s)": 1.53086 }, { "acc": 0.9802084, "epoch": 35.93156784626201, "grad_norm": 0.10080099105834961, "learning_rate": 2.011294913049248e-06, "loss": 0.05165236, "memory(GiB)": 13.7, "step": 76660, "train_speed(iter/s)": 1.530865 }, { "acc": 0.990625, "epoch": 35.93391141317085, "grad_norm": 0.22822487354278564, "learning_rate": 2.010673642849946e-06, "loss": 0.01343474, "memory(GiB)": 13.7, "step": 76665, "train_speed(iter/s)": 1.530862 }, { "acc": 0.99017859, "epoch": 35.93625498007968, "grad_norm": 3.465426445007324, "learning_rate": 2.0100524445162474e-06, "loss": 0.0349523, "memory(GiB)": 13.7, "step": 76670, "train_speed(iter/s)": 1.530866 }, { "acc": 0.97852678, "epoch": 35.938598546988516, "grad_norm": 3.547149419784546, "learning_rate": 2.009431318063085e-06, "loss": 0.05753832, "memory(GiB)": 13.7, "step": 76675, "train_speed(iter/s)": 1.530866 }, { "acc": 0.98500004, "epoch": 35.94094211389735, "grad_norm": 2.1260480880737305, "learning_rate": 2.0088102635053862e-06, "loss": 0.03350905, "memory(GiB)": 13.7, "step": 76680, "train_speed(iter/s)": 1.530868 }, { "acc": 0.98061962, "epoch": 35.943285680806184, "grad_norm": 3.9379866123199463, "learning_rate": 2.008189280858081e-06, "loss": 0.05335763, "memory(GiB)": 13.7, "step": 76685, "train_speed(iter/s)": 1.530869 }, { "acc": 0.97418127, "epoch": 35.945629247715026, "grad_norm": 5.612659931182861, "learning_rate": 2.007568370136094e-06, "loss": 0.08524869, "memory(GiB)": 13.7, "step": 76690, "train_speed(iter/s)": 1.530875 }, { "acc": 0.9833333, "epoch": 35.94797281462386, "grad_norm": 2.649003028869629, "learning_rate": 2.006947531354352e-06, "loss": 0.07807154, "memory(GiB)": 13.7, "step": 76695, "train_speed(iter/s)": 1.53088 }, { "acc": 0.99552269, "epoch": 35.950316381532694, "grad_norm": 3.0827736854553223, "learning_rate": 2.0063267645277755e-06, "loss": 0.01623658, "memory(GiB)": 13.7, "step": 76700, "train_speed(iter/s)": 1.530887 }, { "acc": 0.9848959, "epoch": 35.95265994844153, "grad_norm": 3.8379859924316406, "learning_rate": 2.005706069671287e-06, "loss": 0.04390162, "memory(GiB)": 13.7, "step": 76705, "train_speed(iter/s)": 1.530889 }, { "acc": 0.98038692, "epoch": 35.95500351535036, "grad_norm": 3.7228431701660156, "learning_rate": 2.0050854467998075e-06, "loss": 0.06927981, "memory(GiB)": 13.7, "step": 76710, "train_speed(iter/s)": 1.530889 }, { "acc": 0.98104162, "epoch": 35.9573470822592, "grad_norm": 1.5324076414108276, "learning_rate": 2.004464895928251e-06, "loss": 0.04327043, "memory(GiB)": 13.7, "step": 76715, "train_speed(iter/s)": 1.530891 }, { "acc": 0.990625, "epoch": 35.95969064916803, "grad_norm": 3.050309658050537, "learning_rate": 2.003844417071536e-06, "loss": 0.03308692, "memory(GiB)": 13.7, "step": 76720, "train_speed(iter/s)": 1.530891 }, { "acc": 0.97383928, "epoch": 35.962034216076866, "grad_norm": 3.8247904777526855, "learning_rate": 2.0032240102445763e-06, "loss": 0.04367697, "memory(GiB)": 13.7, "step": 76725, "train_speed(iter/s)": 1.530893 }, { "acc": 0.9794445, "epoch": 35.96437778298571, "grad_norm": 3.515011787414551, "learning_rate": 2.0026036754622864e-06, "loss": 0.06216053, "memory(GiB)": 13.7, "step": 76730, "train_speed(iter/s)": 1.530893 }, { "acc": 0.97979164, "epoch": 35.96672134989454, "grad_norm": 4.419898986816406, "learning_rate": 2.001983412739576e-06, "loss": 0.03458581, "memory(GiB)": 13.7, "step": 76735, "train_speed(iter/s)": 1.530893 }, { "acc": 0.97919016, "epoch": 35.969064916803376, "grad_norm": 0.006131765898317099, "learning_rate": 2.001363222091351e-06, "loss": 0.04698138, "memory(GiB)": 13.7, "step": 76740, "train_speed(iter/s)": 1.530896 }, { "acc": 0.99750004, "epoch": 35.97140848371221, "grad_norm": 4.1090006828308105, "learning_rate": 2.0007431035325215e-06, "loss": 0.03420814, "memory(GiB)": 13.7, "step": 76745, "train_speed(iter/s)": 1.5309 }, { "acc": 0.98550596, "epoch": 35.973752050621044, "grad_norm": 0.0012334233615547419, "learning_rate": 2.0001230570779938e-06, "loss": 0.05156168, "memory(GiB)": 13.7, "step": 76750, "train_speed(iter/s)": 1.530904 }, { "acc": 0.98613968, "epoch": 35.97609561752988, "grad_norm": 4.308710098266602, "learning_rate": 1.9995030827426696e-06, "loss": 0.05516347, "memory(GiB)": 13.7, "step": 76755, "train_speed(iter/s)": 1.530904 }, { "acc": 0.97738094, "epoch": 35.97843918443871, "grad_norm": 0.822512149810791, "learning_rate": 1.9988831805414527e-06, "loss": 0.04472698, "memory(GiB)": 13.7, "step": 76760, "train_speed(iter/s)": 1.530908 }, { "acc": 0.98402786, "epoch": 35.980782751347554, "grad_norm": 4.155264377593994, "learning_rate": 1.9982633504892448e-06, "loss": 0.04067165, "memory(GiB)": 13.7, "step": 76765, "train_speed(iter/s)": 1.530912 }, { "acc": 0.9875, "epoch": 35.98312631825639, "grad_norm": 0.32029297947883606, "learning_rate": 1.9976435926009422e-06, "loss": 0.06671085, "memory(GiB)": 13.7, "step": 76770, "train_speed(iter/s)": 1.530918 }, { "acc": 0.99125004, "epoch": 35.98546988516522, "grad_norm": 0.018566347658634186, "learning_rate": 1.997023906891441e-06, "loss": 0.01431988, "memory(GiB)": 13.7, "step": 76775, "train_speed(iter/s)": 1.530919 }, { "acc": 0.98145828, "epoch": 35.98781345207406, "grad_norm": 0.0017487471923232079, "learning_rate": 1.996404293375638e-06, "loss": 0.02919556, "memory(GiB)": 13.7, "step": 76780, "train_speed(iter/s)": 1.530923 }, { "acc": 0.99208336, "epoch": 35.99015701898289, "grad_norm": 0.0010322416201233864, "learning_rate": 1.9957847520684277e-06, "loss": 0.02028704, "memory(GiB)": 13.7, "step": 76785, "train_speed(iter/s)": 1.530924 }, { "acc": 0.98291664, "epoch": 35.992500585891726, "grad_norm": 2.289924383163452, "learning_rate": 1.995165282984699e-06, "loss": 0.02976273, "memory(GiB)": 13.7, "step": 76790, "train_speed(iter/s)": 1.530932 }, { "acc": 0.984375, "epoch": 35.99484415280056, "grad_norm": 0.12681928277015686, "learning_rate": 1.9945458861393445e-06, "loss": 0.02243687, "memory(GiB)": 13.7, "step": 76795, "train_speed(iter/s)": 1.53094 }, { "acc": 0.98291664, "epoch": 35.997187719709395, "grad_norm": 3.668426752090454, "learning_rate": 1.9939265615472512e-06, "loss": 0.04841733, "memory(GiB)": 13.7, "step": 76800, "train_speed(iter/s)": 1.530945 }, { "acc": 0.98526211, "epoch": 35.999531286618236, "grad_norm": 2.1315176486968994, "learning_rate": 1.993307309223308e-06, "loss": 0.04772542, "memory(GiB)": 13.7, "step": 76805, "train_speed(iter/s)": 1.530953 }, { "acc": 0.97696438, "epoch": 36.00187485352707, "grad_norm": 4.099541664123535, "learning_rate": 1.9926881291823967e-06, "loss": 0.07216079, "memory(GiB)": 13.7, "step": 76810, "train_speed(iter/s)": 1.530935 }, { "acc": 0.99312496, "epoch": 36.004218420435905, "grad_norm": 1.1301144361495972, "learning_rate": 1.9920690214394006e-06, "loss": 0.01764291, "memory(GiB)": 13.7, "step": 76815, "train_speed(iter/s)": 1.530939 }, { "acc": 0.99187498, "epoch": 36.00656198734474, "grad_norm": 0.5127867460250854, "learning_rate": 1.9914499860092013e-06, "loss": 0.01895227, "memory(GiB)": 13.7, "step": 76820, "train_speed(iter/s)": 1.530943 }, { "acc": 0.97613096, "epoch": 36.00890555425357, "grad_norm": 6.593596458435059, "learning_rate": 1.990831022906681e-06, "loss": 0.07526503, "memory(GiB)": 13.7, "step": 76825, "train_speed(iter/s)": 1.530945 }, { "acc": 0.9802083, "epoch": 36.01124912116241, "grad_norm": 1.9595866203308105, "learning_rate": 1.990212132146713e-06, "loss": 0.03517978, "memory(GiB)": 13.7, "step": 76830, "train_speed(iter/s)": 1.530952 }, { "acc": 0.97770834, "epoch": 36.01359268807124, "grad_norm": 6.212282657623291, "learning_rate": 1.9895933137441762e-06, "loss": 0.07833923, "memory(GiB)": 13.7, "step": 76835, "train_speed(iter/s)": 1.530948 }, { "acc": 0.98135414, "epoch": 36.01593625498008, "grad_norm": 3.391127109527588, "learning_rate": 1.988974567713946e-06, "loss": 0.03569234, "memory(GiB)": 13.7, "step": 76840, "train_speed(iter/s)": 1.530956 }, { "acc": 0.984375, "epoch": 36.01827982188892, "grad_norm": 3.4594483375549316, "learning_rate": 1.9883558940708926e-06, "loss": 0.02736834, "memory(GiB)": 13.7, "step": 76845, "train_speed(iter/s)": 1.530958 }, { "acc": 0.98154764, "epoch": 36.02062338879775, "grad_norm": 5.076489448547363, "learning_rate": 1.9877372928298867e-06, "loss": 0.05390257, "memory(GiB)": 13.7, "step": 76850, "train_speed(iter/s)": 1.53096 }, { "acc": 0.9833334, "epoch": 36.022966955706586, "grad_norm": 0.7958007454872131, "learning_rate": 1.987118764005801e-06, "loss": 0.04391682, "memory(GiB)": 13.7, "step": 76855, "train_speed(iter/s)": 1.530965 }, { "acc": 0.96625004, "epoch": 36.02531052261542, "grad_norm": 7.116192817687988, "learning_rate": 1.986500307613499e-06, "loss": 0.07000089, "memory(GiB)": 13.7, "step": 76860, "train_speed(iter/s)": 1.530968 }, { "acc": 0.9916667, "epoch": 36.027654089524255, "grad_norm": 1.6425633430480957, "learning_rate": 1.985881923667849e-06, "loss": 0.02664291, "memory(GiB)": 13.7, "step": 76865, "train_speed(iter/s)": 1.53097 }, { "acc": 0.98764877, "epoch": 36.02999765643309, "grad_norm": 4.278152942657471, "learning_rate": 1.9852636121837126e-06, "loss": 0.04638008, "memory(GiB)": 13.7, "step": 76870, "train_speed(iter/s)": 1.53097 }, { "acc": 0.98583336, "epoch": 36.03234122334192, "grad_norm": 3.0453290939331055, "learning_rate": 1.9846453731759522e-06, "loss": 0.04398027, "memory(GiB)": 13.7, "step": 76875, "train_speed(iter/s)": 1.53097 }, { "acc": 0.98791122, "epoch": 36.034684790250765, "grad_norm": 3.7311458587646484, "learning_rate": 1.984027206659432e-06, "loss": 0.03335416, "memory(GiB)": 13.7, "step": 76880, "train_speed(iter/s)": 1.530969 }, { "acc": 0.97686958, "epoch": 36.0370283571596, "grad_norm": 2.9235193729400635, "learning_rate": 1.983409112649006e-06, "loss": 0.04447568, "memory(GiB)": 13.7, "step": 76885, "train_speed(iter/s)": 1.530971 }, { "acc": 0.990625, "epoch": 36.03937192406843, "grad_norm": 2.04032301902771, "learning_rate": 1.982791091159532e-06, "loss": 0.0331193, "memory(GiB)": 13.7, "step": 76890, "train_speed(iter/s)": 1.530974 }, { "acc": 0.9890625, "epoch": 36.04171549097727, "grad_norm": 4.514805793762207, "learning_rate": 1.982173142205869e-06, "loss": 0.0312266, "memory(GiB)": 13.7, "step": 76895, "train_speed(iter/s)": 1.53097 }, { "acc": 0.97666664, "epoch": 36.0440590578861, "grad_norm": 4.85088586807251, "learning_rate": 1.9815552658028674e-06, "loss": 0.05279613, "memory(GiB)": 13.7, "step": 76900, "train_speed(iter/s)": 1.530972 }, { "acc": 0.9947917, "epoch": 36.046402624794936, "grad_norm": 2.985659122467041, "learning_rate": 1.9809374619653772e-06, "loss": 0.01599814, "memory(GiB)": 13.7, "step": 76905, "train_speed(iter/s)": 1.530975 }, { "acc": 0.9854166, "epoch": 36.04874619170377, "grad_norm": 3.4031291007995605, "learning_rate": 1.9803197307082505e-06, "loss": 0.02449023, "memory(GiB)": 13.7, "step": 76910, "train_speed(iter/s)": 1.530978 }, { "acc": 0.99375, "epoch": 36.051089758612605, "grad_norm": 5.313893795013428, "learning_rate": 1.979702072046337e-06, "loss": 0.06442876, "memory(GiB)": 13.7, "step": 76915, "train_speed(iter/s)": 1.53098 }, { "acc": 0.98071423, "epoch": 36.053433325521446, "grad_norm": 4.386467456817627, "learning_rate": 1.9790844859944804e-06, "loss": 0.06722841, "memory(GiB)": 13.7, "step": 76920, "train_speed(iter/s)": 1.530985 }, { "acc": 0.9916667, "epoch": 36.05577689243028, "grad_norm": 4.215854167938232, "learning_rate": 1.9784669725675263e-06, "loss": 0.05720345, "memory(GiB)": 13.7, "step": 76925, "train_speed(iter/s)": 1.530986 }, { "acc": 0.98604164, "epoch": 36.058120459339115, "grad_norm": 4.078763484954834, "learning_rate": 1.9778495317803195e-06, "loss": 0.0276881, "memory(GiB)": 13.7, "step": 76930, "train_speed(iter/s)": 1.530987 }, { "acc": 0.9770833, "epoch": 36.06046402624795, "grad_norm": 3.195941686630249, "learning_rate": 1.9772321636476988e-06, "loss": 0.04337546, "memory(GiB)": 13.7, "step": 76935, "train_speed(iter/s)": 1.530994 }, { "acc": 0.99300594, "epoch": 36.06280759315678, "grad_norm": 2.716128349304199, "learning_rate": 1.976614868184506e-06, "loss": 0.0260164, "memory(GiB)": 13.7, "step": 76940, "train_speed(iter/s)": 1.530991 }, { "acc": 0.98729162, "epoch": 36.06515116006562, "grad_norm": 0.003579452633857727, "learning_rate": 1.9759976454055755e-06, "loss": 0.05411863, "memory(GiB)": 13.7, "step": 76945, "train_speed(iter/s)": 1.530993 }, { "acc": 0.98681545, "epoch": 36.06749472697445, "grad_norm": 2.453514814376831, "learning_rate": 1.975380495325746e-06, "loss": 0.04069695, "memory(GiB)": 13.7, "step": 76950, "train_speed(iter/s)": 1.530996 }, { "acc": 0.99093752, "epoch": 36.06983829388329, "grad_norm": 1.0494307279586792, "learning_rate": 1.974763417959852e-06, "loss": 0.02355787, "memory(GiB)": 13.7, "step": 76955, "train_speed(iter/s)": 1.530997 }, { "acc": 0.98758926, "epoch": 36.07218186079213, "grad_norm": 2.9560513496398926, "learning_rate": 1.9741464133227238e-06, "loss": 0.04026852, "memory(GiB)": 13.7, "step": 76960, "train_speed(iter/s)": 1.530997 }, { "acc": 0.98625002, "epoch": 36.07452542770096, "grad_norm": 2.94576358795166, "learning_rate": 1.9735294814291936e-06, "loss": 0.03762145, "memory(GiB)": 13.7, "step": 76965, "train_speed(iter/s)": 1.530996 }, { "acc": 0.99020834, "epoch": 36.0768689946098, "grad_norm": 3.1849982738494873, "learning_rate": 1.972912622294092e-06, "loss": 0.04050231, "memory(GiB)": 13.7, "step": 76970, "train_speed(iter/s)": 1.530996 }, { "acc": 0.9916667, "epoch": 36.07921256151863, "grad_norm": 0.08356496691703796, "learning_rate": 1.9722958359322423e-06, "loss": 0.02362072, "memory(GiB)": 13.7, "step": 76975, "train_speed(iter/s)": 1.531 }, { "acc": 0.98291664, "epoch": 36.081556128427465, "grad_norm": 0.9383438229560852, "learning_rate": 1.9716791223584753e-06, "loss": 0.04379825, "memory(GiB)": 13.7, "step": 76980, "train_speed(iter/s)": 1.531005 }, { "acc": 0.98354168, "epoch": 36.0838996953363, "grad_norm": 4.043283462524414, "learning_rate": 1.971062481587609e-06, "loss": 0.04251212, "memory(GiB)": 13.7, "step": 76985, "train_speed(iter/s)": 1.531009 }, { "acc": 0.98469696, "epoch": 36.086243262245134, "grad_norm": 3.2990975379943848, "learning_rate": 1.97044591363447e-06, "loss": 0.03681314, "memory(GiB)": 13.7, "step": 76990, "train_speed(iter/s)": 1.531009 }, { "acc": 0.98604164, "epoch": 36.088586829153975, "grad_norm": 0.00348359951749444, "learning_rate": 1.969829418513875e-06, "loss": 0.01795317, "memory(GiB)": 13.7, "step": 76995, "train_speed(iter/s)": 1.531011 }, { "acc": 0.9862957, "epoch": 36.09093039606281, "grad_norm": 1.910968542098999, "learning_rate": 1.9692129962406444e-06, "loss": 0.0503354, "memory(GiB)": 13.7, "step": 77000, "train_speed(iter/s)": 1.531013 }, { "acc": 0.98083334, "epoch": 36.093273962971644, "grad_norm": 4.7025604248046875, "learning_rate": 1.9685966468295947e-06, "loss": 0.04615116, "memory(GiB)": 13.7, "step": 77005, "train_speed(iter/s)": 1.531015 }, { "acc": 0.99078522, "epoch": 36.09561752988048, "grad_norm": 4.767092227935791, "learning_rate": 1.9679803702955427e-06, "loss": 0.0446718, "memory(GiB)": 13.7, "step": 77010, "train_speed(iter/s)": 1.531013 }, { "acc": 0.98006449, "epoch": 36.09796109678931, "grad_norm": 2.0635366439819336, "learning_rate": 1.967364166653298e-06, "loss": 0.07032281, "memory(GiB)": 13.7, "step": 77015, "train_speed(iter/s)": 1.531018 }, { "acc": 0.96763897, "epoch": 36.10030466369815, "grad_norm": 3.425570249557495, "learning_rate": 1.966748035917677e-06, "loss": 0.05265087, "memory(GiB)": 13.7, "step": 77020, "train_speed(iter/s)": 1.531023 }, { "acc": 0.97465773, "epoch": 36.10264823060698, "grad_norm": 3.194368839263916, "learning_rate": 1.9661319781034836e-06, "loss": 0.05842694, "memory(GiB)": 13.7, "step": 77025, "train_speed(iter/s)": 1.531026 }, { "acc": 0.97517853, "epoch": 36.10499179751582, "grad_norm": 4.776232719421387, "learning_rate": 1.965515993225531e-06, "loss": 0.04025132, "memory(GiB)": 13.7, "step": 77030, "train_speed(iter/s)": 1.531031 }, { "acc": 0.99508934, "epoch": 36.10733536442466, "grad_norm": 3.7621660232543945, "learning_rate": 1.9649000812986217e-06, "loss": 0.02435178, "memory(GiB)": 13.7, "step": 77035, "train_speed(iter/s)": 1.531032 }, { "acc": 0.98756828, "epoch": 36.10967893133349, "grad_norm": 2.373622179031372, "learning_rate": 1.9642842423375615e-06, "loss": 0.04419249, "memory(GiB)": 13.7, "step": 77040, "train_speed(iter/s)": 1.531035 }, { "acc": 0.98361111, "epoch": 36.112022498242325, "grad_norm": 2.923938274383545, "learning_rate": 1.963668476357155e-06, "loss": 0.02310593, "memory(GiB)": 13.7, "step": 77045, "train_speed(iter/s)": 1.531037 }, { "acc": 0.98601189, "epoch": 36.11436606515116, "grad_norm": 2.5848097801208496, "learning_rate": 1.9630527833722e-06, "loss": 0.05210332, "memory(GiB)": 13.7, "step": 77050, "train_speed(iter/s)": 1.531038 }, { "acc": 0.98989048, "epoch": 36.116709632059994, "grad_norm": 1.6318110227584839, "learning_rate": 1.9624371633974976e-06, "loss": 0.02001452, "memory(GiB)": 13.7, "step": 77055, "train_speed(iter/s)": 1.531034 }, { "acc": 0.9621726, "epoch": 36.11905319896883, "grad_norm": 5.278563499450684, "learning_rate": 1.9618216164478463e-06, "loss": 0.09460196, "memory(GiB)": 13.7, "step": 77060, "train_speed(iter/s)": 1.531041 }, { "acc": 0.98357954, "epoch": 36.12139676587766, "grad_norm": 2.7822487354278564, "learning_rate": 1.9612061425380404e-06, "loss": 0.07935737, "memory(GiB)": 13.7, "step": 77065, "train_speed(iter/s)": 1.531045 }, { "acc": 0.9802084, "epoch": 36.123740332786504, "grad_norm": 4.932856559753418, "learning_rate": 1.9605907416828726e-06, "loss": 0.05585624, "memory(GiB)": 13.7, "step": 77070, "train_speed(iter/s)": 1.531046 }, { "acc": 0.98125, "epoch": 36.12608389969534, "grad_norm": 2.1649863719940186, "learning_rate": 1.9599754138971357e-06, "loss": 0.04252446, "memory(GiB)": 13.7, "step": 77075, "train_speed(iter/s)": 1.531048 }, { "acc": 0.98639956, "epoch": 36.12842746660417, "grad_norm": 4.050367832183838, "learning_rate": 1.9593601591956217e-06, "loss": 0.05648617, "memory(GiB)": 13.7, "step": 77080, "train_speed(iter/s)": 1.531049 }, { "acc": 0.9958334, "epoch": 36.13077103351301, "grad_norm": 1.134223222732544, "learning_rate": 1.9587449775931204e-06, "loss": 0.01335158, "memory(GiB)": 13.7, "step": 77085, "train_speed(iter/s)": 1.531056 }, { "acc": 0.9895833, "epoch": 36.13311460042184, "grad_norm": 1.1006884574890137, "learning_rate": 1.9581298691044148e-06, "loss": 0.02055804, "memory(GiB)": 13.7, "step": 77090, "train_speed(iter/s)": 1.531057 }, { "acc": 0.9864584, "epoch": 36.135458167330675, "grad_norm": 4.145576477050781, "learning_rate": 1.9575148337442927e-06, "loss": 0.03567231, "memory(GiB)": 13.7, "step": 77095, "train_speed(iter/s)": 1.531056 }, { "acc": 0.9742857, "epoch": 36.13780173423951, "grad_norm": 2.526249408721924, "learning_rate": 1.956899871527538e-06, "loss": 0.05456783, "memory(GiB)": 13.7, "step": 77100, "train_speed(iter/s)": 1.531064 }, { "acc": 0.98988094, "epoch": 36.14014530114835, "grad_norm": 0.0013265935704112053, "learning_rate": 1.956284982468932e-06, "loss": 0.01756918, "memory(GiB)": 13.7, "step": 77105, "train_speed(iter/s)": 1.531064 }, { "acc": 0.97423611, "epoch": 36.142488868057185, "grad_norm": 0.01215850468724966, "learning_rate": 1.955670166583252e-06, "loss": 0.04025619, "memory(GiB)": 13.7, "step": 77110, "train_speed(iter/s)": 1.531064 }, { "acc": 0.97988091, "epoch": 36.14483243496602, "grad_norm": 3.1133944988250732, "learning_rate": 1.9550554238852777e-06, "loss": 0.05644755, "memory(GiB)": 13.7, "step": 77115, "train_speed(iter/s)": 1.531068 }, { "acc": 0.9833334, "epoch": 36.147176001874854, "grad_norm": 4.738534450531006, "learning_rate": 1.954440754389788e-06, "loss": 0.04888414, "memory(GiB)": 13.7, "step": 77120, "train_speed(iter/s)": 1.531073 }, { "acc": 0.9802084, "epoch": 36.14951956878369, "grad_norm": 3.7135391235351562, "learning_rate": 1.9538261581115543e-06, "loss": 0.08461446, "memory(GiB)": 13.7, "step": 77125, "train_speed(iter/s)": 1.531073 }, { "acc": 0.98872595, "epoch": 36.15186313569252, "grad_norm": 0.9557492733001709, "learning_rate": 1.9532116350653505e-06, "loss": 0.02774506, "memory(GiB)": 13.7, "step": 77130, "train_speed(iter/s)": 1.531073 }, { "acc": 0.9895833, "epoch": 36.15420670260136, "grad_norm": 2.4215426445007324, "learning_rate": 1.9525971852659493e-06, "loss": 0.02434881, "memory(GiB)": 13.7, "step": 77135, "train_speed(iter/s)": 1.531077 }, { "acc": 0.98386364, "epoch": 36.15655026951019, "grad_norm": 4.05526876449585, "learning_rate": 1.9519828087281163e-06, "loss": 0.06629111, "memory(GiB)": 13.7, "step": 77140, "train_speed(iter/s)": 1.531083 }, { "acc": 0.97744045, "epoch": 36.15889383641903, "grad_norm": 2.621202230453491, "learning_rate": 1.9513685054666225e-06, "loss": 0.05206356, "memory(GiB)": 13.7, "step": 77145, "train_speed(iter/s)": 1.531085 }, { "acc": 0.9885416, "epoch": 36.16123740332787, "grad_norm": 4.713726997375488, "learning_rate": 1.950754275496234e-06, "loss": 0.03337556, "memory(GiB)": 13.7, "step": 77150, "train_speed(iter/s)": 1.531087 }, { "acc": 0.97946434, "epoch": 36.1635809702367, "grad_norm": 6.384786128997803, "learning_rate": 1.9501401188317116e-06, "loss": 0.08158245, "memory(GiB)": 13.7, "step": 77155, "train_speed(iter/s)": 1.531092 }, { "acc": 0.98321428, "epoch": 36.165924537145536, "grad_norm": 2.6361796855926514, "learning_rate": 1.9495260354878213e-06, "loss": 0.03418854, "memory(GiB)": 13.7, "step": 77160, "train_speed(iter/s)": 1.531092 }, { "acc": 0.98716354, "epoch": 36.16826810405437, "grad_norm": 1.1634057760238647, "learning_rate": 1.9489120254793203e-06, "loss": 0.04298918, "memory(GiB)": 13.7, "step": 77165, "train_speed(iter/s)": 1.531094 }, { "acc": 0.96967258, "epoch": 36.170611670963204, "grad_norm": 5.803356647491455, "learning_rate": 1.948298088820969e-06, "loss": 0.07015354, "memory(GiB)": 13.7, "step": 77170, "train_speed(iter/s)": 1.531092 }, { "acc": 0.98729172, "epoch": 36.17295523787204, "grad_norm": 5.476888656616211, "learning_rate": 1.947684225527526e-06, "loss": 0.03129312, "memory(GiB)": 13.7, "step": 77175, "train_speed(iter/s)": 1.531093 }, { "acc": 0.983144, "epoch": 36.17529880478088, "grad_norm": 4.319159507751465, "learning_rate": 1.947070435613743e-06, "loss": 0.06551077, "memory(GiB)": 13.7, "step": 77180, "train_speed(iter/s)": 1.53109 }, { "acc": 0.96965284, "epoch": 36.177642371689714, "grad_norm": 5.614955902099609, "learning_rate": 1.9464567190943757e-06, "loss": 0.04555253, "memory(GiB)": 13.7, "step": 77185, "train_speed(iter/s)": 1.531093 }, { "acc": 0.97902775, "epoch": 36.17998593859855, "grad_norm": 4.509751319885254, "learning_rate": 1.9458430759841775e-06, "loss": 0.07064079, "memory(GiB)": 13.7, "step": 77190, "train_speed(iter/s)": 1.531096 }, { "acc": 0.98202381, "epoch": 36.18232950550738, "grad_norm": 2.085143804550171, "learning_rate": 1.945229506297896e-06, "loss": 0.07019405, "memory(GiB)": 13.7, "step": 77195, "train_speed(iter/s)": 1.531095 }, { "acc": 0.97251463, "epoch": 36.18467307241622, "grad_norm": 4.504392623901367, "learning_rate": 1.9446160100502784e-06, "loss": 0.07282873, "memory(GiB)": 13.7, "step": 77200, "train_speed(iter/s)": 1.531095 }, { "acc": 0.9853838, "epoch": 36.18701663932505, "grad_norm": 6.695958614349365, "learning_rate": 1.9440025872560726e-06, "loss": 0.03328812, "memory(GiB)": 13.7, "step": 77205, "train_speed(iter/s)": 1.531101 }, { "acc": 0.98309526, "epoch": 36.189360206233886, "grad_norm": 4.002007484436035, "learning_rate": 1.943389237930025e-06, "loss": 0.0447437, "memory(GiB)": 13.7, "step": 77210, "train_speed(iter/s)": 1.531105 }, { "acc": 0.98883934, "epoch": 36.19170377314272, "grad_norm": 3.838242530822754, "learning_rate": 1.9427759620868748e-06, "loss": 0.02079851, "memory(GiB)": 13.7, "step": 77215, "train_speed(iter/s)": 1.531106 }, { "acc": 0.98125, "epoch": 36.19404734005156, "grad_norm": 2.5575568675994873, "learning_rate": 1.942162759741365e-06, "loss": 0.08145982, "memory(GiB)": 13.7, "step": 77220, "train_speed(iter/s)": 1.531106 }, { "acc": 0.98903847, "epoch": 36.196390906960396, "grad_norm": 1.877942681312561, "learning_rate": 1.941549630908235e-06, "loss": 0.03844274, "memory(GiB)": 13.7, "step": 77225, "train_speed(iter/s)": 1.531109 }, { "acc": 0.98277283, "epoch": 36.19873447386923, "grad_norm": 0.9618930220603943, "learning_rate": 1.9409365756022245e-06, "loss": 0.03218445, "memory(GiB)": 13.7, "step": 77230, "train_speed(iter/s)": 1.531104 }, { "acc": 0.99050598, "epoch": 36.201078040778064, "grad_norm": 0.06947112083435059, "learning_rate": 1.940323593838067e-06, "loss": 0.01650405, "memory(GiB)": 13.7, "step": 77235, "train_speed(iter/s)": 1.531104 }, { "acc": 0.9840477, "epoch": 36.2034216076869, "grad_norm": 3.139073371887207, "learning_rate": 1.9397106856304953e-06, "loss": 0.05066057, "memory(GiB)": 13.7, "step": 77240, "train_speed(iter/s)": 1.531101 }, { "acc": 0.98296127, "epoch": 36.20576517459573, "grad_norm": 0.8492636680603027, "learning_rate": 1.939097850994243e-06, "loss": 0.04414498, "memory(GiB)": 13.7, "step": 77245, "train_speed(iter/s)": 1.531099 }, { "acc": 0.98145828, "epoch": 36.20810874150457, "grad_norm": 5.203840255737305, "learning_rate": 1.938485089944043e-06, "loss": 0.05591651, "memory(GiB)": 13.7, "step": 77250, "train_speed(iter/s)": 1.531102 }, { "acc": 0.99372482, "epoch": 36.21045230841341, "grad_norm": 2.093881845474243, "learning_rate": 1.9378724024946195e-06, "loss": 0.02482227, "memory(GiB)": 13.7, "step": 77255, "train_speed(iter/s)": 1.531103 }, { "acc": 0.96811008, "epoch": 36.21279587532224, "grad_norm": 4.970002174377441, "learning_rate": 1.9372597886607025e-06, "loss": 0.05628999, "memory(GiB)": 13.7, "step": 77260, "train_speed(iter/s)": 1.531104 }, { "acc": 0.98104172, "epoch": 36.21513944223108, "grad_norm": 1.0332682132720947, "learning_rate": 1.936647248457018e-06, "loss": 0.04042606, "memory(GiB)": 13.7, "step": 77265, "train_speed(iter/s)": 1.531107 }, { "acc": 0.97895832, "epoch": 36.21748300913991, "grad_norm": 2.6822009086608887, "learning_rate": 1.9360347818982874e-06, "loss": 0.03145377, "memory(GiB)": 13.7, "step": 77270, "train_speed(iter/s)": 1.531111 }, { "acc": 0.9791666, "epoch": 36.219826576048746, "grad_norm": 0.01802903227508068, "learning_rate": 1.9354223889992334e-06, "loss": 0.05998161, "memory(GiB)": 13.7, "step": 77275, "train_speed(iter/s)": 1.531116 }, { "acc": 0.98728628, "epoch": 36.22217014295758, "grad_norm": 4.283509731292725, "learning_rate": 1.934810069774575e-06, "loss": 0.02986306, "memory(GiB)": 13.7, "step": 77280, "train_speed(iter/s)": 1.531119 }, { "acc": 0.98562498, "epoch": 36.224513709866415, "grad_norm": 4.08488655090332, "learning_rate": 1.9341978242390304e-06, "loss": 0.02475279, "memory(GiB)": 13.7, "step": 77285, "train_speed(iter/s)": 1.531123 }, { "acc": 0.98633928, "epoch": 36.22685727677525, "grad_norm": 4.016382217407227, "learning_rate": 1.9335856524073183e-06, "loss": 0.02352957, "memory(GiB)": 13.7, "step": 77290, "train_speed(iter/s)": 1.531127 }, { "acc": 0.98760414, "epoch": 36.22920084368409, "grad_norm": 0.7920079231262207, "learning_rate": 1.93297355429415e-06, "loss": 0.03114084, "memory(GiB)": 13.7, "step": 77295, "train_speed(iter/s)": 1.531132 }, { "acc": 0.97458334, "epoch": 36.231544410592925, "grad_norm": 2.2312605381011963, "learning_rate": 1.932361529914239e-06, "loss": 0.03943703, "memory(GiB)": 13.7, "step": 77300, "train_speed(iter/s)": 1.531136 }, { "acc": 0.99092264, "epoch": 36.23388797750176, "grad_norm": 2.924964666366577, "learning_rate": 1.9317495792823e-06, "loss": 0.01953527, "memory(GiB)": 13.7, "step": 77305, "train_speed(iter/s)": 1.531137 }, { "acc": 0.98238096, "epoch": 36.23623154441059, "grad_norm": 2.6089861392974854, "learning_rate": 1.9311377024130376e-06, "loss": 0.06820825, "memory(GiB)": 13.7, "step": 77310, "train_speed(iter/s)": 1.531139 }, { "acc": 0.98500004, "epoch": 36.23857511131943, "grad_norm": 2.613065481185913, "learning_rate": 1.9305258993211627e-06, "loss": 0.02214511, "memory(GiB)": 13.7, "step": 77315, "train_speed(iter/s)": 1.53114 }, { "acc": 0.98916664, "epoch": 36.24091867822826, "grad_norm": 3.551513910293579, "learning_rate": 1.929914170021378e-06, "loss": 0.03028677, "memory(GiB)": 13.7, "step": 77320, "train_speed(iter/s)": 1.531142 }, { "acc": 0.98165178, "epoch": 36.243262245137096, "grad_norm": 2.160752534866333, "learning_rate": 1.9293025145283905e-06, "loss": 0.03960533, "memory(GiB)": 13.7, "step": 77325, "train_speed(iter/s)": 1.531147 }, { "acc": 0.97979164, "epoch": 36.24560581204594, "grad_norm": 3.1291303634643555, "learning_rate": 1.9286909328568988e-06, "loss": 0.04827844, "memory(GiB)": 13.7, "step": 77330, "train_speed(iter/s)": 1.531151 }, { "acc": 0.98258934, "epoch": 36.24794937895477, "grad_norm": 5.489139556884766, "learning_rate": 1.928079425021606e-06, "loss": 0.03664571, "memory(GiB)": 13.7, "step": 77335, "train_speed(iter/s)": 1.531157 }, { "acc": 0.98104162, "epoch": 36.250292945863606, "grad_norm": 3.456868886947632, "learning_rate": 1.9274679910372116e-06, "loss": 0.05399152, "memory(GiB)": 13.7, "step": 77340, "train_speed(iter/s)": 1.531159 }, { "acc": 0.99375, "epoch": 36.25263651277244, "grad_norm": 0.004199262708425522, "learning_rate": 1.926856630918409e-06, "loss": 0.01510586, "memory(GiB)": 13.7, "step": 77345, "train_speed(iter/s)": 1.531158 }, { "acc": 0.9894886, "epoch": 36.254980079681275, "grad_norm": 6.106432914733887, "learning_rate": 1.926245344679896e-06, "loss": 0.05587651, "memory(GiB)": 13.7, "step": 77350, "train_speed(iter/s)": 1.531161 }, { "acc": 0.9842804, "epoch": 36.25732364659011, "grad_norm": 2.570615530014038, "learning_rate": 1.9256341323363665e-06, "loss": 0.03463148, "memory(GiB)": 13.7, "step": 77355, "train_speed(iter/s)": 1.531159 }, { "acc": 0.99278984, "epoch": 36.25966721349894, "grad_norm": 2.7539453506469727, "learning_rate": 1.925022993902509e-06, "loss": 0.04940467, "memory(GiB)": 13.7, "step": 77360, "train_speed(iter/s)": 1.531165 }, { "acc": 0.97658548, "epoch": 36.26201078040778, "grad_norm": 0.00012083386536687613, "learning_rate": 1.924411929393017e-06, "loss": 0.06841628, "memory(GiB)": 13.7, "step": 77365, "train_speed(iter/s)": 1.531172 }, { "acc": 0.98961315, "epoch": 36.26435434731662, "grad_norm": 2.321870803833008, "learning_rate": 1.9238009388225744e-06, "loss": 0.03665017, "memory(GiB)": 13.7, "step": 77370, "train_speed(iter/s)": 1.531183 }, { "acc": 0.98727674, "epoch": 36.26669791422545, "grad_norm": 3.0774762630462646, "learning_rate": 1.9231900222058697e-06, "loss": 0.03594035, "memory(GiB)": 13.7, "step": 77375, "train_speed(iter/s)": 1.531188 }, { "acc": 0.98395834, "epoch": 36.26904148113429, "grad_norm": 2.742286205291748, "learning_rate": 1.9225791795575886e-06, "loss": 0.0480271, "memory(GiB)": 13.7, "step": 77380, "train_speed(iter/s)": 1.531199 }, { "acc": 0.97093754, "epoch": 36.27138504804312, "grad_norm": 8.70392894744873, "learning_rate": 1.921968410892411e-06, "loss": 0.07470815, "memory(GiB)": 13.7, "step": 77385, "train_speed(iter/s)": 1.531206 }, { "acc": 0.99278851, "epoch": 36.273728614951956, "grad_norm": 1.499127745628357, "learning_rate": 1.921357716225019e-06, "loss": 0.01880047, "memory(GiB)": 13.7, "step": 77390, "train_speed(iter/s)": 1.531206 }, { "acc": 0.98048611, "epoch": 36.27607218186079, "grad_norm": 5.269365310668945, "learning_rate": 1.920747095570093e-06, "loss": 0.04579534, "memory(GiB)": 13.7, "step": 77395, "train_speed(iter/s)": 1.53121 }, { "acc": 0.97032204, "epoch": 36.278415748769625, "grad_norm": 5.521140098571777, "learning_rate": 1.9201365489423094e-06, "loss": 0.0634685, "memory(GiB)": 13.7, "step": 77400, "train_speed(iter/s)": 1.531213 }, { "acc": 0.9958333, "epoch": 36.28075931567846, "grad_norm": 0.9535105228424072, "learning_rate": 1.919526076356341e-06, "loss": 0.01388947, "memory(GiB)": 13.7, "step": 77405, "train_speed(iter/s)": 1.531215 }, { "acc": 0.99125004, "epoch": 36.2831028825873, "grad_norm": 1.6473021507263184, "learning_rate": 1.9189156778268646e-06, "loss": 0.05391397, "memory(GiB)": 13.7, "step": 77410, "train_speed(iter/s)": 1.531213 }, { "acc": 0.9856945, "epoch": 36.285446449496135, "grad_norm": 4.6200408935546875, "learning_rate": 1.9183053533685535e-06, "loss": 0.02995533, "memory(GiB)": 13.7, "step": 77415, "train_speed(iter/s)": 1.531217 }, { "acc": 0.98916664, "epoch": 36.28779001640497, "grad_norm": 2.343294382095337, "learning_rate": 1.917695102996074e-06, "loss": 0.03043415, "memory(GiB)": 13.7, "step": 77420, "train_speed(iter/s)": 1.531221 }, { "acc": 0.98611107, "epoch": 36.2901335833138, "grad_norm": 2.7659406661987305, "learning_rate": 1.9170849267240963e-06, "loss": 0.03132096, "memory(GiB)": 13.7, "step": 77425, "train_speed(iter/s)": 1.531219 }, { "acc": 0.97394352, "epoch": 36.29247715022264, "grad_norm": 3.5181691646575928, "learning_rate": 1.9164748245672867e-06, "loss": 0.07463802, "memory(GiB)": 13.7, "step": 77430, "train_speed(iter/s)": 1.531218 }, { "acc": 0.97624998, "epoch": 36.29482071713147, "grad_norm": 4.031917095184326, "learning_rate": 1.9158647965403125e-06, "loss": 0.04411326, "memory(GiB)": 13.7, "step": 77435, "train_speed(iter/s)": 1.531223 }, { "acc": 0.97342262, "epoch": 36.297164284040306, "grad_norm": 4.341434001922607, "learning_rate": 1.915254842657835e-06, "loss": 0.06818538, "memory(GiB)": 13.7, "step": 77440, "train_speed(iter/s)": 1.53123 }, { "acc": 0.97904758, "epoch": 36.29950785094915, "grad_norm": 4.303799152374268, "learning_rate": 1.9146449629345138e-06, "loss": 0.05328178, "memory(GiB)": 13.7, "step": 77445, "train_speed(iter/s)": 1.53123 }, { "acc": 0.98656254, "epoch": 36.30185141785798, "grad_norm": 2.3497564792633057, "learning_rate": 1.9140351573850093e-06, "loss": 0.04713274, "memory(GiB)": 13.7, "step": 77450, "train_speed(iter/s)": 1.531231 }, { "acc": 0.98041668, "epoch": 36.304194984766816, "grad_norm": 1.865273118019104, "learning_rate": 1.9134254260239815e-06, "loss": 0.02960893, "memory(GiB)": 13.7, "step": 77455, "train_speed(iter/s)": 1.531236 }, { "acc": 0.9802083, "epoch": 36.30653855167565, "grad_norm": 2.3578431606292725, "learning_rate": 1.9128157688660833e-06, "loss": 0.05508392, "memory(GiB)": 13.7, "step": 77460, "train_speed(iter/s)": 1.53124 }, { "acc": 0.99750004, "epoch": 36.308882118584485, "grad_norm": 0.0020793969742953777, "learning_rate": 1.91220618592597e-06, "loss": 0.00523447, "memory(GiB)": 13.7, "step": 77465, "train_speed(iter/s)": 1.531243 }, { "acc": 0.98465281, "epoch": 36.31122568549332, "grad_norm": 3.2751171588897705, "learning_rate": 1.911596677218296e-06, "loss": 0.06206889, "memory(GiB)": 13.7, "step": 77470, "train_speed(iter/s)": 1.531252 }, { "acc": 0.98946428, "epoch": 36.313569252402154, "grad_norm": 0.14633750915527344, "learning_rate": 1.910987242757708e-06, "loss": 0.01831189, "memory(GiB)": 13.7, "step": 77475, "train_speed(iter/s)": 1.531258 }, { "acc": 0.98791666, "epoch": 36.31591281931099, "grad_norm": 1.2156965732574463, "learning_rate": 1.910377882558857e-06, "loss": 0.01706944, "memory(GiB)": 13.7, "step": 77480, "train_speed(iter/s)": 1.531263 }, { "acc": 0.98239584, "epoch": 36.31825638621983, "grad_norm": 0.0057556177489459515, "learning_rate": 1.9097685966363914e-06, "loss": 0.02814746, "memory(GiB)": 13.7, "step": 77485, "train_speed(iter/s)": 1.531269 }, { "acc": 0.9791667, "epoch": 36.320599953128664, "grad_norm": 3.2902839183807373, "learning_rate": 1.9091593850049554e-06, "loss": 0.05466154, "memory(GiB)": 13.7, "step": 77490, "train_speed(iter/s)": 1.53127 }, { "acc": 0.9679018, "epoch": 36.3229435200375, "grad_norm": 3.3831675052642822, "learning_rate": 1.9085502476791903e-06, "loss": 0.06392865, "memory(GiB)": 13.7, "step": 77495, "train_speed(iter/s)": 1.531279 }, { "acc": 0.98151474, "epoch": 36.32528708694633, "grad_norm": 4.532800197601318, "learning_rate": 1.907941184673739e-06, "loss": 0.04887597, "memory(GiB)": 13.7, "step": 77500, "train_speed(iter/s)": 1.531285 }, { "acc": 0.99695644, "epoch": 36.32763065385517, "grad_norm": 1.0813829898834229, "learning_rate": 1.907332196003242e-06, "loss": 0.01627028, "memory(GiB)": 13.7, "step": 77505, "train_speed(iter/s)": 1.53129 }, { "acc": 0.99083328, "epoch": 36.329974220764, "grad_norm": 0.0021113366819918156, "learning_rate": 1.906723281682339e-06, "loss": 0.01635009, "memory(GiB)": 13.7, "step": 77510, "train_speed(iter/s)": 1.531294 }, { "acc": 0.98142853, "epoch": 36.332317787672835, "grad_norm": 3.7665274143218994, "learning_rate": 1.9061144417256623e-06, "loss": 0.04335489, "memory(GiB)": 13.7, "step": 77515, "train_speed(iter/s)": 1.531295 }, { "acc": 0.98777771, "epoch": 36.33466135458168, "grad_norm": 3.622978925704956, "learning_rate": 1.9055056761478482e-06, "loss": 0.03830742, "memory(GiB)": 13.7, "step": 77520, "train_speed(iter/s)": 1.531297 }, { "acc": 0.99142857, "epoch": 36.33700492149051, "grad_norm": 0.7707590460777283, "learning_rate": 1.9048969849635326e-06, "loss": 0.02837964, "memory(GiB)": 13.7, "step": 77525, "train_speed(iter/s)": 1.5313 }, { "acc": 0.97770834, "epoch": 36.339348488399345, "grad_norm": 0.013061183504760265, "learning_rate": 1.904288368187343e-06, "loss": 0.04502937, "memory(GiB)": 13.7, "step": 77530, "train_speed(iter/s)": 1.531308 }, { "acc": 0.982197, "epoch": 36.34169205530818, "grad_norm": 0.010648835450410843, "learning_rate": 1.9036798258339076e-06, "loss": 0.040917, "memory(GiB)": 13.7, "step": 77535, "train_speed(iter/s)": 1.531311 }, { "acc": 0.9769886, "epoch": 36.344035622217014, "grad_norm": 5.487481117248535, "learning_rate": 1.9030713579178555e-06, "loss": 0.07137802, "memory(GiB)": 13.7, "step": 77540, "train_speed(iter/s)": 1.531312 }, { "acc": 0.98872471, "epoch": 36.34637918912585, "grad_norm": 4.947486877441406, "learning_rate": 1.9024629644538135e-06, "loss": 0.04633541, "memory(GiB)": 13.7, "step": 77545, "train_speed(iter/s)": 1.531316 }, { "acc": 0.96967258, "epoch": 36.34872275603468, "grad_norm": 0.0024719033390283585, "learning_rate": 1.9018546454564024e-06, "loss": 0.06572156, "memory(GiB)": 13.7, "step": 77550, "train_speed(iter/s)": 1.531318 }, { "acc": 0.98395834, "epoch": 36.35106632294352, "grad_norm": 0.006063301581889391, "learning_rate": 1.9012464009402458e-06, "loss": 0.03388663, "memory(GiB)": 13.7, "step": 77555, "train_speed(iter/s)": 1.531315 }, { "acc": 0.97889881, "epoch": 36.35340988985236, "grad_norm": 3.6969313621520996, "learning_rate": 1.9006382309199655e-06, "loss": 0.06717381, "memory(GiB)": 13.7, "step": 77560, "train_speed(iter/s)": 1.531317 }, { "acc": 0.98520832, "epoch": 36.35575345676119, "grad_norm": 4.3788533210754395, "learning_rate": 1.9000301354101786e-06, "loss": 0.02644347, "memory(GiB)": 13.7, "step": 77565, "train_speed(iter/s)": 1.531316 }, { "acc": 0.98500004, "epoch": 36.35809702367003, "grad_norm": 4.378842353820801, "learning_rate": 1.8994221144254995e-06, "loss": 0.04302194, "memory(GiB)": 13.7, "step": 77570, "train_speed(iter/s)": 1.531317 }, { "acc": 0.988447, "epoch": 36.36044059057886, "grad_norm": 4.911028861999512, "learning_rate": 1.898814167980545e-06, "loss": 0.03776233, "memory(GiB)": 13.7, "step": 77575, "train_speed(iter/s)": 1.53132 }, { "acc": 0.99236107, "epoch": 36.362784157487695, "grad_norm": 2.84389328956604, "learning_rate": 1.8982062960899279e-06, "loss": 0.0290053, "memory(GiB)": 13.7, "step": 77580, "train_speed(iter/s)": 1.531322 }, { "acc": 0.98430557, "epoch": 36.36512772439653, "grad_norm": 0.2651347219944, "learning_rate": 1.8975984987682615e-06, "loss": 0.03850424, "memory(GiB)": 13.7, "step": 77585, "train_speed(iter/s)": 1.531326 }, { "acc": 0.9885417, "epoch": 36.367471291305364, "grad_norm": 7.279666423797607, "learning_rate": 1.8969907760301518e-06, "loss": 0.04397367, "memory(GiB)": 13.7, "step": 77590, "train_speed(iter/s)": 1.531327 }, { "acc": 0.9854167, "epoch": 36.369814858214205, "grad_norm": 0.7254289984703064, "learning_rate": 1.8963831278902075e-06, "loss": 0.02782355, "memory(GiB)": 13.7, "step": 77595, "train_speed(iter/s)": 1.531325 }, { "acc": 0.97421875, "epoch": 36.37215842512304, "grad_norm": 4.489038467407227, "learning_rate": 1.8957755543630368e-06, "loss": 0.05675, "memory(GiB)": 13.7, "step": 77600, "train_speed(iter/s)": 1.531327 }, { "acc": 0.9802084, "epoch": 36.374501992031874, "grad_norm": 1.3671339750289917, "learning_rate": 1.8951680554632412e-06, "loss": 0.02993471, "memory(GiB)": 13.7, "step": 77605, "train_speed(iter/s)": 1.531328 }, { "acc": 0.9791667, "epoch": 36.37684555894071, "grad_norm": 3.5631043910980225, "learning_rate": 1.894560631205424e-06, "loss": 0.04281965, "memory(GiB)": 13.7, "step": 77610, "train_speed(iter/s)": 1.531328 }, { "acc": 0.97666664, "epoch": 36.37918912584954, "grad_norm": 3.415450096130371, "learning_rate": 1.8939532816041848e-06, "loss": 0.03553457, "memory(GiB)": 13.7, "step": 77615, "train_speed(iter/s)": 1.531329 }, { "acc": 0.98125, "epoch": 36.38153269275838, "grad_norm": 4.8885178565979, "learning_rate": 1.8933460066741245e-06, "loss": 0.04676352, "memory(GiB)": 13.7, "step": 77620, "train_speed(iter/s)": 1.531334 }, { "acc": 0.9697916, "epoch": 36.38387625966721, "grad_norm": 4.34905481338501, "learning_rate": 1.8927388064298367e-06, "loss": 0.06208555, "memory(GiB)": 13.7, "step": 77625, "train_speed(iter/s)": 1.531339 }, { "acc": 0.99750004, "epoch": 36.386219826576045, "grad_norm": 1.6628166122245602e-05, "learning_rate": 1.8921316808859177e-06, "loss": 0.00888604, "memory(GiB)": 13.7, "step": 77630, "train_speed(iter/s)": 1.531345 }, { "acc": 0.9958334, "epoch": 36.38856339348489, "grad_norm": 6.787506580352783, "learning_rate": 1.8915246300569632e-06, "loss": 0.02778986, "memory(GiB)": 13.7, "step": 77635, "train_speed(iter/s)": 1.531341 }, { "acc": 0.96875, "epoch": 36.39090696039372, "grad_norm": 0.15296213328838348, "learning_rate": 1.890917653957561e-06, "loss": 0.09011731, "memory(GiB)": 13.7, "step": 77640, "train_speed(iter/s)": 1.531351 }, { "acc": 0.98311014, "epoch": 36.393250527302555, "grad_norm": 0.0021214792504906654, "learning_rate": 1.8903107526023029e-06, "loss": 0.04033387, "memory(GiB)": 13.7, "step": 77645, "train_speed(iter/s)": 1.531349 }, { "acc": 0.98104162, "epoch": 36.39559409421139, "grad_norm": 4.2240166664123535, "learning_rate": 1.8897039260057782e-06, "loss": 0.08216228, "memory(GiB)": 13.7, "step": 77650, "train_speed(iter/s)": 1.531349 }, { "acc": 0.99452114, "epoch": 36.397937661120224, "grad_norm": 4.862489223480225, "learning_rate": 1.8890971741825699e-06, "loss": 0.02826469, "memory(GiB)": 13.7, "step": 77655, "train_speed(iter/s)": 1.531348 }, { "acc": 0.98923607, "epoch": 36.40028122802906, "grad_norm": 2.5773165225982666, "learning_rate": 1.888490497147265e-06, "loss": 0.03625002, "memory(GiB)": 13.7, "step": 77660, "train_speed(iter/s)": 1.531352 }, { "acc": 0.9916666, "epoch": 36.40262479493789, "grad_norm": 0.03429460525512695, "learning_rate": 1.8878838949144434e-06, "loss": 0.02219566, "memory(GiB)": 13.7, "step": 77665, "train_speed(iter/s)": 1.531353 }, { "acc": 0.98676472, "epoch": 36.404968361846734, "grad_norm": 5.440448760986328, "learning_rate": 1.8872773674986871e-06, "loss": 0.03954836, "memory(GiB)": 13.7, "step": 77670, "train_speed(iter/s)": 1.531358 }, { "acc": 0.97437496, "epoch": 36.40731192875557, "grad_norm": 0.9803155660629272, "learning_rate": 1.886670914914577e-06, "loss": 0.06035122, "memory(GiB)": 13.7, "step": 77675, "train_speed(iter/s)": 1.53136 }, { "acc": 0.9911232, "epoch": 36.4096554956644, "grad_norm": 4.621993064880371, "learning_rate": 1.8860645371766867e-06, "loss": 0.03592598, "memory(GiB)": 13.7, "step": 77680, "train_speed(iter/s)": 1.531366 }, { "acc": 0.98968754, "epoch": 36.41199906257324, "grad_norm": 3.479607105255127, "learning_rate": 1.8854582342995931e-06, "loss": 0.06849737, "memory(GiB)": 13.7, "step": 77685, "train_speed(iter/s)": 1.531366 }, { "acc": 0.98604164, "epoch": 36.41434262948207, "grad_norm": 3.7227578163146973, "learning_rate": 1.8848520062978715e-06, "loss": 0.02936109, "memory(GiB)": 13.7, "step": 77690, "train_speed(iter/s)": 1.531372 }, { "acc": 0.978125, "epoch": 36.416686196390906, "grad_norm": 1.975957989692688, "learning_rate": 1.8842458531860915e-06, "loss": 0.04860114, "memory(GiB)": 13.7, "step": 77695, "train_speed(iter/s)": 1.53138 }, { "acc": 0.9770833, "epoch": 36.41902976329974, "grad_norm": 3.6505331993103027, "learning_rate": 1.883639774978823e-06, "loss": 0.06937594, "memory(GiB)": 13.7, "step": 77700, "train_speed(iter/s)": 1.531384 }, { "acc": 0.9791666, "epoch": 36.421373330208574, "grad_norm": 4.3246259689331055, "learning_rate": 1.8830337716906336e-06, "loss": 0.05787009, "memory(GiB)": 13.7, "step": 77705, "train_speed(iter/s)": 1.531388 }, { "acc": 0.98833332, "epoch": 36.423716897117416, "grad_norm": 0.14244762063026428, "learning_rate": 1.882427843336093e-06, "loss": 0.01593682, "memory(GiB)": 13.7, "step": 77710, "train_speed(iter/s)": 1.531389 }, { "acc": 0.98365536, "epoch": 36.42606046402625, "grad_norm": 2.1659607887268066, "learning_rate": 1.881821989929761e-06, "loss": 0.03927868, "memory(GiB)": 13.7, "step": 77715, "train_speed(iter/s)": 1.531391 }, { "acc": 0.9880209, "epoch": 36.428404030935084, "grad_norm": 1.8617392778396606, "learning_rate": 1.8812162114862034e-06, "loss": 0.03166481, "memory(GiB)": 13.7, "step": 77720, "train_speed(iter/s)": 1.531392 }, { "acc": 0.98597755, "epoch": 36.43074759784392, "grad_norm": 3.7589516639709473, "learning_rate": 1.8806105080199799e-06, "loss": 0.02639281, "memory(GiB)": 13.7, "step": 77725, "train_speed(iter/s)": 1.531391 }, { "acc": 0.9864584, "epoch": 36.43309116475275, "grad_norm": 2.145550489425659, "learning_rate": 1.880004879545652e-06, "loss": 0.03796577, "memory(GiB)": 13.7, "step": 77730, "train_speed(iter/s)": 1.531393 }, { "acc": 0.9869566, "epoch": 36.43543473166159, "grad_norm": 5.0163679122924805, "learning_rate": 1.8793993260777755e-06, "loss": 0.03324369, "memory(GiB)": 13.7, "step": 77735, "train_speed(iter/s)": 1.531391 }, { "acc": 0.98395834, "epoch": 36.43777829857042, "grad_norm": 4.869585037231445, "learning_rate": 1.8787938476309033e-06, "loss": 0.05122906, "memory(GiB)": 13.7, "step": 77740, "train_speed(iter/s)": 1.53139 }, { "acc": 0.97609844, "epoch": 36.44012186547926, "grad_norm": 5.64428186416626, "learning_rate": 1.8781884442195921e-06, "loss": 0.05550325, "memory(GiB)": 13.7, "step": 77745, "train_speed(iter/s)": 1.531396 }, { "acc": 0.97416668, "epoch": 36.4424654323881, "grad_norm": 1.7515100240707397, "learning_rate": 1.8775831158583943e-06, "loss": 0.05569041, "memory(GiB)": 13.7, "step": 77750, "train_speed(iter/s)": 1.531399 }, { "acc": 0.985322, "epoch": 36.44480899929693, "grad_norm": 3.261512279510498, "learning_rate": 1.8769778625618572e-06, "loss": 0.05580035, "memory(GiB)": 13.7, "step": 77755, "train_speed(iter/s)": 1.531393 }, { "acc": 0.98898811, "epoch": 36.447152566205766, "grad_norm": 0.031352270394563675, "learning_rate": 1.876372684344531e-06, "loss": 0.0324982, "memory(GiB)": 13.7, "step": 77760, "train_speed(iter/s)": 1.531398 }, { "acc": 0.99161711, "epoch": 36.4494961331146, "grad_norm": 2.4435007572174072, "learning_rate": 1.8757675812209633e-06, "loss": 0.05521475, "memory(GiB)": 13.7, "step": 77765, "train_speed(iter/s)": 1.531402 }, { "acc": 0.9791667, "epoch": 36.451839700023434, "grad_norm": 5.456810474395752, "learning_rate": 1.8751625532056959e-06, "loss": 0.04248128, "memory(GiB)": 13.7, "step": 77770, "train_speed(iter/s)": 1.531406 }, { "acc": 0.98506947, "epoch": 36.45418326693227, "grad_norm": 3.384459972381592, "learning_rate": 1.8745576003132749e-06, "loss": 0.04450211, "memory(GiB)": 13.7, "step": 77775, "train_speed(iter/s)": 1.531409 }, { "acc": 0.96958332, "epoch": 36.4565268338411, "grad_norm": 4.0109453201293945, "learning_rate": 1.8739527225582377e-06, "loss": 0.05362948, "memory(GiB)": 13.7, "step": 77780, "train_speed(iter/s)": 1.531409 }, { "acc": 0.9895833, "epoch": 36.458870400749944, "grad_norm": 0.010750002227723598, "learning_rate": 1.873347919955126e-06, "loss": 0.03629991, "memory(GiB)": 13.7, "step": 77785, "train_speed(iter/s)": 1.531413 }, { "acc": 0.9854166, "epoch": 36.46121396765878, "grad_norm": 3.339179515838623, "learning_rate": 1.8727431925184782e-06, "loss": 0.03575858, "memory(GiB)": 13.7, "step": 77790, "train_speed(iter/s)": 1.531413 }, { "acc": 0.9885417, "epoch": 36.46355753456761, "grad_norm": 1.2113357782363892, "learning_rate": 1.8721385402628268e-06, "loss": 0.04033796, "memory(GiB)": 13.7, "step": 77795, "train_speed(iter/s)": 1.531413 }, { "acc": 0.9864584, "epoch": 36.46590110147645, "grad_norm": 2.359541416168213, "learning_rate": 1.8715339632027077e-06, "loss": 0.05564008, "memory(GiB)": 13.7, "step": 77800, "train_speed(iter/s)": 1.531416 }, { "acc": 0.98779755, "epoch": 36.46824466838528, "grad_norm": 4.179118633270264, "learning_rate": 1.8709294613526543e-06, "loss": 0.05661402, "memory(GiB)": 13.7, "step": 77805, "train_speed(iter/s)": 1.531414 }, { "acc": 0.9833333, "epoch": 36.470588235294116, "grad_norm": 3.523179531097412, "learning_rate": 1.8703250347271933e-06, "loss": 0.08311426, "memory(GiB)": 13.7, "step": 77810, "train_speed(iter/s)": 1.531416 }, { "acc": 0.98718748, "epoch": 36.47293180220295, "grad_norm": 2.9839954376220703, "learning_rate": 1.869720683340855e-06, "loss": 0.01763943, "memory(GiB)": 13.7, "step": 77815, "train_speed(iter/s)": 1.531416 }, { "acc": 0.98556547, "epoch": 36.475275369111785, "grad_norm": 2.695284605026245, "learning_rate": 1.8691164072081682e-06, "loss": 0.02589355, "memory(GiB)": 13.7, "step": 77820, "train_speed(iter/s)": 1.531422 }, { "acc": 0.98145828, "epoch": 36.477618936020626, "grad_norm": 3.037118911743164, "learning_rate": 1.8685122063436552e-06, "loss": 0.05953087, "memory(GiB)": 13.7, "step": 77825, "train_speed(iter/s)": 1.531431 }, { "acc": 0.98861609, "epoch": 36.47996250292946, "grad_norm": 3.655863046646118, "learning_rate": 1.867908080761838e-06, "loss": 0.04633762, "memory(GiB)": 13.7, "step": 77830, "train_speed(iter/s)": 1.531429 }, { "acc": 0.9863636, "epoch": 36.482306069838295, "grad_norm": 3.4088854789733887, "learning_rate": 1.8673040304772386e-06, "loss": 0.03904338, "memory(GiB)": 13.7, "step": 77835, "train_speed(iter/s)": 1.531425 }, { "acc": 0.97987013, "epoch": 36.48464963674713, "grad_norm": 3.944924831390381, "learning_rate": 1.8667000555043793e-06, "loss": 0.08649018, "memory(GiB)": 13.7, "step": 77840, "train_speed(iter/s)": 1.531429 }, { "acc": 0.98131943, "epoch": 36.48699320365596, "grad_norm": 2.90644907951355, "learning_rate": 1.8660961558577735e-06, "loss": 0.05524826, "memory(GiB)": 13.7, "step": 77845, "train_speed(iter/s)": 1.531428 }, { "acc": 0.98125, "epoch": 36.4893367705648, "grad_norm": 0.04124332591891289, "learning_rate": 1.8654923315519383e-06, "loss": 0.05433064, "memory(GiB)": 13.7, "step": 77850, "train_speed(iter/s)": 1.531427 }, { "acc": 0.9895833, "epoch": 36.49168033747363, "grad_norm": 2.129427909851074, "learning_rate": 1.8648885826013885e-06, "loss": 0.02008126, "memory(GiB)": 13.7, "step": 77855, "train_speed(iter/s)": 1.531423 }, { "acc": 0.98571424, "epoch": 36.49402390438247, "grad_norm": 2.770216941833496, "learning_rate": 1.8642849090206378e-06, "loss": 0.03685749, "memory(GiB)": 13.7, "step": 77860, "train_speed(iter/s)": 1.531424 }, { "acc": 0.9802083, "epoch": 36.49636747129131, "grad_norm": 5.62339448928833, "learning_rate": 1.8636813108241944e-06, "loss": 0.04600605, "memory(GiB)": 13.7, "step": 77865, "train_speed(iter/s)": 1.531429 }, { "acc": 0.984375, "epoch": 36.49871103820014, "grad_norm": 3.510343551635742, "learning_rate": 1.8630777880265652e-06, "loss": 0.03843618, "memory(GiB)": 13.7, "step": 77870, "train_speed(iter/s)": 1.531428 }, { "acc": 0.98552084, "epoch": 36.501054605108976, "grad_norm": 4.988253593444824, "learning_rate": 1.8624743406422588e-06, "loss": 0.02821962, "memory(GiB)": 13.7, "step": 77875, "train_speed(iter/s)": 1.531432 }, { "acc": 0.98351192, "epoch": 36.50339817201781, "grad_norm": 5.9304704666137695, "learning_rate": 1.8618709686857823e-06, "loss": 0.09693615, "memory(GiB)": 13.7, "step": 77880, "train_speed(iter/s)": 1.531433 }, { "acc": 0.98668871, "epoch": 36.505741738926645, "grad_norm": 4.75136661529541, "learning_rate": 1.8612676721716342e-06, "loss": 0.03127265, "memory(GiB)": 13.7, "step": 77885, "train_speed(iter/s)": 1.531428 }, { "acc": 0.9779356, "epoch": 36.50808530583548, "grad_norm": 3.376443862915039, "learning_rate": 1.8606644511143178e-06, "loss": 0.05410719, "memory(GiB)": 13.7, "step": 77890, "train_speed(iter/s)": 1.531432 }, { "acc": 0.97166662, "epoch": 36.51042887274431, "grad_norm": 1.3666162490844727, "learning_rate": 1.860061305528335e-06, "loss": 0.076572, "memory(GiB)": 13.7, "step": 77895, "train_speed(iter/s)": 1.53143 }, { "acc": 0.98395834, "epoch": 36.512772439653155, "grad_norm": 3.204585552215576, "learning_rate": 1.8594582354281815e-06, "loss": 0.03165944, "memory(GiB)": 13.7, "step": 77900, "train_speed(iter/s)": 1.531434 }, { "acc": 0.98354168, "epoch": 36.51511600656199, "grad_norm": 3.052633047103882, "learning_rate": 1.8588552408283512e-06, "loss": 0.03942227, "memory(GiB)": 13.7, "step": 77905, "train_speed(iter/s)": 1.531438 }, { "acc": 0.98954439, "epoch": 36.51745957347082, "grad_norm": 3.7300796508789062, "learning_rate": 1.8582523217433396e-06, "loss": 0.03048201, "memory(GiB)": 13.7, "step": 77910, "train_speed(iter/s)": 1.531442 }, { "acc": 0.98500004, "epoch": 36.51980314037966, "grad_norm": 4.073974132537842, "learning_rate": 1.8576494781876406e-06, "loss": 0.03554926, "memory(GiB)": 13.7, "step": 77915, "train_speed(iter/s)": 1.531446 }, { "acc": 0.99750004, "epoch": 36.52214670728849, "grad_norm": 2.634366989135742, "learning_rate": 1.8570467101757418e-06, "loss": 0.00799223, "memory(GiB)": 13.7, "step": 77920, "train_speed(iter/s)": 1.531449 }, { "acc": 0.9770834, "epoch": 36.524490274197326, "grad_norm": 8.890084266662598, "learning_rate": 1.8564440177221325e-06, "loss": 0.0439758, "memory(GiB)": 13.7, "step": 77925, "train_speed(iter/s)": 1.531453 }, { "acc": 0.98055553, "epoch": 36.52683384110616, "grad_norm": 1.816205382347107, "learning_rate": 1.8558414008413004e-06, "loss": 0.03111227, "memory(GiB)": 13.7, "step": 77930, "train_speed(iter/s)": 1.531459 }, { "acc": 0.98705807, "epoch": 36.529177408015, "grad_norm": 0.0038395063020288944, "learning_rate": 1.8552388595477316e-06, "loss": 0.04098669, "memory(GiB)": 13.7, "step": 77935, "train_speed(iter/s)": 1.53147 }, { "acc": 0.97458324, "epoch": 36.531520974923836, "grad_norm": 6.07175874710083, "learning_rate": 1.8546363938559058e-06, "loss": 0.06586497, "memory(GiB)": 13.7, "step": 77940, "train_speed(iter/s)": 1.531472 }, { "acc": 0.98715277, "epoch": 36.53386454183267, "grad_norm": 0.011222447268664837, "learning_rate": 1.854034003780308e-06, "loss": 0.02942518, "memory(GiB)": 13.7, "step": 77945, "train_speed(iter/s)": 1.531476 }, { "acc": 0.97641945, "epoch": 36.536208108741505, "grad_norm": 4.029865264892578, "learning_rate": 1.8534316893354138e-06, "loss": 0.04293053, "memory(GiB)": 13.7, "step": 77950, "train_speed(iter/s)": 1.531473 }, { "acc": 0.97523813, "epoch": 36.53855167565034, "grad_norm": 5.784832954406738, "learning_rate": 1.8528294505357051e-06, "loss": 0.05108996, "memory(GiB)": 13.7, "step": 77955, "train_speed(iter/s)": 1.531474 }, { "acc": 0.9802084, "epoch": 36.54089524255917, "grad_norm": 4.509335041046143, "learning_rate": 1.8522272873956538e-06, "loss": 0.03818916, "memory(GiB)": 13.7, "step": 77960, "train_speed(iter/s)": 1.531478 }, { "acc": 0.9958334, "epoch": 36.54323880946801, "grad_norm": 0.6552690267562866, "learning_rate": 1.8516251999297351e-06, "loss": 0.00812858, "memory(GiB)": 13.7, "step": 77965, "train_speed(iter/s)": 1.531479 }, { "acc": 0.99271774, "epoch": 36.54558237637684, "grad_norm": 3.0868945121765137, "learning_rate": 1.8510231881524236e-06, "loss": 0.06228542, "memory(GiB)": 13.7, "step": 77970, "train_speed(iter/s)": 1.531484 }, { "acc": 0.9875, "epoch": 36.547925943285684, "grad_norm": 3.5119080543518066, "learning_rate": 1.8504212520781861e-06, "loss": 0.02877023, "memory(GiB)": 13.7, "step": 77975, "train_speed(iter/s)": 1.531492 }, { "acc": 0.97665176, "epoch": 36.55026951019452, "grad_norm": 0.9569887518882751, "learning_rate": 1.8498193917214931e-06, "loss": 0.0671349, "memory(GiB)": 13.7, "step": 77980, "train_speed(iter/s)": 1.53149 }, { "acc": 0.9842804, "epoch": 36.55261307710335, "grad_norm": 7.036979675292969, "learning_rate": 1.8492176070968132e-06, "loss": 0.05145318, "memory(GiB)": 13.7, "step": 77985, "train_speed(iter/s)": 1.5315 }, { "acc": 0.99333334, "epoch": 36.55495664401219, "grad_norm": 4.453835964202881, "learning_rate": 1.8486158982186084e-06, "loss": 0.02542577, "memory(GiB)": 13.7, "step": 77990, "train_speed(iter/s)": 1.531501 }, { "acc": 0.97998514, "epoch": 36.55730021092102, "grad_norm": 2.184560775756836, "learning_rate": 1.8480142651013417e-06, "loss": 0.03696693, "memory(GiB)": 13.7, "step": 77995, "train_speed(iter/s)": 1.531501 }, { "acc": 0.9885416, "epoch": 36.559643777829855, "grad_norm": 0.766730546951294, "learning_rate": 1.8474127077594746e-06, "loss": 0.03194293, "memory(GiB)": 13.7, "step": 78000, "train_speed(iter/s)": 1.531499 }, { "acc": 0.98287039, "epoch": 36.56198734473869, "grad_norm": 2.496624231338501, "learning_rate": 1.8468112262074677e-06, "loss": 0.03732104, "memory(GiB)": 13.7, "step": 78005, "train_speed(iter/s)": 1.531497 }, { "acc": 0.98988094, "epoch": 36.56433091164753, "grad_norm": 2.247659683227539, "learning_rate": 1.8462098204597801e-06, "loss": 0.03093412, "memory(GiB)": 13.7, "step": 78010, "train_speed(iter/s)": 1.531504 }, { "acc": 0.9859375, "epoch": 36.566674478556365, "grad_norm": 0.014980844222009182, "learning_rate": 1.8456084905308644e-06, "loss": 0.02951541, "memory(GiB)": 13.7, "step": 78015, "train_speed(iter/s)": 1.531507 }, { "acc": 0.9833333, "epoch": 36.5690180454652, "grad_norm": 0.0018665067618712783, "learning_rate": 1.8450072364351754e-06, "loss": 0.05983238, "memory(GiB)": 13.7, "step": 78020, "train_speed(iter/s)": 1.531512 }, { "acc": 0.96937504, "epoch": 36.571361612374034, "grad_norm": 4.284073352813721, "learning_rate": 1.8444060581871676e-06, "loss": 0.04961157, "memory(GiB)": 13.7, "step": 78025, "train_speed(iter/s)": 1.531518 }, { "acc": 0.98154764, "epoch": 36.57370517928287, "grad_norm": 3.105133295059204, "learning_rate": 1.8438049558012897e-06, "loss": 0.0439589, "memory(GiB)": 13.7, "step": 78030, "train_speed(iter/s)": 1.53152 }, { "acc": 0.98916664, "epoch": 36.5760487461917, "grad_norm": 2.0907318592071533, "learning_rate": 1.843203929291988e-06, "loss": 0.03521484, "memory(GiB)": 13.7, "step": 78035, "train_speed(iter/s)": 1.531527 }, { "acc": 0.96446428, "epoch": 36.57839231310054, "grad_norm": 2.9114084243774414, "learning_rate": 1.8426029786737117e-06, "loss": 0.08276011, "memory(GiB)": 13.7, "step": 78040, "train_speed(iter/s)": 1.531531 }, { "acc": 0.9848959, "epoch": 36.58073588000937, "grad_norm": 6.435004234313965, "learning_rate": 1.842002103960907e-06, "loss": 0.02844535, "memory(GiB)": 13.7, "step": 78045, "train_speed(iter/s)": 1.531538 }, { "acc": 0.99031248, "epoch": 36.58307944691821, "grad_norm": 4.02531099319458, "learning_rate": 1.8414013051680135e-06, "loss": 0.06839477, "memory(GiB)": 13.7, "step": 78050, "train_speed(iter/s)": 1.53154 }, { "acc": 0.98249998, "epoch": 36.58542301382705, "grad_norm": 3.4030442237854004, "learning_rate": 1.840800582309474e-06, "loss": 0.03638598, "memory(GiB)": 13.7, "step": 78055, "train_speed(iter/s)": 1.531543 }, { "acc": 0.98604164, "epoch": 36.58776658073588, "grad_norm": 2.471630096435547, "learning_rate": 1.8401999353997302e-06, "loss": 0.02316193, "memory(GiB)": 13.7, "step": 78060, "train_speed(iter/s)": 1.531547 }, { "acc": 0.996875, "epoch": 36.590110147644715, "grad_norm": 9.629072883399203e-05, "learning_rate": 1.839599364453216e-06, "loss": 0.02429778, "memory(GiB)": 13.7, "step": 78065, "train_speed(iter/s)": 1.531548 }, { "acc": 0.99162769, "epoch": 36.59245371455355, "grad_norm": 3.5497233867645264, "learning_rate": 1.8389988694843698e-06, "loss": 0.03509884, "memory(GiB)": 13.7, "step": 78070, "train_speed(iter/s)": 1.531547 }, { "acc": 0.98571434, "epoch": 36.594797281462384, "grad_norm": 2.4036054611206055, "learning_rate": 1.8383984505076232e-06, "loss": 0.05402218, "memory(GiB)": 13.7, "step": 78075, "train_speed(iter/s)": 1.531549 }, { "acc": 0.98708324, "epoch": 36.59714084837122, "grad_norm": 1.6827635765075684, "learning_rate": 1.8377981075374094e-06, "loss": 0.03853877, "memory(GiB)": 13.7, "step": 78080, "train_speed(iter/s)": 1.531549 }, { "acc": 0.97458334, "epoch": 36.59948441528006, "grad_norm": 3.7085120677948, "learning_rate": 1.8371978405881602e-06, "loss": 0.04913147, "memory(GiB)": 13.7, "step": 78085, "train_speed(iter/s)": 1.531549 }, { "acc": 0.96550598, "epoch": 36.601827982188894, "grad_norm": 6.583360195159912, "learning_rate": 1.8365976496743013e-06, "loss": 0.07315713, "memory(GiB)": 13.7, "step": 78090, "train_speed(iter/s)": 1.531553 }, { "acc": 0.990625, "epoch": 36.60417154909773, "grad_norm": 2.2654926776885986, "learning_rate": 1.8359975348102607e-06, "loss": 0.02658053, "memory(GiB)": 13.7, "step": 78095, "train_speed(iter/s)": 1.531559 }, { "acc": 0.9864584, "epoch": 36.60651511600656, "grad_norm": 5.5705766677856445, "learning_rate": 1.8353974960104645e-06, "loss": 0.06850999, "memory(GiB)": 13.7, "step": 78100, "train_speed(iter/s)": 1.531564 }, { "acc": 0.98654766, "epoch": 36.6088586829154, "grad_norm": 2.2151107788085938, "learning_rate": 1.8347975332893332e-06, "loss": 0.03661078, "memory(GiB)": 13.7, "step": 78105, "train_speed(iter/s)": 1.531562 }, { "acc": 0.98611107, "epoch": 36.61120224982423, "grad_norm": 0.019602010026574135, "learning_rate": 1.8341976466612901e-06, "loss": 0.0401843, "memory(GiB)": 13.7, "step": 78110, "train_speed(iter/s)": 1.531561 }, { "acc": 0.98552074, "epoch": 36.613545816733065, "grad_norm": 3.1034207344055176, "learning_rate": 1.833597836140752e-06, "loss": 0.03834535, "memory(GiB)": 13.7, "step": 78115, "train_speed(iter/s)": 1.531569 }, { "acc": 0.97029228, "epoch": 36.6158893836419, "grad_norm": 5.009171485900879, "learning_rate": 1.8329981017421396e-06, "loss": 0.06250256, "memory(GiB)": 13.7, "step": 78120, "train_speed(iter/s)": 1.531572 }, { "acc": 0.98194447, "epoch": 36.61823295055074, "grad_norm": 2.180514335632324, "learning_rate": 1.8323984434798653e-06, "loss": 0.04547786, "memory(GiB)": 13.7, "step": 78125, "train_speed(iter/s)": 1.531574 }, { "acc": 0.984375, "epoch": 36.620576517459575, "grad_norm": 3.081538438796997, "learning_rate": 1.8317988613683443e-06, "loss": 0.02230956, "memory(GiB)": 13.7, "step": 78130, "train_speed(iter/s)": 1.531573 }, { "acc": 0.98249998, "epoch": 36.62292008436841, "grad_norm": 0.5445806980133057, "learning_rate": 1.8311993554219906e-06, "loss": 0.02515923, "memory(GiB)": 13.7, "step": 78135, "train_speed(iter/s)": 1.531574 }, { "acc": 0.9875, "epoch": 36.625263651277244, "grad_norm": 0.047024600207805634, "learning_rate": 1.8305999256552104e-06, "loss": 0.01880139, "memory(GiB)": 13.7, "step": 78140, "train_speed(iter/s)": 1.531573 }, { "acc": 0.9746726, "epoch": 36.62760721818608, "grad_norm": 2.433576822280884, "learning_rate": 1.8300005720824138e-06, "loss": 0.06262217, "memory(GiB)": 13.7, "step": 78145, "train_speed(iter/s)": 1.531579 }, { "acc": 0.97217264, "epoch": 36.62995078509491, "grad_norm": 3.8217153549194336, "learning_rate": 1.829401294718008e-06, "loss": 0.05580038, "memory(GiB)": 13.7, "step": 78150, "train_speed(iter/s)": 1.531581 }, { "acc": 0.97666664, "epoch": 36.63229435200375, "grad_norm": 9.002525329589844, "learning_rate": 1.8288020935763983e-06, "loss": 0.04130693, "memory(GiB)": 13.7, "step": 78155, "train_speed(iter/s)": 1.531583 }, { "acc": 0.96374998, "epoch": 36.63463791891259, "grad_norm": 3.9384920597076416, "learning_rate": 1.8282029686719871e-06, "loss": 0.05763807, "memory(GiB)": 13.7, "step": 78160, "train_speed(iter/s)": 1.531584 }, { "acc": 0.97136364, "epoch": 36.63698148582142, "grad_norm": 4.597865104675293, "learning_rate": 1.8276039200191731e-06, "loss": 0.07299426, "memory(GiB)": 13.7, "step": 78165, "train_speed(iter/s)": 1.531585 }, { "acc": 0.99821434, "epoch": 36.63932505273026, "grad_norm": 2.2803404331207275, "learning_rate": 1.8270049476323566e-06, "loss": 0.03450902, "memory(GiB)": 13.7, "step": 78170, "train_speed(iter/s)": 1.531582 }, { "acc": 0.98145294, "epoch": 36.64166861963909, "grad_norm": 0.004925610963255167, "learning_rate": 1.8264060515259374e-06, "loss": 0.06162605, "memory(GiB)": 13.7, "step": 78175, "train_speed(iter/s)": 1.531589 }, { "acc": 0.98687496, "epoch": 36.644012186547926, "grad_norm": 3.3248229026794434, "learning_rate": 1.8258072317143078e-06, "loss": 0.04752798, "memory(GiB)": 13.7, "step": 78180, "train_speed(iter/s)": 1.531588 }, { "acc": 0.97615528, "epoch": 36.64635575345676, "grad_norm": 2.517387866973877, "learning_rate": 1.8252084882118618e-06, "loss": 0.062881, "memory(GiB)": 13.7, "step": 78185, "train_speed(iter/s)": 1.531592 }, { "acc": 0.96840277, "epoch": 36.648699320365594, "grad_norm": 0.6636656522750854, "learning_rate": 1.824609821032994e-06, "loss": 0.05308259, "memory(GiB)": 13.7, "step": 78190, "train_speed(iter/s)": 1.531595 }, { "acc": 0.98114586, "epoch": 36.65104288727443, "grad_norm": 2.449592113494873, "learning_rate": 1.8240112301920928e-06, "loss": 0.05820549, "memory(GiB)": 13.7, "step": 78195, "train_speed(iter/s)": 1.531597 }, { "acc": 0.99002972, "epoch": 36.65338645418327, "grad_norm": 0.8991870284080505, "learning_rate": 1.823412715703544e-06, "loss": 0.02466299, "memory(GiB)": 13.7, "step": 78200, "train_speed(iter/s)": 1.531599 }, { "acc": 0.9916666, "epoch": 36.655730021092104, "grad_norm": 3.813798189163208, "learning_rate": 1.8228142775817358e-06, "loss": 0.01761589, "memory(GiB)": 13.7, "step": 78205, "train_speed(iter/s)": 1.531606 }, { "acc": 0.97562504, "epoch": 36.65807358800094, "grad_norm": 7.450700759887695, "learning_rate": 1.8222159158410534e-06, "loss": 0.06466696, "memory(GiB)": 13.7, "step": 78210, "train_speed(iter/s)": 1.531612 }, { "acc": 0.9875, "epoch": 36.66041715490977, "grad_norm": 4.342532157897949, "learning_rate": 1.8216176304958805e-06, "loss": 0.02443795, "memory(GiB)": 13.7, "step": 78215, "train_speed(iter/s)": 1.531612 }, { "acc": 0.98562498, "epoch": 36.66276072181861, "grad_norm": 4.334705352783203, "learning_rate": 1.8210194215605948e-06, "loss": 0.05037073, "memory(GiB)": 13.7, "step": 78220, "train_speed(iter/s)": 1.531617 }, { "acc": 0.97594242, "epoch": 36.66510428872744, "grad_norm": 0.0011769823031499982, "learning_rate": 1.8204212890495764e-06, "loss": 0.0595477, "memory(GiB)": 13.7, "step": 78225, "train_speed(iter/s)": 1.53162 }, { "acc": 0.98675594, "epoch": 36.667447855636276, "grad_norm": 5.269979953765869, "learning_rate": 1.8198232329772047e-06, "loss": 0.03743973, "memory(GiB)": 13.7, "step": 78230, "train_speed(iter/s)": 1.531624 }, { "acc": 0.98383923, "epoch": 36.66979142254512, "grad_norm": 1.095414161682129, "learning_rate": 1.8192252533578514e-06, "loss": 0.03106963, "memory(GiB)": 13.7, "step": 78235, "train_speed(iter/s)": 1.531628 }, { "acc": 0.9794445, "epoch": 36.67213498945395, "grad_norm": 4.146942138671875, "learning_rate": 1.8186273502058935e-06, "loss": 0.0540396, "memory(GiB)": 13.7, "step": 78240, "train_speed(iter/s)": 1.531631 }, { "acc": 0.96441288, "epoch": 36.674478556362786, "grad_norm": 8.3897066116333, "learning_rate": 1.8180295235356993e-06, "loss": 0.08321932, "memory(GiB)": 13.7, "step": 78245, "train_speed(iter/s)": 1.531638 }, { "acc": 0.9791667, "epoch": 36.67682212327162, "grad_norm": 5.675900936126709, "learning_rate": 1.8174317733616413e-06, "loss": 0.09332308, "memory(GiB)": 13.7, "step": 78250, "train_speed(iter/s)": 1.531644 }, { "acc": 0.97241068, "epoch": 36.679165690180454, "grad_norm": 4.60565185546875, "learning_rate": 1.8168340996980848e-06, "loss": 0.07651055, "memory(GiB)": 13.7, "step": 78255, "train_speed(iter/s)": 1.531648 }, { "acc": 0.99092264, "epoch": 36.68150925708929, "grad_norm": 0.9082376956939697, "learning_rate": 1.8162365025593978e-06, "loss": 0.0231914, "memory(GiB)": 13.7, "step": 78260, "train_speed(iter/s)": 1.531651 }, { "acc": 0.985322, "epoch": 36.68385282399812, "grad_norm": 5.119955539703369, "learning_rate": 1.8156389819599455e-06, "loss": 0.02658626, "memory(GiB)": 13.7, "step": 78265, "train_speed(iter/s)": 1.531659 }, { "acc": 0.98249998, "epoch": 36.68619639090696, "grad_norm": 3.672546625137329, "learning_rate": 1.815041537914087e-06, "loss": 0.05145783, "memory(GiB)": 13.7, "step": 78270, "train_speed(iter/s)": 1.53166 }, { "acc": 0.99244785, "epoch": 36.6885399578158, "grad_norm": 2.8274545669555664, "learning_rate": 1.814444170436185e-06, "loss": 0.0180638, "memory(GiB)": 13.7, "step": 78275, "train_speed(iter/s)": 1.531664 }, { "acc": 0.9807292, "epoch": 36.69088352472463, "grad_norm": 6.183273792266846, "learning_rate": 1.8138468795406004e-06, "loss": 0.05053564, "memory(GiB)": 13.7, "step": 78280, "train_speed(iter/s)": 1.531669 }, { "acc": 0.98562498, "epoch": 36.69322709163347, "grad_norm": 1.8749719858169556, "learning_rate": 1.813249665241686e-06, "loss": 0.02988835, "memory(GiB)": 13.7, "step": 78285, "train_speed(iter/s)": 1.531672 }, { "acc": 0.97904758, "epoch": 36.6955706585423, "grad_norm": 9.709481239318848, "learning_rate": 1.8126525275537998e-06, "loss": 0.0500737, "memory(GiB)": 13.7, "step": 78290, "train_speed(iter/s)": 1.531671 }, { "acc": 0.986269, "epoch": 36.697914225451136, "grad_norm": 6.013944149017334, "learning_rate": 1.812055466491293e-06, "loss": 0.06909609, "memory(GiB)": 13.7, "step": 78295, "train_speed(iter/s)": 1.531668 }, { "acc": 0.97270832, "epoch": 36.70025779235997, "grad_norm": 2.9120633602142334, "learning_rate": 1.8114584820685172e-06, "loss": 0.04799272, "memory(GiB)": 13.7, "step": 78300, "train_speed(iter/s)": 1.531669 }, { "acc": 0.9770833, "epoch": 36.702601359268805, "grad_norm": 0.00590060418471694, "learning_rate": 1.8108615742998251e-06, "loss": 0.03728284, "memory(GiB)": 13.7, "step": 78305, "train_speed(iter/s)": 1.531667 }, { "acc": 0.98354168, "epoch": 36.704944926177646, "grad_norm": 2.744661569595337, "learning_rate": 1.81026474319956e-06, "loss": 0.02682369, "memory(GiB)": 13.7, "step": 78310, "train_speed(iter/s)": 1.531674 }, { "acc": 0.98125, "epoch": 36.70728849308648, "grad_norm": 4.936309337615967, "learning_rate": 1.8096679887820695e-06, "loss": 0.05029387, "memory(GiB)": 13.7, "step": 78315, "train_speed(iter/s)": 1.531676 }, { "acc": 0.9927083, "epoch": 36.709632059995315, "grad_norm": 0.04793086275458336, "learning_rate": 1.8090713110616998e-06, "loss": 0.03754629, "memory(GiB)": 13.7, "step": 78320, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98594704, "epoch": 36.71197562690415, "grad_norm": 4.007760524749756, "learning_rate": 1.808474710052791e-06, "loss": 0.04601789, "memory(GiB)": 13.7, "step": 78325, "train_speed(iter/s)": 1.531689 }, { "acc": 0.98842258, "epoch": 36.71431919381298, "grad_norm": 3.247697114944458, "learning_rate": 1.8078781857696824e-06, "loss": 0.01867325, "memory(GiB)": 13.7, "step": 78330, "train_speed(iter/s)": 1.53169 }, { "acc": 0.98520832, "epoch": 36.71666276072182, "grad_norm": 0.540863573551178, "learning_rate": 1.8072817382267129e-06, "loss": 0.05267612, "memory(GiB)": 13.7, "step": 78335, "train_speed(iter/s)": 1.531689 }, { "acc": 0.99750004, "epoch": 36.71900632763065, "grad_norm": 0.002534017199650407, "learning_rate": 1.806685367438222e-06, "loss": 0.01740561, "memory(GiB)": 13.7, "step": 78340, "train_speed(iter/s)": 1.531693 }, { "acc": 0.97563848, "epoch": 36.721349894539486, "grad_norm": 1.6327886581420898, "learning_rate": 1.8060890734185402e-06, "loss": 0.08589008, "memory(GiB)": 13.7, "step": 78345, "train_speed(iter/s)": 1.5317 }, { "acc": 0.98299675, "epoch": 36.72369346144833, "grad_norm": 4.493253231048584, "learning_rate": 1.8054928561820029e-06, "loss": 0.05238605, "memory(GiB)": 13.7, "step": 78350, "train_speed(iter/s)": 1.531703 }, { "acc": 0.99474211, "epoch": 36.72603702835716, "grad_norm": 0.0017047204310074449, "learning_rate": 1.8048967157429412e-06, "loss": 0.01599507, "memory(GiB)": 13.7, "step": 78355, "train_speed(iter/s)": 1.531709 }, { "acc": 0.98594704, "epoch": 36.728380595265996, "grad_norm": 3.775360345840454, "learning_rate": 1.8043006521156855e-06, "loss": 0.06070694, "memory(GiB)": 13.7, "step": 78360, "train_speed(iter/s)": 1.531709 }, { "acc": 0.98127975, "epoch": 36.73072416217483, "grad_norm": 2.800875186920166, "learning_rate": 1.8037046653145619e-06, "loss": 0.07507654, "memory(GiB)": 13.7, "step": 78365, "train_speed(iter/s)": 1.531712 }, { "acc": 0.9864584, "epoch": 36.733067729083665, "grad_norm": 3.6043248176574707, "learning_rate": 1.8031087553538942e-06, "loss": 0.02962637, "memory(GiB)": 13.7, "step": 78370, "train_speed(iter/s)": 1.531714 }, { "acc": 0.9739584, "epoch": 36.7354112959925, "grad_norm": 4.37721586227417, "learning_rate": 1.8025129222480076e-06, "loss": 0.0503713, "memory(GiB)": 13.7, "step": 78375, "train_speed(iter/s)": 1.531716 }, { "acc": 0.98604164, "epoch": 36.73775486290133, "grad_norm": 4.470408916473389, "learning_rate": 1.8019171660112256e-06, "loss": 0.04511179, "memory(GiB)": 13.7, "step": 78380, "train_speed(iter/s)": 1.531718 }, { "acc": 0.99258928, "epoch": 36.74009842981017, "grad_norm": 1.0286109447479248, "learning_rate": 1.8013214866578657e-06, "loss": 0.00964547, "memory(GiB)": 13.7, "step": 78385, "train_speed(iter/s)": 1.531723 }, { "acc": 0.99020834, "epoch": 36.74244199671901, "grad_norm": 2.4386045932769775, "learning_rate": 1.8007258842022467e-06, "loss": 0.02973547, "memory(GiB)": 13.7, "step": 78390, "train_speed(iter/s)": 1.531728 }, { "acc": 0.99020834, "epoch": 36.74478556362784, "grad_norm": 3.7752838134765625, "learning_rate": 1.8001303586586868e-06, "loss": 0.03651309, "memory(GiB)": 13.7, "step": 78395, "train_speed(iter/s)": 1.53173 }, { "acc": 0.99258928, "epoch": 36.74712913053668, "grad_norm": 3.839946985244751, "learning_rate": 1.7995349100414984e-06, "loss": 0.02188785, "memory(GiB)": 13.7, "step": 78400, "train_speed(iter/s)": 1.531726 }, { "acc": 0.98946428, "epoch": 36.74947269744551, "grad_norm": 3.6107101440429688, "learning_rate": 1.7989395383649955e-06, "loss": 0.03864392, "memory(GiB)": 13.7, "step": 78405, "train_speed(iter/s)": 1.531727 }, { "acc": 0.97630482, "epoch": 36.751816264354346, "grad_norm": 6.1191582679748535, "learning_rate": 1.798344243643487e-06, "loss": 0.07474856, "memory(GiB)": 13.7, "step": 78410, "train_speed(iter/s)": 1.531727 }, { "acc": 0.97895832, "epoch": 36.75415983126318, "grad_norm": 1.8602370023727417, "learning_rate": 1.797749025891284e-06, "loss": 0.05246615, "memory(GiB)": 13.7, "step": 78415, "train_speed(iter/s)": 1.531729 }, { "acc": 0.98500004, "epoch": 36.756503398172015, "grad_norm": 3.384038209915161, "learning_rate": 1.7971538851226913e-06, "loss": 0.02555055, "memory(GiB)": 13.7, "step": 78420, "train_speed(iter/s)": 1.531736 }, { "acc": 0.98410177, "epoch": 36.758846965080856, "grad_norm": 4.139230728149414, "learning_rate": 1.7965588213520156e-06, "loss": 0.06652559, "memory(GiB)": 13.7, "step": 78425, "train_speed(iter/s)": 1.531734 }, { "acc": 0.9921875, "epoch": 36.76119053198969, "grad_norm": 0.44229209423065186, "learning_rate": 1.7959638345935602e-06, "loss": 0.05077236, "memory(GiB)": 13.7, "step": 78430, "train_speed(iter/s)": 1.531732 }, { "acc": 0.98800602, "epoch": 36.763534098898525, "grad_norm": 1.0199673175811768, "learning_rate": 1.7953689248616277e-06, "loss": 0.02532607, "memory(GiB)": 13.7, "step": 78435, "train_speed(iter/s)": 1.531736 }, { "acc": 0.98458328, "epoch": 36.76587766580736, "grad_norm": 4.330410480499268, "learning_rate": 1.7947740921705151e-06, "loss": 0.03432049, "memory(GiB)": 13.7, "step": 78440, "train_speed(iter/s)": 1.531733 }, { "acc": 0.9958333, "epoch": 36.76822123271619, "grad_norm": 1.3470780849456787, "learning_rate": 1.7941793365345234e-06, "loss": 0.00827512, "memory(GiB)": 13.7, "step": 78445, "train_speed(iter/s)": 1.531735 }, { "acc": 0.9921875, "epoch": 36.77056479962503, "grad_norm": 2.5045876502990723, "learning_rate": 1.793584657967945e-06, "loss": 0.01550896, "memory(GiB)": 13.7, "step": 78450, "train_speed(iter/s)": 1.531736 }, { "acc": 0.9869792, "epoch": 36.77290836653386, "grad_norm": 0.0005959945847280324, "learning_rate": 1.7929900564850777e-06, "loss": 0.04940378, "memory(GiB)": 13.7, "step": 78455, "train_speed(iter/s)": 1.531739 }, { "acc": 0.98318453, "epoch": 36.775251933442696, "grad_norm": 3.394759178161621, "learning_rate": 1.79239553210021e-06, "loss": 0.03241154, "memory(GiB)": 13.7, "step": 78460, "train_speed(iter/s)": 1.531736 }, { "acc": 0.97872028, "epoch": 36.77759550035154, "grad_norm": 1.5352442264556885, "learning_rate": 1.7918010848276349e-06, "loss": 0.04799958, "memory(GiB)": 13.7, "step": 78465, "train_speed(iter/s)": 1.531739 }, { "acc": 0.99571428, "epoch": 36.77993906726037, "grad_norm": 2.6096951961517334, "learning_rate": 1.7912067146816416e-06, "loss": 0.01636147, "memory(GiB)": 13.7, "step": 78470, "train_speed(iter/s)": 1.531742 }, { "acc": 0.98633938, "epoch": 36.782282634169206, "grad_norm": 1.561142086982727, "learning_rate": 1.790612421676514e-06, "loss": 0.03682408, "memory(GiB)": 13.7, "step": 78475, "train_speed(iter/s)": 1.531741 }, { "acc": 0.9895834, "epoch": 36.78462620107804, "grad_norm": 0.004721542354673147, "learning_rate": 1.790018205826538e-06, "loss": 0.02272732, "memory(GiB)": 13.7, "step": 78480, "train_speed(iter/s)": 1.531741 }, { "acc": 0.98145828, "epoch": 36.786969767986875, "grad_norm": 2.935865879058838, "learning_rate": 1.7894240671459994e-06, "loss": 0.03695179, "memory(GiB)": 13.7, "step": 78485, "train_speed(iter/s)": 1.531744 }, { "acc": 0.98708324, "epoch": 36.78931333489571, "grad_norm": 4.958203315734863, "learning_rate": 1.7888300056491754e-06, "loss": 0.03941352, "memory(GiB)": 13.7, "step": 78490, "train_speed(iter/s)": 1.531747 }, { "acc": 0.9876442, "epoch": 36.791656901804544, "grad_norm": 1.0362566709518433, "learning_rate": 1.788236021350348e-06, "loss": 0.03029887, "memory(GiB)": 13.7, "step": 78495, "train_speed(iter/s)": 1.531745 }, { "acc": 0.98125, "epoch": 36.794000468713385, "grad_norm": 0.10028936713933945, "learning_rate": 1.787642114263792e-06, "loss": 0.06426427, "memory(GiB)": 13.7, "step": 78500, "train_speed(iter/s)": 1.531747 }, { "acc": 0.97111111, "epoch": 36.79634403562222, "grad_norm": 3.7620675563812256, "learning_rate": 1.7870482844037849e-06, "loss": 0.06765853, "memory(GiB)": 13.7, "step": 78505, "train_speed(iter/s)": 1.531754 }, { "acc": 0.99154758, "epoch": 36.798687602531054, "grad_norm": 1.849183201789856, "learning_rate": 1.7864545317846014e-06, "loss": 0.02141961, "memory(GiB)": 13.7, "step": 78510, "train_speed(iter/s)": 1.531757 }, { "acc": 0.97738094, "epoch": 36.80103116943989, "grad_norm": 0.012342103756964207, "learning_rate": 1.7858608564205108e-06, "loss": 0.03975232, "memory(GiB)": 13.7, "step": 78515, "train_speed(iter/s)": 1.53176 }, { "acc": 0.9942708, "epoch": 36.80337473634872, "grad_norm": 1.0322648286819458, "learning_rate": 1.7852672583257836e-06, "loss": 0.06279361, "memory(GiB)": 13.7, "step": 78520, "train_speed(iter/s)": 1.531764 }, { "acc": 0.98604164, "epoch": 36.80571830325756, "grad_norm": 0.004814382176846266, "learning_rate": 1.7846737375146906e-06, "loss": 0.02304678, "memory(GiB)": 13.7, "step": 78525, "train_speed(iter/s)": 1.531769 }, { "acc": 0.98770828, "epoch": 36.80806187016639, "grad_norm": 0.4602688252925873, "learning_rate": 1.7840802940014962e-06, "loss": 0.04081175, "memory(GiB)": 13.7, "step": 78530, "train_speed(iter/s)": 1.531768 }, { "acc": 0.97817707, "epoch": 36.810405437075225, "grad_norm": 3.4240829944610596, "learning_rate": 1.7834869278004631e-06, "loss": 0.03479872, "memory(GiB)": 13.7, "step": 78535, "train_speed(iter/s)": 1.531772 }, { "acc": 0.98395834, "epoch": 36.81274900398407, "grad_norm": 3.3054590225219727, "learning_rate": 1.7828936389258558e-06, "loss": 0.04351198, "memory(GiB)": 13.7, "step": 78540, "train_speed(iter/s)": 1.531774 }, { "acc": 0.99437504, "epoch": 36.8150925708929, "grad_norm": 1.1004353761672974, "learning_rate": 1.7823004273919355e-06, "loss": 0.02428347, "memory(GiB)": 13.7, "step": 78545, "train_speed(iter/s)": 1.531775 }, { "acc": 0.98355522, "epoch": 36.817436137801735, "grad_norm": 0.8661547303199768, "learning_rate": 1.78170729321296e-06, "loss": 0.08424662, "memory(GiB)": 13.7, "step": 78550, "train_speed(iter/s)": 1.531785 }, { "acc": 0.99111109, "epoch": 36.81977970471057, "grad_norm": 2.2872402667999268, "learning_rate": 1.7811142364031857e-06, "loss": 0.03407785, "memory(GiB)": 13.7, "step": 78555, "train_speed(iter/s)": 1.531785 }, { "acc": 0.98916664, "epoch": 36.822123271619404, "grad_norm": 3.4285075664520264, "learning_rate": 1.7805212569768704e-06, "loss": 0.03472309, "memory(GiB)": 13.7, "step": 78560, "train_speed(iter/s)": 1.53179 }, { "acc": 0.98576393, "epoch": 36.82446683852824, "grad_norm": 6.368778705596924, "learning_rate": 1.7799283549482638e-06, "loss": 0.05277538, "memory(GiB)": 13.7, "step": 78565, "train_speed(iter/s)": 1.531795 }, { "acc": 0.990625, "epoch": 36.82681040543707, "grad_norm": 1.0041133165359497, "learning_rate": 1.7793355303316197e-06, "loss": 0.02277487, "memory(GiB)": 13.7, "step": 78570, "train_speed(iter/s)": 1.531794 }, { "acc": 0.96062498, "epoch": 36.829153972345914, "grad_norm": 2.7369463443756104, "learning_rate": 1.7787427831411888e-06, "loss": 0.07393792, "memory(GiB)": 13.7, "step": 78575, "train_speed(iter/s)": 1.5318 }, { "acc": 0.97562504, "epoch": 36.83149753925475, "grad_norm": 5.268712520599365, "learning_rate": 1.7781501133912148e-06, "loss": 0.05826722, "memory(GiB)": 13.7, "step": 78580, "train_speed(iter/s)": 1.531806 }, { "acc": 0.9957386, "epoch": 36.83384110616358, "grad_norm": 0.00772911636158824, "learning_rate": 1.7775575210959484e-06, "loss": 0.01686058, "memory(GiB)": 13.7, "step": 78585, "train_speed(iter/s)": 1.53181 }, { "acc": 0.98049679, "epoch": 36.83618467307242, "grad_norm": 0.8801566958427429, "learning_rate": 1.7769650062696284e-06, "loss": 0.06858316, "memory(GiB)": 13.7, "step": 78590, "train_speed(iter/s)": 1.531813 }, { "acc": 0.98864574, "epoch": 36.83852823998125, "grad_norm": 0.014296566136181355, "learning_rate": 1.7763725689265e-06, "loss": 0.03390708, "memory(GiB)": 13.7, "step": 78595, "train_speed(iter/s)": 1.531817 }, { "acc": 0.9864584, "epoch": 36.840871806890085, "grad_norm": 2.386746406555176, "learning_rate": 1.7757802090808043e-06, "loss": 0.04910422, "memory(GiB)": 13.7, "step": 78600, "train_speed(iter/s)": 1.53182 }, { "acc": 0.98696423, "epoch": 36.84321537379892, "grad_norm": 3.923593282699585, "learning_rate": 1.775187926746777e-06, "loss": 0.02291821, "memory(GiB)": 13.7, "step": 78605, "train_speed(iter/s)": 1.531822 }, { "acc": 0.97361603, "epoch": 36.845558940707754, "grad_norm": 2.2261409759521484, "learning_rate": 1.7745957219386558e-06, "loss": 0.08333575, "memory(GiB)": 13.7, "step": 78610, "train_speed(iter/s)": 1.531829 }, { "acc": 0.98125, "epoch": 36.847902507616595, "grad_norm": 5.827097415924072, "learning_rate": 1.7740035946706775e-06, "loss": 0.04197396, "memory(GiB)": 13.7, "step": 78615, "train_speed(iter/s)": 1.531827 }, { "acc": 0.99222755, "epoch": 36.85024607452543, "grad_norm": 5.707856178283691, "learning_rate": 1.7734115449570729e-06, "loss": 0.0251408, "memory(GiB)": 13.7, "step": 78620, "train_speed(iter/s)": 1.531834 }, { "acc": 0.98569441, "epoch": 36.852589641434264, "grad_norm": 4.73221492767334, "learning_rate": 1.7728195728120709e-06, "loss": 0.05658116, "memory(GiB)": 13.7, "step": 78625, "train_speed(iter/s)": 1.531832 }, { "acc": 0.98218136, "epoch": 36.8549332083431, "grad_norm": 3.502312183380127, "learning_rate": 1.7722276782499029e-06, "loss": 0.05093517, "memory(GiB)": 13.7, "step": 78630, "train_speed(iter/s)": 1.531831 }, { "acc": 0.98708334, "epoch": 36.85727677525193, "grad_norm": 0.0037078033201396465, "learning_rate": 1.7716358612847958e-06, "loss": 0.03318101, "memory(GiB)": 13.7, "step": 78635, "train_speed(iter/s)": 1.531836 }, { "acc": 0.9884985, "epoch": 36.85962034216077, "grad_norm": 3.9191479682922363, "learning_rate": 1.7710441219309762e-06, "loss": 0.03171657, "memory(GiB)": 13.7, "step": 78640, "train_speed(iter/s)": 1.53184 }, { "acc": 0.99383011, "epoch": 36.8619639090696, "grad_norm": 0.003446266520768404, "learning_rate": 1.7704524602026655e-06, "loss": 0.0274082, "memory(GiB)": 13.7, "step": 78645, "train_speed(iter/s)": 1.531845 }, { "acc": 1.0, "epoch": 36.86430747597844, "grad_norm": 1.7978525161743164, "learning_rate": 1.7698608761140855e-06, "loss": 0.00538876, "memory(GiB)": 13.7, "step": 78650, "train_speed(iter/s)": 1.531846 }, { "acc": 0.97833338, "epoch": 36.86665104288728, "grad_norm": 0.8475833535194397, "learning_rate": 1.7692693696794584e-06, "loss": 0.03063928, "memory(GiB)": 13.7, "step": 78655, "train_speed(iter/s)": 1.531844 }, { "acc": 0.97145824, "epoch": 36.86899460979611, "grad_norm": 3.7367751598358154, "learning_rate": 1.768677940913e-06, "loss": 0.05979496, "memory(GiB)": 13.7, "step": 78660, "train_speed(iter/s)": 1.531847 }, { "acc": 0.96579857, "epoch": 36.871338176704946, "grad_norm": 5.087759494781494, "learning_rate": 1.768086589828925e-06, "loss": 0.06560583, "memory(GiB)": 13.7, "step": 78665, "train_speed(iter/s)": 1.531848 }, { "acc": 0.9958334, "epoch": 36.87368174361378, "grad_norm": 4.212922096252441, "learning_rate": 1.7674953164414494e-06, "loss": 0.00767987, "memory(GiB)": 13.7, "step": 78670, "train_speed(iter/s)": 1.531845 }, { "acc": 0.99020834, "epoch": 36.876025310522614, "grad_norm": 2.2817444801330566, "learning_rate": 1.7669041207647864e-06, "loss": 0.02008453, "memory(GiB)": 13.7, "step": 78675, "train_speed(iter/s)": 1.531853 }, { "acc": 0.9760417, "epoch": 36.87836887743145, "grad_norm": 3.6559360027313232, "learning_rate": 1.7663130028131437e-06, "loss": 0.04517727, "memory(GiB)": 13.7, "step": 78680, "train_speed(iter/s)": 1.531852 }, { "acc": 0.96958332, "epoch": 36.88071244434028, "grad_norm": 4.1926093101501465, "learning_rate": 1.7657219626007314e-06, "loss": 0.09436473, "memory(GiB)": 13.7, "step": 78685, "train_speed(iter/s)": 1.531857 }, { "acc": 0.99071426, "epoch": 36.883056011249124, "grad_norm": 2.824708938598633, "learning_rate": 1.7651310001417576e-06, "loss": 0.02250149, "memory(GiB)": 13.7, "step": 78690, "train_speed(iter/s)": 1.531861 }, { "acc": 0.98599205, "epoch": 36.88539957815796, "grad_norm": 3.203979730606079, "learning_rate": 1.7645401154504235e-06, "loss": 0.03967006, "memory(GiB)": 13.7, "step": 78695, "train_speed(iter/s)": 1.531867 }, { "acc": 0.96550598, "epoch": 36.88774314506679, "grad_norm": 3.8465380668640137, "learning_rate": 1.7639493085409365e-06, "loss": 0.06402643, "memory(GiB)": 13.7, "step": 78700, "train_speed(iter/s)": 1.531876 }, { "acc": 0.98898811, "epoch": 36.89008671197563, "grad_norm": 1.9677233695983887, "learning_rate": 1.7633585794274926e-06, "loss": 0.03480479, "memory(GiB)": 13.7, "step": 78705, "train_speed(iter/s)": 1.531881 }, { "acc": 0.98395834, "epoch": 36.89243027888446, "grad_norm": 1.407061219215393, "learning_rate": 1.7627679281242943e-06, "loss": 0.03126199, "memory(GiB)": 13.7, "step": 78710, "train_speed(iter/s)": 1.531886 }, { "acc": 0.98874998, "epoch": 36.894773845793296, "grad_norm": 3.862640380859375, "learning_rate": 1.7621773546455397e-06, "loss": 0.04369155, "memory(GiB)": 13.7, "step": 78715, "train_speed(iter/s)": 1.531887 }, { "acc": 0.9921875, "epoch": 36.89711741270213, "grad_norm": 1.578943133354187, "learning_rate": 1.7615868590054213e-06, "loss": 0.03258817, "memory(GiB)": 13.7, "step": 78720, "train_speed(iter/s)": 1.53189 }, { "acc": 0.98988094, "epoch": 36.89946097961097, "grad_norm": 2.2460458278656006, "learning_rate": 1.7609964412181341e-06, "loss": 0.02474613, "memory(GiB)": 13.7, "step": 78725, "train_speed(iter/s)": 1.531892 }, { "acc": 0.988447, "epoch": 36.901804546519806, "grad_norm": 1.9898271560668945, "learning_rate": 1.7604061012978715e-06, "loss": 0.05044215, "memory(GiB)": 13.7, "step": 78730, "train_speed(iter/s)": 1.531893 }, { "acc": 0.97029762, "epoch": 36.90414811342864, "grad_norm": 0.0005869031883776188, "learning_rate": 1.7598158392588198e-06, "loss": 0.06055465, "memory(GiB)": 13.7, "step": 78735, "train_speed(iter/s)": 1.531898 }, { "acc": 0.97767868, "epoch": 36.906491680337474, "grad_norm": 5.304058074951172, "learning_rate": 1.7592256551151707e-06, "loss": 0.05569338, "memory(GiB)": 13.7, "step": 78740, "train_speed(iter/s)": 1.531899 }, { "acc": 0.98000002, "epoch": 36.90883524724631, "grad_norm": 2.574735403060913, "learning_rate": 1.7586355488811065e-06, "loss": 0.05235509, "memory(GiB)": 13.7, "step": 78745, "train_speed(iter/s)": 1.531903 }, { "acc": 0.9802084, "epoch": 36.91117881415514, "grad_norm": 3.4189019203186035, "learning_rate": 1.7580455205708156e-06, "loss": 0.03625926, "memory(GiB)": 13.7, "step": 78750, "train_speed(iter/s)": 1.5319 }, { "acc": 0.98809528, "epoch": 36.91352238106398, "grad_norm": 1.5760204792022705, "learning_rate": 1.757455570198476e-06, "loss": 0.03007745, "memory(GiB)": 13.7, "step": 78755, "train_speed(iter/s)": 1.531903 }, { "acc": 0.98967266, "epoch": 36.91586594797281, "grad_norm": 3.741969585418701, "learning_rate": 1.7568656977782706e-06, "loss": 0.03077957, "memory(GiB)": 13.7, "step": 78760, "train_speed(iter/s)": 1.531904 }, { "acc": 0.99333324, "epoch": 36.91820951488165, "grad_norm": 5.090126991271973, "learning_rate": 1.756275903324379e-06, "loss": 0.02916027, "memory(GiB)": 13.7, "step": 78765, "train_speed(iter/s)": 1.531905 }, { "acc": 0.98194447, "epoch": 36.92055308179049, "grad_norm": 2.8760149478912354, "learning_rate": 1.7556861868509747e-06, "loss": 0.05411217, "memory(GiB)": 13.7, "step": 78770, "train_speed(iter/s)": 1.53191 }, { "acc": 0.9743453, "epoch": 36.92289664869932, "grad_norm": 5.606823921203613, "learning_rate": 1.7550965483722347e-06, "loss": 0.04862916, "memory(GiB)": 13.7, "step": 78775, "train_speed(iter/s)": 1.531911 }, { "acc": 0.98556824, "epoch": 36.925240215608156, "grad_norm": 2.888975143432617, "learning_rate": 1.7545069879023318e-06, "loss": 0.06941307, "memory(GiB)": 13.7, "step": 78780, "train_speed(iter/s)": 1.531912 }, { "acc": 0.98883772, "epoch": 36.92758378251699, "grad_norm": 6.565776824951172, "learning_rate": 1.7539175054554385e-06, "loss": 0.06349758, "memory(GiB)": 13.7, "step": 78785, "train_speed(iter/s)": 1.531918 }, { "acc": 0.9739584, "epoch": 36.929927349425824, "grad_norm": 4.717651844024658, "learning_rate": 1.7533281010457228e-06, "loss": 0.03954263, "memory(GiB)": 13.7, "step": 78790, "train_speed(iter/s)": 1.531924 }, { "acc": 0.98270206, "epoch": 36.93227091633466, "grad_norm": 0.9504157900810242, "learning_rate": 1.75273877468735e-06, "loss": 0.06200104, "memory(GiB)": 13.7, "step": 78795, "train_speed(iter/s)": 1.531927 }, { "acc": 0.96238098, "epoch": 36.93461448324349, "grad_norm": 5.361050605773926, "learning_rate": 1.7521495263944872e-06, "loss": 0.11139646, "memory(GiB)": 13.7, "step": 78800, "train_speed(iter/s)": 1.531927 }, { "acc": 0.9708333, "epoch": 36.936958050152334, "grad_norm": 1.486857533454895, "learning_rate": 1.7515603561812998e-06, "loss": 0.0931537, "memory(GiB)": 13.7, "step": 78805, "train_speed(iter/s)": 1.531923 }, { "acc": 0.9875, "epoch": 36.93930161706117, "grad_norm": 3.4264652729034424, "learning_rate": 1.7509712640619465e-06, "loss": 0.04556976, "memory(GiB)": 13.7, "step": 78810, "train_speed(iter/s)": 1.531923 }, { "acc": 0.97198486, "epoch": 36.94164518397, "grad_norm": 3.550344944000244, "learning_rate": 1.750382250050588e-06, "loss": 0.06901044, "memory(GiB)": 13.7, "step": 78815, "train_speed(iter/s)": 1.531926 }, { "acc": 0.97458334, "epoch": 36.94398875087884, "grad_norm": 2.815338134765625, "learning_rate": 1.7497933141613846e-06, "loss": 0.06023959, "memory(GiB)": 13.7, "step": 78820, "train_speed(iter/s)": 1.531925 }, { "acc": 0.98121529, "epoch": 36.94633231778767, "grad_norm": 4.789160251617432, "learning_rate": 1.7492044564084907e-06, "loss": 0.06900512, "memory(GiB)": 13.7, "step": 78825, "train_speed(iter/s)": 1.531929 }, { "acc": 0.96706848, "epoch": 36.948675884696506, "grad_norm": 4.010583877563477, "learning_rate": 1.7486156768060581e-06, "loss": 0.07049009, "memory(GiB)": 13.7, "step": 78830, "train_speed(iter/s)": 1.53193 }, { "acc": 0.98187504, "epoch": 36.95101945160534, "grad_norm": 2.914924144744873, "learning_rate": 1.7480269753682418e-06, "loss": 0.06128773, "memory(GiB)": 13.7, "step": 78835, "train_speed(iter/s)": 1.531928 }, { "acc": 0.98735123, "epoch": 36.95336301851418, "grad_norm": 1.2305867671966553, "learning_rate": 1.7474383521091938e-06, "loss": 0.03442709, "memory(GiB)": 13.7, "step": 78840, "train_speed(iter/s)": 1.531933 }, { "acc": 0.9895833, "epoch": 36.955706585423016, "grad_norm": 4.925955295562744, "learning_rate": 1.746849807043059e-06, "loss": 0.04753946, "memory(GiB)": 13.7, "step": 78845, "train_speed(iter/s)": 1.531933 }, { "acc": 0.9869791, "epoch": 36.95805015233185, "grad_norm": 2.994320869445801, "learning_rate": 1.7462613401839851e-06, "loss": 0.02816379, "memory(GiB)": 13.7, "step": 78850, "train_speed(iter/s)": 1.531937 }, { "acc": 0.987257, "epoch": 36.960393719240685, "grad_norm": 0.011970242485404015, "learning_rate": 1.7456729515461176e-06, "loss": 0.0381331, "memory(GiB)": 13.7, "step": 78855, "train_speed(iter/s)": 1.531943 }, { "acc": 0.9822917, "epoch": 36.96273728614952, "grad_norm": 3.568108320236206, "learning_rate": 1.7450846411436018e-06, "loss": 0.05098183, "memory(GiB)": 13.7, "step": 78860, "train_speed(iter/s)": 1.531942 }, { "acc": 0.98328371, "epoch": 36.96508085305835, "grad_norm": 2.8855199813842773, "learning_rate": 1.7444964089905755e-06, "loss": 0.04222528, "memory(GiB)": 13.7, "step": 78865, "train_speed(iter/s)": 1.53194 }, { "acc": 0.99118061, "epoch": 36.96742441996719, "grad_norm": 0.04592452570796013, "learning_rate": 1.743908255101178e-06, "loss": 0.02413671, "memory(GiB)": 13.7, "step": 78870, "train_speed(iter/s)": 1.531942 }, { "acc": 0.99020834, "epoch": 36.96976798687602, "grad_norm": 5.187206745147705, "learning_rate": 1.7433201794895462e-06, "loss": 0.05228615, "memory(GiB)": 13.7, "step": 78875, "train_speed(iter/s)": 1.531947 }, { "acc": 0.99541664, "epoch": 36.97211155378486, "grad_norm": 3.906352996826172, "learning_rate": 1.7427321821698192e-06, "loss": 0.03413335, "memory(GiB)": 13.7, "step": 78880, "train_speed(iter/s)": 1.531945 }, { "acc": 0.99125004, "epoch": 36.9744551206937, "grad_norm": 3.859523057937622, "learning_rate": 1.7421442631561261e-06, "loss": 0.02906681, "memory(GiB)": 13.7, "step": 78885, "train_speed(iter/s)": 1.531944 }, { "acc": 0.9895834, "epoch": 36.97679868760253, "grad_norm": 5.108623027801514, "learning_rate": 1.7415564224626e-06, "loss": 0.05424001, "memory(GiB)": 13.7, "step": 78890, "train_speed(iter/s)": 1.53194 }, { "acc": 0.975, "epoch": 36.979142254511366, "grad_norm": 5.810803413391113, "learning_rate": 1.7409686601033736e-06, "loss": 0.08552718, "memory(GiB)": 13.7, "step": 78895, "train_speed(iter/s)": 1.531944 }, { "acc": 0.97473211, "epoch": 36.9814858214202, "grad_norm": 3.280890464782715, "learning_rate": 1.7403809760925694e-06, "loss": 0.0742926, "memory(GiB)": 13.7, "step": 78900, "train_speed(iter/s)": 1.531941 }, { "acc": 0.99258928, "epoch": 36.983829388329035, "grad_norm": 1.7136906385421753, "learning_rate": 1.7397933704443168e-06, "loss": 0.02201957, "memory(GiB)": 13.7, "step": 78905, "train_speed(iter/s)": 1.53194 }, { "acc": 0.9875, "epoch": 36.98617295523787, "grad_norm": 4.230127334594727, "learning_rate": 1.7392058431727406e-06, "loss": 0.04021667, "memory(GiB)": 13.7, "step": 78910, "train_speed(iter/s)": 1.531939 }, { "acc": 0.9895833, "epoch": 36.98851652214671, "grad_norm": 3.5878024101257324, "learning_rate": 1.7386183942919623e-06, "loss": 0.04447535, "memory(GiB)": 13.7, "step": 78915, "train_speed(iter/s)": 1.531946 }, { "acc": 0.99508934, "epoch": 36.990860089055545, "grad_norm": 1.2924433946609497, "learning_rate": 1.7380310238161003e-06, "loss": 0.02689435, "memory(GiB)": 13.7, "step": 78920, "train_speed(iter/s)": 1.531949 }, { "acc": 0.978125, "epoch": 36.99320365596438, "grad_norm": 3.008265972137451, "learning_rate": 1.7374437317592736e-06, "loss": 0.05445143, "memory(GiB)": 13.7, "step": 78925, "train_speed(iter/s)": 1.531952 }, { "acc": 0.99548607, "epoch": 36.99554722287321, "grad_norm": 2.873351812362671, "learning_rate": 1.7368565181356e-06, "loss": 0.01728911, "memory(GiB)": 13.7, "step": 78930, "train_speed(iter/s)": 1.531946 }, { "acc": 0.98936958, "epoch": 36.99789078978205, "grad_norm": 3.3864684104919434, "learning_rate": 1.7362693829591957e-06, "loss": 0.06197962, "memory(GiB)": 13.7, "step": 78935, "train_speed(iter/s)": 1.531949 }, { "acc": 0.98458328, "epoch": 37.00023435669088, "grad_norm": 5.116295337677002, "learning_rate": 1.7356823262441691e-06, "loss": 0.02748465, "memory(GiB)": 13.7, "step": 78940, "train_speed(iter/s)": 1.531939 }, { "acc": 0.99750004, "epoch": 37.002577923599716, "grad_norm": 1.8593467473983765, "learning_rate": 1.7350953480046343e-06, "loss": 0.03929359, "memory(GiB)": 13.7, "step": 78945, "train_speed(iter/s)": 1.531945 }, { "acc": 0.98187504, "epoch": 37.00492149050855, "grad_norm": 0.0017745019868016243, "learning_rate": 1.7345084482547013e-06, "loss": 0.06025399, "memory(GiB)": 13.7, "step": 78950, "train_speed(iter/s)": 1.531949 }, { "acc": 0.97738094, "epoch": 37.00726505741739, "grad_norm": 6.482333660125732, "learning_rate": 1.7339216270084759e-06, "loss": 0.08333275, "memory(GiB)": 13.7, "step": 78955, "train_speed(iter/s)": 1.531954 }, { "acc": 0.98833332, "epoch": 37.009608624326226, "grad_norm": 3.1723833084106445, "learning_rate": 1.7333348842800608e-06, "loss": 0.03045164, "memory(GiB)": 13.7, "step": 78960, "train_speed(iter/s)": 1.53195 }, { "acc": 0.9802083, "epoch": 37.01195219123506, "grad_norm": 6.475409984588623, "learning_rate": 1.7327482200835617e-06, "loss": 0.05514584, "memory(GiB)": 13.7, "step": 78965, "train_speed(iter/s)": 1.531955 }, { "acc": 0.97455044, "epoch": 37.014295758143895, "grad_norm": 2.847673177719116, "learning_rate": 1.7321616344330812e-06, "loss": 0.09595627, "memory(GiB)": 13.7, "step": 78970, "train_speed(iter/s)": 1.531955 }, { "acc": 0.98395834, "epoch": 37.01663932505273, "grad_norm": 3.1387569904327393, "learning_rate": 1.7315751273427169e-06, "loss": 0.04926145, "memory(GiB)": 13.7, "step": 78975, "train_speed(iter/s)": 1.531958 }, { "acc": 0.99229164, "epoch": 37.018982891961564, "grad_norm": 1.824580430984497, "learning_rate": 1.7309886988265673e-06, "loss": 0.02405106, "memory(GiB)": 13.7, "step": 78980, "train_speed(iter/s)": 1.531967 }, { "acc": 0.98217258, "epoch": 37.0213264588704, "grad_norm": 3.74113392829895, "learning_rate": 1.7304023488987295e-06, "loss": 0.04073582, "memory(GiB)": 13.7, "step": 78985, "train_speed(iter/s)": 1.531966 }, { "acc": 0.97749996, "epoch": 37.02367002577924, "grad_norm": 2.7931690216064453, "learning_rate": 1.7298160775732938e-06, "loss": 0.06115685, "memory(GiB)": 13.7, "step": 78990, "train_speed(iter/s)": 1.531972 }, { "acc": 0.97302084, "epoch": 37.026013592688074, "grad_norm": 6.019174575805664, "learning_rate": 1.7292298848643574e-06, "loss": 0.05366779, "memory(GiB)": 13.7, "step": 78995, "train_speed(iter/s)": 1.531975 }, { "acc": 0.98675594, "epoch": 37.02835715959691, "grad_norm": 3.2529456615448, "learning_rate": 1.728643770786005e-06, "loss": 0.02899157, "memory(GiB)": 13.7, "step": 79000, "train_speed(iter/s)": 1.531976 }, { "acc": 0.98208332, "epoch": 37.03070072650574, "grad_norm": 3.5366475582122803, "learning_rate": 1.7280577353523281e-06, "loss": 0.04131653, "memory(GiB)": 13.7, "step": 79005, "train_speed(iter/s)": 1.531979 }, { "acc": 0.99145832, "epoch": 37.03304429341458, "grad_norm": 0.7530468702316284, "learning_rate": 1.727471778577414e-06, "loss": 0.02021113, "memory(GiB)": 13.7, "step": 79010, "train_speed(iter/s)": 1.531979 }, { "acc": 0.99072914, "epoch": 37.03538786032341, "grad_norm": 2.862125873565674, "learning_rate": 1.726885900475344e-06, "loss": 0.04349844, "memory(GiB)": 13.7, "step": 79015, "train_speed(iter/s)": 1.531981 }, { "acc": 0.99333334, "epoch": 37.037731427232245, "grad_norm": 1.0638350248336792, "learning_rate": 1.7263001010602017e-06, "loss": 0.01997949, "memory(GiB)": 13.7, "step": 79020, "train_speed(iter/s)": 1.531979 }, { "acc": 0.98198862, "epoch": 37.04007499414108, "grad_norm": 7.292360782623291, "learning_rate": 1.725714380346071e-06, "loss": 0.03728825, "memory(GiB)": 13.7, "step": 79025, "train_speed(iter/s)": 1.531981 }, { "acc": 0.99092264, "epoch": 37.04241856104992, "grad_norm": 0.7430696487426758, "learning_rate": 1.7251287383470259e-06, "loss": 0.03416945, "memory(GiB)": 13.7, "step": 79030, "train_speed(iter/s)": 1.531986 }, { "acc": 0.97937498, "epoch": 37.044762127958755, "grad_norm": 5.748486042022705, "learning_rate": 1.7245431750771473e-06, "loss": 0.02658161, "memory(GiB)": 13.7, "step": 79035, "train_speed(iter/s)": 1.531985 }, { "acc": 0.99020834, "epoch": 37.04710569486759, "grad_norm": 2.8919765949249268, "learning_rate": 1.7239576905505068e-06, "loss": 0.02913664, "memory(GiB)": 13.7, "step": 79040, "train_speed(iter/s)": 1.531991 }, { "acc": 0.96791668, "epoch": 37.049449261776424, "grad_norm": 3.551305055618286, "learning_rate": 1.7233722847811807e-06, "loss": 0.06895801, "memory(GiB)": 13.7, "step": 79045, "train_speed(iter/s)": 1.531987 }, { "acc": 0.99504871, "epoch": 37.05179282868526, "grad_norm": 1.9934368133544922, "learning_rate": 1.7227869577832366e-06, "loss": 0.0597471, "memory(GiB)": 13.7, "step": 79050, "train_speed(iter/s)": 1.531988 }, { "acc": 0.98781252, "epoch": 37.05413639559409, "grad_norm": 3.9258482456207275, "learning_rate": 1.7222017095707464e-06, "loss": 0.03214646, "memory(GiB)": 13.7, "step": 79055, "train_speed(iter/s)": 1.531989 }, { "acc": 0.98842258, "epoch": 37.05647996250293, "grad_norm": 4.212163925170898, "learning_rate": 1.7216165401577766e-06, "loss": 0.05630791, "memory(GiB)": 13.7, "step": 79060, "train_speed(iter/s)": 1.531991 }, { "acc": 0.99090271, "epoch": 37.05882352941177, "grad_norm": 2.9617815017700195, "learning_rate": 1.7210314495583954e-06, "loss": 0.03073551, "memory(GiB)": 13.7, "step": 79065, "train_speed(iter/s)": 1.531992 }, { "acc": 0.99508934, "epoch": 37.0611670963206, "grad_norm": 1.3523892164230347, "learning_rate": 1.7204464377866624e-06, "loss": 0.02710084, "memory(GiB)": 13.7, "step": 79070, "train_speed(iter/s)": 1.531991 }, { "acc": 1.0, "epoch": 37.06351066322944, "grad_norm": 1.1712698936462402, "learning_rate": 1.7198615048566431e-06, "loss": 0.00769595, "memory(GiB)": 13.7, "step": 79075, "train_speed(iter/s)": 1.531999 }, { "acc": 0.99926472, "epoch": 37.06585423013827, "grad_norm": 1.6776525974273682, "learning_rate": 1.7192766507823935e-06, "loss": 0.01791112, "memory(GiB)": 13.7, "step": 79080, "train_speed(iter/s)": 1.532003 }, { "acc": 0.99454861, "epoch": 37.068197797047105, "grad_norm": 2.22153639793396, "learning_rate": 1.7186918755779753e-06, "loss": 0.04314392, "memory(GiB)": 13.7, "step": 79085, "train_speed(iter/s)": 1.532002 }, { "acc": 0.97061014, "epoch": 37.07054136395594, "grad_norm": 4.582711219787598, "learning_rate": 1.718107179257441e-06, "loss": 0.09472055, "memory(GiB)": 13.7, "step": 79090, "train_speed(iter/s)": 1.532002 }, { "acc": 0.98312492, "epoch": 37.072884930864774, "grad_norm": 3.556581974029541, "learning_rate": 1.7175225618348474e-06, "loss": 0.04578657, "memory(GiB)": 13.7, "step": 79095, "train_speed(iter/s)": 1.532002 }, { "acc": 0.98729172, "epoch": 37.07522849777361, "grad_norm": 0.010747171007096767, "learning_rate": 1.7169380233242472e-06, "loss": 0.04661043, "memory(GiB)": 13.7, "step": 79100, "train_speed(iter/s)": 1.532003 }, { "acc": 0.98562498, "epoch": 37.07757206468245, "grad_norm": 2.412290334701538, "learning_rate": 1.7163535637396874e-06, "loss": 0.05226854, "memory(GiB)": 13.7, "step": 79105, "train_speed(iter/s)": 1.532001 }, { "acc": 0.9854167, "epoch": 37.079915631591284, "grad_norm": 8.149991989135742, "learning_rate": 1.715769183095219e-06, "loss": 0.04343542, "memory(GiB)": 13.7, "step": 79110, "train_speed(iter/s)": 1.531998 }, { "acc": 0.98916664, "epoch": 37.08225919850012, "grad_norm": 0.47145307064056396, "learning_rate": 1.7151848814048902e-06, "loss": 0.02442306, "memory(GiB)": 13.7, "step": 79115, "train_speed(iter/s)": 1.531998 }, { "acc": 0.98708334, "epoch": 37.08460276540895, "grad_norm": 4.086810111999512, "learning_rate": 1.7146006586827438e-06, "loss": 0.04374068, "memory(GiB)": 13.7, "step": 79120, "train_speed(iter/s)": 1.531999 }, { "acc": 0.98806553, "epoch": 37.08694633231779, "grad_norm": 2.5601019859313965, "learning_rate": 1.7140165149428203e-06, "loss": 0.04280385, "memory(GiB)": 13.7, "step": 79125, "train_speed(iter/s)": 1.531996 }, { "acc": 0.98946428, "epoch": 37.08928989922662, "grad_norm": 4.314251899719238, "learning_rate": 1.713432450199163e-06, "loss": 0.03180965, "memory(GiB)": 13.7, "step": 79130, "train_speed(iter/s)": 1.531998 }, { "acc": 0.98239584, "epoch": 37.091633466135455, "grad_norm": 1.6984190940856934, "learning_rate": 1.7128484644658111e-06, "loss": 0.03971406, "memory(GiB)": 13.7, "step": 79135, "train_speed(iter/s)": 1.532 }, { "acc": 0.97145824, "epoch": 37.0939770330443, "grad_norm": 6.128233432769775, "learning_rate": 1.7122645577568027e-06, "loss": 0.0799703, "memory(GiB)": 13.7, "step": 79140, "train_speed(iter/s)": 1.532003 }, { "acc": 0.97979164, "epoch": 37.09632059995313, "grad_norm": 2.9626739025115967, "learning_rate": 1.7116807300861704e-06, "loss": 0.06599619, "memory(GiB)": 13.7, "step": 79145, "train_speed(iter/s)": 1.532001 }, { "acc": 0.99092255, "epoch": 37.098664166861965, "grad_norm": 1.5477886199951172, "learning_rate": 1.7110969814679488e-06, "loss": 0.03954093, "memory(GiB)": 13.7, "step": 79150, "train_speed(iter/s)": 1.532004 }, { "acc": 0.98988972, "epoch": 37.1010077337708, "grad_norm": 3.418074369430542, "learning_rate": 1.7105133119161704e-06, "loss": 0.02247985, "memory(GiB)": 13.7, "step": 79155, "train_speed(iter/s)": 1.532006 }, { "acc": 0.99125004, "epoch": 37.103351300679634, "grad_norm": 0.005113054532557726, "learning_rate": 1.7099297214448635e-06, "loss": 0.05719128, "memory(GiB)": 13.7, "step": 79160, "train_speed(iter/s)": 1.532011 }, { "acc": 0.99535847, "epoch": 37.10569486758847, "grad_norm": 3.669940233230591, "learning_rate": 1.7093462100680544e-06, "loss": 0.04261984, "memory(GiB)": 13.7, "step": 79165, "train_speed(iter/s)": 1.532016 }, { "acc": 0.96154766, "epoch": 37.1080384344973, "grad_norm": 3.701054096221924, "learning_rate": 1.7087627777997698e-06, "loss": 0.1211951, "memory(GiB)": 13.7, "step": 79170, "train_speed(iter/s)": 1.53202 }, { "acc": 0.9802084, "epoch": 37.11038200140614, "grad_norm": 3.9093306064605713, "learning_rate": 1.7081794246540356e-06, "loss": 0.04264984, "memory(GiB)": 13.7, "step": 79175, "train_speed(iter/s)": 1.532022 }, { "acc": 0.98350697, "epoch": 37.11272556831498, "grad_norm": 7.230010032653809, "learning_rate": 1.7075961506448705e-06, "loss": 0.03888897, "memory(GiB)": 13.7, "step": 79180, "train_speed(iter/s)": 1.532023 }, { "acc": 0.99300594, "epoch": 37.11506913522381, "grad_norm": 2.2568609714508057, "learning_rate": 1.707012955786296e-06, "loss": 0.02787624, "memory(GiB)": 13.7, "step": 79185, "train_speed(iter/s)": 1.532024 }, { "acc": 0.98353634, "epoch": 37.11741270213265, "grad_norm": 3.184156656265259, "learning_rate": 1.7064298400923312e-06, "loss": 0.0309128, "memory(GiB)": 13.7, "step": 79190, "train_speed(iter/s)": 1.532023 }, { "acc": 0.9905303, "epoch": 37.11975626904148, "grad_norm": 3.7410852909088135, "learning_rate": 1.70584680357699e-06, "loss": 0.03428368, "memory(GiB)": 13.7, "step": 79195, "train_speed(iter/s)": 1.532028 }, { "acc": 0.98266945, "epoch": 37.122099835950316, "grad_norm": 5.975574493408203, "learning_rate": 1.7052638462542892e-06, "loss": 0.05266438, "memory(GiB)": 13.7, "step": 79200, "train_speed(iter/s)": 1.532031 }, { "acc": 0.97979164, "epoch": 37.12444340285915, "grad_norm": 6.79774808883667, "learning_rate": 1.704680968138238e-06, "loss": 0.04689083, "memory(GiB)": 13.7, "step": 79205, "train_speed(iter/s)": 1.532034 }, { "acc": 0.97995033, "epoch": 37.126786969767984, "grad_norm": 2.217691421508789, "learning_rate": 1.7040981692428496e-06, "loss": 0.07912229, "memory(GiB)": 13.7, "step": 79210, "train_speed(iter/s)": 1.53204 }, { "acc": 0.9916667, "epoch": 37.129130536676826, "grad_norm": 0.0022045779041945934, "learning_rate": 1.7035154495821324e-06, "loss": 0.0261975, "memory(GiB)": 13.7, "step": 79215, "train_speed(iter/s)": 1.532038 }, { "acc": 0.99125004, "epoch": 37.13147410358566, "grad_norm": 0.6684476733207703, "learning_rate": 1.7029328091700918e-06, "loss": 0.01585715, "memory(GiB)": 13.7, "step": 79220, "train_speed(iter/s)": 1.532037 }, { "acc": 0.98312492, "epoch": 37.133817670494494, "grad_norm": 0.003137107938528061, "learning_rate": 1.7023502480207321e-06, "loss": 0.06436935, "memory(GiB)": 13.7, "step": 79225, "train_speed(iter/s)": 1.532041 }, { "acc": 0.98571434, "epoch": 37.13616123740333, "grad_norm": 0.015645073726773262, "learning_rate": 1.7017677661480592e-06, "loss": 0.03143454, "memory(GiB)": 13.7, "step": 79230, "train_speed(iter/s)": 1.532044 }, { "acc": 0.98312492, "epoch": 37.13850480431216, "grad_norm": 2.8758299350738525, "learning_rate": 1.7011853635660705e-06, "loss": 0.03111205, "memory(GiB)": 13.7, "step": 79235, "train_speed(iter/s)": 1.532043 }, { "acc": 0.98768425, "epoch": 37.140848371221, "grad_norm": 3.035578966140747, "learning_rate": 1.7006030402887661e-06, "loss": 0.02993608, "memory(GiB)": 13.7, "step": 79240, "train_speed(iter/s)": 1.532042 }, { "acc": 0.9791667, "epoch": 37.14319193812983, "grad_norm": 3.7645785808563232, "learning_rate": 1.700020796330146e-06, "loss": 0.05312741, "memory(GiB)": 13.7, "step": 79245, "train_speed(iter/s)": 1.532047 }, { "acc": 0.98834324, "epoch": 37.145535505038666, "grad_norm": 2.5118567943573, "learning_rate": 1.6994386317042025e-06, "loss": 0.04371533, "memory(GiB)": 13.7, "step": 79250, "train_speed(iter/s)": 1.532049 }, { "acc": 0.9770834, "epoch": 37.14787907194751, "grad_norm": 0.04006870836019516, "learning_rate": 1.6988565464249277e-06, "loss": 0.07149539, "memory(GiB)": 13.7, "step": 79255, "train_speed(iter/s)": 1.532051 }, { "acc": 0.9916667, "epoch": 37.15022263885634, "grad_norm": 2.6924045085906982, "learning_rate": 1.6982745405063153e-06, "loss": 0.01329555, "memory(GiB)": 13.7, "step": 79260, "train_speed(iter/s)": 1.532055 }, { "acc": 0.98125, "epoch": 37.152566205765176, "grad_norm": 2.9508659839630127, "learning_rate": 1.697692613962355e-06, "loss": 0.03237411, "memory(GiB)": 13.7, "step": 79265, "train_speed(iter/s)": 1.532053 }, { "acc": 0.98633919, "epoch": 37.15490977267401, "grad_norm": 1.9633419513702393, "learning_rate": 1.697110766807033e-06, "loss": 0.03794102, "memory(GiB)": 13.7, "step": 79270, "train_speed(iter/s)": 1.532053 }, { "acc": 0.9947916, "epoch": 37.157253339582844, "grad_norm": 3.5547373294830322, "learning_rate": 1.6965289990543355e-06, "loss": 0.01571743, "memory(GiB)": 13.7, "step": 79275, "train_speed(iter/s)": 1.532054 }, { "acc": 0.98447914, "epoch": 37.15959690649168, "grad_norm": 4.212042808532715, "learning_rate": 1.6959473107182465e-06, "loss": 0.04539833, "memory(GiB)": 13.7, "step": 79280, "train_speed(iter/s)": 1.532052 }, { "acc": 0.96711311, "epoch": 37.16194047340051, "grad_norm": 2.4506161212921143, "learning_rate": 1.6953657018127496e-06, "loss": 0.08120791, "memory(GiB)": 13.7, "step": 79285, "train_speed(iter/s)": 1.532058 }, { "acc": 0.97808609, "epoch": 37.164284040309354, "grad_norm": 4.944238185882568, "learning_rate": 1.6947841723518224e-06, "loss": 0.04414694, "memory(GiB)": 13.7, "step": 79290, "train_speed(iter/s)": 1.532058 }, { "acc": 0.99392853, "epoch": 37.16662760721819, "grad_norm": 1.3965458869934082, "learning_rate": 1.6942027223494425e-06, "loss": 0.01703836, "memory(GiB)": 13.7, "step": 79295, "train_speed(iter/s)": 1.53206 }, { "acc": 0.9864584, "epoch": 37.16897117412702, "grad_norm": 4.262012958526611, "learning_rate": 1.6936213518195868e-06, "loss": 0.03833782, "memory(GiB)": 13.7, "step": 79300, "train_speed(iter/s)": 1.53206 }, { "acc": 0.9822916, "epoch": 37.17131474103586, "grad_norm": 2.873070478439331, "learning_rate": 1.6930400607762318e-06, "loss": 0.04313565, "memory(GiB)": 13.7, "step": 79305, "train_speed(iter/s)": 1.532063 }, { "acc": 0.99004116, "epoch": 37.17365830794469, "grad_norm": 3.893186330795288, "learning_rate": 1.692458849233346e-06, "loss": 0.06059435, "memory(GiB)": 13.7, "step": 79310, "train_speed(iter/s)": 1.532064 }, { "acc": 0.97000008, "epoch": 37.176001874853526, "grad_norm": 2.779355525970459, "learning_rate": 1.691877717204902e-06, "loss": 0.04851871, "memory(GiB)": 13.7, "step": 79315, "train_speed(iter/s)": 1.532069 }, { "acc": 0.9832386, "epoch": 37.17834544176236, "grad_norm": 3.064678430557251, "learning_rate": 1.6912966647048692e-06, "loss": 0.03812889, "memory(GiB)": 13.7, "step": 79320, "train_speed(iter/s)": 1.532069 }, { "acc": 0.98154764, "epoch": 37.180689008671195, "grad_norm": 0.14205974340438843, "learning_rate": 1.690715691747212e-06, "loss": 0.05634854, "memory(GiB)": 13.7, "step": 79325, "train_speed(iter/s)": 1.532076 }, { "acc": 0.98640871, "epoch": 37.183032575580036, "grad_norm": 2.9268574714660645, "learning_rate": 1.690134798345897e-06, "loss": 0.05626239, "memory(GiB)": 13.7, "step": 79330, "train_speed(iter/s)": 1.532081 }, { "acc": 0.97928028, "epoch": 37.18537614248887, "grad_norm": 3.5968430042266846, "learning_rate": 1.6895539845148847e-06, "loss": 0.04897894, "memory(GiB)": 13.7, "step": 79335, "train_speed(iter/s)": 1.53208 }, { "acc": 0.99508934, "epoch": 37.187719709397705, "grad_norm": 1.6958034038543701, "learning_rate": 1.688973250268139e-06, "loss": 0.03117586, "memory(GiB)": 13.7, "step": 79340, "train_speed(iter/s)": 1.532083 }, { "acc": 0.98520832, "epoch": 37.19006327630654, "grad_norm": 0.6819194555282593, "learning_rate": 1.6883925956196158e-06, "loss": 0.03688256, "memory(GiB)": 13.7, "step": 79345, "train_speed(iter/s)": 1.532085 }, { "acc": 0.97885418, "epoch": 37.19240684321537, "grad_norm": 2.9831900596618652, "learning_rate": 1.6878120205832734e-06, "loss": 0.07202393, "memory(GiB)": 13.7, "step": 79350, "train_speed(iter/s)": 1.532088 }, { "acc": 0.98586311, "epoch": 37.19475041012421, "grad_norm": 2.5516302585601807, "learning_rate": 1.6872315251730664e-06, "loss": 0.050913, "memory(GiB)": 13.7, "step": 79355, "train_speed(iter/s)": 1.53209 }, { "acc": 0.97562504, "epoch": 37.19709397703304, "grad_norm": 3.609165906906128, "learning_rate": 1.6866511094029508e-06, "loss": 0.04411817, "memory(GiB)": 13.7, "step": 79360, "train_speed(iter/s)": 1.532099 }, { "acc": 0.99375, "epoch": 37.199437543941876, "grad_norm": 0.009054767899215221, "learning_rate": 1.6860707732868735e-06, "loss": 0.02956936, "memory(GiB)": 13.7, "step": 79365, "train_speed(iter/s)": 1.532101 }, { "acc": 0.98743057, "epoch": 37.20178111085072, "grad_norm": 1.7144695520401, "learning_rate": 1.6854905168387887e-06, "loss": 0.05617607, "memory(GiB)": 13.7, "step": 79370, "train_speed(iter/s)": 1.532104 }, { "acc": 0.97686014, "epoch": 37.20412467775955, "grad_norm": 2.834014654159546, "learning_rate": 1.6849103400726388e-06, "loss": 0.04037064, "memory(GiB)": 13.7, "step": 79375, "train_speed(iter/s)": 1.532112 }, { "acc": 0.99437504, "epoch": 37.206468244668386, "grad_norm": 3.7017550468444824, "learning_rate": 1.6843302430023736e-06, "loss": 0.01965865, "memory(GiB)": 13.7, "step": 79380, "train_speed(iter/s)": 1.532114 }, { "acc": 0.97857151, "epoch": 37.20881181157722, "grad_norm": 4.730184078216553, "learning_rate": 1.6837502256419335e-06, "loss": 0.04566275, "memory(GiB)": 13.7, "step": 79385, "train_speed(iter/s)": 1.532118 }, { "acc": 0.9926136, "epoch": 37.211155378486055, "grad_norm": 4.211467742919922, "learning_rate": 1.6831702880052614e-06, "loss": 0.03879743, "memory(GiB)": 13.7, "step": 79390, "train_speed(iter/s)": 1.532114 }, { "acc": 0.98312492, "epoch": 37.21349894539489, "grad_norm": 5.192643642425537, "learning_rate": 1.6825904301062986e-06, "loss": 0.06109863, "memory(GiB)": 13.7, "step": 79395, "train_speed(iter/s)": 1.532116 }, { "acc": 0.98916664, "epoch": 37.21584251230372, "grad_norm": 2.8524160385131836, "learning_rate": 1.6820106519589808e-06, "loss": 0.03458149, "memory(GiB)": 13.7, "step": 79400, "train_speed(iter/s)": 1.532116 }, { "acc": 0.98125, "epoch": 37.218186079212565, "grad_norm": 5.399511337280273, "learning_rate": 1.6814309535772441e-06, "loss": 0.04154719, "memory(GiB)": 13.7, "step": 79405, "train_speed(iter/s)": 1.532122 }, { "acc": 0.99131556, "epoch": 37.2205296461214, "grad_norm": 2.0703744888305664, "learning_rate": 1.6808513349750255e-06, "loss": 0.03166948, "memory(GiB)": 13.7, "step": 79410, "train_speed(iter/s)": 1.532127 }, { "acc": 0.9838542, "epoch": 37.22287321303023, "grad_norm": 1.8531984090805054, "learning_rate": 1.6802717961662527e-06, "loss": 0.03829103, "memory(GiB)": 13.7, "step": 79415, "train_speed(iter/s)": 1.532129 }, { "acc": 0.98370476, "epoch": 37.22521677993907, "grad_norm": 4.278121471405029, "learning_rate": 1.67969233716486e-06, "loss": 0.04350799, "memory(GiB)": 13.7, "step": 79420, "train_speed(iter/s)": 1.532129 }, { "acc": 0.98734379, "epoch": 37.2275603468479, "grad_norm": 3.9279227256774902, "learning_rate": 1.6791129579847724e-06, "loss": 0.03562317, "memory(GiB)": 13.7, "step": 79425, "train_speed(iter/s)": 1.532133 }, { "acc": 0.9958334, "epoch": 37.229903913756736, "grad_norm": 0.40597769618034363, "learning_rate": 1.6785336586399175e-06, "loss": 0.0309688, "memory(GiB)": 13.7, "step": 79430, "train_speed(iter/s)": 1.532133 }, { "acc": 0.97250004, "epoch": 37.23224748066557, "grad_norm": 2.209155559539795, "learning_rate": 1.6779544391442218e-06, "loss": 0.04803559, "memory(GiB)": 13.7, "step": 79435, "train_speed(iter/s)": 1.532132 }, { "acc": 0.99437504, "epoch": 37.234591047574405, "grad_norm": 0.003156149061396718, "learning_rate": 1.6773752995116042e-06, "loss": 0.02257934, "memory(GiB)": 13.7, "step": 79440, "train_speed(iter/s)": 1.532135 }, { "acc": 0.99020834, "epoch": 37.236934614483246, "grad_norm": 2.6475532054901123, "learning_rate": 1.6767962397559866e-06, "loss": 0.03081349, "memory(GiB)": 13.7, "step": 79445, "train_speed(iter/s)": 1.532137 }, { "acc": 0.99375, "epoch": 37.23927818139208, "grad_norm": 3.680218458175659, "learning_rate": 1.6762172598912902e-06, "loss": 0.02583463, "memory(GiB)": 13.7, "step": 79450, "train_speed(iter/s)": 1.532137 }, { "acc": 0.99437504, "epoch": 37.241621748300915, "grad_norm": 2.21097469329834, "learning_rate": 1.6756383599314298e-06, "loss": 0.02351576, "memory(GiB)": 13.7, "step": 79455, "train_speed(iter/s)": 1.532139 }, { "acc": 0.98362179, "epoch": 37.24396531520975, "grad_norm": 6.552834987640381, "learning_rate": 1.6750595398903188e-06, "loss": 0.02477821, "memory(GiB)": 13.7, "step": 79460, "train_speed(iter/s)": 1.532146 }, { "acc": 0.98675594, "epoch": 37.24630888211858, "grad_norm": 2.049886465072632, "learning_rate": 1.6744807997818715e-06, "loss": 0.03867872, "memory(GiB)": 13.7, "step": 79465, "train_speed(iter/s)": 1.532145 }, { "acc": 0.99750004, "epoch": 37.24865244902742, "grad_norm": 1.6926730871200562, "learning_rate": 1.6739021396200004e-06, "loss": 0.00818278, "memory(GiB)": 13.7, "step": 79470, "train_speed(iter/s)": 1.532144 }, { "acc": 0.98770828, "epoch": 37.25099601593625, "grad_norm": 2.3686182498931885, "learning_rate": 1.6733235594186113e-06, "loss": 0.02557403, "memory(GiB)": 13.7, "step": 79475, "train_speed(iter/s)": 1.53215 }, { "acc": 0.97546082, "epoch": 37.25333958284509, "grad_norm": 10.388375282287598, "learning_rate": 1.6727450591916134e-06, "loss": 0.05741085, "memory(GiB)": 13.7, "step": 79480, "train_speed(iter/s)": 1.532153 }, { "acc": 0.98416061, "epoch": 37.25568314975393, "grad_norm": 3.387784004211426, "learning_rate": 1.6721666389529135e-06, "loss": 0.03816857, "memory(GiB)": 13.7, "step": 79485, "train_speed(iter/s)": 1.532157 }, { "acc": 0.98202457, "epoch": 37.25802671666276, "grad_norm": 5.3485822677612305, "learning_rate": 1.6715882987164115e-06, "loss": 0.06238948, "memory(GiB)": 13.7, "step": 79490, "train_speed(iter/s)": 1.532158 }, { "acc": 0.97770834, "epoch": 37.260370283571596, "grad_norm": 7.060896396636963, "learning_rate": 1.671010038496012e-06, "loss": 0.04393388, "memory(GiB)": 13.7, "step": 79495, "train_speed(iter/s)": 1.532162 }, { "acc": 0.95854168, "epoch": 37.26271385048043, "grad_norm": 7.539798736572266, "learning_rate": 1.6704318583056117e-06, "loss": 0.07388242, "memory(GiB)": 13.7, "step": 79500, "train_speed(iter/s)": 1.532159 }, { "acc": 0.98916664, "epoch": 37.265057417389265, "grad_norm": 2.116542339324951, "learning_rate": 1.6698537581591086e-06, "loss": 0.06252719, "memory(GiB)": 13.7, "step": 79505, "train_speed(iter/s)": 1.532161 }, { "acc": 0.98194447, "epoch": 37.2674009842981, "grad_norm": 1.128456711769104, "learning_rate": 1.6692757380704015e-06, "loss": 0.04555842, "memory(GiB)": 13.7, "step": 79510, "train_speed(iter/s)": 1.53216 }, { "acc": 0.97145834, "epoch": 37.269744551206934, "grad_norm": 0.8041320443153381, "learning_rate": 1.66869779805338e-06, "loss": 0.04303857, "memory(GiB)": 13.7, "step": 79515, "train_speed(iter/s)": 1.532165 }, { "acc": 0.98291664, "epoch": 37.272088118115775, "grad_norm": 4.823355674743652, "learning_rate": 1.6681199381219372e-06, "loss": 0.03313585, "memory(GiB)": 13.7, "step": 79520, "train_speed(iter/s)": 1.532167 }, { "acc": 0.98864584, "epoch": 37.27443168502461, "grad_norm": 5.646128177642822, "learning_rate": 1.6675421582899654e-06, "loss": 0.03427359, "memory(GiB)": 13.7, "step": 79525, "train_speed(iter/s)": 1.532169 }, { "acc": 0.9927084, "epoch": 37.276775251933444, "grad_norm": 4.538097381591797, "learning_rate": 1.6669644585713488e-06, "loss": 0.01506434, "memory(GiB)": 13.7, "step": 79530, "train_speed(iter/s)": 1.532169 }, { "acc": 0.98386364, "epoch": 37.27911881884228, "grad_norm": 3.9158079624176025, "learning_rate": 1.6663868389799764e-06, "loss": 0.04714801, "memory(GiB)": 13.7, "step": 79535, "train_speed(iter/s)": 1.532171 }, { "acc": 0.9731945, "epoch": 37.28146238575111, "grad_norm": 3.6646294593811035, "learning_rate": 1.6658092995297301e-06, "loss": 0.05538154, "memory(GiB)": 13.7, "step": 79540, "train_speed(iter/s)": 1.532172 }, { "acc": 0.98374996, "epoch": 37.28380595265995, "grad_norm": 2.2793571949005127, "learning_rate": 1.665231840234494e-06, "loss": 0.03843369, "memory(GiB)": 13.7, "step": 79545, "train_speed(iter/s)": 1.532173 }, { "acc": 0.98652782, "epoch": 37.28614951956878, "grad_norm": 2.04544734954834, "learning_rate": 1.6646544611081458e-06, "loss": 0.03157562, "memory(GiB)": 13.7, "step": 79550, "train_speed(iter/s)": 1.532177 }, { "acc": 0.97791672, "epoch": 37.28849308647762, "grad_norm": 7.832448482513428, "learning_rate": 1.664077162164565e-06, "loss": 0.06343604, "memory(GiB)": 13.7, "step": 79555, "train_speed(iter/s)": 1.532177 }, { "acc": 0.97770824, "epoch": 37.29083665338646, "grad_norm": 1.2557131052017212, "learning_rate": 1.6634999434176285e-06, "loss": 0.04313628, "memory(GiB)": 13.7, "step": 79560, "train_speed(iter/s)": 1.532182 }, { "acc": 0.99196434, "epoch": 37.29318022029529, "grad_norm": 2.4846067428588867, "learning_rate": 1.662922804881212e-06, "loss": 0.01810894, "memory(GiB)": 13.7, "step": 79565, "train_speed(iter/s)": 1.53219 }, { "acc": 0.99541664, "epoch": 37.295523787204125, "grad_norm": 2.287926435470581, "learning_rate": 1.6623457465691848e-06, "loss": 0.01117079, "memory(GiB)": 13.7, "step": 79570, "train_speed(iter/s)": 1.53219 }, { "acc": 0.9833334, "epoch": 37.29786735411296, "grad_norm": 5.940863132476807, "learning_rate": 1.6617687684954189e-06, "loss": 0.03715407, "memory(GiB)": 13.7, "step": 79575, "train_speed(iter/s)": 1.532195 }, { "acc": 0.97402782, "epoch": 37.300210921021794, "grad_norm": 4.524386882781982, "learning_rate": 1.6611918706737857e-06, "loss": 0.03340992, "memory(GiB)": 13.7, "step": 79580, "train_speed(iter/s)": 1.532201 }, { "acc": 0.9854167, "epoch": 37.30255448793063, "grad_norm": 0.33704090118408203, "learning_rate": 1.6606150531181487e-06, "loss": 0.0316223, "memory(GiB)": 13.7, "step": 79585, "train_speed(iter/s)": 1.532206 }, { "acc": 0.98947306, "epoch": 37.30489805483946, "grad_norm": 3.0530643463134766, "learning_rate": 1.660038315842372e-06, "loss": 0.04277623, "memory(GiB)": 13.7, "step": 79590, "train_speed(iter/s)": 1.532206 }, { "acc": 0.97145834, "epoch": 37.307241621748304, "grad_norm": 4.179459095001221, "learning_rate": 1.6594616588603201e-06, "loss": 0.05903257, "memory(GiB)": 13.7, "step": 79595, "train_speed(iter/s)": 1.532208 }, { "acc": 0.97544641, "epoch": 37.30958518865714, "grad_norm": 2.59198260307312, "learning_rate": 1.6588850821858551e-06, "loss": 0.07202206, "memory(GiB)": 13.7, "step": 79600, "train_speed(iter/s)": 1.532211 }, { "acc": 0.98187504, "epoch": 37.31192875556597, "grad_norm": 1.965994954109192, "learning_rate": 1.6583085858328328e-06, "loss": 0.03883948, "memory(GiB)": 13.7, "step": 79605, "train_speed(iter/s)": 1.53221 }, { "acc": 0.9822917, "epoch": 37.31427232247481, "grad_norm": 3.892932891845703, "learning_rate": 1.657732169815113e-06, "loss": 0.04521305, "memory(GiB)": 13.7, "step": 79610, "train_speed(iter/s)": 1.532211 }, { "acc": 0.99184971, "epoch": 37.31661588938364, "grad_norm": 1.994552731513977, "learning_rate": 1.657155834146551e-06, "loss": 0.06642351, "memory(GiB)": 13.7, "step": 79615, "train_speed(iter/s)": 1.532211 }, { "acc": 0.99333334, "epoch": 37.318959456292475, "grad_norm": 0.007470280397683382, "learning_rate": 1.6565795788409989e-06, "loss": 0.02342357, "memory(GiB)": 13.7, "step": 79620, "train_speed(iter/s)": 1.532214 }, { "acc": 0.9770834, "epoch": 37.32130302320131, "grad_norm": 5.232789993286133, "learning_rate": 1.6560034039123069e-06, "loss": 0.06051837, "memory(GiB)": 13.7, "step": 79625, "train_speed(iter/s)": 1.532216 }, { "acc": 0.99020824, "epoch": 37.32364659011015, "grad_norm": 6.481282711029053, "learning_rate": 1.655427309374325e-06, "loss": 0.02995966, "memory(GiB)": 13.7, "step": 79630, "train_speed(iter/s)": 1.532217 }, { "acc": 0.99093132, "epoch": 37.325990157018985, "grad_norm": 2.240828037261963, "learning_rate": 1.6548512952409017e-06, "loss": 0.02756346, "memory(GiB)": 13.7, "step": 79635, "train_speed(iter/s)": 1.532214 }, { "acc": 0.98953381, "epoch": 37.32833372392782, "grad_norm": 1.5220431089401245, "learning_rate": 1.6542753615258839e-06, "loss": 0.02217514, "memory(GiB)": 13.7, "step": 79640, "train_speed(iter/s)": 1.532213 }, { "acc": 0.98619127, "epoch": 37.330677290836654, "grad_norm": 0.0016007605008780956, "learning_rate": 1.6536995082431106e-06, "loss": 0.06280019, "memory(GiB)": 13.7, "step": 79645, "train_speed(iter/s)": 1.532219 }, { "acc": 0.99298611, "epoch": 37.33302085774549, "grad_norm": 0.8128178119659424, "learning_rate": 1.6531237354064269e-06, "loss": 0.0253719, "memory(GiB)": 13.7, "step": 79650, "train_speed(iter/s)": 1.532222 }, { "acc": 0.98520832, "epoch": 37.33536442465432, "grad_norm": 0.0018495713593438268, "learning_rate": 1.6525480430296726e-06, "loss": 0.02608145, "memory(GiB)": 13.7, "step": 79655, "train_speed(iter/s)": 1.532224 }, { "acc": 0.97321424, "epoch": 37.33770799156316, "grad_norm": 2.1829280853271484, "learning_rate": 1.651972431126683e-06, "loss": 0.04611598, "memory(GiB)": 13.7, "step": 79660, "train_speed(iter/s)": 1.532223 }, { "acc": 0.9875, "epoch": 37.34005155847199, "grad_norm": 2.0898072719573975, "learning_rate": 1.6513968997112972e-06, "loss": 0.03206379, "memory(GiB)": 13.7, "step": 79665, "train_speed(iter/s)": 1.532224 }, { "acc": 0.971875, "epoch": 37.34239512538083, "grad_norm": 6.816630840301514, "learning_rate": 1.6508214487973458e-06, "loss": 0.05546699, "memory(GiB)": 13.7, "step": 79670, "train_speed(iter/s)": 1.532221 }, { "acc": 0.9927084, "epoch": 37.34473869228967, "grad_norm": 2.133139133453369, "learning_rate": 1.6502460783986634e-06, "loss": 0.01945564, "memory(GiB)": 13.7, "step": 79675, "train_speed(iter/s)": 1.532229 }, { "acc": 0.97104168, "epoch": 37.3470822591985, "grad_norm": 11.151482582092285, "learning_rate": 1.6496707885290775e-06, "loss": 0.08729227, "memory(GiB)": 13.7, "step": 79680, "train_speed(iter/s)": 1.532233 }, { "acc": 0.9822917, "epoch": 37.349425826107336, "grad_norm": 0.000580880674533546, "learning_rate": 1.6490955792024173e-06, "loss": 0.03728957, "memory(GiB)": 13.7, "step": 79685, "train_speed(iter/s)": 1.532234 }, { "acc": 0.98770828, "epoch": 37.35176939301617, "grad_norm": 2.723999261856079, "learning_rate": 1.6485204504325114e-06, "loss": 0.02678577, "memory(GiB)": 13.7, "step": 79690, "train_speed(iter/s)": 1.532233 }, { "acc": 0.98611107, "epoch": 37.354112959925004, "grad_norm": 0.5126209259033203, "learning_rate": 1.6479454022331792e-06, "loss": 0.02933081, "memory(GiB)": 13.7, "step": 79695, "train_speed(iter/s)": 1.532238 }, { "acc": 0.996875, "epoch": 37.35645652683384, "grad_norm": 0.5546888709068298, "learning_rate": 1.6473704346182464e-06, "loss": 0.00805295, "memory(GiB)": 13.7, "step": 79700, "train_speed(iter/s)": 1.532243 }, { "acc": 0.98294716, "epoch": 37.35880009374268, "grad_norm": 3.517442464828491, "learning_rate": 1.6467955476015335e-06, "loss": 0.03932951, "memory(GiB)": 13.7, "step": 79705, "train_speed(iter/s)": 1.532243 }, { "acc": 0.98874998, "epoch": 37.361143660651514, "grad_norm": 2.9015631675720215, "learning_rate": 1.6462207411968561e-06, "loss": 0.01874961, "memory(GiB)": 13.7, "step": 79710, "train_speed(iter/s)": 1.532252 }, { "acc": 0.98223219, "epoch": 37.36348722756035, "grad_norm": 3.7872743606567383, "learning_rate": 1.6456460154180346e-06, "loss": 0.05188003, "memory(GiB)": 13.7, "step": 79715, "train_speed(iter/s)": 1.532256 }, { "acc": 0.97592258, "epoch": 37.36583079446918, "grad_norm": 2.7531986236572266, "learning_rate": 1.6450713702788795e-06, "loss": 0.04316943, "memory(GiB)": 13.7, "step": 79720, "train_speed(iter/s)": 1.532259 }, { "acc": 0.9958334, "epoch": 37.36817436137802, "grad_norm": 1.8630691766738892, "learning_rate": 1.6444968057932051e-06, "loss": 0.03487628, "memory(GiB)": 13.7, "step": 79725, "train_speed(iter/s)": 1.532259 }, { "acc": 0.9947917, "epoch": 37.37051792828685, "grad_norm": 0.5436200499534607, "learning_rate": 1.643922321974824e-06, "loss": 0.03443992, "memory(GiB)": 13.7, "step": 79730, "train_speed(iter/s)": 1.53226 }, { "acc": 0.98120041, "epoch": 37.372861495195686, "grad_norm": 2.233863353729248, "learning_rate": 1.6433479188375415e-06, "loss": 0.04325867, "memory(GiB)": 13.7, "step": 79735, "train_speed(iter/s)": 1.532258 }, { "acc": 0.99333334, "epoch": 37.37520506210452, "grad_norm": 1.6765717267990112, "learning_rate": 1.642773596395166e-06, "loss": 0.0205487, "memory(GiB)": 13.7, "step": 79740, "train_speed(iter/s)": 1.532262 }, { "acc": 0.98093758, "epoch": 37.37754862901336, "grad_norm": 2.387199640274048, "learning_rate": 1.6421993546615034e-06, "loss": 0.03391946, "memory(GiB)": 13.7, "step": 79745, "train_speed(iter/s)": 1.532266 }, { "acc": 0.9760417, "epoch": 37.379892195922196, "grad_norm": 7.812667369842529, "learning_rate": 1.6416251936503558e-06, "loss": 0.04577926, "memory(GiB)": 13.7, "step": 79750, "train_speed(iter/s)": 1.532271 }, { "acc": 0.9833334, "epoch": 37.38223576283103, "grad_norm": 3.2166030406951904, "learning_rate": 1.6410511133755222e-06, "loss": 0.07253809, "memory(GiB)": 13.7, "step": 79755, "train_speed(iter/s)": 1.532281 }, { "acc": 0.98708334, "epoch": 37.384579329739864, "grad_norm": 4.9862751960754395, "learning_rate": 1.6404771138508028e-06, "loss": 0.03848684, "memory(GiB)": 13.7, "step": 79760, "train_speed(iter/s)": 1.532284 }, { "acc": 0.98187504, "epoch": 37.3869228966487, "grad_norm": 3.111149549484253, "learning_rate": 1.6399031950899968e-06, "loss": 0.02673662, "memory(GiB)": 13.7, "step": 79765, "train_speed(iter/s)": 1.532288 }, { "acc": 0.98708334, "epoch": 37.38926646355753, "grad_norm": 0.7297520637512207, "learning_rate": 1.639329357106896e-06, "loss": 0.02539279, "memory(GiB)": 13.7, "step": 79770, "train_speed(iter/s)": 1.532293 }, { "acc": 0.98125, "epoch": 37.39161003046637, "grad_norm": 1.5297951698303223, "learning_rate": 1.6387555999152956e-06, "loss": 0.03909807, "memory(GiB)": 13.7, "step": 79775, "train_speed(iter/s)": 1.532296 }, { "acc": 0.97719698, "epoch": 37.3939535973752, "grad_norm": 5.421430587768555, "learning_rate": 1.6381819235289858e-06, "loss": 0.04609453, "memory(GiB)": 13.7, "step": 79780, "train_speed(iter/s)": 1.532293 }, { "acc": 0.9791666, "epoch": 37.39629716428404, "grad_norm": 8.20822811126709, "learning_rate": 1.6376083279617585e-06, "loss": 0.08609294, "memory(GiB)": 13.7, "step": 79785, "train_speed(iter/s)": 1.532296 }, { "acc": 0.9958334, "epoch": 37.39864073119288, "grad_norm": 0.012738394550979137, "learning_rate": 1.6370348132273988e-06, "loss": 0.01224098, "memory(GiB)": 13.7, "step": 79790, "train_speed(iter/s)": 1.532295 }, { "acc": 0.98217258, "epoch": 37.40098429810171, "grad_norm": 6.96317195892334, "learning_rate": 1.6364613793396908e-06, "loss": 0.04412303, "memory(GiB)": 13.7, "step": 79795, "train_speed(iter/s)": 1.532295 }, { "acc": 0.9916666, "epoch": 37.403327865010546, "grad_norm": 0.002592161763459444, "learning_rate": 1.635888026312419e-06, "loss": 0.04357303, "memory(GiB)": 13.7, "step": 79800, "train_speed(iter/s)": 1.532298 }, { "acc": 0.98154602, "epoch": 37.40567143191938, "grad_norm": 5.605550765991211, "learning_rate": 1.6353147541593678e-06, "loss": 0.07162305, "memory(GiB)": 13.7, "step": 79805, "train_speed(iter/s)": 1.532303 }, { "acc": 0.98162918, "epoch": 37.408014998828214, "grad_norm": 5.9704413414001465, "learning_rate": 1.6347415628943128e-06, "loss": 0.05921038, "memory(GiB)": 13.7, "step": 79810, "train_speed(iter/s)": 1.532305 }, { "acc": 0.9864584, "epoch": 37.41035856573705, "grad_norm": 3.6181557178497314, "learning_rate": 1.6341684525310328e-06, "loss": 0.06213605, "memory(GiB)": 13.7, "step": 79815, "train_speed(iter/s)": 1.532307 }, { "acc": 0.99548607, "epoch": 37.41270213264589, "grad_norm": 1.5304737091064453, "learning_rate": 1.6335954230833058e-06, "loss": 0.00680833, "memory(GiB)": 13.7, "step": 79820, "train_speed(iter/s)": 1.532314 }, { "acc": 0.98298607, "epoch": 37.415045699554724, "grad_norm": 2.9847259521484375, "learning_rate": 1.633022474564902e-06, "loss": 0.04832649, "memory(GiB)": 13.7, "step": 79825, "train_speed(iter/s)": 1.532318 }, { "acc": 0.98348217, "epoch": 37.41738926646356, "grad_norm": 0.887654721736908, "learning_rate": 1.6324496069895968e-06, "loss": 0.04473391, "memory(GiB)": 13.7, "step": 79830, "train_speed(iter/s)": 1.532324 }, { "acc": 0.98604164, "epoch": 37.41973283337239, "grad_norm": 0.01885097101330757, "learning_rate": 1.6318768203711563e-06, "loss": 0.03699821, "memory(GiB)": 13.7, "step": 79835, "train_speed(iter/s)": 1.532327 }, { "acc": 0.99258928, "epoch": 37.42207640028123, "grad_norm": 2.148667335510254, "learning_rate": 1.6313041147233499e-06, "loss": 0.03375726, "memory(GiB)": 13.7, "step": 79840, "train_speed(iter/s)": 1.532332 }, { "acc": 0.99020834, "epoch": 37.42441996719006, "grad_norm": 0.003025568788871169, "learning_rate": 1.630731490059946e-06, "loss": 0.0290346, "memory(GiB)": 13.7, "step": 79845, "train_speed(iter/s)": 1.532336 }, { "acc": 0.99508934, "epoch": 37.426763534098896, "grad_norm": 3.250667095184326, "learning_rate": 1.6301589463947049e-06, "loss": 0.02453832, "memory(GiB)": 13.7, "step": 79850, "train_speed(iter/s)": 1.532337 }, { "acc": 0.98592262, "epoch": 37.42910710100773, "grad_norm": 3.8934261798858643, "learning_rate": 1.6295864837413902e-06, "loss": 0.03904417, "memory(GiB)": 13.7, "step": 79855, "train_speed(iter/s)": 1.532337 }, { "acc": 0.98208332, "epoch": 37.43145066791657, "grad_norm": 3.694908857345581, "learning_rate": 1.629014102113764e-06, "loss": 0.10136315, "memory(GiB)": 13.7, "step": 79860, "train_speed(iter/s)": 1.532338 }, { "acc": 0.97832794, "epoch": 37.433794234825406, "grad_norm": 0.7697132229804993, "learning_rate": 1.6284418015255804e-06, "loss": 0.06191454, "memory(GiB)": 13.7, "step": 79865, "train_speed(iter/s)": 1.532336 }, { "acc": 0.98675594, "epoch": 37.43613780173424, "grad_norm": 2.466967821121216, "learning_rate": 1.6278695819905978e-06, "loss": 0.0444544, "memory(GiB)": 13.7, "step": 79870, "train_speed(iter/s)": 1.532345 }, { "acc": 0.99437504, "epoch": 37.438481368643075, "grad_norm": 0.012667321600019932, "learning_rate": 1.6272974435225725e-06, "loss": 0.02607165, "memory(GiB)": 13.7, "step": 79875, "train_speed(iter/s)": 1.532345 }, { "acc": 0.98883934, "epoch": 37.44082493555191, "grad_norm": 2.8865458965301514, "learning_rate": 1.626725386135255e-06, "loss": 0.03277763, "memory(GiB)": 13.7, "step": 79880, "train_speed(iter/s)": 1.532348 }, { "acc": 0.98178034, "epoch": 37.44316850246074, "grad_norm": 7.258642673492432, "learning_rate": 1.6261534098423934e-06, "loss": 0.04127447, "memory(GiB)": 13.7, "step": 79885, "train_speed(iter/s)": 1.532354 }, { "acc": 0.97833328, "epoch": 37.44551206936958, "grad_norm": 0.05562319606542587, "learning_rate": 1.6255815146577389e-06, "loss": 0.05153553, "memory(GiB)": 13.7, "step": 79890, "train_speed(iter/s)": 1.532359 }, { "acc": 0.98469696, "epoch": 37.44785563627842, "grad_norm": 5.020891189575195, "learning_rate": 1.6250097005950385e-06, "loss": 0.0454106, "memory(GiB)": 13.7, "step": 79895, "train_speed(iter/s)": 1.532363 }, { "acc": 0.98069439, "epoch": 37.45019920318725, "grad_norm": 4.011064052581787, "learning_rate": 1.6244379676680338e-06, "loss": 0.07489244, "memory(GiB)": 13.7, "step": 79900, "train_speed(iter/s)": 1.532359 }, { "acc": 0.9582386, "epoch": 37.45254277009609, "grad_norm": 5.306337356567383, "learning_rate": 1.6238663158904693e-06, "loss": 0.09544246, "memory(GiB)": 13.7, "step": 79905, "train_speed(iter/s)": 1.532358 }, { "acc": 0.98496246, "epoch": 37.45488633700492, "grad_norm": 2.849893569946289, "learning_rate": 1.6232947452760873e-06, "loss": 0.05615723, "memory(GiB)": 13.7, "step": 79910, "train_speed(iter/s)": 1.532355 }, { "acc": 0.978125, "epoch": 37.457229903913756, "grad_norm": 3.0715317726135254, "learning_rate": 1.6227232558386227e-06, "loss": 0.05760642, "memory(GiB)": 13.7, "step": 79915, "train_speed(iter/s)": 1.532354 }, { "acc": 0.98490524, "epoch": 37.45957347082259, "grad_norm": 4.190071105957031, "learning_rate": 1.6221518475918158e-06, "loss": 0.04285234, "memory(GiB)": 13.7, "step": 79920, "train_speed(iter/s)": 1.532361 }, { "acc": 0.99541664, "epoch": 37.461917037731425, "grad_norm": 1.0730087757110596, "learning_rate": 1.6215805205493984e-06, "loss": 0.00786118, "memory(GiB)": 13.7, "step": 79925, "train_speed(iter/s)": 1.532364 }, { "acc": 0.97416668, "epoch": 37.46426060464026, "grad_norm": 2.1884241104125977, "learning_rate": 1.6210092747251045e-06, "loss": 0.04731363, "memory(GiB)": 13.7, "step": 79930, "train_speed(iter/s)": 1.532366 }, { "acc": 0.98013887, "epoch": 37.4666041715491, "grad_norm": 0.003186931600794196, "learning_rate": 1.6204381101326667e-06, "loss": 0.04920877, "memory(GiB)": 13.7, "step": 79935, "train_speed(iter/s)": 1.532362 }, { "acc": 0.98291664, "epoch": 37.468947738457935, "grad_norm": 2.179152727127075, "learning_rate": 1.6198670267858109e-06, "loss": 0.08266029, "memory(GiB)": 13.7, "step": 79940, "train_speed(iter/s)": 1.532365 }, { "acc": 0.99196434, "epoch": 37.47129130536677, "grad_norm": 3.3603222370147705, "learning_rate": 1.619296024698265e-06, "loss": 0.02640194, "memory(GiB)": 13.7, "step": 79945, "train_speed(iter/s)": 1.532367 }, { "acc": 0.99508934, "epoch": 37.4736348722756, "grad_norm": 1.8307323455810547, "learning_rate": 1.6187251038837573e-06, "loss": 0.03266005, "memory(GiB)": 13.7, "step": 79950, "train_speed(iter/s)": 1.532367 }, { "acc": 0.98425598, "epoch": 37.47597843918444, "grad_norm": 3.7870090007781982, "learning_rate": 1.6181542643560072e-06, "loss": 0.04933782, "memory(GiB)": 13.7, "step": 79955, "train_speed(iter/s)": 1.532368 }, { "acc": 0.95611115, "epoch": 37.47832200609327, "grad_norm": 5.341716289520264, "learning_rate": 1.6175835061287351e-06, "loss": 0.07814227, "memory(GiB)": 13.7, "step": 79960, "train_speed(iter/s)": 1.532373 }, { "acc": 0.98217258, "epoch": 37.480665573002106, "grad_norm": 4.006199836730957, "learning_rate": 1.6170128292156618e-06, "loss": 0.03850419, "memory(GiB)": 13.7, "step": 79965, "train_speed(iter/s)": 1.53237 }, { "acc": 0.9840476, "epoch": 37.48300913991095, "grad_norm": 5.779938697814941, "learning_rate": 1.6164422336305069e-06, "loss": 0.04518933, "memory(GiB)": 13.7, "step": 79970, "train_speed(iter/s)": 1.532377 }, { "acc": 0.97706852, "epoch": 37.48535270681978, "grad_norm": 4.31104850769043, "learning_rate": 1.6158717193869808e-06, "loss": 0.03947577, "memory(GiB)": 13.7, "step": 79975, "train_speed(iter/s)": 1.532381 }, { "acc": 0.98068447, "epoch": 37.487696273728616, "grad_norm": 2.2188055515289307, "learning_rate": 1.615301286498799e-06, "loss": 0.07912611, "memory(GiB)": 13.7, "step": 79980, "train_speed(iter/s)": 1.532386 }, { "acc": 0.97205362, "epoch": 37.49003984063745, "grad_norm": 1.2602293491363525, "learning_rate": 1.6147309349796738e-06, "loss": 0.05342304, "memory(GiB)": 13.7, "step": 79985, "train_speed(iter/s)": 1.532387 }, { "acc": 0.98832798, "epoch": 37.492383407546285, "grad_norm": 2.452392101287842, "learning_rate": 1.6141606648433147e-06, "loss": 0.08529013, "memory(GiB)": 13.7, "step": 79990, "train_speed(iter/s)": 1.53239 }, { "acc": 0.99333324, "epoch": 37.49472697445512, "grad_norm": 0.07619619369506836, "learning_rate": 1.613590476103427e-06, "loss": 0.02638578, "memory(GiB)": 13.7, "step": 79995, "train_speed(iter/s)": 1.532395 }, { "acc": 0.9729167, "epoch": 37.497070541363954, "grad_norm": 4.606560707092285, "learning_rate": 1.6130203687737191e-06, "loss": 0.08845398, "memory(GiB)": 13.7, "step": 80000, "train_speed(iter/s)": 1.532396 }, { "epoch": 37.497070541363954, "eval_acc": 0.7787482295811695, "eval_loss": 1.2415568828582764, "eval_runtime": 143.9544, "eval_samples_per_second": 56.046, "eval_steps_per_second": 7.009, "step": 80000 }, { "acc": 0.9817709, "epoch": 37.49941410827279, "grad_norm": 4.798710346221924, "learning_rate": 1.6124503428678908e-06, "loss": 0.04682907, "memory(GiB)": 13.7, "step": 80005, "train_speed(iter/s)": 1.527305 }, { "acc": 0.98083334, "epoch": 37.50175767518163, "grad_norm": 5.292287826538086, "learning_rate": 1.6118803983996475e-06, "loss": 0.03039321, "memory(GiB)": 13.7, "step": 80010, "train_speed(iter/s)": 1.527308 }, { "acc": 0.990625, "epoch": 37.504101242090464, "grad_norm": 0.0014401618391275406, "learning_rate": 1.6113105353826858e-06, "loss": 0.0317152, "memory(GiB)": 13.7, "step": 80015, "train_speed(iter/s)": 1.527309 }, { "acc": 0.99821434, "epoch": 37.5064448089993, "grad_norm": 3.205521821975708, "learning_rate": 1.610740753830704e-06, "loss": 0.01555801, "memory(GiB)": 13.7, "step": 80020, "train_speed(iter/s)": 1.52731 }, { "acc": 0.9879735, "epoch": 37.50878837590813, "grad_norm": 3.9082138538360596, "learning_rate": 1.6101710537573995e-06, "loss": 0.03270392, "memory(GiB)": 13.7, "step": 80025, "train_speed(iter/s)": 1.527308 }, { "acc": 0.97822914, "epoch": 37.51113194281697, "grad_norm": 1.7769391536712646, "learning_rate": 1.6096014351764635e-06, "loss": 0.0334961, "memory(GiB)": 13.7, "step": 80030, "train_speed(iter/s)": 1.527309 }, { "acc": 0.98184519, "epoch": 37.5134755097258, "grad_norm": 0.05524930730462074, "learning_rate": 1.6090318981015896e-06, "loss": 0.04442688, "memory(GiB)": 13.7, "step": 80035, "train_speed(iter/s)": 1.527311 }, { "acc": 0.96363096, "epoch": 37.515819076634635, "grad_norm": 4.475292205810547, "learning_rate": 1.6084624425464682e-06, "loss": 0.05587074, "memory(GiB)": 13.7, "step": 80040, "train_speed(iter/s)": 1.527319 }, { "acc": 0.97979164, "epoch": 37.51816264354348, "grad_norm": 1.5059149265289307, "learning_rate": 1.6078930685247853e-06, "loss": 0.03021604, "memory(GiB)": 13.7, "step": 80045, "train_speed(iter/s)": 1.527323 }, { "acc": 0.9895834, "epoch": 37.52050621045231, "grad_norm": 0.021870601922273636, "learning_rate": 1.6073237760502268e-06, "loss": 0.02778697, "memory(GiB)": 13.7, "step": 80050, "train_speed(iter/s)": 1.527325 }, { "acc": 0.98708334, "epoch": 37.522849777361145, "grad_norm": 6.106324195861816, "learning_rate": 1.6067545651364765e-06, "loss": 0.08605974, "memory(GiB)": 13.7, "step": 80055, "train_speed(iter/s)": 1.527331 }, { "acc": 0.9802084, "epoch": 37.52519334426998, "grad_norm": 1.9607599973678589, "learning_rate": 1.606185435797218e-06, "loss": 0.03597161, "memory(GiB)": 13.7, "step": 80060, "train_speed(iter/s)": 1.527331 }, { "acc": 0.99499998, "epoch": 37.527536911178814, "grad_norm": 3.2018837928771973, "learning_rate": 1.6056163880461318e-06, "loss": 0.01993699, "memory(GiB)": 13.7, "step": 80065, "train_speed(iter/s)": 1.52733 }, { "acc": 0.9854167, "epoch": 37.52988047808765, "grad_norm": 7.176655292510986, "learning_rate": 1.6050474218968925e-06, "loss": 0.04185273, "memory(GiB)": 13.7, "step": 80070, "train_speed(iter/s)": 1.527334 }, { "acc": 0.9890625, "epoch": 37.53222404499648, "grad_norm": 0.9971855282783508, "learning_rate": 1.6044785373631783e-06, "loss": 0.02426986, "memory(GiB)": 13.7, "step": 80075, "train_speed(iter/s)": 1.527339 }, { "acc": 0.96794643, "epoch": 37.53456761190532, "grad_norm": 2.319469928741455, "learning_rate": 1.6039097344586651e-06, "loss": 0.0599958, "memory(GiB)": 13.7, "step": 80080, "train_speed(iter/s)": 1.527341 }, { "acc": 0.97946434, "epoch": 37.53691117881416, "grad_norm": 0.005308789201080799, "learning_rate": 1.6033410131970237e-06, "loss": 0.0606609, "memory(GiB)": 13.7, "step": 80085, "train_speed(iter/s)": 1.527347 }, { "acc": 0.98500004, "epoch": 37.53925474572299, "grad_norm": 3.9179978370666504, "learning_rate": 1.6027723735919212e-06, "loss": 0.04778125, "memory(GiB)": 13.7, "step": 80090, "train_speed(iter/s)": 1.527354 }, { "acc": 0.98696423, "epoch": 37.54159831263183, "grad_norm": 4.965671062469482, "learning_rate": 1.6022038156570291e-06, "loss": 0.03975881, "memory(GiB)": 13.7, "step": 80095, "train_speed(iter/s)": 1.527357 }, { "acc": 0.9933897, "epoch": 37.54394187954066, "grad_norm": 2.395029306411743, "learning_rate": 1.6016353394060147e-06, "loss": 0.02689911, "memory(GiB)": 13.7, "step": 80100, "train_speed(iter/s)": 1.527354 }, { "acc": 0.98083334, "epoch": 37.546285446449495, "grad_norm": 5.38957405090332, "learning_rate": 1.6010669448525386e-06, "loss": 0.05322274, "memory(GiB)": 13.7, "step": 80105, "train_speed(iter/s)": 1.527356 }, { "acc": 0.98758926, "epoch": 37.54862901335833, "grad_norm": 1.681778907775879, "learning_rate": 1.6004986320102653e-06, "loss": 0.03697093, "memory(GiB)": 13.7, "step": 80110, "train_speed(iter/s)": 1.527361 }, { "acc": 0.9864583, "epoch": 37.550972580267164, "grad_norm": 2.8641059398651123, "learning_rate": 1.5999304008928569e-06, "loss": 0.03869535, "memory(GiB)": 13.7, "step": 80115, "train_speed(iter/s)": 1.527363 }, { "acc": 0.99035797, "epoch": 37.553316147176005, "grad_norm": 1.7140213251113892, "learning_rate": 1.5993622515139678e-06, "loss": 0.02499497, "memory(GiB)": 13.7, "step": 80120, "train_speed(iter/s)": 1.527365 }, { "acc": 0.9858366, "epoch": 37.55565971408484, "grad_norm": 2.509845018386841, "learning_rate": 1.5987941838872581e-06, "loss": 0.05004715, "memory(GiB)": 13.7, "step": 80125, "train_speed(iter/s)": 1.527367 }, { "acc": 1.0, "epoch": 37.558003280993674, "grad_norm": 1.3240299224853516, "learning_rate": 1.5982261980263797e-06, "loss": 0.02825125, "memory(GiB)": 13.7, "step": 80130, "train_speed(iter/s)": 1.52737 }, { "acc": 0.98506947, "epoch": 37.56034684790251, "grad_norm": 4.325578212738037, "learning_rate": 1.5976582939449855e-06, "loss": 0.0421105, "memory(GiB)": 13.7, "step": 80135, "train_speed(iter/s)": 1.527377 }, { "acc": 0.99229164, "epoch": 37.56269041481134, "grad_norm": 2.5716371536254883, "learning_rate": 1.597090471656729e-06, "loss": 0.01480913, "memory(GiB)": 13.7, "step": 80140, "train_speed(iter/s)": 1.527381 }, { "acc": 0.97342262, "epoch": 37.56503398172018, "grad_norm": 7.649136066436768, "learning_rate": 1.5965227311752546e-06, "loss": 0.06410484, "memory(GiB)": 13.7, "step": 80145, "train_speed(iter/s)": 1.527382 }, { "acc": 0.99219704, "epoch": 37.56737754862901, "grad_norm": 2.815006732940674, "learning_rate": 1.5959550725142112e-06, "loss": 0.03710262, "memory(GiB)": 13.7, "step": 80150, "train_speed(iter/s)": 1.527383 }, { "acc": 0.98553028, "epoch": 37.569721115537845, "grad_norm": 1.0758808851242065, "learning_rate": 1.5953874956872443e-06, "loss": 0.03632059, "memory(GiB)": 13.7, "step": 80155, "train_speed(iter/s)": 1.527393 }, { "acc": 0.99321423, "epoch": 37.57206468244669, "grad_norm": 2.3721559047698975, "learning_rate": 1.5948200007079936e-06, "loss": 0.0337312, "memory(GiB)": 13.7, "step": 80160, "train_speed(iter/s)": 1.527395 }, { "acc": 0.9832386, "epoch": 37.57440824935552, "grad_norm": 3.3343777656555176, "learning_rate": 1.5942525875901035e-06, "loss": 0.0426163, "memory(GiB)": 13.7, "step": 80165, "train_speed(iter/s)": 1.527398 }, { "acc": 0.98614578, "epoch": 37.576751816264355, "grad_norm": 3.056241512298584, "learning_rate": 1.5936852563472085e-06, "loss": 0.02451855, "memory(GiB)": 13.7, "step": 80170, "train_speed(iter/s)": 1.5274 }, { "acc": 0.98907204, "epoch": 37.57909538317319, "grad_norm": 2.1488261222839355, "learning_rate": 1.59311800699295e-06, "loss": 0.03264587, "memory(GiB)": 13.7, "step": 80175, "train_speed(iter/s)": 1.527401 }, { "acc": 0.99437504, "epoch": 37.581438950082024, "grad_norm": 0.0008942665299400687, "learning_rate": 1.5925508395409585e-06, "loss": 0.01960651, "memory(GiB)": 13.7, "step": 80180, "train_speed(iter/s)": 1.527408 }, { "acc": 0.97994938, "epoch": 37.58378251699086, "grad_norm": 1.6469651460647583, "learning_rate": 1.5919837540048693e-06, "loss": 0.04009865, "memory(GiB)": 13.7, "step": 80185, "train_speed(iter/s)": 1.527412 }, { "acc": 0.99619045, "epoch": 37.58612608389969, "grad_norm": 3.679044246673584, "learning_rate": 1.5914167503983144e-06, "loss": 0.01664307, "memory(GiB)": 13.7, "step": 80190, "train_speed(iter/s)": 1.527416 }, { "acc": 0.9994318, "epoch": 37.588469650808534, "grad_norm": 0.003659689100459218, "learning_rate": 1.5908498287349186e-06, "loss": 0.00972883, "memory(GiB)": 13.7, "step": 80195, "train_speed(iter/s)": 1.527423 }, { "acc": 0.97674675, "epoch": 37.59081321771737, "grad_norm": 4.767640590667725, "learning_rate": 1.590282989028312e-06, "loss": 0.05324562, "memory(GiB)": 13.7, "step": 80200, "train_speed(iter/s)": 1.52743 }, { "acc": 0.990625, "epoch": 37.5931567846262, "grad_norm": 6.454934597015381, "learning_rate": 1.5897162312921191e-06, "loss": 0.02537311, "memory(GiB)": 13.7, "step": 80205, "train_speed(iter/s)": 1.527433 }, { "acc": 0.97637768, "epoch": 37.59550035153504, "grad_norm": 1.3043376207351685, "learning_rate": 1.589149555539964e-06, "loss": 0.04722528, "memory(GiB)": 13.7, "step": 80210, "train_speed(iter/s)": 1.527431 }, { "acc": 0.9862649, "epoch": 37.59784391844387, "grad_norm": 0.05388757586479187, "learning_rate": 1.5885829617854664e-06, "loss": 0.03528355, "memory(GiB)": 13.7, "step": 80215, "train_speed(iter/s)": 1.527434 }, { "acc": 0.9879261, "epoch": 37.600187485352706, "grad_norm": 3.608733654022217, "learning_rate": 1.5880164500422443e-06, "loss": 0.0274729, "memory(GiB)": 13.7, "step": 80220, "train_speed(iter/s)": 1.527441 }, { "acc": 0.99020824, "epoch": 37.60253105226154, "grad_norm": 0.0007177985389716923, "learning_rate": 1.5874500203239152e-06, "loss": 0.02378565, "memory(GiB)": 13.7, "step": 80225, "train_speed(iter/s)": 1.527448 }, { "acc": 0.99354172, "epoch": 37.604874619170374, "grad_norm": 0.7453346848487854, "learning_rate": 1.586883672644097e-06, "loss": 0.03746333, "memory(GiB)": 13.7, "step": 80230, "train_speed(iter/s)": 1.527449 }, { "acc": 0.98820515, "epoch": 37.607218186079216, "grad_norm": 3.3227527141571045, "learning_rate": 1.5863174070163984e-06, "loss": 0.03247984, "memory(GiB)": 13.7, "step": 80235, "train_speed(iter/s)": 1.52745 }, { "acc": 0.98133926, "epoch": 37.60956175298805, "grad_norm": 0.00086398416897282, "learning_rate": 1.5857512234544337e-06, "loss": 0.04514534, "memory(GiB)": 13.7, "step": 80240, "train_speed(iter/s)": 1.527455 }, { "acc": 0.99279766, "epoch": 37.611905319896884, "grad_norm": 2.806173086166382, "learning_rate": 1.5851851219718123e-06, "loss": 0.02701392, "memory(GiB)": 13.7, "step": 80245, "train_speed(iter/s)": 1.527455 }, { "acc": 0.99002972, "epoch": 37.61424888680572, "grad_norm": 4.106847763061523, "learning_rate": 1.5846191025821406e-06, "loss": 0.06210694, "memory(GiB)": 13.7, "step": 80250, "train_speed(iter/s)": 1.52746 }, { "acc": 0.996875, "epoch": 37.61659245371455, "grad_norm": 2.378310203552246, "learning_rate": 1.5840531652990224e-06, "loss": 0.01934648, "memory(GiB)": 13.7, "step": 80255, "train_speed(iter/s)": 1.527462 }, { "acc": 0.99344692, "epoch": 37.61893602062339, "grad_norm": 2.7036521434783936, "learning_rate": 1.5834873101360615e-06, "loss": 0.05088855, "memory(GiB)": 13.7, "step": 80260, "train_speed(iter/s)": 1.527466 }, { "acc": 0.98173618, "epoch": 37.62127958753222, "grad_norm": 1.0885205268859863, "learning_rate": 1.5829215371068604e-06, "loss": 0.05032341, "memory(GiB)": 13.7, "step": 80265, "train_speed(iter/s)": 1.527476 }, { "acc": 0.98842258, "epoch": 37.62362315444106, "grad_norm": 2.8780453205108643, "learning_rate": 1.5823558462250192e-06, "loss": 0.04552725, "memory(GiB)": 13.7, "step": 80270, "train_speed(iter/s)": 1.52748 }, { "acc": 0.9885417, "epoch": 37.6259667213499, "grad_norm": 5.342648029327393, "learning_rate": 1.5817902375041327e-06, "loss": 0.02688069, "memory(GiB)": 13.7, "step": 80275, "train_speed(iter/s)": 1.527482 }, { "acc": 0.99340277, "epoch": 37.62831028825873, "grad_norm": 0.0019989560823887587, "learning_rate": 1.5812247109577972e-06, "loss": 0.0165721, "memory(GiB)": 13.7, "step": 80280, "train_speed(iter/s)": 1.527484 }, { "acc": 0.97071428, "epoch": 37.630653855167566, "grad_norm": 6.004054546356201, "learning_rate": 1.5806592665996087e-06, "loss": 0.05038362, "memory(GiB)": 13.7, "step": 80285, "train_speed(iter/s)": 1.527486 }, { "acc": 0.990625, "epoch": 37.6329974220764, "grad_norm": 1.858207106590271, "learning_rate": 1.5800939044431556e-06, "loss": 0.02091738, "memory(GiB)": 13.7, "step": 80290, "train_speed(iter/s)": 1.527486 }, { "acc": 0.97874994, "epoch": 37.635340988985234, "grad_norm": 2.5473363399505615, "learning_rate": 1.5795286245020273e-06, "loss": 0.04042505, "memory(GiB)": 13.7, "step": 80295, "train_speed(iter/s)": 1.527492 }, { "acc": 0.98479166, "epoch": 37.63768455589407, "grad_norm": 0.0020367256365716457, "learning_rate": 1.5789634267898116e-06, "loss": 0.02439997, "memory(GiB)": 13.7, "step": 80300, "train_speed(iter/s)": 1.527489 }, { "acc": 0.97583332, "epoch": 37.6400281228029, "grad_norm": 2.619870662689209, "learning_rate": 1.578398311320096e-06, "loss": 0.07348928, "memory(GiB)": 13.7, "step": 80305, "train_speed(iter/s)": 1.527496 }, { "acc": 0.98871117, "epoch": 37.642371689711744, "grad_norm": 5.549473762512207, "learning_rate": 1.577833278106461e-06, "loss": 0.05574183, "memory(GiB)": 13.7, "step": 80310, "train_speed(iter/s)": 1.5275 }, { "acc": 0.98074656, "epoch": 37.64471525662058, "grad_norm": 2.0238115787506104, "learning_rate": 1.5772683271624898e-06, "loss": 0.05110538, "memory(GiB)": 13.7, "step": 80315, "train_speed(iter/s)": 1.527503 }, { "acc": 0.98681545, "epoch": 37.64705882352941, "grad_norm": 5.401265621185303, "learning_rate": 1.5767034585017632e-06, "loss": 0.02510348, "memory(GiB)": 13.7, "step": 80320, "train_speed(iter/s)": 1.527508 }, { "acc": 0.99035711, "epoch": 37.64940239043825, "grad_norm": 0.04677338898181915, "learning_rate": 1.576138672137856e-06, "loss": 0.02393568, "memory(GiB)": 13.7, "step": 80325, "train_speed(iter/s)": 1.527516 }, { "acc": 0.96270838, "epoch": 37.65174595734708, "grad_norm": 6.288804531097412, "learning_rate": 1.5755739680843452e-06, "loss": 0.075606, "memory(GiB)": 13.7, "step": 80330, "train_speed(iter/s)": 1.527519 }, { "acc": 0.99375, "epoch": 37.654089524255916, "grad_norm": 1.770644187927246, "learning_rate": 1.5750093463548066e-06, "loss": 0.04518477, "memory(GiB)": 13.7, "step": 80335, "train_speed(iter/s)": 1.527521 }, { "acc": 0.97979164, "epoch": 37.65643309116475, "grad_norm": 0.007716840133070946, "learning_rate": 1.5744448069628073e-06, "loss": 0.04927211, "memory(GiB)": 13.7, "step": 80340, "train_speed(iter/s)": 1.527527 }, { "acc": 0.98094692, "epoch": 37.658776658073585, "grad_norm": 3.7491626739501953, "learning_rate": 1.573880349921921e-06, "loss": 0.05035007, "memory(GiB)": 13.7, "step": 80345, "train_speed(iter/s)": 1.527529 }, { "acc": 0.97008924, "epoch": 37.661120224982426, "grad_norm": 3.5039546489715576, "learning_rate": 1.5733159752457125e-06, "loss": 0.06417025, "memory(GiB)": 13.7, "step": 80350, "train_speed(iter/s)": 1.52753 }, { "acc": 0.9905303, "epoch": 37.66346379189126, "grad_norm": 1.7056446075439453, "learning_rate": 1.5727516829477488e-06, "loss": 0.03636228, "memory(GiB)": 13.7, "step": 80355, "train_speed(iter/s)": 1.52753 }, { "acc": 0.96750946, "epoch": 37.665807358800095, "grad_norm": 2.3446922302246094, "learning_rate": 1.5721874730415952e-06, "loss": 0.07483683, "memory(GiB)": 13.7, "step": 80360, "train_speed(iter/s)": 1.527535 }, { "acc": 0.98943901, "epoch": 37.66815092570893, "grad_norm": 3.9862146377563477, "learning_rate": 1.5716233455408102e-06, "loss": 0.04191773, "memory(GiB)": 13.7, "step": 80365, "train_speed(iter/s)": 1.527538 }, { "acc": 0.96958332, "epoch": 37.67049449261776, "grad_norm": 2.9359307289123535, "learning_rate": 1.5710593004589549e-06, "loss": 0.08226748, "memory(GiB)": 13.7, "step": 80370, "train_speed(iter/s)": 1.527542 }, { "acc": 0.9916667, "epoch": 37.6728380595266, "grad_norm": 4.673007965087891, "learning_rate": 1.5704953378095894e-06, "loss": 0.02099231, "memory(GiB)": 13.7, "step": 80375, "train_speed(iter/s)": 1.527545 }, { "acc": 0.98036709, "epoch": 37.67518162643543, "grad_norm": 4.448879241943359, "learning_rate": 1.569931457606268e-06, "loss": 0.06643398, "memory(GiB)": 13.7, "step": 80380, "train_speed(iter/s)": 1.52755 }, { "acc": 0.98187008, "epoch": 37.67752519334427, "grad_norm": 8.122062718030065e-05, "learning_rate": 1.5693676598625418e-06, "loss": 0.04970909, "memory(GiB)": 13.7, "step": 80385, "train_speed(iter/s)": 1.52755 }, { "acc": 0.97175598, "epoch": 37.67986876025311, "grad_norm": 6.477694034576416, "learning_rate": 1.568803944591965e-06, "loss": 0.06168052, "memory(GiB)": 13.7, "step": 80390, "train_speed(iter/s)": 1.527554 }, { "acc": 0.98386364, "epoch": 37.68221232716194, "grad_norm": 5.222804546356201, "learning_rate": 1.5682403118080888e-06, "loss": 0.05834441, "memory(GiB)": 13.7, "step": 80395, "train_speed(iter/s)": 1.527554 }, { "acc": 0.98425598, "epoch": 37.684555894070776, "grad_norm": 3.0063908100128174, "learning_rate": 1.5676767615244584e-06, "loss": 0.05345827, "memory(GiB)": 13.7, "step": 80400, "train_speed(iter/s)": 1.527556 }, { "acc": 0.99437504, "epoch": 37.68689946097961, "grad_norm": 2.505309820175171, "learning_rate": 1.567113293754621e-06, "loss": 0.0231721, "memory(GiB)": 13.7, "step": 80405, "train_speed(iter/s)": 1.527555 }, { "acc": 0.9833333, "epoch": 37.689243027888445, "grad_norm": 4.026091575622559, "learning_rate": 1.5665499085121203e-06, "loss": 0.0606661, "memory(GiB)": 13.7, "step": 80410, "train_speed(iter/s)": 1.527561 }, { "acc": 0.9895834, "epoch": 37.69158659479728, "grad_norm": 0.030819766223430634, "learning_rate": 1.5659866058105003e-06, "loss": 0.029776, "memory(GiB)": 13.7, "step": 80415, "train_speed(iter/s)": 1.527567 }, { "acc": 0.98506947, "epoch": 37.69393016170611, "grad_norm": 0.002038393635302782, "learning_rate": 1.5654233856632987e-06, "loss": 0.04595511, "memory(GiB)": 13.7, "step": 80420, "train_speed(iter/s)": 1.52757 }, { "acc": 0.99273815, "epoch": 37.696273728614955, "grad_norm": 2.1303703784942627, "learning_rate": 1.5648602480840522e-06, "loss": 0.01303125, "memory(GiB)": 13.7, "step": 80425, "train_speed(iter/s)": 1.527571 }, { "acc": 0.984375, "epoch": 37.69861729552379, "grad_norm": 0.0010125677799805999, "learning_rate": 1.5642971930862988e-06, "loss": 0.04333802, "memory(GiB)": 13.7, "step": 80430, "train_speed(iter/s)": 1.527575 }, { "acc": 0.97622023, "epoch": 37.70096086243262, "grad_norm": 3.7693111896514893, "learning_rate": 1.563734220683573e-06, "loss": 0.06178766, "memory(GiB)": 13.7, "step": 80435, "train_speed(iter/s)": 1.527579 }, { "acc": 0.98934212, "epoch": 37.70330442934146, "grad_norm": 2.1633856296539307, "learning_rate": 1.5631713308894044e-06, "loss": 0.02901855, "memory(GiB)": 13.7, "step": 80440, "train_speed(iter/s)": 1.527578 }, { "acc": 0.98708324, "epoch": 37.70564799625029, "grad_norm": 3.668610095977783, "learning_rate": 1.5626085237173249e-06, "loss": 0.03462442, "memory(GiB)": 13.7, "step": 80445, "train_speed(iter/s)": 1.527579 }, { "acc": 0.9947917, "epoch": 37.707991563159126, "grad_norm": 1.1993392705917358, "learning_rate": 1.5620457991808638e-06, "loss": 0.02772432, "memory(GiB)": 13.7, "step": 80450, "train_speed(iter/s)": 1.527579 }, { "acc": 0.9927083, "epoch": 37.71033513006796, "grad_norm": 2.9263858795166016, "learning_rate": 1.5614831572935435e-06, "loss": 0.02497693, "memory(GiB)": 13.7, "step": 80455, "train_speed(iter/s)": 1.527582 }, { "acc": 0.97780704, "epoch": 37.7126786969768, "grad_norm": 1.5892959833145142, "learning_rate": 1.5609205980688917e-06, "loss": 0.04547244, "memory(GiB)": 13.7, "step": 80460, "train_speed(iter/s)": 1.527591 }, { "acc": 0.98571434, "epoch": 37.715022263885636, "grad_norm": 0.005398353096097708, "learning_rate": 1.5603581215204276e-06, "loss": 0.03138598, "memory(GiB)": 13.7, "step": 80465, "train_speed(iter/s)": 1.527597 }, { "acc": 0.9704895, "epoch": 37.71736583079447, "grad_norm": 5.595581531524658, "learning_rate": 1.5597957276616741e-06, "loss": 0.07052736, "memory(GiB)": 13.7, "step": 80470, "train_speed(iter/s)": 1.527597 }, { "acc": 0.96369324, "epoch": 37.719709397703305, "grad_norm": 5.163996696472168, "learning_rate": 1.5592334165061453e-06, "loss": 0.07579221, "memory(GiB)": 13.7, "step": 80475, "train_speed(iter/s)": 1.527595 }, { "acc": 0.98463535, "epoch": 37.72205296461214, "grad_norm": 2.85550594329834, "learning_rate": 1.5586711880673608e-06, "loss": 0.0324101, "memory(GiB)": 13.7, "step": 80480, "train_speed(iter/s)": 1.527595 }, { "acc": 0.97846985, "epoch": 37.72439653152097, "grad_norm": 1.545967698097229, "learning_rate": 1.558109042358833e-06, "loss": 0.04242671, "memory(GiB)": 13.7, "step": 80485, "train_speed(iter/s)": 1.527594 }, { "acc": 0.98812494, "epoch": 37.72674009842981, "grad_norm": 0.04828876256942749, "learning_rate": 1.5575469793940768e-06, "loss": 0.04143001, "memory(GiB)": 13.7, "step": 80490, "train_speed(iter/s)": 1.527596 }, { "acc": 0.98110571, "epoch": 37.72908366533864, "grad_norm": 3.700653076171875, "learning_rate": 1.556984999186598e-06, "loss": 0.0659076, "memory(GiB)": 13.7, "step": 80495, "train_speed(iter/s)": 1.527602 }, { "acc": 0.97979164, "epoch": 37.73142723224748, "grad_norm": 4.524590492248535, "learning_rate": 1.556423101749909e-06, "loss": 0.03833716, "memory(GiB)": 13.7, "step": 80500, "train_speed(iter/s)": 1.527606 }, { "acc": 0.984375, "epoch": 37.73377079915632, "grad_norm": 1.990023136138916, "learning_rate": 1.5558612870975118e-06, "loss": 0.03378735, "memory(GiB)": 13.7, "step": 80505, "train_speed(iter/s)": 1.527611 }, { "acc": 0.98083334, "epoch": 37.73611436606515, "grad_norm": 0.05892718583345413, "learning_rate": 1.5552995552429148e-06, "loss": 0.05885335, "memory(GiB)": 13.7, "step": 80510, "train_speed(iter/s)": 1.527615 }, { "acc": 0.98291664, "epoch": 37.738457932973986, "grad_norm": 0.00674436567351222, "learning_rate": 1.5547379061996169e-06, "loss": 0.07425557, "memory(GiB)": 13.7, "step": 80515, "train_speed(iter/s)": 1.527613 }, { "acc": 0.99020834, "epoch": 37.74080149988282, "grad_norm": 1.9320781230926514, "learning_rate": 1.5541763399811188e-06, "loss": 0.02745861, "memory(GiB)": 13.7, "step": 80520, "train_speed(iter/s)": 1.527615 }, { "acc": 1.0, "epoch": 37.743145066791655, "grad_norm": 0.9331222772598267, "learning_rate": 1.5536148566009215e-06, "loss": 0.0123445, "memory(GiB)": 13.7, "step": 80525, "train_speed(iter/s)": 1.527619 }, { "acc": 0.98270836, "epoch": 37.74548863370049, "grad_norm": 4.491763591766357, "learning_rate": 1.5530534560725178e-06, "loss": 0.04113578, "memory(GiB)": 13.7, "step": 80530, "train_speed(iter/s)": 1.527621 }, { "acc": 0.97911711, "epoch": 37.74783220060933, "grad_norm": 4.874019145965576, "learning_rate": 1.5524921384094028e-06, "loss": 0.04448881, "memory(GiB)": 13.7, "step": 80535, "train_speed(iter/s)": 1.527617 }, { "acc": 0.97666664, "epoch": 37.750175767518165, "grad_norm": 2.5565738677978516, "learning_rate": 1.5519309036250706e-06, "loss": 0.04043218, "memory(GiB)": 13.7, "step": 80540, "train_speed(iter/s)": 1.527623 }, { "acc": 0.99245033, "epoch": 37.752519334427, "grad_norm": 2.08215069770813, "learning_rate": 1.5513697517330104e-06, "loss": 0.01894212, "memory(GiB)": 13.7, "step": 80545, "train_speed(iter/s)": 1.527632 }, { "acc": 0.99375, "epoch": 37.754862901335834, "grad_norm": 0.001855193288065493, "learning_rate": 1.5508086827467084e-06, "loss": 0.01443275, "memory(GiB)": 13.7, "step": 80550, "train_speed(iter/s)": 1.527631 }, { "acc": 0.99416666, "epoch": 37.75720646824467, "grad_norm": 0.186712384223938, "learning_rate": 1.5502476966796529e-06, "loss": 0.01103144, "memory(GiB)": 13.7, "step": 80555, "train_speed(iter/s)": 1.527632 }, { "acc": 0.97458324, "epoch": 37.7595500351535, "grad_norm": 3.849512815475464, "learning_rate": 1.5496867935453274e-06, "loss": 0.05624258, "memory(GiB)": 13.7, "step": 80560, "train_speed(iter/s)": 1.527638 }, { "acc": 0.99645834, "epoch": 37.76189360206234, "grad_norm": 0.9128473997116089, "learning_rate": 1.5491259733572165e-06, "loss": 0.01775508, "memory(GiB)": 13.7, "step": 80565, "train_speed(iter/s)": 1.52764 }, { "acc": 0.98139963, "epoch": 37.76423716897117, "grad_norm": 2.647735595703125, "learning_rate": 1.5485652361287964e-06, "loss": 0.04721067, "memory(GiB)": 13.7, "step": 80570, "train_speed(iter/s)": 1.527647 }, { "acc": 0.9791667, "epoch": 37.76658073588001, "grad_norm": 2.302384853363037, "learning_rate": 1.5480045818735479e-06, "loss": 0.05347243, "memory(GiB)": 13.7, "step": 80575, "train_speed(iter/s)": 1.527647 }, { "acc": 0.96750002, "epoch": 37.76892430278885, "grad_norm": 5.226780891418457, "learning_rate": 1.5474440106049488e-06, "loss": 0.10220227, "memory(GiB)": 13.7, "step": 80580, "train_speed(iter/s)": 1.52765 }, { "acc": 0.98654518, "epoch": 37.77126786969768, "grad_norm": 6.154401779174805, "learning_rate": 1.5468835223364715e-06, "loss": 0.04300257, "memory(GiB)": 13.7, "step": 80585, "train_speed(iter/s)": 1.527652 }, { "acc": 0.96758928, "epoch": 37.773611436606515, "grad_norm": 1.5864818096160889, "learning_rate": 1.5463231170815864e-06, "loss": 0.07227271, "memory(GiB)": 13.7, "step": 80590, "train_speed(iter/s)": 1.527651 }, { "acc": 0.98249998, "epoch": 37.77595500351535, "grad_norm": 4.4831862449646, "learning_rate": 1.5457627948537656e-06, "loss": 0.02950188, "memory(GiB)": 13.7, "step": 80595, "train_speed(iter/s)": 1.527652 }, { "acc": 0.98698864, "epoch": 37.778298570424184, "grad_norm": 4.437637805938721, "learning_rate": 1.5452025556664793e-06, "loss": 0.03160677, "memory(GiB)": 13.7, "step": 80600, "train_speed(iter/s)": 1.527653 }, { "acc": 0.98083334, "epoch": 37.78064213733302, "grad_norm": 3.2438926696777344, "learning_rate": 1.5446423995331903e-06, "loss": 0.03531748, "memory(GiB)": 13.7, "step": 80605, "train_speed(iter/s)": 1.527655 }, { "acc": 0.9895833, "epoch": 37.78298570424186, "grad_norm": 0.005403583403676748, "learning_rate": 1.5440823264673648e-06, "loss": 0.03908592, "memory(GiB)": 13.7, "step": 80610, "train_speed(iter/s)": 1.527656 }, { "acc": 0.9833333, "epoch": 37.785329271150694, "grad_norm": 1.9049575328826904, "learning_rate": 1.5435223364824664e-06, "loss": 0.05442131, "memory(GiB)": 13.7, "step": 80615, "train_speed(iter/s)": 1.527653 }, { "acc": 0.99298611, "epoch": 37.78767283805953, "grad_norm": 1.4413050413131714, "learning_rate": 1.5429624295919526e-06, "loss": 0.0167781, "memory(GiB)": 13.7, "step": 80620, "train_speed(iter/s)": 1.527659 }, { "acc": 0.98530636, "epoch": 37.79001640496836, "grad_norm": 4.952706813812256, "learning_rate": 1.5424026058092846e-06, "loss": 0.07129672, "memory(GiB)": 13.7, "step": 80625, "train_speed(iter/s)": 1.527662 }, { "acc": 0.9769887, "epoch": 37.7923599718772, "grad_norm": 4.415256500244141, "learning_rate": 1.5418428651479156e-06, "loss": 0.05362505, "memory(GiB)": 13.7, "step": 80630, "train_speed(iter/s)": 1.527668 }, { "acc": 0.98842258, "epoch": 37.79470353878603, "grad_norm": 2.8928210735321045, "learning_rate": 1.541283207621302e-06, "loss": 0.02601429, "memory(GiB)": 13.7, "step": 80635, "train_speed(iter/s)": 1.527669 }, { "acc": 0.99125004, "epoch": 37.797047105694865, "grad_norm": 1.6707943677902222, "learning_rate": 1.5407236332428964e-06, "loss": 0.04103843, "memory(GiB)": 13.7, "step": 80640, "train_speed(iter/s)": 1.52767 }, { "acc": 0.98447914, "epoch": 37.7993906726037, "grad_norm": 1.8904272317886353, "learning_rate": 1.5401641420261475e-06, "loss": 0.04601911, "memory(GiB)": 13.7, "step": 80645, "train_speed(iter/s)": 1.527672 }, { "acc": 0.98760414, "epoch": 37.80173423951254, "grad_norm": 2.0747785568237305, "learning_rate": 1.539604733984504e-06, "loss": 0.04418979, "memory(GiB)": 13.7, "step": 80650, "train_speed(iter/s)": 1.52767 }, { "acc": 0.97342262, "epoch": 37.804077806421375, "grad_norm": 6.265517711639404, "learning_rate": 1.5390454091314153e-06, "loss": 0.04605138, "memory(GiB)": 13.7, "step": 80655, "train_speed(iter/s)": 1.527674 }, { "acc": 0.9864584, "epoch": 37.80642137333021, "grad_norm": 0.06764479726552963, "learning_rate": 1.5384861674803205e-06, "loss": 0.02994934, "memory(GiB)": 13.7, "step": 80660, "train_speed(iter/s)": 1.527683 }, { "acc": 0.9677083, "epoch": 37.808764940239044, "grad_norm": 4.382863998413086, "learning_rate": 1.5379270090446655e-06, "loss": 0.06082644, "memory(GiB)": 13.7, "step": 80665, "train_speed(iter/s)": 1.527685 }, { "acc": 0.95500002, "epoch": 37.81110850714788, "grad_norm": 6.58872652053833, "learning_rate": 1.5373679338378908e-06, "loss": 0.10226268, "memory(GiB)": 13.7, "step": 80670, "train_speed(iter/s)": 1.527691 }, { "acc": 0.97426548, "epoch": 37.81345207405671, "grad_norm": 3.3844940662384033, "learning_rate": 1.536808941873434e-06, "loss": 0.05882725, "memory(GiB)": 13.7, "step": 80675, "train_speed(iter/s)": 1.527695 }, { "acc": 0.9875, "epoch": 37.81579564096555, "grad_norm": 3.4871699810028076, "learning_rate": 1.5362500331647296e-06, "loss": 0.01978911, "memory(GiB)": 13.7, "step": 80680, "train_speed(iter/s)": 1.527696 }, { "acc": 0.97303028, "epoch": 37.81813920787438, "grad_norm": 0.004116402938961983, "learning_rate": 1.535691207725213e-06, "loss": 0.04719526, "memory(GiB)": 13.7, "step": 80685, "train_speed(iter/s)": 1.527698 }, { "acc": 0.99125004, "epoch": 37.82048277478322, "grad_norm": 3.207610845565796, "learning_rate": 1.5351324655683172e-06, "loss": 0.03069455, "memory(GiB)": 13.7, "step": 80690, "train_speed(iter/s)": 1.527703 }, { "acc": 0.98145828, "epoch": 37.82282634169206, "grad_norm": 2.5041348934173584, "learning_rate": 1.5345738067074738e-06, "loss": 0.04464384, "memory(GiB)": 13.7, "step": 80695, "train_speed(iter/s)": 1.527706 }, { "acc": 0.98761368, "epoch": 37.82516990860089, "grad_norm": 2.2838525772094727, "learning_rate": 1.5340152311561087e-06, "loss": 0.03165105, "memory(GiB)": 13.7, "step": 80700, "train_speed(iter/s)": 1.527707 }, { "acc": 0.98648815, "epoch": 37.827513475509726, "grad_norm": 0.4488692581653595, "learning_rate": 1.5334567389276482e-06, "loss": 0.06319616, "memory(GiB)": 13.7, "step": 80705, "train_speed(iter/s)": 1.527705 }, { "acc": 0.98812504, "epoch": 37.82985704241856, "grad_norm": 0.004592373035848141, "learning_rate": 1.5328983300355202e-06, "loss": 0.02306451, "memory(GiB)": 13.7, "step": 80710, "train_speed(iter/s)": 1.5277 }, { "acc": 0.98298607, "epoch": 37.832200609327394, "grad_norm": 2.869441509246826, "learning_rate": 1.5323400044931432e-06, "loss": 0.0923961, "memory(GiB)": 13.7, "step": 80715, "train_speed(iter/s)": 1.527699 }, { "acc": 0.98625002, "epoch": 37.83454417623623, "grad_norm": 2.9781789779663086, "learning_rate": 1.5317817623139377e-06, "loss": 0.01844671, "memory(GiB)": 13.7, "step": 80720, "train_speed(iter/s)": 1.527702 }, { "acc": 0.99048615, "epoch": 37.83688774314507, "grad_norm": 3.625715494155884, "learning_rate": 1.5312236035113235e-06, "loss": 0.04217108, "memory(GiB)": 13.7, "step": 80725, "train_speed(iter/s)": 1.527711 }, { "acc": 0.9807292, "epoch": 37.839231310053904, "grad_norm": 7.701054096221924, "learning_rate": 1.5306655280987176e-06, "loss": 0.04037074, "memory(GiB)": 13.7, "step": 80730, "train_speed(iter/s)": 1.527716 }, { "acc": 0.97562504, "epoch": 37.84157487696274, "grad_norm": 3.092674493789673, "learning_rate": 1.5301075360895312e-06, "loss": 0.05627755, "memory(GiB)": 13.7, "step": 80735, "train_speed(iter/s)": 1.527716 }, { "acc": 0.98449812, "epoch": 37.84391844387157, "grad_norm": 2.258582592010498, "learning_rate": 1.5295496274971792e-06, "loss": 0.04378183, "memory(GiB)": 13.7, "step": 80740, "train_speed(iter/s)": 1.527718 }, { "acc": 0.97758923, "epoch": 37.84626201078041, "grad_norm": 0.34791699051856995, "learning_rate": 1.5289918023350718e-06, "loss": 0.04714096, "memory(GiB)": 13.7, "step": 80745, "train_speed(iter/s)": 1.527717 }, { "acc": 0.99489088, "epoch": 37.84860557768924, "grad_norm": 1.2841230630874634, "learning_rate": 1.5284340606166162e-06, "loss": 0.02235595, "memory(GiB)": 13.7, "step": 80750, "train_speed(iter/s)": 1.527719 }, { "acc": 0.9864583, "epoch": 37.850949144598076, "grad_norm": 5.835583686828613, "learning_rate": 1.5278764023552197e-06, "loss": 0.03647341, "memory(GiB)": 13.7, "step": 80755, "train_speed(iter/s)": 1.527725 }, { "acc": 0.9793498, "epoch": 37.85329271150691, "grad_norm": 2.9393036365509033, "learning_rate": 1.5273188275642853e-06, "loss": 0.05464722, "memory(GiB)": 13.7, "step": 80760, "train_speed(iter/s)": 1.52773 }, { "acc": 0.97833328, "epoch": 37.85563627841575, "grad_norm": 2.8991119861602783, "learning_rate": 1.5267613362572156e-06, "loss": 0.03681982, "memory(GiB)": 13.7, "step": 80765, "train_speed(iter/s)": 1.527732 }, { "acc": 0.96625004, "epoch": 37.857979845324586, "grad_norm": 4.099353790283203, "learning_rate": 1.526203928447413e-06, "loss": 0.10551566, "memory(GiB)": 13.7, "step": 80770, "train_speed(iter/s)": 1.527738 }, { "acc": 0.98104172, "epoch": 37.86032341223342, "grad_norm": 1.0306191444396973, "learning_rate": 1.525646604148272e-06, "loss": 0.04738101, "memory(GiB)": 13.7, "step": 80775, "train_speed(iter/s)": 1.527737 }, { "acc": 0.98458328, "epoch": 37.862666979142254, "grad_norm": 5.437530040740967, "learning_rate": 1.5250893633731914e-06, "loss": 0.03738533, "memory(GiB)": 13.7, "step": 80780, "train_speed(iter/s)": 1.527743 }, { "acc": 0.97666664, "epoch": 37.86501054605109, "grad_norm": 5.947206497192383, "learning_rate": 1.5245322061355663e-06, "loss": 0.0763134, "memory(GiB)": 13.7, "step": 80785, "train_speed(iter/s)": 1.527744 }, { "acc": 0.97853556, "epoch": 37.86735411295992, "grad_norm": 3.081603527069092, "learning_rate": 1.523975132448786e-06, "loss": 0.06151047, "memory(GiB)": 13.7, "step": 80790, "train_speed(iter/s)": 1.527748 }, { "acc": 0.98206472, "epoch": 37.86969767986876, "grad_norm": 3.7970540523529053, "learning_rate": 1.523418142326244e-06, "loss": 0.06700384, "memory(GiB)": 13.7, "step": 80795, "train_speed(iter/s)": 1.527753 }, { "acc": 0.97383928, "epoch": 37.8720412467776, "grad_norm": 4.701742649078369, "learning_rate": 1.5228612357813247e-06, "loss": 0.05599244, "memory(GiB)": 13.7, "step": 80800, "train_speed(iter/s)": 1.527756 }, { "acc": 0.97208328, "epoch": 37.87438481368643, "grad_norm": 6.194064140319824, "learning_rate": 1.5223044128274183e-06, "loss": 0.04812486, "memory(GiB)": 13.7, "step": 80805, "train_speed(iter/s)": 1.527755 }, { "acc": 0.97550602, "epoch": 37.87672838059527, "grad_norm": 3.2737014293670654, "learning_rate": 1.5217476734779055e-06, "loss": 0.04187289, "memory(GiB)": 13.7, "step": 80810, "train_speed(iter/s)": 1.527759 }, { "acc": 0.9895834, "epoch": 37.8790719475041, "grad_norm": 3.6124377250671387, "learning_rate": 1.52119101774617e-06, "loss": 0.01783465, "memory(GiB)": 13.7, "step": 80815, "train_speed(iter/s)": 1.527758 }, { "acc": 0.98104162, "epoch": 37.881415514412936, "grad_norm": 5.0253214836120605, "learning_rate": 1.5206344456455935e-06, "loss": 0.03415521, "memory(GiB)": 13.7, "step": 80820, "train_speed(iter/s)": 1.527762 }, { "acc": 0.98354168, "epoch": 37.88375908132177, "grad_norm": 1.7601579427719116, "learning_rate": 1.5200779571895506e-06, "loss": 0.02538068, "memory(GiB)": 13.7, "step": 80825, "train_speed(iter/s)": 1.527762 }, { "acc": 0.96883926, "epoch": 37.886102648230604, "grad_norm": 4.444388389587402, "learning_rate": 1.51952155239142e-06, "loss": 0.06292823, "memory(GiB)": 13.7, "step": 80830, "train_speed(iter/s)": 1.527768 }, { "acc": 0.96854162, "epoch": 37.88844621513944, "grad_norm": 1.9949028491973877, "learning_rate": 1.5189652312645773e-06, "loss": 0.062061, "memory(GiB)": 13.7, "step": 80835, "train_speed(iter/s)": 1.527773 }, { "acc": 0.98625002, "epoch": 37.89078978204828, "grad_norm": 4.032060623168945, "learning_rate": 1.5184089938223907e-06, "loss": 0.0260392, "memory(GiB)": 13.7, "step": 80840, "train_speed(iter/s)": 1.52778 }, { "acc": 0.9895833, "epoch": 37.893133348957114, "grad_norm": 3.492905378341675, "learning_rate": 1.5178528400782342e-06, "loss": 0.02599911, "memory(GiB)": 13.7, "step": 80845, "train_speed(iter/s)": 1.527789 }, { "acc": 0.98312502, "epoch": 37.89547691586595, "grad_norm": 5.690352439880371, "learning_rate": 1.517296770045472e-06, "loss": 0.03869628, "memory(GiB)": 13.7, "step": 80850, "train_speed(iter/s)": 1.527789 }, { "acc": 0.96955357, "epoch": 37.89782048277478, "grad_norm": 3.0147604942321777, "learning_rate": 1.5167407837374729e-06, "loss": 0.07873693, "memory(GiB)": 13.7, "step": 80855, "train_speed(iter/s)": 1.527793 }, { "acc": 0.98152781, "epoch": 37.90016404968362, "grad_norm": 4.856649398803711, "learning_rate": 1.516184881167601e-06, "loss": 0.04059277, "memory(GiB)": 13.7, "step": 80860, "train_speed(iter/s)": 1.527797 }, { "acc": 0.984375, "epoch": 37.90250761659245, "grad_norm": 4.647604465484619, "learning_rate": 1.5156290623492168e-06, "loss": 0.05653286, "memory(GiB)": 13.7, "step": 80865, "train_speed(iter/s)": 1.527797 }, { "acc": 0.97175598, "epoch": 37.904851183501286, "grad_norm": 2.2834744453430176, "learning_rate": 1.5150733272956814e-06, "loss": 0.03848651, "memory(GiB)": 13.7, "step": 80870, "train_speed(iter/s)": 1.527799 }, { "acc": 0.9833334, "epoch": 37.90719475041013, "grad_norm": 5.429982662200928, "learning_rate": 1.514517676020354e-06, "loss": 0.02964233, "memory(GiB)": 13.7, "step": 80875, "train_speed(iter/s)": 1.527803 }, { "acc": 0.9888195, "epoch": 37.90953831731896, "grad_norm": 4.385352611541748, "learning_rate": 1.5139621085365888e-06, "loss": 0.0361646, "memory(GiB)": 13.7, "step": 80880, "train_speed(iter/s)": 1.527813 }, { "acc": 0.990625, "epoch": 37.911881884227796, "grad_norm": 0.0017895251512527466, "learning_rate": 1.5134066248577395e-06, "loss": 0.0312961, "memory(GiB)": 13.7, "step": 80885, "train_speed(iter/s)": 1.52782 }, { "acc": 0.983848, "epoch": 37.91422545113663, "grad_norm": 0.774422824382782, "learning_rate": 1.5128512249971586e-06, "loss": 0.04165163, "memory(GiB)": 13.7, "step": 80890, "train_speed(iter/s)": 1.527821 }, { "acc": 0.98154764, "epoch": 37.916569018045465, "grad_norm": 4.488186836242676, "learning_rate": 1.5122959089681986e-06, "loss": 0.0659155, "memory(GiB)": 13.7, "step": 80895, "train_speed(iter/s)": 1.527822 }, { "acc": 0.97835321, "epoch": 37.9189125849543, "grad_norm": 3.436948776245117, "learning_rate": 1.511740676784203e-06, "loss": 0.04903517, "memory(GiB)": 13.7, "step": 80900, "train_speed(iter/s)": 1.527822 }, { "acc": 0.98675594, "epoch": 37.92125615186313, "grad_norm": 1.1646490097045898, "learning_rate": 1.5111855284585206e-06, "loss": 0.03201807, "memory(GiB)": 13.7, "step": 80905, "train_speed(iter/s)": 1.52782 }, { "acc": 0.97789574, "epoch": 37.92359971877197, "grad_norm": 3.6966452598571777, "learning_rate": 1.5106304640044939e-06, "loss": 0.051393, "memory(GiB)": 13.7, "step": 80910, "train_speed(iter/s)": 1.527821 }, { "acc": 0.98668728, "epoch": 37.92594328568081, "grad_norm": 0.9370936751365662, "learning_rate": 1.510075483435468e-06, "loss": 0.0688055, "memory(GiB)": 13.7, "step": 80915, "train_speed(iter/s)": 1.527822 }, { "acc": 0.98542614, "epoch": 37.92828685258964, "grad_norm": 0.00386626785621047, "learning_rate": 1.509520586764781e-06, "loss": 0.02998535, "memory(GiB)": 13.7, "step": 80920, "train_speed(iter/s)": 1.527823 }, { "acc": 0.97238102, "epoch": 37.93063041949848, "grad_norm": 3.3534255027770996, "learning_rate": 1.5089657740057679e-06, "loss": 0.06113955, "memory(GiB)": 13.7, "step": 80925, "train_speed(iter/s)": 1.527826 }, { "acc": 0.98552084, "epoch": 37.93297398640731, "grad_norm": 4.046199321746826, "learning_rate": 1.5084110451717677e-06, "loss": 0.02136429, "memory(GiB)": 13.7, "step": 80930, "train_speed(iter/s)": 1.527829 }, { "acc": 0.98916664, "epoch": 37.935317553316146, "grad_norm": 1.6447319984436035, "learning_rate": 1.5078564002761143e-06, "loss": 0.02373307, "memory(GiB)": 13.7, "step": 80935, "train_speed(iter/s)": 1.527831 }, { "acc": 0.98425598, "epoch": 37.93766112022498, "grad_norm": 4.764044761657715, "learning_rate": 1.5073018393321379e-06, "loss": 0.04325264, "memory(GiB)": 13.7, "step": 80940, "train_speed(iter/s)": 1.527831 }, { "acc": 0.99041672, "epoch": 37.940004687133815, "grad_norm": 2.4338362216949463, "learning_rate": 1.5067473623531693e-06, "loss": 0.03674743, "memory(GiB)": 13.7, "step": 80945, "train_speed(iter/s)": 1.527838 }, { "acc": 0.99092264, "epoch": 37.942348254042656, "grad_norm": 5.093374252319336, "learning_rate": 1.5061929693525375e-06, "loss": 0.03110403, "memory(GiB)": 13.7, "step": 80950, "train_speed(iter/s)": 1.527836 }, { "acc": 0.98273811, "epoch": 37.94469182095149, "grad_norm": 5.977853298187256, "learning_rate": 1.5056386603435658e-06, "loss": 0.03235614, "memory(GiB)": 13.7, "step": 80955, "train_speed(iter/s)": 1.527844 }, { "acc": 0.9770833, "epoch": 37.947035387860325, "grad_norm": 2.6301825046539307, "learning_rate": 1.5050844353395784e-06, "loss": 0.03987381, "memory(GiB)": 13.7, "step": 80960, "train_speed(iter/s)": 1.527846 }, { "acc": 0.97565479, "epoch": 37.94937895476916, "grad_norm": 2.423299789428711, "learning_rate": 1.5045302943539e-06, "loss": 0.06785125, "memory(GiB)": 13.7, "step": 80965, "train_speed(iter/s)": 1.527849 }, { "acc": 0.97688484, "epoch": 37.95172252167799, "grad_norm": 1.7249317169189453, "learning_rate": 1.5039762373998487e-06, "loss": 0.0763381, "memory(GiB)": 13.7, "step": 80970, "train_speed(iter/s)": 1.52785 }, { "acc": 0.98410254, "epoch": 37.95406608858683, "grad_norm": 5.644534111022949, "learning_rate": 1.5034222644907395e-06, "loss": 0.06114415, "memory(GiB)": 13.7, "step": 80975, "train_speed(iter/s)": 1.527856 }, { "acc": 0.97999992, "epoch": 37.95640965549566, "grad_norm": 6.889784336090088, "learning_rate": 1.5028683756398898e-06, "loss": 0.04302428, "memory(GiB)": 13.7, "step": 80980, "train_speed(iter/s)": 1.527863 }, { "acc": 0.98803024, "epoch": 37.958753222404496, "grad_norm": 3.0924510955810547, "learning_rate": 1.5023145708606144e-06, "loss": 0.02957116, "memory(GiB)": 13.7, "step": 80985, "train_speed(iter/s)": 1.527869 }, { "acc": 0.97798615, "epoch": 37.96109678931334, "grad_norm": 2.3902971744537354, "learning_rate": 1.5017608501662256e-06, "loss": 0.07626811, "memory(GiB)": 13.7, "step": 80990, "train_speed(iter/s)": 1.527871 }, { "acc": 0.98445511, "epoch": 37.96344035622217, "grad_norm": 6.8792266845703125, "learning_rate": 1.5012072135700307e-06, "loss": 0.03029551, "memory(GiB)": 13.7, "step": 80995, "train_speed(iter/s)": 1.527873 }, { "acc": 0.97520838, "epoch": 37.965783923131006, "grad_norm": 3.135470390319824, "learning_rate": 1.5006536610853375e-06, "loss": 0.07479296, "memory(GiB)": 13.7, "step": 81000, "train_speed(iter/s)": 1.527878 }, { "acc": 0.99236107, "epoch": 37.96812749003984, "grad_norm": 1.5410308837890625, "learning_rate": 1.5001001927254544e-06, "loss": 0.02778955, "memory(GiB)": 13.7, "step": 81005, "train_speed(iter/s)": 1.527875 }, { "acc": 0.991572, "epoch": 37.970471056948675, "grad_norm": 1.0320390462875366, "learning_rate": 1.499546808503683e-06, "loss": 0.02151337, "memory(GiB)": 13.7, "step": 81010, "train_speed(iter/s)": 1.527878 }, { "acc": 0.97312508, "epoch": 37.97281462385751, "grad_norm": 4.277688503265381, "learning_rate": 1.4989935084333236e-06, "loss": 0.05519779, "memory(GiB)": 13.7, "step": 81015, "train_speed(iter/s)": 1.527886 }, { "acc": 0.97364988, "epoch": 37.975158190766344, "grad_norm": 5.364124298095703, "learning_rate": 1.498440292527676e-06, "loss": 0.07429496, "memory(GiB)": 13.7, "step": 81020, "train_speed(iter/s)": 1.527892 }, { "acc": 0.98500004, "epoch": 37.977501757675185, "grad_norm": 3.577523708343506, "learning_rate": 1.497887160800041e-06, "loss": 0.03650954, "memory(GiB)": 13.7, "step": 81025, "train_speed(iter/s)": 1.527894 }, { "acc": 0.98812504, "epoch": 37.97984532458402, "grad_norm": 3.9257850646972656, "learning_rate": 1.4973341132637095e-06, "loss": 0.02983681, "memory(GiB)": 13.7, "step": 81030, "train_speed(iter/s)": 1.527895 }, { "acc": 0.96569443, "epoch": 37.982188891492854, "grad_norm": 5.0577898025512695, "learning_rate": 1.4967811499319774e-06, "loss": 0.07898732, "memory(GiB)": 13.7, "step": 81035, "train_speed(iter/s)": 1.527901 }, { "acc": 0.98181553, "epoch": 37.98453245840169, "grad_norm": 1.2598999738693237, "learning_rate": 1.4962282708181373e-06, "loss": 0.03892547, "memory(GiB)": 13.7, "step": 81040, "train_speed(iter/s)": 1.527904 }, { "acc": 0.98039265, "epoch": 37.98687602531052, "grad_norm": 2.534446954727173, "learning_rate": 1.495675475935476e-06, "loss": 0.03353969, "memory(GiB)": 13.7, "step": 81045, "train_speed(iter/s)": 1.527911 }, { "acc": 0.98708324, "epoch": 37.98921959221936, "grad_norm": 4.0330681800842285, "learning_rate": 1.495122765297283e-06, "loss": 0.04947538, "memory(GiB)": 13.7, "step": 81050, "train_speed(iter/s)": 1.527911 }, { "acc": 0.9833334, "epoch": 37.99156315912819, "grad_norm": 3.614199638366699, "learning_rate": 1.4945701389168413e-06, "loss": 0.0612311, "memory(GiB)": 13.7, "step": 81055, "train_speed(iter/s)": 1.527916 }, { "acc": 0.98583336, "epoch": 37.993906726037025, "grad_norm": 3.0216050148010254, "learning_rate": 1.4940175968074353e-06, "loss": 0.02706077, "memory(GiB)": 13.7, "step": 81060, "train_speed(iter/s)": 1.52792 }, { "acc": 0.98624458, "epoch": 37.99625029294587, "grad_norm": 3.293816566467285, "learning_rate": 1.4934651389823485e-06, "loss": 0.04339696, "memory(GiB)": 13.7, "step": 81065, "train_speed(iter/s)": 1.527924 }, { "acc": 0.96821423, "epoch": 37.9985938598547, "grad_norm": 4.231037139892578, "learning_rate": 1.4929127654548569e-06, "loss": 0.06252298, "memory(GiB)": 13.7, "step": 81070, "train_speed(iter/s)": 1.527928 }, { "acc": 0.9604166, "epoch": 38.000937426763535, "grad_norm": 4.222538948059082, "learning_rate": 1.4923604762382387e-06, "loss": 0.0933016, "memory(GiB)": 13.7, "step": 81075, "train_speed(iter/s)": 1.527906 }, { "acc": 0.97743511, "epoch": 38.00328099367237, "grad_norm": 7.403954982757568, "learning_rate": 1.4918082713457712e-06, "loss": 0.05484878, "memory(GiB)": 13.7, "step": 81080, "train_speed(iter/s)": 1.527912 }, { "acc": 0.97590275, "epoch": 38.005624560581204, "grad_norm": 5.243793487548828, "learning_rate": 1.4912561507907245e-06, "loss": 0.04847533, "memory(GiB)": 13.7, "step": 81085, "train_speed(iter/s)": 1.527911 }, { "acc": 0.9763772, "epoch": 38.00796812749004, "grad_norm": 2.0644240379333496, "learning_rate": 1.4907041145863726e-06, "loss": 0.06980171, "memory(GiB)": 13.7, "step": 81090, "train_speed(iter/s)": 1.527906 }, { "acc": 0.984375, "epoch": 38.01031169439887, "grad_norm": 0.6774303913116455, "learning_rate": 1.490152162745982e-06, "loss": 0.04448681, "memory(GiB)": 13.7, "step": 81095, "train_speed(iter/s)": 1.527908 }, { "acc": 0.98017864, "epoch": 38.012655261307714, "grad_norm": 1.1970865726470947, "learning_rate": 1.4896002952828222e-06, "loss": 0.04851257, "memory(GiB)": 13.7, "step": 81100, "train_speed(iter/s)": 1.527907 }, { "acc": 0.98458328, "epoch": 38.01499882821655, "grad_norm": 0.00526017602533102, "learning_rate": 1.489048512210156e-06, "loss": 0.04133627, "memory(GiB)": 13.7, "step": 81105, "train_speed(iter/s)": 1.527912 }, { "acc": 0.99104166, "epoch": 38.01734239512538, "grad_norm": 4.43350076675415, "learning_rate": 1.4884968135412466e-06, "loss": 0.06143235, "memory(GiB)": 13.7, "step": 81110, "train_speed(iter/s)": 1.527916 }, { "acc": 0.990625, "epoch": 38.01968596203422, "grad_norm": 2.2909188270568848, "learning_rate": 1.4879451992893578e-06, "loss": 0.02660207, "memory(GiB)": 13.7, "step": 81115, "train_speed(iter/s)": 1.527914 }, { "acc": 0.996875, "epoch": 38.02202952894305, "grad_norm": 0.012772519141435623, "learning_rate": 1.4873936694677457e-06, "loss": 0.00499145, "memory(GiB)": 13.7, "step": 81120, "train_speed(iter/s)": 1.527917 }, { "acc": 0.97976189, "epoch": 38.024373095851885, "grad_norm": 0.004457259550690651, "learning_rate": 1.4868422240896681e-06, "loss": 0.04529209, "memory(GiB)": 13.7, "step": 81125, "train_speed(iter/s)": 1.527921 }, { "acc": 0.99083328, "epoch": 38.02671666276072, "grad_norm": 4.2832417488098145, "learning_rate": 1.4862908631683824e-06, "loss": 0.03615922, "memory(GiB)": 13.7, "step": 81130, "train_speed(iter/s)": 1.527928 }, { "acc": 0.97217264, "epoch": 38.029060229669554, "grad_norm": 6.155732154846191, "learning_rate": 1.4857395867171365e-06, "loss": 0.05557189, "memory(GiB)": 13.7, "step": 81135, "train_speed(iter/s)": 1.527936 }, { "acc": 0.98467264, "epoch": 38.031403796578395, "grad_norm": 2.0370001792907715, "learning_rate": 1.4851883947491865e-06, "loss": 0.03427355, "memory(GiB)": 13.7, "step": 81140, "train_speed(iter/s)": 1.527938 }, { "acc": 0.97770834, "epoch": 38.03374736348723, "grad_norm": 3.394986867904663, "learning_rate": 1.4846372872777768e-06, "loss": 0.05978872, "memory(GiB)": 13.7, "step": 81145, "train_speed(iter/s)": 1.527944 }, { "acc": 0.97416668, "epoch": 38.036090930396064, "grad_norm": 5.085193634033203, "learning_rate": 1.4840862643161564e-06, "loss": 0.06189713, "memory(GiB)": 13.7, "step": 81150, "train_speed(iter/s)": 1.527947 }, { "acc": 0.9854167, "epoch": 38.0384344973049, "grad_norm": 4.322713851928711, "learning_rate": 1.4835353258775717e-06, "loss": 0.05393105, "memory(GiB)": 13.7, "step": 81155, "train_speed(iter/s)": 1.527952 }, { "acc": 0.96809521, "epoch": 38.04077806421373, "grad_norm": 4.392057418823242, "learning_rate": 1.4829844719752618e-06, "loss": 0.04146871, "memory(GiB)": 13.7, "step": 81160, "train_speed(iter/s)": 1.527955 }, { "acc": 0.97923603, "epoch": 38.04312163112257, "grad_norm": 3.6051197052001953, "learning_rate": 1.482433702622469e-06, "loss": 0.0474882, "memory(GiB)": 13.7, "step": 81165, "train_speed(iter/s)": 1.527958 }, { "acc": 0.9859375, "epoch": 38.0454651980314, "grad_norm": 2.854471206665039, "learning_rate": 1.4818830178324336e-06, "loss": 0.04274439, "memory(GiB)": 13.7, "step": 81170, "train_speed(iter/s)": 1.527961 }, { "acc": 0.9885417, "epoch": 38.04780876494024, "grad_norm": 6.369647026062012, "learning_rate": 1.481332417618392e-06, "loss": 0.03234708, "memory(GiB)": 13.7, "step": 81175, "train_speed(iter/s)": 1.527964 }, { "acc": 0.99375, "epoch": 38.05015233184908, "grad_norm": 0.5823405385017395, "learning_rate": 1.4807819019935753e-06, "loss": 0.04236952, "memory(GiB)": 13.7, "step": 81180, "train_speed(iter/s)": 1.527967 }, { "acc": 0.97951393, "epoch": 38.05249589875791, "grad_norm": 2.473263740539551, "learning_rate": 1.4802314709712186e-06, "loss": 0.03057466, "memory(GiB)": 13.7, "step": 81185, "train_speed(iter/s)": 1.527967 }, { "acc": 0.9838542, "epoch": 38.054839465666745, "grad_norm": 2.827310800552368, "learning_rate": 1.4796811245645523e-06, "loss": 0.04272416, "memory(GiB)": 13.7, "step": 81190, "train_speed(iter/s)": 1.527965 }, { "acc": 0.98497028, "epoch": 38.05718303257558, "grad_norm": 1.4516499042510986, "learning_rate": 1.479130862786807e-06, "loss": 0.03204013, "memory(GiB)": 13.7, "step": 81195, "train_speed(iter/s)": 1.527969 }, { "acc": 0.97171879, "epoch": 38.059526599484414, "grad_norm": 6.316928386688232, "learning_rate": 1.4785806856512055e-06, "loss": 0.06040473, "memory(GiB)": 13.7, "step": 81200, "train_speed(iter/s)": 1.527972 }, { "acc": 0.990625, "epoch": 38.06187016639325, "grad_norm": 2.5019893646240234, "learning_rate": 1.4780305931709734e-06, "loss": 0.02430888, "memory(GiB)": 13.7, "step": 81205, "train_speed(iter/s)": 1.527977 }, { "acc": 0.99750004, "epoch": 38.06421373330208, "grad_norm": 2.960783004760742, "learning_rate": 1.4774805853593358e-06, "loss": 0.01041237, "memory(GiB)": 13.7, "step": 81210, "train_speed(iter/s)": 1.527979 }, { "acc": 0.97875004, "epoch": 38.066557300210924, "grad_norm": 2.57932448387146, "learning_rate": 1.4769306622295104e-06, "loss": 0.04191612, "memory(GiB)": 13.7, "step": 81215, "train_speed(iter/s)": 1.527979 }, { "acc": 0.99229164, "epoch": 38.06890086711976, "grad_norm": 0.847713053226471, "learning_rate": 1.4763808237947146e-06, "loss": 0.01578945, "memory(GiB)": 13.7, "step": 81220, "train_speed(iter/s)": 1.527978 }, { "acc": 0.97587833, "epoch": 38.07124443402859, "grad_norm": 1.5461214780807495, "learning_rate": 1.4758310700681664e-06, "loss": 0.07738848, "memory(GiB)": 13.7, "step": 81225, "train_speed(iter/s)": 1.527978 }, { "acc": 0.99541664, "epoch": 38.07358800093743, "grad_norm": 0.002038513543084264, "learning_rate": 1.4752814010630814e-06, "loss": 0.005961, "memory(GiB)": 13.7, "step": 81230, "train_speed(iter/s)": 1.527978 }, { "acc": 0.9833333, "epoch": 38.07593156784626, "grad_norm": 1.8430609703063965, "learning_rate": 1.4747318167926685e-06, "loss": 0.0527083, "memory(GiB)": 13.7, "step": 81235, "train_speed(iter/s)": 1.527978 }, { "acc": 0.99508934, "epoch": 38.078275134755096, "grad_norm": 1.2053124904632568, "learning_rate": 1.47418231727014e-06, "loss": 0.02641767, "memory(GiB)": 13.7, "step": 81240, "train_speed(iter/s)": 1.527979 }, { "acc": 0.98425598, "epoch": 38.08061870166393, "grad_norm": 2.7393877506256104, "learning_rate": 1.4736329025087052e-06, "loss": 0.03547376, "memory(GiB)": 13.7, "step": 81245, "train_speed(iter/s)": 1.527984 }, { "acc": 0.9838541, "epoch": 38.082962268572764, "grad_norm": 5.337080001831055, "learning_rate": 1.4730835725215665e-06, "loss": 0.04699598, "memory(GiB)": 13.7, "step": 81250, "train_speed(iter/s)": 1.52799 }, { "acc": 0.996875, "epoch": 38.085305835481606, "grad_norm": 0.000671820598654449, "learning_rate": 1.4725343273219324e-06, "loss": 0.02163997, "memory(GiB)": 13.7, "step": 81255, "train_speed(iter/s)": 1.52799 }, { "acc": 0.9858036, "epoch": 38.08764940239044, "grad_norm": 2.800340414047241, "learning_rate": 1.4719851669230017e-06, "loss": 0.01807762, "memory(GiB)": 13.7, "step": 81260, "train_speed(iter/s)": 1.527987 }, { "acc": 0.99750004, "epoch": 38.089992969299274, "grad_norm": 1.0027110576629639, "learning_rate": 1.4714360913379752e-06, "loss": 0.0163881, "memory(GiB)": 13.7, "step": 81265, "train_speed(iter/s)": 1.527983 }, { "acc": 0.98319445, "epoch": 38.09233653620811, "grad_norm": 7.408854961395264, "learning_rate": 1.4708871005800524e-06, "loss": 0.05596302, "memory(GiB)": 13.7, "step": 81270, "train_speed(iter/s)": 1.527983 }, { "acc": 0.97842264, "epoch": 38.09468010311694, "grad_norm": 2.630375385284424, "learning_rate": 1.4703381946624265e-06, "loss": 0.05442145, "memory(GiB)": 13.7, "step": 81275, "train_speed(iter/s)": 1.527987 }, { "acc": 0.9802084, "epoch": 38.09702367002578, "grad_norm": 2.5504159927368164, "learning_rate": 1.4697893735982932e-06, "loss": 0.0415532, "memory(GiB)": 13.7, "step": 81280, "train_speed(iter/s)": 1.52799 }, { "acc": 0.9720623, "epoch": 38.09936723693461, "grad_norm": 5.470061302185059, "learning_rate": 1.4692406374008458e-06, "loss": 0.06840411, "memory(GiB)": 13.7, "step": 81285, "train_speed(iter/s)": 1.527994 }, { "acc": 0.9822916, "epoch": 38.10171080384345, "grad_norm": 7.69797945022583, "learning_rate": 1.4686919860832703e-06, "loss": 0.05650755, "memory(GiB)": 13.7, "step": 81290, "train_speed(iter/s)": 1.527997 }, { "acc": 0.98362465, "epoch": 38.10405437075229, "grad_norm": 5.661579132080078, "learning_rate": 1.4681434196587566e-06, "loss": 0.07185341, "memory(GiB)": 13.7, "step": 81295, "train_speed(iter/s)": 1.527997 }, { "acc": 0.98874998, "epoch": 38.10639793766112, "grad_norm": 2.2632203102111816, "learning_rate": 1.4675949381404923e-06, "loss": 0.02127565, "memory(GiB)": 13.7, "step": 81300, "train_speed(iter/s)": 1.528003 }, { "acc": 0.98224211, "epoch": 38.108741504569956, "grad_norm": 7.129696846008301, "learning_rate": 1.4670465415416594e-06, "loss": 0.02399343, "memory(GiB)": 13.7, "step": 81305, "train_speed(iter/s)": 1.528009 }, { "acc": 0.99412775, "epoch": 38.11108507147879, "grad_norm": 2.0267374515533447, "learning_rate": 1.4664982298754374e-06, "loss": 0.0414877, "memory(GiB)": 13.7, "step": 81310, "train_speed(iter/s)": 1.528012 }, { "acc": 0.98654757, "epoch": 38.113428638387624, "grad_norm": 1.7457941770553589, "learning_rate": 1.4659500031550075e-06, "loss": 0.04016556, "memory(GiB)": 13.7, "step": 81315, "train_speed(iter/s)": 1.528012 }, { "acc": 0.98703375, "epoch": 38.11577220529646, "grad_norm": 3.114863157272339, "learning_rate": 1.4654018613935498e-06, "loss": 0.02780535, "memory(GiB)": 13.7, "step": 81320, "train_speed(iter/s)": 1.528013 }, { "acc": 0.9864584, "epoch": 38.11811577220529, "grad_norm": 2.7953901290893555, "learning_rate": 1.4648538046042363e-06, "loss": 0.04983265, "memory(GiB)": 13.7, "step": 81325, "train_speed(iter/s)": 1.528014 }, { "acc": 0.97629871, "epoch": 38.120459339114134, "grad_norm": 4.988153457641602, "learning_rate": 1.4643058328002408e-06, "loss": 0.07111159, "memory(GiB)": 13.7, "step": 81330, "train_speed(iter/s)": 1.528016 }, { "acc": 0.98779755, "epoch": 38.12280290602297, "grad_norm": 1.6799051761627197, "learning_rate": 1.4637579459947367e-06, "loss": 0.0262774, "memory(GiB)": 13.7, "step": 81335, "train_speed(iter/s)": 1.528017 }, { "acc": 0.97624998, "epoch": 38.1251464729318, "grad_norm": 1.8320780992507935, "learning_rate": 1.463210144200894e-06, "loss": 0.05685683, "memory(GiB)": 13.7, "step": 81340, "train_speed(iter/s)": 1.528016 }, { "acc": 0.97937498, "epoch": 38.12749003984064, "grad_norm": 4.032726764678955, "learning_rate": 1.4626624274318781e-06, "loss": 0.06035143, "memory(GiB)": 13.7, "step": 81345, "train_speed(iter/s)": 1.528019 }, { "acc": 0.99236107, "epoch": 38.12983360674947, "grad_norm": 2.647113561630249, "learning_rate": 1.4621147957008536e-06, "loss": 0.02840944, "memory(GiB)": 13.7, "step": 81350, "train_speed(iter/s)": 1.528021 }, { "acc": 0.97946424, "epoch": 38.132177173658306, "grad_norm": 1.9916222095489502, "learning_rate": 1.4615672490209846e-06, "loss": 0.03268613, "memory(GiB)": 13.7, "step": 81355, "train_speed(iter/s)": 1.528028 }, { "acc": 0.987257, "epoch": 38.13452074056714, "grad_norm": 4.42354679107666, "learning_rate": 1.4610197874054344e-06, "loss": 0.05702076, "memory(GiB)": 13.7, "step": 81360, "train_speed(iter/s)": 1.528027 }, { "acc": 0.96416664, "epoch": 38.13686430747598, "grad_norm": 3.525259256362915, "learning_rate": 1.4604724108673587e-06, "loss": 0.05561673, "memory(GiB)": 13.7, "step": 81365, "train_speed(iter/s)": 1.52803 }, { "acc": 0.98490524, "epoch": 38.139207874384816, "grad_norm": 2.2637832164764404, "learning_rate": 1.4599251194199165e-06, "loss": 0.05306748, "memory(GiB)": 13.7, "step": 81370, "train_speed(iter/s)": 1.52803 }, { "acc": 0.98598213, "epoch": 38.14155144129365, "grad_norm": 4.177961826324463, "learning_rate": 1.4593779130762643e-06, "loss": 0.03056783, "memory(GiB)": 13.7, "step": 81375, "train_speed(iter/s)": 1.528035 }, { "acc": 0.9958334, "epoch": 38.143895008202485, "grad_norm": 2.1706721782684326, "learning_rate": 1.4588307918495532e-06, "loss": 0.03761803, "memory(GiB)": 13.7, "step": 81380, "train_speed(iter/s)": 1.528034 }, { "acc": 0.9864584, "epoch": 38.14623857511132, "grad_norm": 6.321686744689941, "learning_rate": 1.4582837557529334e-06, "loss": 0.03460594, "memory(GiB)": 13.7, "step": 81385, "train_speed(iter/s)": 1.528041 }, { "acc": 0.9895834, "epoch": 38.14858214202015, "grad_norm": 5.76658821105957, "learning_rate": 1.4577368047995546e-06, "loss": 0.05386854, "memory(GiB)": 13.7, "step": 81390, "train_speed(iter/s)": 1.528044 }, { "acc": 0.98770828, "epoch": 38.15092570892899, "grad_norm": 3.490353584289551, "learning_rate": 1.4571899390025661e-06, "loss": 0.02276006, "memory(GiB)": 13.7, "step": 81395, "train_speed(iter/s)": 1.528046 }, { "acc": 0.9958333, "epoch": 38.15326927583782, "grad_norm": 4.544920444488525, "learning_rate": 1.4566431583751094e-06, "loss": 0.04121127, "memory(GiB)": 13.7, "step": 81400, "train_speed(iter/s)": 1.528048 }, { "acc": 0.978125, "epoch": 38.15561284274666, "grad_norm": 5.408450126647949, "learning_rate": 1.4560964629303285e-06, "loss": 0.05261116, "memory(GiB)": 13.7, "step": 81405, "train_speed(iter/s)": 1.52805 }, { "acc": 0.98500004, "epoch": 38.1579564096555, "grad_norm": 1.3473526239395142, "learning_rate": 1.4555498526813642e-06, "loss": 0.02902694, "memory(GiB)": 13.7, "step": 81410, "train_speed(iter/s)": 1.528053 }, { "acc": 0.97634802, "epoch": 38.16029997656433, "grad_norm": 4.538300037384033, "learning_rate": 1.455003327641357e-06, "loss": 0.05443369, "memory(GiB)": 13.7, "step": 81415, "train_speed(iter/s)": 1.52806 }, { "acc": 0.9848959, "epoch": 38.162643543473166, "grad_norm": 4.787582874298096, "learning_rate": 1.4544568878234405e-06, "loss": 0.0710777, "memory(GiB)": 13.7, "step": 81420, "train_speed(iter/s)": 1.528064 }, { "acc": 0.9927083, "epoch": 38.164987110382, "grad_norm": 0.5774582028388977, "learning_rate": 1.4539105332407521e-06, "loss": 0.04219226, "memory(GiB)": 13.7, "step": 81425, "train_speed(iter/s)": 1.528069 }, { "acc": 0.97791672, "epoch": 38.167330677290835, "grad_norm": 7.254391670227051, "learning_rate": 1.4533642639064218e-06, "loss": 0.03571033, "memory(GiB)": 13.7, "step": 81430, "train_speed(iter/s)": 1.528072 }, { "acc": 0.9882143, "epoch": 38.16967424419967, "grad_norm": 1.8595584630966187, "learning_rate": 1.4528180798335828e-06, "loss": 0.02684981, "memory(GiB)": 13.7, "step": 81435, "train_speed(iter/s)": 1.528073 }, { "acc": 0.98698864, "epoch": 38.17201781110851, "grad_norm": 3.6907052993774414, "learning_rate": 1.4522719810353606e-06, "loss": 0.02460863, "memory(GiB)": 13.7, "step": 81440, "train_speed(iter/s)": 1.528075 }, { "acc": 0.990625, "epoch": 38.174361378017345, "grad_norm": 3.3600261211395264, "learning_rate": 1.4517259675248829e-06, "loss": 0.02887588, "memory(GiB)": 13.7, "step": 81445, "train_speed(iter/s)": 1.528079 }, { "acc": 0.99105511, "epoch": 38.17670494492618, "grad_norm": 1.1325279474258423, "learning_rate": 1.451180039315276e-06, "loss": 0.03051148, "memory(GiB)": 13.7, "step": 81450, "train_speed(iter/s)": 1.528082 }, { "acc": 0.98770838, "epoch": 38.17904851183501, "grad_norm": 3.2540178298950195, "learning_rate": 1.4506341964196588e-06, "loss": 0.0403392, "memory(GiB)": 13.7, "step": 81455, "train_speed(iter/s)": 1.528083 }, { "acc": 0.99092264, "epoch": 38.18139207874385, "grad_norm": 2.2010366916656494, "learning_rate": 1.4500884388511532e-06, "loss": 0.02615277, "memory(GiB)": 13.7, "step": 81460, "train_speed(iter/s)": 1.528089 }, { "acc": 0.9901701, "epoch": 38.18373564565268, "grad_norm": 3.8497095108032227, "learning_rate": 1.44954276662288e-06, "loss": 0.04395348, "memory(GiB)": 13.7, "step": 81465, "train_speed(iter/s)": 1.528092 }, { "acc": 0.990625, "epoch": 38.186079212561516, "grad_norm": 4.541492938995361, "learning_rate": 1.4489971797479503e-06, "loss": 0.04230653, "memory(GiB)": 13.7, "step": 81470, "train_speed(iter/s)": 1.528096 }, { "acc": 0.9817709, "epoch": 38.18842277947035, "grad_norm": 3.0812034606933594, "learning_rate": 1.4484516782394827e-06, "loss": 0.0357163, "memory(GiB)": 13.7, "step": 81475, "train_speed(iter/s)": 1.528099 }, { "acc": 0.97145834, "epoch": 38.19076634637919, "grad_norm": 7.269381523132324, "learning_rate": 1.447906262110586e-06, "loss": 0.0588372, "memory(GiB)": 13.7, "step": 81480, "train_speed(iter/s)": 1.528099 }, { "acc": 0.96625004, "epoch": 38.193109913288026, "grad_norm": 3.0002200603485107, "learning_rate": 1.447360931374372e-06, "loss": 0.0512348, "memory(GiB)": 13.7, "step": 81485, "train_speed(iter/s)": 1.528101 }, { "acc": 0.99125004, "epoch": 38.19545348019686, "grad_norm": 0.2099957913160324, "learning_rate": 1.446815686043949e-06, "loss": 0.02218739, "memory(GiB)": 13.7, "step": 81490, "train_speed(iter/s)": 1.528102 }, { "acc": 0.98482141, "epoch": 38.197797047105695, "grad_norm": 6.811428070068359, "learning_rate": 1.446270526132421e-06, "loss": 0.04494549, "memory(GiB)": 13.7, "step": 81495, "train_speed(iter/s)": 1.528105 }, { "acc": 0.9885417, "epoch": 38.20014061401453, "grad_norm": 5.195834159851074, "learning_rate": 1.4457254516528927e-06, "loss": 0.03919728, "memory(GiB)": 13.7, "step": 81500, "train_speed(iter/s)": 1.528109 }, { "acc": 0.98291664, "epoch": 38.20248418092336, "grad_norm": 0.2839505970478058, "learning_rate": 1.4451804626184677e-06, "loss": 0.03164064, "memory(GiB)": 13.7, "step": 81505, "train_speed(iter/s)": 1.528115 }, { "acc": 0.98391752, "epoch": 38.2048277478322, "grad_norm": 4.924657821655273, "learning_rate": 1.4446355590422447e-06, "loss": 0.02495295, "memory(GiB)": 13.7, "step": 81510, "train_speed(iter/s)": 1.528121 }, { "acc": 0.98812504, "epoch": 38.20717131474104, "grad_norm": 2.827113389968872, "learning_rate": 1.4440907409373192e-06, "loss": 0.03823683, "memory(GiB)": 13.7, "step": 81515, "train_speed(iter/s)": 1.528123 }, { "acc": 0.9854166, "epoch": 38.20951488164987, "grad_norm": 2.673499822616577, "learning_rate": 1.4435460083167883e-06, "loss": 0.04244299, "memory(GiB)": 13.7, "step": 81520, "train_speed(iter/s)": 1.528125 }, { "acc": 0.9864584, "epoch": 38.21185844855871, "grad_norm": 4.3834228515625, "learning_rate": 1.443001361193748e-06, "loss": 0.03741291, "memory(GiB)": 13.7, "step": 81525, "train_speed(iter/s)": 1.528136 }, { "acc": 0.975947, "epoch": 38.21420201546754, "grad_norm": 4.053126335144043, "learning_rate": 1.4424567995812857e-06, "loss": 0.08436387, "memory(GiB)": 13.7, "step": 81530, "train_speed(iter/s)": 1.528134 }, { "acc": 0.9854167, "epoch": 38.216545582376376, "grad_norm": 1.9897778034210205, "learning_rate": 1.4419123234924925e-06, "loss": 0.06410694, "memory(GiB)": 13.7, "step": 81535, "train_speed(iter/s)": 1.52814 }, { "acc": 0.9890625, "epoch": 38.21888914928521, "grad_norm": 1.7246272563934326, "learning_rate": 1.4413679329404586e-06, "loss": 0.05495948, "memory(GiB)": 13.7, "step": 81540, "train_speed(iter/s)": 1.528141 }, { "acc": 0.98604164, "epoch": 38.221232716194045, "grad_norm": 2.5082502365112305, "learning_rate": 1.4408236279382648e-06, "loss": 0.02621009, "memory(GiB)": 13.7, "step": 81545, "train_speed(iter/s)": 1.528139 }, { "acc": 0.9544445, "epoch": 38.22357628310288, "grad_norm": 9.841611862182617, "learning_rate": 1.440279408498998e-06, "loss": 0.07163494, "memory(GiB)": 13.7, "step": 81550, "train_speed(iter/s)": 1.528142 }, { "acc": 0.98695507, "epoch": 38.22591985001172, "grad_norm": 0.9350124001502991, "learning_rate": 1.4397352746357368e-06, "loss": 0.03637677, "memory(GiB)": 13.7, "step": 81555, "train_speed(iter/s)": 1.528143 }, { "acc": 0.99083328, "epoch": 38.228263416920555, "grad_norm": 0.18804723024368286, "learning_rate": 1.4391912263615616e-06, "loss": 0.01948239, "memory(GiB)": 13.7, "step": 81560, "train_speed(iter/s)": 1.528147 }, { "acc": 0.98125, "epoch": 38.23060698382939, "grad_norm": 7.1179518699646, "learning_rate": 1.4386472636895512e-06, "loss": 0.04035696, "memory(GiB)": 13.7, "step": 81565, "train_speed(iter/s)": 1.528149 }, { "acc": 0.9864584, "epoch": 38.232950550738224, "grad_norm": 5.600637435913086, "learning_rate": 1.4381033866327769e-06, "loss": 0.03476108, "memory(GiB)": 13.7, "step": 81570, "train_speed(iter/s)": 1.528152 }, { "acc": 0.98657742, "epoch": 38.23529411764706, "grad_norm": 2.0396976470947266, "learning_rate": 1.437559595204314e-06, "loss": 0.02835925, "memory(GiB)": 13.7, "step": 81575, "train_speed(iter/s)": 1.528159 }, { "acc": 0.98291664, "epoch": 38.23763768455589, "grad_norm": 2.401557683944702, "learning_rate": 1.437015889417235e-06, "loss": 0.04933363, "memory(GiB)": 13.7, "step": 81580, "train_speed(iter/s)": 1.528158 }, { "acc": 0.97140865, "epoch": 38.23998125146473, "grad_norm": 3.137364387512207, "learning_rate": 1.4364722692846052e-06, "loss": 0.07690414, "memory(GiB)": 13.7, "step": 81585, "train_speed(iter/s)": 1.52816 }, { "acc": 0.98812504, "epoch": 38.24232481837357, "grad_norm": 3.0683979988098145, "learning_rate": 1.4359287348194952e-06, "loss": 0.02720903, "memory(GiB)": 13.7, "step": 81590, "train_speed(iter/s)": 1.528164 }, { "acc": 0.9856945, "epoch": 38.2446683852824, "grad_norm": 3.6526169776916504, "learning_rate": 1.4353852860349666e-06, "loss": 0.02775936, "memory(GiB)": 13.7, "step": 81595, "train_speed(iter/s)": 1.528168 }, { "acc": 0.98516026, "epoch": 38.24701195219124, "grad_norm": 1.8577444553375244, "learning_rate": 1.4348419229440841e-06, "loss": 0.04898819, "memory(GiB)": 13.7, "step": 81600, "train_speed(iter/s)": 1.528171 }, { "acc": 0.9807292, "epoch": 38.24935551910007, "grad_norm": 5.449644565582275, "learning_rate": 1.4342986455599064e-06, "loss": 0.05125754, "memory(GiB)": 13.7, "step": 81605, "train_speed(iter/s)": 1.528173 }, { "acc": 0.9958333, "epoch": 38.251699086008905, "grad_norm": 1.8705397844314575, "learning_rate": 1.433755453895494e-06, "loss": 0.02471104, "memory(GiB)": 13.7, "step": 81610, "train_speed(iter/s)": 1.528176 }, { "acc": 0.99187508, "epoch": 38.25404265291774, "grad_norm": 2.3680598735809326, "learning_rate": 1.4332123479639024e-06, "loss": 0.03556522, "memory(GiB)": 13.7, "step": 81615, "train_speed(iter/s)": 1.528177 }, { "acc": 0.99125004, "epoch": 38.256386219826574, "grad_norm": 2.7033238410949707, "learning_rate": 1.4326693277781879e-06, "loss": 0.01836917, "memory(GiB)": 13.7, "step": 81620, "train_speed(iter/s)": 1.528174 }, { "acc": 0.9854167, "epoch": 38.25872978673541, "grad_norm": 0.004840229172259569, "learning_rate": 1.4321263933514002e-06, "loss": 0.04058309, "memory(GiB)": 13.7, "step": 81625, "train_speed(iter/s)": 1.528172 }, { "acc": 0.9809227, "epoch": 38.26107335364425, "grad_norm": 1.105871319770813, "learning_rate": 1.4315835446965908e-06, "loss": 0.04274307, "memory(GiB)": 13.7, "step": 81630, "train_speed(iter/s)": 1.528175 }, { "acc": 0.99607029, "epoch": 38.263416920553084, "grad_norm": 0.0029188666958361864, "learning_rate": 1.43104078182681e-06, "loss": 0.02371926, "memory(GiB)": 13.7, "step": 81635, "train_speed(iter/s)": 1.528178 }, { "acc": 0.99437504, "epoch": 38.26576048746192, "grad_norm": 0.13609212636947632, "learning_rate": 1.430498104755102e-06, "loss": 0.02206425, "memory(GiB)": 13.7, "step": 81640, "train_speed(iter/s)": 1.528186 }, { "acc": 0.99229164, "epoch": 38.26810405437075, "grad_norm": 0.9839093685150146, "learning_rate": 1.4299555134945094e-06, "loss": 0.04108786, "memory(GiB)": 13.7, "step": 81645, "train_speed(iter/s)": 1.528185 }, { "acc": 0.99154758, "epoch": 38.27044762127959, "grad_norm": 4.402431011199951, "learning_rate": 1.4294130080580768e-06, "loss": 0.03639849, "memory(GiB)": 13.7, "step": 81650, "train_speed(iter/s)": 1.528194 }, { "acc": 0.98104162, "epoch": 38.27279118818842, "grad_norm": 5.842469215393066, "learning_rate": 1.4288705884588443e-06, "loss": 0.0433187, "memory(GiB)": 13.7, "step": 81655, "train_speed(iter/s)": 1.528199 }, { "acc": 0.99092255, "epoch": 38.275134755097255, "grad_norm": 0.918144702911377, "learning_rate": 1.4283282547098485e-06, "loss": 0.04658318, "memory(GiB)": 13.7, "step": 81660, "train_speed(iter/s)": 1.528204 }, { "acc": 0.97895832, "epoch": 38.2774783220061, "grad_norm": 6.610439300537109, "learning_rate": 1.4277860068241251e-06, "loss": 0.06352472, "memory(GiB)": 13.7, "step": 81665, "train_speed(iter/s)": 1.528207 }, { "acc": 0.9807291, "epoch": 38.27982188891493, "grad_norm": 4.317802429199219, "learning_rate": 1.427243844814711e-06, "loss": 0.03512018, "memory(GiB)": 13.7, "step": 81670, "train_speed(iter/s)": 1.528209 }, { "acc": 0.98738098, "epoch": 38.282165455823765, "grad_norm": 0.002784284995868802, "learning_rate": 1.4267017686946357e-06, "loss": 0.03822262, "memory(GiB)": 13.7, "step": 81675, "train_speed(iter/s)": 1.528213 }, { "acc": 0.98258934, "epoch": 38.2845090227326, "grad_norm": 2.036202907562256, "learning_rate": 1.4261597784769274e-06, "loss": 0.02782415, "memory(GiB)": 13.7, "step": 81680, "train_speed(iter/s)": 1.528209 }, { "acc": 0.98249998, "epoch": 38.286852589641434, "grad_norm": 2.59342098236084, "learning_rate": 1.4256178741746152e-06, "loss": 0.02923469, "memory(GiB)": 13.7, "step": 81685, "train_speed(iter/s)": 1.52821 }, { "acc": 0.98833332, "epoch": 38.28919615655027, "grad_norm": 0.004881892818957567, "learning_rate": 1.4250760558007248e-06, "loss": 0.02135432, "memory(GiB)": 13.7, "step": 81690, "train_speed(iter/s)": 1.528219 }, { "acc": 0.9967804, "epoch": 38.2915397234591, "grad_norm": 0.0006541712791658938, "learning_rate": 1.4245343233682818e-06, "loss": 0.0256386, "memory(GiB)": 13.7, "step": 81695, "train_speed(iter/s)": 1.528225 }, { "acc": 0.98562508, "epoch": 38.29388329036794, "grad_norm": 2.2225725650787354, "learning_rate": 1.4239926768903042e-06, "loss": 0.04763793, "memory(GiB)": 13.7, "step": 81700, "train_speed(iter/s)": 1.528224 }, { "acc": 0.9822917, "epoch": 38.29622685727678, "grad_norm": 0.0038294813130050898, "learning_rate": 1.4234511163798127e-06, "loss": 0.0551981, "memory(GiB)": 13.7, "step": 81705, "train_speed(iter/s)": 1.528226 }, { "acc": 0.9791667, "epoch": 38.29857042418561, "grad_norm": 3.971055030822754, "learning_rate": 1.4229096418498261e-06, "loss": 0.05439956, "memory(GiB)": 13.7, "step": 81710, "train_speed(iter/s)": 1.528224 }, { "acc": 0.9875, "epoch": 38.30091399109445, "grad_norm": 4.211096286773682, "learning_rate": 1.4223682533133586e-06, "loss": 0.04624031, "memory(GiB)": 13.7, "step": 81715, "train_speed(iter/s)": 1.528227 }, { "acc": 0.98145828, "epoch": 38.30325755800328, "grad_norm": 3.2561941146850586, "learning_rate": 1.421826950783422e-06, "loss": 0.04627583, "memory(GiB)": 13.7, "step": 81720, "train_speed(iter/s)": 1.528227 }, { "acc": 0.9864584, "epoch": 38.305601124912116, "grad_norm": 0.0073935482650995255, "learning_rate": 1.4212857342730283e-06, "loss": 0.02454642, "memory(GiB)": 13.7, "step": 81725, "train_speed(iter/s)": 1.528228 }, { "acc": 0.97854166, "epoch": 38.30794469182095, "grad_norm": 2.080420732498169, "learning_rate": 1.4207446037951885e-06, "loss": 0.07559694, "memory(GiB)": 13.7, "step": 81730, "train_speed(iter/s)": 1.528233 }, { "acc": 0.98249464, "epoch": 38.310288258729784, "grad_norm": 1.2930822372436523, "learning_rate": 1.4202035593629069e-06, "loss": 0.03406279, "memory(GiB)": 13.7, "step": 81735, "train_speed(iter/s)": 1.528236 }, { "acc": 0.9885417, "epoch": 38.31263182563862, "grad_norm": 0.00546384509652853, "learning_rate": 1.4196626009891892e-06, "loss": 0.01979038, "memory(GiB)": 13.7, "step": 81740, "train_speed(iter/s)": 1.52824 }, { "acc": 0.98038378, "epoch": 38.31497539254746, "grad_norm": 0.012092715129256248, "learning_rate": 1.4191217286870402e-06, "loss": 0.06111424, "memory(GiB)": 13.7, "step": 81745, "train_speed(iter/s)": 1.528244 }, { "acc": 0.98458338, "epoch": 38.317318959456294, "grad_norm": 7.499263286590576, "learning_rate": 1.4185809424694574e-06, "loss": 0.06352164, "memory(GiB)": 13.7, "step": 81750, "train_speed(iter/s)": 1.528249 }, { "acc": 0.9854166, "epoch": 38.31966252636513, "grad_norm": 2.2939884662628174, "learning_rate": 1.4180402423494416e-06, "loss": 0.03024091, "memory(GiB)": 13.7, "step": 81755, "train_speed(iter/s)": 1.528251 }, { "acc": 0.97217264, "epoch": 38.32200609327396, "grad_norm": 0.0012889900244772434, "learning_rate": 1.4174996283399902e-06, "loss": 0.06425674, "memory(GiB)": 13.7, "step": 81760, "train_speed(iter/s)": 1.528252 }, { "acc": 0.9916667, "epoch": 38.3243496601828, "grad_norm": 2.3363091945648193, "learning_rate": 1.4169591004540952e-06, "loss": 0.01842693, "memory(GiB)": 13.7, "step": 81765, "train_speed(iter/s)": 1.528255 }, { "acc": 0.99676476, "epoch": 38.32669322709163, "grad_norm": 2.960766315460205, "learning_rate": 1.4164186587047526e-06, "loss": 0.02033484, "memory(GiB)": 13.7, "step": 81770, "train_speed(iter/s)": 1.528257 }, { "acc": 0.9864584, "epoch": 38.329036794000466, "grad_norm": 0.3197784721851349, "learning_rate": 1.415878303104949e-06, "loss": 0.07004399, "memory(GiB)": 13.7, "step": 81775, "train_speed(iter/s)": 1.528261 }, { "acc": 0.98543015, "epoch": 38.33138036090931, "grad_norm": 3.3933777809143066, "learning_rate": 1.4153380336676742e-06, "loss": 0.06322561, "memory(GiB)": 13.7, "step": 81780, "train_speed(iter/s)": 1.528263 }, { "acc": 0.98552084, "epoch": 38.33372392781814, "grad_norm": 3.036658763885498, "learning_rate": 1.414797850405917e-06, "loss": 0.03694195, "memory(GiB)": 13.7, "step": 81785, "train_speed(iter/s)": 1.528266 }, { "acc": 0.97704258, "epoch": 38.336067494726976, "grad_norm": 4.127108573913574, "learning_rate": 1.414257753332658e-06, "loss": 0.03446358, "memory(GiB)": 13.7, "step": 81790, "train_speed(iter/s)": 1.528272 }, { "acc": 0.9895834, "epoch": 38.33841106163581, "grad_norm": 2.58023738861084, "learning_rate": 1.4137177424608803e-06, "loss": 0.02920705, "memory(GiB)": 13.7, "step": 81795, "train_speed(iter/s)": 1.528275 }, { "acc": 0.99437504, "epoch": 38.340754628544644, "grad_norm": 0.0011749062687158585, "learning_rate": 1.4131778178035666e-06, "loss": 0.02413306, "memory(GiB)": 13.7, "step": 81800, "train_speed(iter/s)": 1.528277 }, { "acc": 0.98093128, "epoch": 38.34309819545348, "grad_norm": 4.41975212097168, "learning_rate": 1.4126379793736928e-06, "loss": 0.08377699, "memory(GiB)": 13.7, "step": 81805, "train_speed(iter/s)": 1.528284 }, { "acc": 0.98222227, "epoch": 38.34544176236231, "grad_norm": 3.1066110134124756, "learning_rate": 1.412098227184233e-06, "loss": 0.0560639, "memory(GiB)": 13.7, "step": 81810, "train_speed(iter/s)": 1.528289 }, { "acc": 0.97405643, "epoch": 38.34778532927115, "grad_norm": 2.4639220237731934, "learning_rate": 1.4115585612481636e-06, "loss": 0.04895024, "memory(GiB)": 13.7, "step": 81815, "train_speed(iter/s)": 1.528295 }, { "acc": 0.98425598, "epoch": 38.35012889617999, "grad_norm": 5.085476398468018, "learning_rate": 1.4110189815784569e-06, "loss": 0.04642686, "memory(GiB)": 13.7, "step": 81820, "train_speed(iter/s)": 1.528297 }, { "acc": 0.99333334, "epoch": 38.35247246308882, "grad_norm": 2.093888998031616, "learning_rate": 1.41047948818808e-06, "loss": 0.01874011, "memory(GiB)": 13.7, "step": 81825, "train_speed(iter/s)": 1.5283 }, { "acc": 0.975, "epoch": 38.35481602999766, "grad_norm": 3.348637580871582, "learning_rate": 1.4099400810900026e-06, "loss": 0.06013114, "memory(GiB)": 13.7, "step": 81830, "train_speed(iter/s)": 1.528303 }, { "acc": 0.98299675, "epoch": 38.35715959690649, "grad_norm": 7.551735877990723, "learning_rate": 1.409400760297189e-06, "loss": 0.02969643, "memory(GiB)": 13.7, "step": 81835, "train_speed(iter/s)": 1.528301 }, { "acc": 0.99750004, "epoch": 38.359503163815326, "grad_norm": 2.7147998809814453, "learning_rate": 1.4088615258226054e-06, "loss": 0.01608339, "memory(GiB)": 13.7, "step": 81840, "train_speed(iter/s)": 1.528299 }, { "acc": 0.99125004, "epoch": 38.36184673072416, "grad_norm": 2.2493138313293457, "learning_rate": 1.4083223776792115e-06, "loss": 0.04307034, "memory(GiB)": 13.7, "step": 81845, "train_speed(iter/s)": 1.528299 }, { "acc": 0.98874998, "epoch": 38.364190297632994, "grad_norm": 1.017853856086731, "learning_rate": 1.4077833158799646e-06, "loss": 0.05258329, "memory(GiB)": 13.7, "step": 81850, "train_speed(iter/s)": 1.528299 }, { "acc": 0.9854167, "epoch": 38.366533864541836, "grad_norm": 4.440614223480225, "learning_rate": 1.4072443404378239e-06, "loss": 0.04404665, "memory(GiB)": 13.7, "step": 81855, "train_speed(iter/s)": 1.528299 }, { "acc": 0.97830353, "epoch": 38.36887743145067, "grad_norm": 2.253967046737671, "learning_rate": 1.4067054513657463e-06, "loss": 0.07236692, "memory(GiB)": 13.7, "step": 81860, "train_speed(iter/s)": 1.528301 }, { "acc": 0.97695513, "epoch": 38.371220998359505, "grad_norm": 4.577898979187012, "learning_rate": 1.406166648676681e-06, "loss": 0.06043273, "memory(GiB)": 13.7, "step": 81865, "train_speed(iter/s)": 1.5283 }, { "acc": 0.98625002, "epoch": 38.37356456526834, "grad_norm": 2.0680041313171387, "learning_rate": 1.4056279323835814e-06, "loss": 0.03268059, "memory(GiB)": 13.7, "step": 81870, "train_speed(iter/s)": 1.528305 }, { "acc": 0.99111109, "epoch": 38.37590813217717, "grad_norm": 3.2247660160064697, "learning_rate": 1.4050893024993978e-06, "loss": 0.05761075, "memory(GiB)": 13.7, "step": 81875, "train_speed(iter/s)": 1.528313 }, { "acc": 0.98916664, "epoch": 38.37825169908601, "grad_norm": 2.434049129486084, "learning_rate": 1.4045507590370739e-06, "loss": 0.04127226, "memory(GiB)": 13.7, "step": 81880, "train_speed(iter/s)": 1.528319 }, { "acc": 0.9770834, "epoch": 38.38059526599484, "grad_norm": 3.5255229473114014, "learning_rate": 1.4040123020095579e-06, "loss": 0.05721304, "memory(GiB)": 13.7, "step": 81885, "train_speed(iter/s)": 1.528318 }, { "acc": 0.99189482, "epoch": 38.382938832903676, "grad_norm": 0.002985661616548896, "learning_rate": 1.4034739314297887e-06, "loss": 0.02719571, "memory(GiB)": 13.7, "step": 81890, "train_speed(iter/s)": 1.528321 }, { "acc": 0.98178034, "epoch": 38.38528239981252, "grad_norm": 3.2058682441711426, "learning_rate": 1.4029356473107097e-06, "loss": 0.03308526, "memory(GiB)": 13.7, "step": 81895, "train_speed(iter/s)": 1.528317 }, { "acc": 0.98425598, "epoch": 38.38762596672135, "grad_norm": 2.5608997344970703, "learning_rate": 1.4023974496652602e-06, "loss": 0.04581203, "memory(GiB)": 13.7, "step": 81900, "train_speed(iter/s)": 1.528315 }, { "acc": 0.98944445, "epoch": 38.389969533630186, "grad_norm": 4.340097427368164, "learning_rate": 1.4018593385063734e-06, "loss": 0.03458042, "memory(GiB)": 13.7, "step": 81905, "train_speed(iter/s)": 1.528321 }, { "acc": 0.9864584, "epoch": 38.39231310053902, "grad_norm": 3.4441635608673096, "learning_rate": 1.4013213138469859e-06, "loss": 0.01779145, "memory(GiB)": 13.7, "step": 81910, "train_speed(iter/s)": 1.528321 }, { "acc": 0.98104172, "epoch": 38.394656667447855, "grad_norm": 3.3219552040100098, "learning_rate": 1.4007833757000314e-06, "loss": 0.03653817, "memory(GiB)": 13.7, "step": 81915, "train_speed(iter/s)": 1.528321 }, { "acc": 0.98291664, "epoch": 38.39700023435669, "grad_norm": 3.592132806777954, "learning_rate": 1.4002455240784369e-06, "loss": 0.06027504, "memory(GiB)": 13.7, "step": 81920, "train_speed(iter/s)": 1.528328 }, { "acc": 0.984375, "epoch": 38.39934380126552, "grad_norm": 3.329930543899536, "learning_rate": 1.3997077589951338e-06, "loss": 0.03987068, "memory(GiB)": 13.7, "step": 81925, "train_speed(iter/s)": 1.528332 }, { "acc": 0.98916664, "epoch": 38.401687368174365, "grad_norm": 1.0002126693725586, "learning_rate": 1.3991700804630454e-06, "loss": 0.01656625, "memory(GiB)": 13.7, "step": 81930, "train_speed(iter/s)": 1.528334 }, { "acc": 0.98354168, "epoch": 38.4040309350832, "grad_norm": 3.4660563468933105, "learning_rate": 1.3986324884950985e-06, "loss": 0.02741329, "memory(GiB)": 13.7, "step": 81935, "train_speed(iter/s)": 1.528334 }, { "acc": 0.9854167, "epoch": 38.40637450199203, "grad_norm": 1.5799554586410522, "learning_rate": 1.3980949831042119e-06, "loss": 0.04325534, "memory(GiB)": 13.7, "step": 81940, "train_speed(iter/s)": 1.528333 }, { "acc": 0.98529758, "epoch": 38.40871806890087, "grad_norm": 2.3031129837036133, "learning_rate": 1.3975575643033066e-06, "loss": 0.02915478, "memory(GiB)": 13.7, "step": 81945, "train_speed(iter/s)": 1.52834 }, { "acc": 0.99666672, "epoch": 38.4110616358097, "grad_norm": 0.8011402487754822, "learning_rate": 1.397020232105303e-06, "loss": 0.01855923, "memory(GiB)": 13.7, "step": 81950, "train_speed(iter/s)": 1.528344 }, { "acc": 0.97416124, "epoch": 38.413405202718536, "grad_norm": 4.864564895629883, "learning_rate": 1.3964829865231127e-06, "loss": 0.05358237, "memory(GiB)": 13.7, "step": 81955, "train_speed(iter/s)": 1.528347 }, { "acc": 0.98976192, "epoch": 38.41574876962737, "grad_norm": 2.1697163581848145, "learning_rate": 1.3959458275696518e-06, "loss": 0.03092412, "memory(GiB)": 13.7, "step": 81960, "train_speed(iter/s)": 1.528351 }, { "acc": 0.9669445, "epoch": 38.418092336536205, "grad_norm": 4.739574909210205, "learning_rate": 1.3954087552578325e-06, "loss": 0.07625809, "memory(GiB)": 13.7, "step": 81965, "train_speed(iter/s)": 1.528355 }, { "acc": 0.99375, "epoch": 38.420435903445046, "grad_norm": 2.4495320320129395, "learning_rate": 1.3948717696005618e-06, "loss": 0.05408648, "memory(GiB)": 13.7, "step": 81970, "train_speed(iter/s)": 1.528359 }, { "acc": 0.98338757, "epoch": 38.42277947035388, "grad_norm": 3.6834847927093506, "learning_rate": 1.3943348706107495e-06, "loss": 0.05326096, "memory(GiB)": 13.7, "step": 81975, "train_speed(iter/s)": 1.528362 }, { "acc": 0.9874053, "epoch": 38.425123037262715, "grad_norm": 3.2936673164367676, "learning_rate": 1.3937980583012981e-06, "loss": 0.05697483, "memory(GiB)": 13.7, "step": 81980, "train_speed(iter/s)": 1.528362 }, { "acc": 0.98395834, "epoch": 38.42746660417155, "grad_norm": 5.340664863586426, "learning_rate": 1.393261332685113e-06, "loss": 0.0333288, "memory(GiB)": 13.7, "step": 81985, "train_speed(iter/s)": 1.528366 }, { "acc": 0.9744792, "epoch": 38.42981017108038, "grad_norm": 4.447049617767334, "learning_rate": 1.392724693775096e-06, "loss": 0.05276399, "memory(GiB)": 13.7, "step": 81990, "train_speed(iter/s)": 1.528369 }, { "acc": 0.96592264, "epoch": 38.43215373798922, "grad_norm": 0.002060886938124895, "learning_rate": 1.3921881415841434e-06, "loss": 0.1172369, "memory(GiB)": 13.7, "step": 81995, "train_speed(iter/s)": 1.528372 }, { "acc": 0.98872471, "epoch": 38.43449730489805, "grad_norm": 2.304450035095215, "learning_rate": 1.3916516761251536e-06, "loss": 0.05343262, "memory(GiB)": 13.7, "step": 82000, "train_speed(iter/s)": 1.528376 }, { "acc": 0.996875, "epoch": 38.43684087180689, "grad_norm": 1.3836193084716797, "learning_rate": 1.3911152974110234e-06, "loss": 0.02743516, "memory(GiB)": 13.7, "step": 82005, "train_speed(iter/s)": 1.528381 }, { "acc": 0.99196434, "epoch": 38.43918443871573, "grad_norm": 3.8891894817352295, "learning_rate": 1.3905790054546437e-06, "loss": 0.04891923, "memory(GiB)": 13.7, "step": 82010, "train_speed(iter/s)": 1.528385 }, { "acc": 0.98071423, "epoch": 38.44152800562456, "grad_norm": 0.12481389194726944, "learning_rate": 1.3900428002689032e-06, "loss": 0.03320178, "memory(GiB)": 13.7, "step": 82015, "train_speed(iter/s)": 1.528387 }, { "acc": 0.98249998, "epoch": 38.443871572533396, "grad_norm": 4.557942867279053, "learning_rate": 1.3895066818666928e-06, "loss": 0.04487769, "memory(GiB)": 13.7, "step": 82020, "train_speed(iter/s)": 1.528392 }, { "acc": 0.98625002, "epoch": 38.44621513944223, "grad_norm": 4.4964070320129395, "learning_rate": 1.3889706502609e-06, "loss": 0.03160291, "memory(GiB)": 13.7, "step": 82025, "train_speed(iter/s)": 1.528394 }, { "acc": 0.9833333, "epoch": 38.448558706351065, "grad_norm": 0.07944728434085846, "learning_rate": 1.3884347054644067e-06, "loss": 0.05866126, "memory(GiB)": 13.7, "step": 82030, "train_speed(iter/s)": 1.528396 }, { "acc": 0.98101549, "epoch": 38.4509022732599, "grad_norm": 2.793994188308716, "learning_rate": 1.3878988474900965e-06, "loss": 0.05688047, "memory(GiB)": 13.7, "step": 82035, "train_speed(iter/s)": 1.528399 }, { "acc": 0.97135963, "epoch": 38.453245840168734, "grad_norm": 4.145923614501953, "learning_rate": 1.3873630763508495e-06, "loss": 0.06235715, "memory(GiB)": 13.7, "step": 82040, "train_speed(iter/s)": 1.528401 }, { "acc": 0.96744795, "epoch": 38.455589407077575, "grad_norm": 5.459146022796631, "learning_rate": 1.3868273920595454e-06, "loss": 0.0881773, "memory(GiB)": 13.7, "step": 82045, "train_speed(iter/s)": 1.528406 }, { "acc": 0.98690472, "epoch": 38.45793297398641, "grad_norm": 2.9042065143585205, "learning_rate": 1.3862917946290578e-06, "loss": 0.0292477, "memory(GiB)": 13.7, "step": 82050, "train_speed(iter/s)": 1.528414 }, { "acc": 0.9927083, "epoch": 38.460276540895244, "grad_norm": 2.853480100631714, "learning_rate": 1.3857562840722637e-06, "loss": 0.03064034, "memory(GiB)": 13.7, "step": 82055, "train_speed(iter/s)": 1.528417 }, { "acc": 0.98458328, "epoch": 38.46262010780408, "grad_norm": 1.0011656284332275, "learning_rate": 1.3852208604020312e-06, "loss": 0.04816249, "memory(GiB)": 13.7, "step": 82060, "train_speed(iter/s)": 1.528423 }, { "acc": 0.9788393, "epoch": 38.46496367471291, "grad_norm": 3.4814677238464355, "learning_rate": 1.3846855236312339e-06, "loss": 0.08724207, "memory(GiB)": 13.7, "step": 82065, "train_speed(iter/s)": 1.528425 }, { "acc": 0.97229166, "epoch": 38.46730724162175, "grad_norm": 1.6494842767715454, "learning_rate": 1.384150273772736e-06, "loss": 0.06696413, "memory(GiB)": 13.7, "step": 82070, "train_speed(iter/s)": 1.528429 }, { "acc": 0.9927083, "epoch": 38.46965080853058, "grad_norm": 0.009502805769443512, "learning_rate": 1.383615110839405e-06, "loss": 0.01951654, "memory(GiB)": 13.7, "step": 82075, "train_speed(iter/s)": 1.528439 }, { "acc": 0.98113976, "epoch": 38.47199437543942, "grad_norm": 5.6542887687683105, "learning_rate": 1.3830800348441055e-06, "loss": 0.05065851, "memory(GiB)": 13.7, "step": 82080, "train_speed(iter/s)": 1.528446 }, { "acc": 0.98624992, "epoch": 38.47433794234826, "grad_norm": 1.854024052619934, "learning_rate": 1.3825450457996966e-06, "loss": 0.02370788, "memory(GiB)": 13.7, "step": 82085, "train_speed(iter/s)": 1.528448 }, { "acc": 0.99375, "epoch": 38.47668150925709, "grad_norm": 0.01167316734790802, "learning_rate": 1.3820101437190392e-06, "loss": 0.03295985, "memory(GiB)": 13.7, "step": 82090, "train_speed(iter/s)": 1.528449 }, { "acc": 0.98291664, "epoch": 38.479025076165925, "grad_norm": 3.6428472995758057, "learning_rate": 1.3814753286149916e-06, "loss": 0.0301968, "memory(GiB)": 13.7, "step": 82095, "train_speed(iter/s)": 1.528448 }, { "acc": 0.9864583, "epoch": 38.48136864307476, "grad_norm": 2.529383420944214, "learning_rate": 1.380940600500407e-06, "loss": 0.03065749, "memory(GiB)": 13.7, "step": 82100, "train_speed(iter/s)": 1.528447 }, { "acc": 0.98245049, "epoch": 38.483712209983594, "grad_norm": 4.0214738845825195, "learning_rate": 1.3804059593881383e-06, "loss": 0.04700234, "memory(GiB)": 13.7, "step": 82105, "train_speed(iter/s)": 1.528453 }, { "acc": 0.9833334, "epoch": 38.48605577689243, "grad_norm": 2.4604711532592773, "learning_rate": 1.3798714052910376e-06, "loss": 0.02128453, "memory(GiB)": 13.7, "step": 82110, "train_speed(iter/s)": 1.528458 }, { "acc": 0.9874053, "epoch": 38.48839934380126, "grad_norm": 5.466770172119141, "learning_rate": 1.379336938221953e-06, "loss": 0.03381542, "memory(GiB)": 13.7, "step": 82115, "train_speed(iter/s)": 1.528462 }, { "acc": 0.990625, "epoch": 38.490742910710104, "grad_norm": 1.336670994758606, "learning_rate": 1.378802558193734e-06, "loss": 0.02548412, "memory(GiB)": 13.7, "step": 82120, "train_speed(iter/s)": 1.528462 }, { "acc": 0.97995186, "epoch": 38.49308647761894, "grad_norm": 3.958289384841919, "learning_rate": 1.378268265219222e-06, "loss": 0.05083936, "memory(GiB)": 13.7, "step": 82125, "train_speed(iter/s)": 1.52846 }, { "acc": 0.9979167, "epoch": 38.49543004452777, "grad_norm": 3.4985544681549072, "learning_rate": 1.37773405931126e-06, "loss": 0.0389933, "memory(GiB)": 13.7, "step": 82130, "train_speed(iter/s)": 1.528457 }, { "acc": 0.98916664, "epoch": 38.49777361143661, "grad_norm": 2.1745681762695312, "learning_rate": 1.3771999404826916e-06, "loss": 0.03842627, "memory(GiB)": 13.7, "step": 82135, "train_speed(iter/s)": 1.528461 }, { "acc": 0.98249998, "epoch": 38.50011717834544, "grad_norm": 4.023695468902588, "learning_rate": 1.3766659087463534e-06, "loss": 0.03358012, "memory(GiB)": 13.7, "step": 82140, "train_speed(iter/s)": 1.528467 }, { "acc": 0.98458328, "epoch": 38.502460745254275, "grad_norm": 4.4344024658203125, "learning_rate": 1.3761319641150802e-06, "loss": 0.04807087, "memory(GiB)": 13.7, "step": 82145, "train_speed(iter/s)": 1.528472 }, { "acc": 0.98000002, "epoch": 38.50480431216311, "grad_norm": 4.1798319816589355, "learning_rate": 1.375598106601707e-06, "loss": 0.0488335, "memory(GiB)": 13.7, "step": 82150, "train_speed(iter/s)": 1.528473 }, { "acc": 0.98102684, "epoch": 38.50714787907195, "grad_norm": 2.6309030055999756, "learning_rate": 1.3750643362190685e-06, "loss": 0.03464478, "memory(GiB)": 13.7, "step": 82155, "train_speed(iter/s)": 1.528473 }, { "acc": 0.99125004, "epoch": 38.509491445980785, "grad_norm": 3.1095495223999023, "learning_rate": 1.3745306529799906e-06, "loss": 0.04307396, "memory(GiB)": 13.7, "step": 82160, "train_speed(iter/s)": 1.528477 }, { "acc": 0.97104168, "epoch": 38.51183501288962, "grad_norm": 2.6328413486480713, "learning_rate": 1.3739970568973046e-06, "loss": 0.06506181, "memory(GiB)": 13.7, "step": 82165, "train_speed(iter/s)": 1.528477 }, { "acc": 0.9947773, "epoch": 38.514178579798454, "grad_norm": 0.7592701315879822, "learning_rate": 1.3734635479838365e-06, "loss": 0.01978191, "memory(GiB)": 13.7, "step": 82170, "train_speed(iter/s)": 1.528482 }, { "acc": 0.96930561, "epoch": 38.51652214670729, "grad_norm": 5.297979354858398, "learning_rate": 1.3729301262524072e-06, "loss": 0.06112364, "memory(GiB)": 13.7, "step": 82175, "train_speed(iter/s)": 1.528481 }, { "acc": 0.9916667, "epoch": 38.51886571361612, "grad_norm": 2.712984085083008, "learning_rate": 1.372396791715842e-06, "loss": 0.04017841, "memory(GiB)": 13.7, "step": 82180, "train_speed(iter/s)": 1.528491 }, { "acc": 0.97562504, "epoch": 38.52120928052496, "grad_norm": 2.3210020065307617, "learning_rate": 1.371863544386957e-06, "loss": 0.05051064, "memory(GiB)": 13.7, "step": 82185, "train_speed(iter/s)": 1.52849 }, { "acc": 0.98640881, "epoch": 38.52355284743379, "grad_norm": 1.9503237009048462, "learning_rate": 1.3713303842785714e-06, "loss": 0.02775739, "memory(GiB)": 13.7, "step": 82190, "train_speed(iter/s)": 1.528494 }, { "acc": 0.97800598, "epoch": 38.52589641434263, "grad_norm": 8.228500366210938, "learning_rate": 1.3707973114035023e-06, "loss": 0.07889855, "memory(GiB)": 13.7, "step": 82195, "train_speed(iter/s)": 1.528497 }, { "acc": 0.98708324, "epoch": 38.52823998125147, "grad_norm": 2.158250093460083, "learning_rate": 1.3702643257745592e-06, "loss": 0.02353634, "memory(GiB)": 13.7, "step": 82200, "train_speed(iter/s)": 1.528494 }, { "acc": 0.97967262, "epoch": 38.5305835481603, "grad_norm": 1.9402532577514648, "learning_rate": 1.3697314274045562e-06, "loss": 0.06366511, "memory(GiB)": 13.7, "step": 82205, "train_speed(iter/s)": 1.528502 }, { "acc": 0.98812504, "epoch": 38.532927115069135, "grad_norm": 3.9557979106903076, "learning_rate": 1.3691986163063034e-06, "loss": 0.04067304, "memory(GiB)": 13.7, "step": 82210, "train_speed(iter/s)": 1.528509 }, { "acc": 0.99187498, "epoch": 38.53527068197797, "grad_norm": 4.155774116516113, "learning_rate": 1.3686658924926035e-06, "loss": 0.02373497, "memory(GiB)": 13.7, "step": 82215, "train_speed(iter/s)": 1.528516 }, { "acc": 0.96948862, "epoch": 38.537614248886804, "grad_norm": 5.440205097198486, "learning_rate": 1.3681332559762664e-06, "loss": 0.06731202, "memory(GiB)": 13.7, "step": 82220, "train_speed(iter/s)": 1.528517 }, { "acc": 0.98458328, "epoch": 38.53995781579564, "grad_norm": 5.602909088134766, "learning_rate": 1.3676007067700909e-06, "loss": 0.05622293, "memory(GiB)": 13.7, "step": 82225, "train_speed(iter/s)": 1.528519 }, { "acc": 0.9822916, "epoch": 38.54230138270448, "grad_norm": 3.453277826309204, "learning_rate": 1.3670682448868808e-06, "loss": 0.048545, "memory(GiB)": 13.7, "step": 82230, "train_speed(iter/s)": 1.528522 }, { "acc": 0.96962795, "epoch": 38.544644949613314, "grad_norm": 5.2546491622924805, "learning_rate": 1.3665358703394318e-06, "loss": 0.05556952, "memory(GiB)": 13.7, "step": 82235, "train_speed(iter/s)": 1.528523 }, { "acc": 0.9947917, "epoch": 38.54698851652215, "grad_norm": 2.543274402618408, "learning_rate": 1.3660035831405424e-06, "loss": 0.00973684, "memory(GiB)": 13.7, "step": 82240, "train_speed(iter/s)": 1.528527 }, { "acc": 0.98249998, "epoch": 38.54933208343098, "grad_norm": 5.907589912414551, "learning_rate": 1.3654713833030078e-06, "loss": 0.05905172, "memory(GiB)": 13.7, "step": 82245, "train_speed(iter/s)": 1.528527 }, { "acc": 0.98145828, "epoch": 38.55167565033982, "grad_norm": 1.40354585647583, "learning_rate": 1.3649392708396176e-06, "loss": 0.02618329, "memory(GiB)": 13.7, "step": 82250, "train_speed(iter/s)": 1.528528 }, { "acc": 0.97811012, "epoch": 38.55401921724865, "grad_norm": 2.689409017562866, "learning_rate": 1.3644072457631637e-06, "loss": 0.03803481, "memory(GiB)": 13.7, "step": 82255, "train_speed(iter/s)": 1.528533 }, { "acc": 0.98999996, "epoch": 38.556362784157486, "grad_norm": 0.806061863899231, "learning_rate": 1.3638753080864357e-06, "loss": 0.0274003, "memory(GiB)": 13.7, "step": 82260, "train_speed(iter/s)": 1.528531 }, { "acc": 0.98625002, "epoch": 38.55870635106632, "grad_norm": 0.13627925515174866, "learning_rate": 1.3633434578222163e-06, "loss": 0.03157014, "memory(GiB)": 13.7, "step": 82265, "train_speed(iter/s)": 1.528531 }, { "acc": 0.9875, "epoch": 38.56104991797516, "grad_norm": 2.9419994354248047, "learning_rate": 1.3628116949832926e-06, "loss": 0.07098141, "memory(GiB)": 13.7, "step": 82270, "train_speed(iter/s)": 1.528533 }, { "acc": 0.98008928, "epoch": 38.563393484883996, "grad_norm": 3.788396120071411, "learning_rate": 1.3622800195824437e-06, "loss": 0.05812027, "memory(GiB)": 13.7, "step": 82275, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98490524, "epoch": 38.56573705179283, "grad_norm": 3.6932361125946045, "learning_rate": 1.3617484316324508e-06, "loss": 0.03889033, "memory(GiB)": 13.7, "step": 82280, "train_speed(iter/s)": 1.528535 }, { "acc": 0.9875, "epoch": 38.568080618701664, "grad_norm": 3.1578290462493896, "learning_rate": 1.361216931146093e-06, "loss": 0.02879067, "memory(GiB)": 13.7, "step": 82285, "train_speed(iter/s)": 1.528533 }, { "acc": 0.9801136, "epoch": 38.5704241856105, "grad_norm": 2.759880781173706, "learning_rate": 1.3606855181361423e-06, "loss": 0.05000793, "memory(GiB)": 13.7, "step": 82290, "train_speed(iter/s)": 1.52853 }, { "acc": 0.96541672, "epoch": 38.57276775251933, "grad_norm": 3.7944414615631104, "learning_rate": 1.3601541926153745e-06, "loss": 0.07089097, "memory(GiB)": 13.7, "step": 82295, "train_speed(iter/s)": 1.528529 }, { "acc": 0.98497028, "epoch": 38.57511131942817, "grad_norm": 4.394069194793701, "learning_rate": 1.3596229545965614e-06, "loss": 0.03550733, "memory(GiB)": 13.7, "step": 82300, "train_speed(iter/s)": 1.528532 }, { "acc": 0.98173609, "epoch": 38.577454886337, "grad_norm": 2.914774179458618, "learning_rate": 1.3590918040924714e-06, "loss": 0.03207264, "memory(GiB)": 13.7, "step": 82305, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98500004, "epoch": 38.57979845324584, "grad_norm": 4.26762056350708, "learning_rate": 1.3585607411158703e-06, "loss": 0.03359188, "memory(GiB)": 13.7, "step": 82310, "train_speed(iter/s)": 1.528537 }, { "acc": 0.98395834, "epoch": 38.58214202015468, "grad_norm": 4.400474548339844, "learning_rate": 1.3580297656795242e-06, "loss": 0.02197853, "memory(GiB)": 13.7, "step": 82315, "train_speed(iter/s)": 1.528538 }, { "acc": 0.99375, "epoch": 38.58448558706351, "grad_norm": 0.951537549495697, "learning_rate": 1.3574988777961972e-06, "loss": 0.03340909, "memory(GiB)": 13.7, "step": 82320, "train_speed(iter/s)": 1.528542 }, { "acc": 0.99236107, "epoch": 38.586829153972346, "grad_norm": 0.0008907992742024362, "learning_rate": 1.3569680774786478e-06, "loss": 0.0238743, "memory(GiB)": 13.7, "step": 82325, "train_speed(iter/s)": 1.528541 }, { "acc": 0.99229164, "epoch": 38.58917272088118, "grad_norm": 1.116405963897705, "learning_rate": 1.3564373647396358e-06, "loss": 0.03982691, "memory(GiB)": 13.7, "step": 82330, "train_speed(iter/s)": 1.528546 }, { "acc": 0.99291668, "epoch": 38.591516287790014, "grad_norm": 3.056100606918335, "learning_rate": 1.3559067395919178e-06, "loss": 0.02140564, "memory(GiB)": 13.7, "step": 82335, "train_speed(iter/s)": 1.528551 }, { "acc": 0.97508926, "epoch": 38.59385985469885, "grad_norm": 4.485779285430908, "learning_rate": 1.3553762020482502e-06, "loss": 0.05538381, "memory(GiB)": 13.7, "step": 82340, "train_speed(iter/s)": 1.528554 }, { "acc": 0.98999996, "epoch": 38.59620342160769, "grad_norm": 1.4362633228302002, "learning_rate": 1.354845752121383e-06, "loss": 0.02168613, "memory(GiB)": 13.7, "step": 82345, "train_speed(iter/s)": 1.528558 }, { "acc": 0.99048615, "epoch": 38.598546988516524, "grad_norm": 2.5059802532196045, "learning_rate": 1.3543153898240658e-06, "loss": 0.02381159, "memory(GiB)": 13.7, "step": 82350, "train_speed(iter/s)": 1.52856 }, { "acc": 0.97451468, "epoch": 38.60089055542536, "grad_norm": 3.6371283531188965, "learning_rate": 1.3537851151690478e-06, "loss": 0.04848357, "memory(GiB)": 13.7, "step": 82355, "train_speed(iter/s)": 1.528564 }, { "acc": 0.97624998, "epoch": 38.60323412233419, "grad_norm": 1.5466725826263428, "learning_rate": 1.353254928169077e-06, "loss": 0.05081712, "memory(GiB)": 13.7, "step": 82360, "train_speed(iter/s)": 1.528563 }, { "acc": 0.990625, "epoch": 38.60557768924303, "grad_norm": 4.649562835693359, "learning_rate": 1.352724828836894e-06, "loss": 0.01838172, "memory(GiB)": 13.7, "step": 82365, "train_speed(iter/s)": 1.528566 }, { "acc": 0.98601189, "epoch": 38.60792125615186, "grad_norm": 0.01972159743309021, "learning_rate": 1.3521948171852426e-06, "loss": 0.03762934, "memory(GiB)": 13.7, "step": 82370, "train_speed(iter/s)": 1.528565 }, { "acc": 0.9875, "epoch": 38.610264823060696, "grad_norm": 1.2295852899551392, "learning_rate": 1.3516648932268635e-06, "loss": 0.02665873, "memory(GiB)": 13.7, "step": 82375, "train_speed(iter/s)": 1.528566 }, { "acc": 0.98047085, "epoch": 38.61260838996953, "grad_norm": 2.2037978172302246, "learning_rate": 1.3511350569744913e-06, "loss": 0.04802778, "memory(GiB)": 13.7, "step": 82380, "train_speed(iter/s)": 1.528559 }, { "acc": 0.9848959, "epoch": 38.61495195687837, "grad_norm": 0.008492483757436275, "learning_rate": 1.3506053084408633e-06, "loss": 0.0254758, "memory(GiB)": 13.7, "step": 82385, "train_speed(iter/s)": 1.528558 }, { "acc": 0.98373508, "epoch": 38.617295523787206, "grad_norm": 3.2506000995635986, "learning_rate": 1.3500756476387144e-06, "loss": 0.03919734, "memory(GiB)": 13.7, "step": 82390, "train_speed(iter/s)": 1.528559 }, { "acc": 0.99115534, "epoch": 38.61963909069604, "grad_norm": 1.0703495740890503, "learning_rate": 1.3495460745807726e-06, "loss": 0.02361161, "memory(GiB)": 13.7, "step": 82395, "train_speed(iter/s)": 1.528561 }, { "acc": 0.98036709, "epoch": 38.621982657604875, "grad_norm": 0.14764273166656494, "learning_rate": 1.349016589279771e-06, "loss": 0.03933566, "memory(GiB)": 13.7, "step": 82400, "train_speed(iter/s)": 1.528562 }, { "acc": 0.98270836, "epoch": 38.62432622451371, "grad_norm": 0.9385817050933838, "learning_rate": 1.3484871917484325e-06, "loss": 0.05108683, "memory(GiB)": 13.7, "step": 82405, "train_speed(iter/s)": 1.528563 }, { "acc": 0.98937502, "epoch": 38.62666979142254, "grad_norm": 2.4024789333343506, "learning_rate": 1.3479578819994843e-06, "loss": 0.01284394, "memory(GiB)": 13.7, "step": 82410, "train_speed(iter/s)": 1.528571 }, { "acc": 0.98895836, "epoch": 38.62901335833138, "grad_norm": 0.02644130028784275, "learning_rate": 1.3474286600456506e-06, "loss": 0.04285644, "memory(GiB)": 13.7, "step": 82415, "train_speed(iter/s)": 1.528573 }, { "acc": 0.9916667, "epoch": 38.63135692524022, "grad_norm": 3.051933526992798, "learning_rate": 1.3468995258996488e-06, "loss": 0.03316273, "memory(GiB)": 13.7, "step": 82420, "train_speed(iter/s)": 1.52858 }, { "acc": 0.98855114, "epoch": 38.63370049214905, "grad_norm": 1.4793481826782227, "learning_rate": 1.3463704795742e-06, "loss": 0.06173778, "memory(GiB)": 13.7, "step": 82425, "train_speed(iter/s)": 1.528583 }, { "acc": 0.9729166, "epoch": 38.63604405905789, "grad_norm": 4.20890474319458, "learning_rate": 1.345841521082021e-06, "loss": 0.0651988, "memory(GiB)": 13.7, "step": 82430, "train_speed(iter/s)": 1.528587 }, { "acc": 0.98402786, "epoch": 38.63838762596672, "grad_norm": 0.0011537341633811593, "learning_rate": 1.3453126504358258e-06, "loss": 0.05694556, "memory(GiB)": 13.7, "step": 82435, "train_speed(iter/s)": 1.528591 }, { "acc": 0.98279762, "epoch": 38.640731192875556, "grad_norm": 1.1291754245758057, "learning_rate": 1.3447838676483246e-06, "loss": 0.04249589, "memory(GiB)": 13.7, "step": 82440, "train_speed(iter/s)": 1.528598 }, { "acc": 0.98351192, "epoch": 38.64307475978439, "grad_norm": 5.135313987731934, "learning_rate": 1.3442551727322295e-06, "loss": 0.07765449, "memory(GiB)": 13.7, "step": 82445, "train_speed(iter/s)": 1.528602 }, { "acc": 0.97654762, "epoch": 38.645418326693225, "grad_norm": 2.4704954624176025, "learning_rate": 1.3437265657002494e-06, "loss": 0.06414936, "memory(GiB)": 13.7, "step": 82450, "train_speed(iter/s)": 1.528611 }, { "acc": 0.98687506, "epoch": 38.64776189360206, "grad_norm": 0.6087062358856201, "learning_rate": 1.3431980465650877e-06, "loss": 0.02927693, "memory(GiB)": 13.7, "step": 82455, "train_speed(iter/s)": 1.528612 }, { "acc": 0.98145828, "epoch": 38.6501054605109, "grad_norm": 4.354190826416016, "learning_rate": 1.3426696153394502e-06, "loss": 0.06054151, "memory(GiB)": 13.7, "step": 82460, "train_speed(iter/s)": 1.528615 }, { "acc": 0.98458328, "epoch": 38.652449027419735, "grad_norm": 1.6143351793289185, "learning_rate": 1.3421412720360383e-06, "loss": 0.03375767, "memory(GiB)": 13.7, "step": 82465, "train_speed(iter/s)": 1.528623 }, { "acc": 0.98202381, "epoch": 38.65479259432857, "grad_norm": 2.6379361152648926, "learning_rate": 1.3416130166675524e-06, "loss": 0.03392087, "memory(GiB)": 13.7, "step": 82470, "train_speed(iter/s)": 1.528626 }, { "acc": 0.98445511, "epoch": 38.6571361612374, "grad_norm": 6.198790550231934, "learning_rate": 1.3410848492466896e-06, "loss": 0.03745994, "memory(GiB)": 13.7, "step": 82475, "train_speed(iter/s)": 1.528625 }, { "acc": 0.9864584, "epoch": 38.65947972814624, "grad_norm": 4.471189022064209, "learning_rate": 1.3405567697861439e-06, "loss": 0.05342391, "memory(GiB)": 13.7, "step": 82480, "train_speed(iter/s)": 1.528624 }, { "acc": 0.98684206, "epoch": 38.66182329505507, "grad_norm": 5.778988838195801, "learning_rate": 1.3400287782986088e-06, "loss": 0.04477174, "memory(GiB)": 13.7, "step": 82485, "train_speed(iter/s)": 1.528622 }, { "acc": 0.98923607, "epoch": 38.664166861963906, "grad_norm": 1.9850409030914307, "learning_rate": 1.3395008747967783e-06, "loss": 0.03029082, "memory(GiB)": 13.7, "step": 82490, "train_speed(iter/s)": 1.528627 }, { "acc": 0.97319441, "epoch": 38.66651042887275, "grad_norm": 4.242669582366943, "learning_rate": 1.3389730592933381e-06, "loss": 0.06571655, "memory(GiB)": 13.7, "step": 82495, "train_speed(iter/s)": 1.528629 }, { "acc": 0.96029768, "epoch": 38.66885399578158, "grad_norm": 4.423799514770508, "learning_rate": 1.338445331800976e-06, "loss": 0.05876222, "memory(GiB)": 13.7, "step": 82500, "train_speed(iter/s)": 1.528637 }, { "acc": 0.97717257, "epoch": 38.671197562690416, "grad_norm": 4.117405891418457, "learning_rate": 1.3379176923323788e-06, "loss": 0.04395943, "memory(GiB)": 13.7, "step": 82505, "train_speed(iter/s)": 1.528638 }, { "acc": 0.98214121, "epoch": 38.67354112959925, "grad_norm": 2.5574488639831543, "learning_rate": 1.3373901409002267e-06, "loss": 0.03359697, "memory(GiB)": 13.7, "step": 82510, "train_speed(iter/s)": 1.528642 }, { "acc": 0.9895833, "epoch": 38.675884696508085, "grad_norm": 1.803928017616272, "learning_rate": 1.336862677517203e-06, "loss": 0.02555314, "memory(GiB)": 13.7, "step": 82515, "train_speed(iter/s)": 1.528644 }, { "acc": 0.9802084, "epoch": 38.67822826341692, "grad_norm": 5.898472309112549, "learning_rate": 1.3363353021959826e-06, "loss": 0.0420715, "memory(GiB)": 13.7, "step": 82520, "train_speed(iter/s)": 1.528648 }, { "acc": 0.98946428, "epoch": 38.68057183032575, "grad_norm": 2.6863863468170166, "learning_rate": 1.3358080149492453e-06, "loss": 0.02771561, "memory(GiB)": 13.7, "step": 82525, "train_speed(iter/s)": 1.528649 }, { "acc": 0.97375002, "epoch": 38.68291539723459, "grad_norm": 4.998347282409668, "learning_rate": 1.3352808157896625e-06, "loss": 0.08300391, "memory(GiB)": 13.7, "step": 82530, "train_speed(iter/s)": 1.528652 }, { "acc": 0.98312492, "epoch": 38.68525896414343, "grad_norm": 2.4530582427978516, "learning_rate": 1.3347537047299079e-06, "loss": 0.04889428, "memory(GiB)": 13.7, "step": 82535, "train_speed(iter/s)": 1.528657 }, { "acc": 0.9916667, "epoch": 38.68760253105226, "grad_norm": 0.5618948340415955, "learning_rate": 1.334226681782651e-06, "loss": 0.02490315, "memory(GiB)": 13.7, "step": 82540, "train_speed(iter/s)": 1.528665 }, { "acc": 0.990625, "epoch": 38.6899460979611, "grad_norm": 1.2572206258773804, "learning_rate": 1.333699746960562e-06, "loss": 0.05306781, "memory(GiB)": 13.7, "step": 82545, "train_speed(iter/s)": 1.528668 }, { "acc": 0.990625, "epoch": 38.69228966486993, "grad_norm": 0.00485074520111084, "learning_rate": 1.333172900276303e-06, "loss": 0.03529921, "memory(GiB)": 13.7, "step": 82550, "train_speed(iter/s)": 1.528668 }, { "acc": 0.99541664, "epoch": 38.69463323177877, "grad_norm": 1.0815036296844482, "learning_rate": 1.3326461417425408e-06, "loss": 0.02515069, "memory(GiB)": 13.7, "step": 82555, "train_speed(iter/s)": 1.528666 }, { "acc": 0.9895833, "epoch": 38.6969767986876, "grad_norm": 0.577903687953949, "learning_rate": 1.3321194713719338e-06, "loss": 0.03320241, "memory(GiB)": 13.7, "step": 82560, "train_speed(iter/s)": 1.528668 }, { "acc": 0.97550602, "epoch": 38.699320365596435, "grad_norm": 5.024813652038574, "learning_rate": 1.3315928891771445e-06, "loss": 0.07857411, "memory(GiB)": 13.7, "step": 82565, "train_speed(iter/s)": 1.528676 }, { "acc": 0.990625, "epoch": 38.70166393250528, "grad_norm": 0.0076965875923633575, "learning_rate": 1.331066395170828e-06, "loss": 0.02183868, "memory(GiB)": 13.7, "step": 82570, "train_speed(iter/s)": 1.52868 }, { "acc": 0.99020834, "epoch": 38.70400749941411, "grad_norm": 2.333561658859253, "learning_rate": 1.33053998936564e-06, "loss": 0.04703014, "memory(GiB)": 13.7, "step": 82575, "train_speed(iter/s)": 1.528681 }, { "acc": 0.98406248, "epoch": 38.706351066322945, "grad_norm": 2.108485460281372, "learning_rate": 1.3300136717742357e-06, "loss": 0.02453536, "memory(GiB)": 13.7, "step": 82580, "train_speed(iter/s)": 1.528682 }, { "acc": 0.98566923, "epoch": 38.70869463323178, "grad_norm": 3.695206880569458, "learning_rate": 1.329487442409262e-06, "loss": 0.04415333, "memory(GiB)": 13.7, "step": 82585, "train_speed(iter/s)": 1.528684 }, { "acc": 0.97369051, "epoch": 38.711038200140614, "grad_norm": 2.7655932903289795, "learning_rate": 1.3289613012833704e-06, "loss": 0.04124835, "memory(GiB)": 13.7, "step": 82590, "train_speed(iter/s)": 1.528691 }, { "acc": 0.99048615, "epoch": 38.71338176704945, "grad_norm": 2.0898516178131104, "learning_rate": 1.3284352484092088e-06, "loss": 0.02518309, "memory(GiB)": 13.7, "step": 82595, "train_speed(iter/s)": 1.528693 }, { "acc": 0.98809528, "epoch": 38.71572533395828, "grad_norm": 1.2751833200454712, "learning_rate": 1.32790928379942e-06, "loss": 0.03280249, "memory(GiB)": 13.7, "step": 82600, "train_speed(iter/s)": 1.528694 }, { "acc": 0.9947917, "epoch": 38.71806890086712, "grad_norm": 2.101029396057129, "learning_rate": 1.3273834074666455e-06, "loss": 0.04360506, "memory(GiB)": 13.7, "step": 82605, "train_speed(iter/s)": 1.528696 }, { "acc": 0.98988094, "epoch": 38.72041246777596, "grad_norm": 4.4522294998168945, "learning_rate": 1.3268576194235263e-06, "loss": 0.05023926, "memory(GiB)": 13.7, "step": 82610, "train_speed(iter/s)": 1.528697 }, { "acc": 0.9916666, "epoch": 38.72275603468479, "grad_norm": 5.390146732330322, "learning_rate": 1.3263319196827013e-06, "loss": 0.01769782, "memory(GiB)": 13.7, "step": 82615, "train_speed(iter/s)": 1.528693 }, { "acc": 0.98174677, "epoch": 38.72509960159363, "grad_norm": 6.052328586578369, "learning_rate": 1.3258063082568082e-06, "loss": 0.02950527, "memory(GiB)": 13.7, "step": 82620, "train_speed(iter/s)": 1.528698 }, { "acc": 0.98812504, "epoch": 38.72744316850246, "grad_norm": 4.143552303314209, "learning_rate": 1.3252807851584768e-06, "loss": 0.0406232, "memory(GiB)": 13.7, "step": 82625, "train_speed(iter/s)": 1.528701 }, { "acc": 0.98550053, "epoch": 38.729786735411295, "grad_norm": 2.7545382976531982, "learning_rate": 1.324755350400342e-06, "loss": 0.04577406, "memory(GiB)": 13.7, "step": 82630, "train_speed(iter/s)": 1.528708 }, { "acc": 0.9770833, "epoch": 38.73213030232013, "grad_norm": 6.369962215423584, "learning_rate": 1.324230003995034e-06, "loss": 0.05452682, "memory(GiB)": 13.7, "step": 82635, "train_speed(iter/s)": 1.528709 }, { "acc": 0.98041668, "epoch": 38.734473869228964, "grad_norm": 3.1685574054718018, "learning_rate": 1.323704745955179e-06, "loss": 0.03862802, "memory(GiB)": 13.7, "step": 82640, "train_speed(iter/s)": 1.528713 }, { "acc": 0.98482141, "epoch": 38.7368174361378, "grad_norm": 3.306358575820923, "learning_rate": 1.3231795762934015e-06, "loss": 0.04102492, "memory(GiB)": 13.7, "step": 82645, "train_speed(iter/s)": 1.528716 }, { "acc": 0.97458324, "epoch": 38.73916100304664, "grad_norm": 5.08676290512085, "learning_rate": 1.3226544950223266e-06, "loss": 0.11564116, "memory(GiB)": 13.7, "step": 82650, "train_speed(iter/s)": 1.528723 }, { "acc": 0.98032198, "epoch": 38.741504569955474, "grad_norm": 5.072739124298096, "learning_rate": 1.3221295021545756e-06, "loss": 0.07481995, "memory(GiB)": 13.7, "step": 82655, "train_speed(iter/s)": 1.528725 }, { "acc": 0.97416668, "epoch": 38.74384813686431, "grad_norm": 3.8594772815704346, "learning_rate": 1.3216045977027661e-06, "loss": 0.06477955, "memory(GiB)": 13.7, "step": 82660, "train_speed(iter/s)": 1.528729 }, { "acc": 0.97312498, "epoch": 38.74619170377314, "grad_norm": 3.15809965133667, "learning_rate": 1.3210797816795154e-06, "loss": 0.04569879, "memory(GiB)": 13.7, "step": 82665, "train_speed(iter/s)": 1.528733 }, { "acc": 0.990625, "epoch": 38.74853527068198, "grad_norm": 0.013567842543125153, "learning_rate": 1.320555054097441e-06, "loss": 0.02282664, "memory(GiB)": 13.7, "step": 82670, "train_speed(iter/s)": 1.528737 }, { "acc": 0.9793993, "epoch": 38.75087883759081, "grad_norm": 4.138471603393555, "learning_rate": 1.3200304149691514e-06, "loss": 0.08183435, "memory(GiB)": 13.7, "step": 82675, "train_speed(iter/s)": 1.528735 }, { "acc": 0.99375, "epoch": 38.753222404499645, "grad_norm": 4.311711311340332, "learning_rate": 1.3195058643072613e-06, "loss": 0.01921461, "memory(GiB)": 13.7, "step": 82680, "train_speed(iter/s)": 1.528735 }, { "acc": 0.98125, "epoch": 38.75556597140849, "grad_norm": 5.156027317047119, "learning_rate": 1.3189814021243759e-06, "loss": 0.04083314, "memory(GiB)": 13.7, "step": 82685, "train_speed(iter/s)": 1.528736 }, { "acc": 0.99750004, "epoch": 38.75790953831732, "grad_norm": 1.455239176750183, "learning_rate": 1.3184570284331025e-06, "loss": 0.0251886, "memory(GiB)": 13.7, "step": 82690, "train_speed(iter/s)": 1.528744 }, { "acc": 0.97520838, "epoch": 38.760253105226155, "grad_norm": 5.091747283935547, "learning_rate": 1.317932743246047e-06, "loss": 0.0692048, "memory(GiB)": 13.7, "step": 82695, "train_speed(iter/s)": 1.528749 }, { "acc": 0.99020834, "epoch": 38.76259667213499, "grad_norm": 0.9668415784835815, "learning_rate": 1.3174085465758085e-06, "loss": 0.0187315, "memory(GiB)": 13.7, "step": 82700, "train_speed(iter/s)": 1.528755 }, { "acc": 0.98708334, "epoch": 38.764940239043824, "grad_norm": 2.871066093444824, "learning_rate": 1.316884438434989e-06, "loss": 0.02561024, "memory(GiB)": 13.7, "step": 82705, "train_speed(iter/s)": 1.528758 }, { "acc": 0.9723959, "epoch": 38.76728380595266, "grad_norm": 8.23238468170166, "learning_rate": 1.3163604188361882e-06, "loss": 0.04532005, "memory(GiB)": 13.7, "step": 82710, "train_speed(iter/s)": 1.528764 }, { "acc": 0.96486111, "epoch": 38.76962737286149, "grad_norm": 6.083672523498535, "learning_rate": 1.315836487791997e-06, "loss": 0.05477163, "memory(GiB)": 13.7, "step": 82715, "train_speed(iter/s)": 1.52877 }, { "acc": 0.99571428, "epoch": 38.77197093977033, "grad_norm": 1.7736537456512451, "learning_rate": 1.3153126453150121e-06, "loss": 0.01245664, "memory(GiB)": 13.7, "step": 82720, "train_speed(iter/s)": 1.528769 }, { "acc": 0.97336311, "epoch": 38.77431450667917, "grad_norm": 3.0400595664978027, "learning_rate": 1.3147888914178258e-06, "loss": 0.08054497, "memory(GiB)": 13.7, "step": 82725, "train_speed(iter/s)": 1.528769 }, { "acc": 0.98779764, "epoch": 38.776658073588, "grad_norm": 4.730258464813232, "learning_rate": 1.3142652261130265e-06, "loss": 0.03668678, "memory(GiB)": 13.7, "step": 82730, "train_speed(iter/s)": 1.52877 }, { "acc": 0.98240528, "epoch": 38.77900164049684, "grad_norm": 0.11677023768424988, "learning_rate": 1.3137416494131992e-06, "loss": 0.03721306, "memory(GiB)": 13.7, "step": 82735, "train_speed(iter/s)": 1.528776 }, { "acc": 0.98145828, "epoch": 38.78134520740567, "grad_norm": 3.925583839416504, "learning_rate": 1.3132181613309304e-06, "loss": 0.03924202, "memory(GiB)": 13.7, "step": 82740, "train_speed(iter/s)": 1.528779 }, { "acc": 1.0, "epoch": 38.783688774314506, "grad_norm": 1.5392905473709106, "learning_rate": 1.312694761878805e-06, "loss": 0.00894959, "memory(GiB)": 13.7, "step": 82745, "train_speed(iter/s)": 1.528778 }, { "acc": 0.974473, "epoch": 38.78603234122334, "grad_norm": 5.964343547821045, "learning_rate": 1.3121714510694004e-06, "loss": 0.0329615, "memory(GiB)": 13.7, "step": 82750, "train_speed(iter/s)": 1.528783 }, { "acc": 0.98113098, "epoch": 38.788375908132174, "grad_norm": 4.333489894866943, "learning_rate": 1.3116482289152971e-06, "loss": 0.06528432, "memory(GiB)": 13.7, "step": 82755, "train_speed(iter/s)": 1.528789 }, { "acc": 0.99821434, "epoch": 38.790719475041016, "grad_norm": 0.7090784907341003, "learning_rate": 1.3111250954290723e-06, "loss": 0.00888559, "memory(GiB)": 13.7, "step": 82760, "train_speed(iter/s)": 1.528792 }, { "acc": 0.98663692, "epoch": 38.79306304194985, "grad_norm": 3.387407064437866, "learning_rate": 1.3106020506233002e-06, "loss": 0.04087491, "memory(GiB)": 13.7, "step": 82765, "train_speed(iter/s)": 1.528792 }, { "acc": 0.9864583, "epoch": 38.795406608858684, "grad_norm": 4.130530834197998, "learning_rate": 1.3100790945105526e-06, "loss": 0.02051593, "memory(GiB)": 13.7, "step": 82770, "train_speed(iter/s)": 1.528792 }, { "acc": 0.98562756, "epoch": 38.79775017576752, "grad_norm": 4.949271202087402, "learning_rate": 1.3095562271033984e-06, "loss": 0.0373419, "memory(GiB)": 13.7, "step": 82775, "train_speed(iter/s)": 1.528798 }, { "acc": 0.98514881, "epoch": 38.80009374267635, "grad_norm": 5.2339396476745605, "learning_rate": 1.3090334484144074e-06, "loss": 0.03832356, "memory(GiB)": 13.7, "step": 82780, "train_speed(iter/s)": 1.528805 }, { "acc": 0.9697916, "epoch": 38.80243730958519, "grad_norm": 4.000398635864258, "learning_rate": 1.3085107584561458e-06, "loss": 0.07769105, "memory(GiB)": 13.7, "step": 82785, "train_speed(iter/s)": 1.528809 }, { "acc": 0.98045454, "epoch": 38.80478087649402, "grad_norm": 3.1550402641296387, "learning_rate": 1.3079881572411748e-06, "loss": 0.06456395, "memory(GiB)": 13.7, "step": 82790, "train_speed(iter/s)": 1.528816 }, { "acc": 0.9901042, "epoch": 38.807124443402856, "grad_norm": 0.16117563843727112, "learning_rate": 1.3074656447820578e-06, "loss": 0.02278903, "memory(GiB)": 13.7, "step": 82795, "train_speed(iter/s)": 1.528817 }, { "acc": 0.9895834, "epoch": 38.8094680103117, "grad_norm": 3.7629506587982178, "learning_rate": 1.3069432210913554e-06, "loss": 0.02537162, "memory(GiB)": 13.7, "step": 82800, "train_speed(iter/s)": 1.528818 }, { "acc": 0.98383923, "epoch": 38.81181157722053, "grad_norm": 0.5678854584693909, "learning_rate": 1.3064208861816245e-06, "loss": 0.05607855, "memory(GiB)": 13.7, "step": 82805, "train_speed(iter/s)": 1.528824 }, { "acc": 0.99077387, "epoch": 38.814155144129366, "grad_norm": 2.0715458393096924, "learning_rate": 1.3058986400654172e-06, "loss": 0.02600891, "memory(GiB)": 13.7, "step": 82810, "train_speed(iter/s)": 1.528822 }, { "acc": 0.9822917, "epoch": 38.8164987110382, "grad_norm": 4.578573703765869, "learning_rate": 1.3053764827552896e-06, "loss": 0.03540484, "memory(GiB)": 13.7, "step": 82815, "train_speed(iter/s)": 1.528824 }, { "acc": 0.98111115, "epoch": 38.818842277947034, "grad_norm": 1.1017122268676758, "learning_rate": 1.3048544142637908e-06, "loss": 0.05112734, "memory(GiB)": 13.7, "step": 82820, "train_speed(iter/s)": 1.528823 }, { "acc": 0.990625, "epoch": 38.82118584485587, "grad_norm": 3.1462361812591553, "learning_rate": 1.304332434603473e-06, "loss": 0.03066412, "memory(GiB)": 13.7, "step": 82825, "train_speed(iter/s)": 1.528826 }, { "acc": 0.97946272, "epoch": 38.8235294117647, "grad_norm": 0.00015168932441156358, "learning_rate": 1.3038105437868791e-06, "loss": 0.02492875, "memory(GiB)": 13.7, "step": 82830, "train_speed(iter/s)": 1.528829 }, { "acc": 0.98942709, "epoch": 38.825872978673544, "grad_norm": 3.648836374282837, "learning_rate": 1.3032887418265553e-06, "loss": 0.04902095, "memory(GiB)": 13.7, "step": 82835, "train_speed(iter/s)": 1.528837 }, { "acc": 0.99541664, "epoch": 38.82821654558238, "grad_norm": 0.0021325452253222466, "learning_rate": 1.302767028735045e-06, "loss": 0.00932838, "memory(GiB)": 13.7, "step": 82840, "train_speed(iter/s)": 1.52884 }, { "acc": 0.98309679, "epoch": 38.83056011249121, "grad_norm": 1.7407855987548828, "learning_rate": 1.3022454045248862e-06, "loss": 0.05337582, "memory(GiB)": 13.7, "step": 82845, "train_speed(iter/s)": 1.528847 }, { "acc": 0.98479166, "epoch": 38.83290367940005, "grad_norm": 7.671607971191406, "learning_rate": 1.3017238692086196e-06, "loss": 0.03780042, "memory(GiB)": 13.7, "step": 82850, "train_speed(iter/s)": 1.528847 }, { "acc": 0.97710476, "epoch": 38.83524724630888, "grad_norm": 3.6170146465301514, "learning_rate": 1.301202422798778e-06, "loss": 0.07755668, "memory(GiB)": 13.7, "step": 82855, "train_speed(iter/s)": 1.528852 }, { "acc": 0.98299828, "epoch": 38.837590813217716, "grad_norm": 5.52244234085083, "learning_rate": 1.300681065307899e-06, "loss": 0.04574937, "memory(GiB)": 13.7, "step": 82860, "train_speed(iter/s)": 1.52886 }, { "acc": 0.9902976, "epoch": 38.83993438012655, "grad_norm": 4.479433536529541, "learning_rate": 1.3001597967485102e-06, "loss": 0.02541606, "memory(GiB)": 13.7, "step": 82865, "train_speed(iter/s)": 1.528865 }, { "acc": 0.97488098, "epoch": 38.842277947035384, "grad_norm": 4.636397838592529, "learning_rate": 1.2996386171331434e-06, "loss": 0.04128581, "memory(GiB)": 13.7, "step": 82870, "train_speed(iter/s)": 1.528865 }, { "acc": 0.97250004, "epoch": 38.844621513944226, "grad_norm": 3.097980499267578, "learning_rate": 1.299117526474328e-06, "loss": 0.04032969, "memory(GiB)": 13.7, "step": 82875, "train_speed(iter/s)": 1.528861 }, { "acc": 0.9833334, "epoch": 38.84696508085306, "grad_norm": 1.102385401725769, "learning_rate": 1.2985965247845853e-06, "loss": 0.019221, "memory(GiB)": 13.7, "step": 82880, "train_speed(iter/s)": 1.528863 }, { "acc": 0.99375, "epoch": 38.849308647761895, "grad_norm": 3.592963457107544, "learning_rate": 1.2980756120764407e-06, "loss": 0.01632639, "memory(GiB)": 13.7, "step": 82885, "train_speed(iter/s)": 1.528866 }, { "acc": 0.9927084, "epoch": 38.85165221467073, "grad_norm": 4.752372741699219, "learning_rate": 1.2975547883624161e-06, "loss": 0.04605232, "memory(GiB)": 13.7, "step": 82890, "train_speed(iter/s)": 1.528867 }, { "acc": 0.9723732, "epoch": 38.85399578157956, "grad_norm": 4.1222076416015625, "learning_rate": 1.297034053655028e-06, "loss": 0.03862895, "memory(GiB)": 13.7, "step": 82895, "train_speed(iter/s)": 1.528873 }, { "acc": 0.98354168, "epoch": 38.8563393484884, "grad_norm": 5.532166004180908, "learning_rate": 1.2965134079667964e-06, "loss": 0.02900467, "memory(GiB)": 13.7, "step": 82900, "train_speed(iter/s)": 1.528872 }, { "acc": 0.97333336, "epoch": 38.85868291539723, "grad_norm": 0.7999940514564514, "learning_rate": 1.295992851310232e-06, "loss": 0.04179368, "memory(GiB)": 13.7, "step": 82905, "train_speed(iter/s)": 1.528864 }, { "acc": 0.9833004, "epoch": 38.86102648230607, "grad_norm": 0.0007247129105962813, "learning_rate": 1.2954723836978489e-06, "loss": 0.06175957, "memory(GiB)": 13.7, "step": 82910, "train_speed(iter/s)": 1.528864 }, { "acc": 0.9875, "epoch": 38.86337004921491, "grad_norm": 0.5747624635696411, "learning_rate": 1.2949520051421598e-06, "loss": 0.03732451, "memory(GiB)": 13.7, "step": 82915, "train_speed(iter/s)": 1.528873 }, { "acc": 0.97705193, "epoch": 38.86571361612374, "grad_norm": 0.00042328302515670657, "learning_rate": 1.2944317156556692e-06, "loss": 0.03106646, "memory(GiB)": 13.7, "step": 82920, "train_speed(iter/s)": 1.528879 }, { "acc": 0.9916667, "epoch": 38.868057183032576, "grad_norm": 1.8966220617294312, "learning_rate": 1.2939115152508846e-06, "loss": 0.0207442, "memory(GiB)": 13.7, "step": 82925, "train_speed(iter/s)": 1.528884 }, { "acc": 0.99404764, "epoch": 38.87040074994141, "grad_norm": 3.856959819793701, "learning_rate": 1.2933914039403114e-06, "loss": 0.0314777, "memory(GiB)": 13.7, "step": 82930, "train_speed(iter/s)": 1.52889 }, { "acc": 0.98519344, "epoch": 38.872744316850245, "grad_norm": 4.286073207855225, "learning_rate": 1.2928713817364505e-06, "loss": 0.04326388, "memory(GiB)": 13.7, "step": 82935, "train_speed(iter/s)": 1.528889 }, { "acc": 0.98319454, "epoch": 38.87508788375908, "grad_norm": 3.326869487762451, "learning_rate": 1.2923514486517996e-06, "loss": 0.0452937, "memory(GiB)": 13.7, "step": 82940, "train_speed(iter/s)": 1.528891 }, { "acc": 0.97217274, "epoch": 38.87743145066791, "grad_norm": 2.4192094802856445, "learning_rate": 1.291831604698857e-06, "loss": 0.04998189, "memory(GiB)": 13.7, "step": 82945, "train_speed(iter/s)": 1.528894 }, { "acc": 0.98946428, "epoch": 38.879775017576755, "grad_norm": 1.557502269744873, "learning_rate": 1.2913118498901206e-06, "loss": 0.04109316, "memory(GiB)": 13.7, "step": 82950, "train_speed(iter/s)": 1.528893 }, { "acc": 0.99073868, "epoch": 38.88211858448559, "grad_norm": 4.203758239746094, "learning_rate": 1.2907921842380804e-06, "loss": 0.02022704, "memory(GiB)": 13.7, "step": 82955, "train_speed(iter/s)": 1.528891 }, { "acc": 0.96885414, "epoch": 38.88446215139442, "grad_norm": 2.50998592376709, "learning_rate": 1.2902726077552286e-06, "loss": 0.06883458, "memory(GiB)": 13.7, "step": 82960, "train_speed(iter/s)": 1.528893 }, { "acc": 0.99229164, "epoch": 38.88680571830326, "grad_norm": 4.127778053283691, "learning_rate": 1.2897531204540542e-06, "loss": 0.03804869, "memory(GiB)": 13.7, "step": 82965, "train_speed(iter/s)": 1.528897 }, { "acc": 0.98488102, "epoch": 38.88914928521209, "grad_norm": 2.091845750808716, "learning_rate": 1.2892337223470461e-06, "loss": 0.02458177, "memory(GiB)": 13.7, "step": 82970, "train_speed(iter/s)": 1.528903 }, { "acc": 0.98916664, "epoch": 38.891492852120926, "grad_norm": 4.48699426651001, "learning_rate": 1.2887144134466861e-06, "loss": 0.02561789, "memory(GiB)": 13.7, "step": 82975, "train_speed(iter/s)": 1.528909 }, { "acc": 0.996875, "epoch": 38.89383641902976, "grad_norm": 4.560625076293945, "learning_rate": 1.2881951937654563e-06, "loss": 0.01269036, "memory(GiB)": 13.7, "step": 82980, "train_speed(iter/s)": 1.528915 }, { "acc": 0.97979164, "epoch": 38.8961799859386, "grad_norm": 7.5800886154174805, "learning_rate": 1.2876760633158377e-06, "loss": 0.11240311, "memory(GiB)": 13.7, "step": 82985, "train_speed(iter/s)": 1.528916 }, { "acc": 0.98604164, "epoch": 38.898523552847436, "grad_norm": 3.3341262340545654, "learning_rate": 1.2871570221103109e-06, "loss": 0.03026345, "memory(GiB)": 13.7, "step": 82990, "train_speed(iter/s)": 1.52892 }, { "acc": 0.9879261, "epoch": 38.90086711975627, "grad_norm": 3.5022330284118652, "learning_rate": 1.2866380701613485e-06, "loss": 0.03371729, "memory(GiB)": 13.7, "step": 82995, "train_speed(iter/s)": 1.528919 }, { "acc": 0.98249998, "epoch": 38.903210686665105, "grad_norm": 3.205662250518799, "learning_rate": 1.2861192074814257e-06, "loss": 0.02107458, "memory(GiB)": 13.7, "step": 83000, "train_speed(iter/s)": 1.528922 }, { "acc": 0.99387493, "epoch": 38.90555425357394, "grad_norm": 1.9695695638656616, "learning_rate": 1.2856004340830168e-06, "loss": 0.01586395, "memory(GiB)": 13.7, "step": 83005, "train_speed(iter/s)": 1.528924 }, { "acc": 0.96668015, "epoch": 38.90789782048277, "grad_norm": 4.836306571960449, "learning_rate": 1.2850817499785864e-06, "loss": 0.07627832, "memory(GiB)": 13.7, "step": 83010, "train_speed(iter/s)": 1.528929 }, { "acc": 0.9882143, "epoch": 38.91024138739161, "grad_norm": 3.0002331733703613, "learning_rate": 1.2845631551806065e-06, "loss": 0.04226391, "memory(GiB)": 13.7, "step": 83015, "train_speed(iter/s)": 1.52893 }, { "acc": 1.0, "epoch": 38.91258495430044, "grad_norm": 0.3740876019001007, "learning_rate": 1.2840446497015396e-06, "loss": 0.01467081, "memory(GiB)": 13.7, "step": 83020, "train_speed(iter/s)": 1.528931 }, { "acc": 0.98363094, "epoch": 38.91492852120928, "grad_norm": 4.138069152832031, "learning_rate": 1.2835262335538503e-06, "loss": 0.04560738, "memory(GiB)": 13.7, "step": 83025, "train_speed(iter/s)": 1.528931 }, { "acc": 0.99229164, "epoch": 38.91727208811812, "grad_norm": 4.1812567710876465, "learning_rate": 1.2830079067499985e-06, "loss": 0.02508385, "memory(GiB)": 13.7, "step": 83030, "train_speed(iter/s)": 1.528931 }, { "acc": 0.98646326, "epoch": 38.91961565502695, "grad_norm": 2.8230435848236084, "learning_rate": 1.2824896693024432e-06, "loss": 0.04445448, "memory(GiB)": 13.7, "step": 83035, "train_speed(iter/s)": 1.528936 }, { "acc": 0.98145828, "epoch": 38.921959221935786, "grad_norm": 2.670710802078247, "learning_rate": 1.2819715212236424e-06, "loss": 0.03921602, "memory(GiB)": 13.7, "step": 83040, "train_speed(iter/s)": 1.528938 }, { "acc": 0.97562504, "epoch": 38.92430278884462, "grad_norm": 5.213634967803955, "learning_rate": 1.2814534625260508e-06, "loss": 0.03675128, "memory(GiB)": 13.7, "step": 83045, "train_speed(iter/s)": 1.528942 }, { "acc": 0.98446426, "epoch": 38.926646355753455, "grad_norm": 3.0535080432891846, "learning_rate": 1.2809354932221183e-06, "loss": 0.0495353, "memory(GiB)": 13.7, "step": 83050, "train_speed(iter/s)": 1.528951 }, { "acc": 0.98298607, "epoch": 38.92898992266229, "grad_norm": 7.863483905792236, "learning_rate": 1.280417613324297e-06, "loss": 0.06718355, "memory(GiB)": 13.7, "step": 83055, "train_speed(iter/s)": 1.528952 }, { "acc": 0.97663689, "epoch": 38.93133348957113, "grad_norm": 3.5788824558258057, "learning_rate": 1.279899822845037e-06, "loss": 0.06108456, "memory(GiB)": 13.7, "step": 83060, "train_speed(iter/s)": 1.528949 }, { "acc": 0.99333324, "epoch": 38.933677056479965, "grad_norm": 0.0032133159693330526, "learning_rate": 1.2793821217967815e-06, "loss": 0.01127599, "memory(GiB)": 13.7, "step": 83065, "train_speed(iter/s)": 1.528952 }, { "acc": 0.97479172, "epoch": 38.9360206233888, "grad_norm": 4.187417030334473, "learning_rate": 1.2788645101919734e-06, "loss": 0.05002768, "memory(GiB)": 13.7, "step": 83070, "train_speed(iter/s)": 1.528957 }, { "acc": 0.98299675, "epoch": 38.938364190297634, "grad_norm": 5.198214054107666, "learning_rate": 1.278346988043056e-06, "loss": 0.05217507, "memory(GiB)": 13.7, "step": 83075, "train_speed(iter/s)": 1.528958 }, { "acc": 0.98458328, "epoch": 38.94070775720647, "grad_norm": 1.9069929122924805, "learning_rate": 1.2778295553624706e-06, "loss": 0.03312488, "memory(GiB)": 13.7, "step": 83080, "train_speed(iter/s)": 1.528962 }, { "acc": 0.9822917, "epoch": 38.9430513241153, "grad_norm": 4.123312473297119, "learning_rate": 1.2773122121626512e-06, "loss": 0.04275649, "memory(GiB)": 13.7, "step": 83085, "train_speed(iter/s)": 1.528962 }, { "acc": 0.9932292, "epoch": 38.94539489102414, "grad_norm": 2.403374433517456, "learning_rate": 1.2767949584560344e-06, "loss": 0.02155147, "memory(GiB)": 13.7, "step": 83090, "train_speed(iter/s)": 1.528967 }, { "acc": 0.98552074, "epoch": 38.94773845793297, "grad_norm": 1.1224011182785034, "learning_rate": 1.2762777942550546e-06, "loss": 0.02527657, "memory(GiB)": 13.7, "step": 83095, "train_speed(iter/s)": 1.528967 }, { "acc": 0.98937502, "epoch": 38.95008202484181, "grad_norm": 0.022990455850958824, "learning_rate": 1.2757607195721407e-06, "loss": 0.02611658, "memory(GiB)": 13.7, "step": 83100, "train_speed(iter/s)": 1.528967 }, { "acc": 0.99437504, "epoch": 38.95242559175065, "grad_norm": 1.865954041481018, "learning_rate": 1.275243734419724e-06, "loss": 0.03142058, "memory(GiB)": 13.7, "step": 83105, "train_speed(iter/s)": 1.528969 }, { "acc": 0.9770834, "epoch": 38.95476915865948, "grad_norm": 0.004042398184537888, "learning_rate": 1.2747268388102275e-06, "loss": 0.03970477, "memory(GiB)": 13.7, "step": 83110, "train_speed(iter/s)": 1.528972 }, { "acc": 0.98573322, "epoch": 38.957112725568315, "grad_norm": 2.0198795795440674, "learning_rate": 1.2742100327560781e-06, "loss": 0.06306944, "memory(GiB)": 13.7, "step": 83115, "train_speed(iter/s)": 1.528969 }, { "acc": 0.97819338, "epoch": 38.95945629247715, "grad_norm": 5.2000861167907715, "learning_rate": 1.273693316269699e-06, "loss": 0.06669084, "memory(GiB)": 13.7, "step": 83120, "train_speed(iter/s)": 1.52897 }, { "acc": 0.98318901, "epoch": 38.961799859385984, "grad_norm": 2.234004497528076, "learning_rate": 1.2731766893635076e-06, "loss": 0.05339448, "memory(GiB)": 13.7, "step": 83125, "train_speed(iter/s)": 1.528968 }, { "acc": 0.99875002, "epoch": 38.96414342629482, "grad_norm": 3.572967052459717, "learning_rate": 1.2726601520499228e-06, "loss": 0.01446485, "memory(GiB)": 13.7, "step": 83130, "train_speed(iter/s)": 1.528968 }, { "acc": 0.97619047, "epoch": 38.96648699320366, "grad_norm": 5.112947463989258, "learning_rate": 1.2721437043413634e-06, "loss": 0.04180888, "memory(GiB)": 13.7, "step": 83135, "train_speed(iter/s)": 1.52897 }, { "acc": 0.98604164, "epoch": 38.968830560112494, "grad_norm": 0.011127174831926823, "learning_rate": 1.271627346250239e-06, "loss": 0.04908389, "memory(GiB)": 13.7, "step": 83140, "train_speed(iter/s)": 1.528976 }, { "acc": 0.9958333, "epoch": 38.97117412702133, "grad_norm": 3.9811315536499023, "learning_rate": 1.2711110777889642e-06, "loss": 0.01824154, "memory(GiB)": 13.7, "step": 83145, "train_speed(iter/s)": 1.528976 }, { "acc": 0.98812504, "epoch": 38.97351769393016, "grad_norm": 0.01126487273722887, "learning_rate": 1.270594898969946e-06, "loss": 0.03200564, "memory(GiB)": 13.7, "step": 83150, "train_speed(iter/s)": 1.528982 }, { "acc": 0.99341345, "epoch": 38.975861260839, "grad_norm": 2.5386977195739746, "learning_rate": 1.2700788098055942e-06, "loss": 0.02810332, "memory(GiB)": 13.7, "step": 83155, "train_speed(iter/s)": 1.528984 }, { "acc": 0.99363098, "epoch": 38.97820482774783, "grad_norm": 2.447495937347412, "learning_rate": 1.2695628103083105e-06, "loss": 0.03119009, "memory(GiB)": 13.7, "step": 83160, "train_speed(iter/s)": 1.528987 }, { "acc": 0.9833334, "epoch": 38.980548394656665, "grad_norm": 7.088695049285889, "learning_rate": 1.2690469004904998e-06, "loss": 0.03055802, "memory(GiB)": 13.7, "step": 83165, "train_speed(iter/s)": 1.528994 }, { "acc": 0.96682549, "epoch": 38.9828919615655, "grad_norm": 3.938251256942749, "learning_rate": 1.2685310803645646e-06, "loss": 0.06656765, "memory(GiB)": 13.7, "step": 83170, "train_speed(iter/s)": 1.528997 }, { "acc": 0.98511372, "epoch": 38.98523552847434, "grad_norm": 2.8084378242492676, "learning_rate": 1.2680153499429004e-06, "loss": 0.04003775, "memory(GiB)": 13.7, "step": 83175, "train_speed(iter/s)": 1.528999 }, { "acc": 0.97666664, "epoch": 38.987579095383175, "grad_norm": 0.009840769693255424, "learning_rate": 1.2674997092379047e-06, "loss": 0.03914691, "memory(GiB)": 13.7, "step": 83180, "train_speed(iter/s)": 1.529003 }, { "acc": 0.98477678, "epoch": 38.98992266229201, "grad_norm": 4.14410924911499, "learning_rate": 1.2669841582619736e-06, "loss": 0.0421809, "memory(GiB)": 13.7, "step": 83185, "train_speed(iter/s)": 1.528999 }, { "acc": 0.9864584, "epoch": 38.992266229200844, "grad_norm": 5.076549053192139, "learning_rate": 1.2664686970274967e-06, "loss": 0.02152938, "memory(GiB)": 13.7, "step": 83190, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98708334, "epoch": 38.99460979610968, "grad_norm": 3.414463996887207, "learning_rate": 1.2659533255468665e-06, "loss": 0.01951944, "memory(GiB)": 13.7, "step": 83195, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98728628, "epoch": 38.99695336301851, "grad_norm": 4.504132270812988, "learning_rate": 1.2654380438324674e-06, "loss": 0.02642738, "memory(GiB)": 13.7, "step": 83200, "train_speed(iter/s)": 1.529006 }, { "acc": 0.97770834, "epoch": 38.99929692992735, "grad_norm": 5.598057270050049, "learning_rate": 1.2649228518966872e-06, "loss": 0.05705059, "memory(GiB)": 13.7, "step": 83205, "train_speed(iter/s)": 1.529009 }, { "acc": 0.98291664, "epoch": 39.00164049683618, "grad_norm": 6.6743011474609375, "learning_rate": 1.264407749751911e-06, "loss": 0.04671097, "memory(GiB)": 13.7, "step": 83210, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98676472, "epoch": 39.00398406374502, "grad_norm": 3.991412401199341, "learning_rate": 1.2638927374105172e-06, "loss": 0.04920821, "memory(GiB)": 13.7, "step": 83215, "train_speed(iter/s)": 1.529005 }, { "acc": 0.996875, "epoch": 39.00632763065386, "grad_norm": 3.083285331726074, "learning_rate": 1.2633778148848863e-06, "loss": 0.02105032, "memory(GiB)": 13.7, "step": 83220, "train_speed(iter/s)": 1.529009 }, { "acc": 0.9875, "epoch": 39.00867119756269, "grad_norm": 4.303433895111084, "learning_rate": 1.262862982187396e-06, "loss": 0.04364133, "memory(GiB)": 13.7, "step": 83225, "train_speed(iter/s)": 1.529011 }, { "acc": 0.98051472, "epoch": 39.011014764471525, "grad_norm": 1.1790146827697754, "learning_rate": 1.2623482393304218e-06, "loss": 0.03800959, "memory(GiB)": 13.7, "step": 83230, "train_speed(iter/s)": 1.529015 }, { "acc": 0.97800598, "epoch": 39.01335833138036, "grad_norm": 0.8996616005897522, "learning_rate": 1.2618335863263328e-06, "loss": 0.03253812, "memory(GiB)": 13.7, "step": 83235, "train_speed(iter/s)": 1.529021 }, { "acc": 0.97806683, "epoch": 39.015701898289194, "grad_norm": 3.264364004135132, "learning_rate": 1.261319023187502e-06, "loss": 0.05696738, "memory(GiB)": 13.7, "step": 83240, "train_speed(iter/s)": 1.52902 }, { "acc": 0.99499998, "epoch": 39.01804546519803, "grad_norm": 3.3192923069000244, "learning_rate": 1.2608045499262978e-06, "loss": 0.03675511, "memory(GiB)": 13.7, "step": 83245, "train_speed(iter/s)": 1.529023 }, { "acc": 0.98851194, "epoch": 39.02038903210687, "grad_norm": 1.3353228569030762, "learning_rate": 1.260290166555088e-06, "loss": 0.04782169, "memory(GiB)": 13.7, "step": 83250, "train_speed(iter/s)": 1.529028 }, { "acc": 0.99437504, "epoch": 39.022732599015704, "grad_norm": 1.749495029449463, "learning_rate": 1.2597758730862337e-06, "loss": 0.04190959, "memory(GiB)": 13.7, "step": 83255, "train_speed(iter/s)": 1.529027 }, { "acc": 0.99125004, "epoch": 39.02507616592454, "grad_norm": 1.8105380535125732, "learning_rate": 1.2592616695320978e-06, "loss": 0.01611813, "memory(GiB)": 13.7, "step": 83260, "train_speed(iter/s)": 1.529032 }, { "acc": 0.97687502, "epoch": 39.02741973283337, "grad_norm": 3.6940340995788574, "learning_rate": 1.258747555905042e-06, "loss": 0.0455583, "memory(GiB)": 13.7, "step": 83265, "train_speed(iter/s)": 1.529036 }, { "acc": 0.98249998, "epoch": 39.02976329974221, "grad_norm": 0.0010285453172400594, "learning_rate": 1.2582335322174225e-06, "loss": 0.03168133, "memory(GiB)": 13.7, "step": 83270, "train_speed(iter/s)": 1.529042 }, { "acc": 0.9875, "epoch": 39.03210686665104, "grad_norm": 5.696890830993652, "learning_rate": 1.2577195984815926e-06, "loss": 0.03312485, "memory(GiB)": 13.7, "step": 83275, "train_speed(iter/s)": 1.529045 }, { "acc": 0.98874998, "epoch": 39.034450433559876, "grad_norm": 1.7099995613098145, "learning_rate": 1.2572057547099084e-06, "loss": 0.02348114, "memory(GiB)": 13.7, "step": 83280, "train_speed(iter/s)": 1.529044 }, { "acc": 0.98604164, "epoch": 39.03679400046871, "grad_norm": 1.3258605003356934, "learning_rate": 1.256692000914721e-06, "loss": 0.05983056, "memory(GiB)": 13.7, "step": 83285, "train_speed(iter/s)": 1.529044 }, { "acc": 0.98279762, "epoch": 39.03913756737755, "grad_norm": 3.6786460876464844, "learning_rate": 1.2561783371083772e-06, "loss": 0.04798922, "memory(GiB)": 13.7, "step": 83290, "train_speed(iter/s)": 1.529047 }, { "acc": 0.97145834, "epoch": 39.041481134286386, "grad_norm": 1.9364627599716187, "learning_rate": 1.2556647633032254e-06, "loss": 0.06212204, "memory(GiB)": 13.7, "step": 83295, "train_speed(iter/s)": 1.529044 }, { "acc": 0.9895834, "epoch": 39.04382470119522, "grad_norm": 1.835073709487915, "learning_rate": 1.255151279511611e-06, "loss": 0.02700088, "memory(GiB)": 13.7, "step": 83300, "train_speed(iter/s)": 1.529044 }, { "acc": 0.9802084, "epoch": 39.046168268104054, "grad_norm": 4.420957088470459, "learning_rate": 1.254637885745874e-06, "loss": 0.04965414, "memory(GiB)": 13.7, "step": 83305, "train_speed(iter/s)": 1.52904 }, { "acc": 0.98778839, "epoch": 39.04851183501289, "grad_norm": 1.0763373374938965, "learning_rate": 1.2541245820183582e-06, "loss": 0.02201102, "memory(GiB)": 13.7, "step": 83310, "train_speed(iter/s)": 1.529039 }, { "acc": 1.0, "epoch": 39.05085540192172, "grad_norm": 0.011837571859359741, "learning_rate": 1.2536113683413976e-06, "loss": 0.01705279, "memory(GiB)": 13.7, "step": 83315, "train_speed(iter/s)": 1.529044 }, { "acc": 0.98666668, "epoch": 39.05319896883056, "grad_norm": 1.2063530683517456, "learning_rate": 1.2530982447273304e-06, "loss": 0.03892195, "memory(GiB)": 13.7, "step": 83320, "train_speed(iter/s)": 1.529046 }, { "acc": 0.98139877, "epoch": 39.0555425357394, "grad_norm": 0.004268990363925695, "learning_rate": 1.2525852111884918e-06, "loss": 0.02779279, "memory(GiB)": 13.7, "step": 83325, "train_speed(iter/s)": 1.529049 }, { "acc": 0.99375, "epoch": 39.05788610264823, "grad_norm": 3.5861144065856934, "learning_rate": 1.25207226773721e-06, "loss": 0.03675115, "memory(GiB)": 13.7, "step": 83330, "train_speed(iter/s)": 1.529051 }, { "acc": 0.98312492, "epoch": 39.06022966955707, "grad_norm": 1.4700977802276611, "learning_rate": 1.2515594143858167e-06, "loss": 0.03794721, "memory(GiB)": 13.7, "step": 83335, "train_speed(iter/s)": 1.529049 }, { "acc": 0.9822916, "epoch": 39.0625732364659, "grad_norm": 8.155858039855957, "learning_rate": 1.2510466511466404e-06, "loss": 0.0438981, "memory(GiB)": 13.7, "step": 83340, "train_speed(iter/s)": 1.52905 }, { "acc": 0.98812504, "epoch": 39.064916803374736, "grad_norm": 2.3983023166656494, "learning_rate": 1.250533978032003e-06, "loss": 0.01954475, "memory(GiB)": 13.7, "step": 83345, "train_speed(iter/s)": 1.529049 }, { "acc": 0.98395834, "epoch": 39.06726037028357, "grad_norm": 3.8030993938446045, "learning_rate": 1.2500213950542305e-06, "loss": 0.02355475, "memory(GiB)": 13.7, "step": 83350, "train_speed(iter/s)": 1.529049 }, { "acc": 0.98373508, "epoch": 39.069603937192404, "grad_norm": 2.5799708366394043, "learning_rate": 1.249508902225641e-06, "loss": 0.04680409, "memory(GiB)": 13.7, "step": 83355, "train_speed(iter/s)": 1.529051 }, { "acc": 0.99333334, "epoch": 39.07194750410124, "grad_norm": 3.8808844089508057, "learning_rate": 1.2489964995585567e-06, "loss": 0.02318501, "memory(GiB)": 13.7, "step": 83360, "train_speed(iter/s)": 1.529056 }, { "acc": 0.9875, "epoch": 39.07429107101008, "grad_norm": 4.5444655418396, "learning_rate": 1.2484841870652896e-06, "loss": 0.07589307, "memory(GiB)": 13.7, "step": 83365, "train_speed(iter/s)": 1.529056 }, { "acc": 0.984375, "epoch": 39.076634637918914, "grad_norm": 5.158576965332031, "learning_rate": 1.2479719647581566e-06, "loss": 0.04630677, "memory(GiB)": 13.7, "step": 83370, "train_speed(iter/s)": 1.529057 }, { "acc": 0.98354168, "epoch": 39.07897820482775, "grad_norm": 4.175910949707031, "learning_rate": 1.247459832649471e-06, "loss": 0.06041987, "memory(GiB)": 13.7, "step": 83375, "train_speed(iter/s)": 1.529063 }, { "acc": 0.98240528, "epoch": 39.08132177173658, "grad_norm": 1.1208136081695557, "learning_rate": 1.2469477907515395e-06, "loss": 0.05062214, "memory(GiB)": 13.7, "step": 83380, "train_speed(iter/s)": 1.529068 }, { "acc": 0.98800602, "epoch": 39.08366533864542, "grad_norm": 3.3689403533935547, "learning_rate": 1.246435839076672e-06, "loss": 0.02610882, "memory(GiB)": 13.7, "step": 83385, "train_speed(iter/s)": 1.529068 }, { "acc": 0.98779764, "epoch": 39.08600890555425, "grad_norm": 0.43717536330223083, "learning_rate": 1.2459239776371744e-06, "loss": 0.04496233, "memory(GiB)": 13.7, "step": 83390, "train_speed(iter/s)": 1.529074 }, { "acc": 0.984375, "epoch": 39.088352472463086, "grad_norm": 3.858306646347046, "learning_rate": 1.2454122064453506e-06, "loss": 0.04156239, "memory(GiB)": 13.7, "step": 83395, "train_speed(iter/s)": 1.529076 }, { "acc": 0.98819447, "epoch": 39.09069603937193, "grad_norm": 1.7016711235046387, "learning_rate": 1.244900525513501e-06, "loss": 0.04530302, "memory(GiB)": 13.7, "step": 83400, "train_speed(iter/s)": 1.529084 }, { "acc": 0.9854167, "epoch": 39.09303960628076, "grad_norm": 5.877875328063965, "learning_rate": 1.2443889348539233e-06, "loss": 0.03322901, "memory(GiB)": 13.7, "step": 83405, "train_speed(iter/s)": 1.52909 }, { "acc": 0.97659721, "epoch": 39.095383173189596, "grad_norm": 3.668931484222412, "learning_rate": 1.2438774344789158e-06, "loss": 0.06176625, "memory(GiB)": 13.7, "step": 83410, "train_speed(iter/s)": 1.529092 }, { "acc": 0.98510418, "epoch": 39.09772674009843, "grad_norm": 2.713865041732788, "learning_rate": 1.2433660244007747e-06, "loss": 0.03122301, "memory(GiB)": 13.7, "step": 83415, "train_speed(iter/s)": 1.52909 }, { "acc": 0.99258928, "epoch": 39.100070307007265, "grad_norm": 2.5241260528564453, "learning_rate": 1.2428547046317898e-06, "loss": 0.02889149, "memory(GiB)": 13.7, "step": 83420, "train_speed(iter/s)": 1.529093 }, { "acc": 0.97562504, "epoch": 39.1024138739161, "grad_norm": 4.1501240730285645, "learning_rate": 1.2423434751842527e-06, "loss": 0.04238903, "memory(GiB)": 13.7, "step": 83425, "train_speed(iter/s)": 1.529091 }, { "acc": 0.97321434, "epoch": 39.10475744082493, "grad_norm": 3.628925323486328, "learning_rate": 1.2418323360704538e-06, "loss": 0.05758541, "memory(GiB)": 13.7, "step": 83430, "train_speed(iter/s)": 1.529095 }, { "acc": 0.98916664, "epoch": 39.10710100773377, "grad_norm": 2.1920697689056396, "learning_rate": 1.241321287302677e-06, "loss": 0.03700348, "memory(GiB)": 13.7, "step": 83435, "train_speed(iter/s)": 1.529095 }, { "acc": 0.9825695, "epoch": 39.10944457464261, "grad_norm": 3.0301434993743896, "learning_rate": 1.2408103288932054e-06, "loss": 0.04953771, "memory(GiB)": 13.7, "step": 83440, "train_speed(iter/s)": 1.529096 }, { "acc": 0.98819447, "epoch": 39.11178814155144, "grad_norm": 3.6514952182769775, "learning_rate": 1.2402994608543218e-06, "loss": 0.03015256, "memory(GiB)": 13.7, "step": 83445, "train_speed(iter/s)": 1.529097 }, { "acc": 0.98739443, "epoch": 39.11413170846028, "grad_norm": 0.36667555570602417, "learning_rate": 1.2397886831983079e-06, "loss": 0.02798648, "memory(GiB)": 13.7, "step": 83450, "train_speed(iter/s)": 1.529101 }, { "acc": 0.98666668, "epoch": 39.11647527536911, "grad_norm": 2.3121113777160645, "learning_rate": 1.2392779959374372e-06, "loss": 0.03187335, "memory(GiB)": 13.7, "step": 83455, "train_speed(iter/s)": 1.529109 }, { "acc": 0.990625, "epoch": 39.118818842277946, "grad_norm": 3.5012614727020264, "learning_rate": 1.238767399083987e-06, "loss": 0.02331087, "memory(GiB)": 13.7, "step": 83460, "train_speed(iter/s)": 1.529119 }, { "acc": 0.98520298, "epoch": 39.12116240918678, "grad_norm": 5.516933917999268, "learning_rate": 1.2382568926502301e-06, "loss": 0.04177634, "memory(GiB)": 13.7, "step": 83465, "train_speed(iter/s)": 1.529118 }, { "acc": 0.97406254, "epoch": 39.123505976095615, "grad_norm": 4.654835224151611, "learning_rate": 1.2377464766484391e-06, "loss": 0.05013291, "memory(GiB)": 13.7, "step": 83470, "train_speed(iter/s)": 1.529119 }, { "acc": 0.9895833, "epoch": 39.125849543004456, "grad_norm": 3.5586273670196533, "learning_rate": 1.23723615109088e-06, "loss": 0.04398383, "memory(GiB)": 13.7, "step": 83475, "train_speed(iter/s)": 1.529123 }, { "acc": 0.97997475, "epoch": 39.12819310991329, "grad_norm": 2.0700066089630127, "learning_rate": 1.2367259159898218e-06, "loss": 0.07710909, "memory(GiB)": 13.7, "step": 83480, "train_speed(iter/s)": 1.529128 }, { "acc": 0.98888893, "epoch": 39.130536676822125, "grad_norm": 1.8400342464447021, "learning_rate": 1.2362157713575265e-06, "loss": 0.03097714, "memory(GiB)": 13.7, "step": 83485, "train_speed(iter/s)": 1.529131 }, { "acc": 0.97279758, "epoch": 39.13288024373096, "grad_norm": 6.396342754364014, "learning_rate": 1.2357057172062586e-06, "loss": 0.0507081, "memory(GiB)": 13.7, "step": 83490, "train_speed(iter/s)": 1.529137 }, { "acc": 0.98187504, "epoch": 39.13522381063979, "grad_norm": 1.869038462638855, "learning_rate": 1.2351957535482757e-06, "loss": 0.04427458, "memory(GiB)": 13.7, "step": 83495, "train_speed(iter/s)": 1.529139 }, { "acc": 0.98395824, "epoch": 39.13756737754863, "grad_norm": 0.17540942132472992, "learning_rate": 1.2346858803958365e-06, "loss": 0.04723165, "memory(GiB)": 13.7, "step": 83500, "train_speed(iter/s)": 1.529142 }, { "acc": 0.9832386, "epoch": 39.13991094445746, "grad_norm": 6.2942962646484375, "learning_rate": 1.2341760977611986e-06, "loss": 0.05493674, "memory(GiB)": 13.7, "step": 83505, "train_speed(iter/s)": 1.529144 }, { "acc": 0.975, "epoch": 39.142254511366296, "grad_norm": 4.1065263748168945, "learning_rate": 1.2336664056566129e-06, "loss": 0.04545171, "memory(GiB)": 13.7, "step": 83510, "train_speed(iter/s)": 1.529146 }, { "acc": 0.984375, "epoch": 39.14459807827514, "grad_norm": 2.749417543411255, "learning_rate": 1.2331568040943318e-06, "loss": 0.04258563, "memory(GiB)": 13.7, "step": 83515, "train_speed(iter/s)": 1.529151 }, { "acc": 0.99092264, "epoch": 39.14694164518397, "grad_norm": 0.0521024651825428, "learning_rate": 1.2326472930866053e-06, "loss": 0.02694811, "memory(GiB)": 13.7, "step": 83520, "train_speed(iter/s)": 1.529155 }, { "acc": 0.97614574, "epoch": 39.149285212092806, "grad_norm": 4.491205215454102, "learning_rate": 1.2321378726456799e-06, "loss": 0.04039996, "memory(GiB)": 13.7, "step": 83525, "train_speed(iter/s)": 1.529158 }, { "acc": 0.97946434, "epoch": 39.15162877900164, "grad_norm": 2.4136321544647217, "learning_rate": 1.2316285427837985e-06, "loss": 0.03967211, "memory(GiB)": 13.7, "step": 83530, "train_speed(iter/s)": 1.529168 }, { "acc": 0.99861107, "epoch": 39.153972345910475, "grad_norm": 0.018674276769161224, "learning_rate": 1.231119303513205e-06, "loss": 0.03354391, "memory(GiB)": 13.7, "step": 83535, "train_speed(iter/s)": 1.529173 }, { "acc": 0.96958332, "epoch": 39.15631591281931, "grad_norm": 6.091753959655762, "learning_rate": 1.2306101548461405e-06, "loss": 0.05109189, "memory(GiB)": 13.7, "step": 83540, "train_speed(iter/s)": 1.529175 }, { "acc": 0.99525557, "epoch": 39.15865947972814, "grad_norm": 0.6051084995269775, "learning_rate": 1.2301010967948445e-06, "loss": 0.02559521, "memory(GiB)": 13.7, "step": 83545, "train_speed(iter/s)": 1.529173 }, { "acc": 0.9749054, "epoch": 39.161003046636985, "grad_norm": 4.8684306144714355, "learning_rate": 1.2295921293715493e-06, "loss": 0.06897522, "memory(GiB)": 13.7, "step": 83550, "train_speed(iter/s)": 1.529174 }, { "acc": 0.99750004, "epoch": 39.16334661354582, "grad_norm": 2.6157925128936768, "learning_rate": 1.229083252588491e-06, "loss": 0.01755209, "memory(GiB)": 13.7, "step": 83555, "train_speed(iter/s)": 1.529175 }, { "acc": 0.99331846, "epoch": 39.165690180454654, "grad_norm": 5.2743330001831055, "learning_rate": 1.2285744664579034e-06, "loss": 0.03733232, "memory(GiB)": 13.7, "step": 83560, "train_speed(iter/s)": 1.529179 }, { "acc": 0.97309523, "epoch": 39.16803374736349, "grad_norm": 3.526867628097534, "learning_rate": 1.2280657709920133e-06, "loss": 0.0911563, "memory(GiB)": 13.7, "step": 83565, "train_speed(iter/s)": 1.529186 }, { "acc": 0.98374996, "epoch": 39.17037731427232, "grad_norm": 5.079575061798096, "learning_rate": 1.2275571662030469e-06, "loss": 0.05530396, "memory(GiB)": 13.7, "step": 83570, "train_speed(iter/s)": 1.529187 }, { "acc": 0.98708334, "epoch": 39.17272088118116, "grad_norm": 0.0007806509383954108, "learning_rate": 1.2270486521032319e-06, "loss": 0.0207525, "memory(GiB)": 13.7, "step": 83575, "train_speed(iter/s)": 1.529193 }, { "acc": 0.9854166, "epoch": 39.17506444808999, "grad_norm": 4.403561592102051, "learning_rate": 1.2265402287047914e-06, "loss": 0.03246433, "memory(GiB)": 13.7, "step": 83580, "train_speed(iter/s)": 1.529194 }, { "acc": 0.978125, "epoch": 39.177408014998825, "grad_norm": 0.0032305466011166573, "learning_rate": 1.2260318960199443e-06, "loss": 0.04684609, "memory(GiB)": 13.7, "step": 83585, "train_speed(iter/s)": 1.529199 }, { "acc": 0.98520832, "epoch": 39.17975158190767, "grad_norm": 6.35222864151001, "learning_rate": 1.2255236540609103e-06, "loss": 0.0540942, "memory(GiB)": 13.7, "step": 83590, "train_speed(iter/s)": 1.529203 }, { "acc": 0.97999992, "epoch": 39.1820951488165, "grad_norm": 3.4713332653045654, "learning_rate": 1.2250155028399071e-06, "loss": 0.04946561, "memory(GiB)": 13.7, "step": 83595, "train_speed(iter/s)": 1.529201 }, { "acc": 0.97071438, "epoch": 39.184438715725335, "grad_norm": 3.198437213897705, "learning_rate": 1.2245074423691465e-06, "loss": 0.05352325, "memory(GiB)": 13.7, "step": 83600, "train_speed(iter/s)": 1.529206 }, { "acc": 0.98083334, "epoch": 39.18678228263417, "grad_norm": 2.9825570583343506, "learning_rate": 1.2239994726608435e-06, "loss": 0.03505177, "memory(GiB)": 13.7, "step": 83605, "train_speed(iter/s)": 1.529206 }, { "acc": 0.99348211, "epoch": 39.189125849543004, "grad_norm": 0.7501269578933716, "learning_rate": 1.2234915937272048e-06, "loss": 0.02428082, "memory(GiB)": 13.7, "step": 83610, "train_speed(iter/s)": 1.529208 }, { "acc": 0.98279762, "epoch": 39.19146941645184, "grad_norm": 2.013242244720459, "learning_rate": 1.2229838055804395e-06, "loss": 0.05143265, "memory(GiB)": 13.7, "step": 83615, "train_speed(iter/s)": 1.529214 }, { "acc": 0.99513884, "epoch": 39.19381298336067, "grad_norm": 0.6291802525520325, "learning_rate": 1.2224761082327552e-06, "loss": 0.01704736, "memory(GiB)": 13.7, "step": 83620, "train_speed(iter/s)": 1.529213 }, { "acc": 0.98791037, "epoch": 39.196156550269514, "grad_norm": 2.891183853149414, "learning_rate": 1.221968501696352e-06, "loss": 0.05138868, "memory(GiB)": 13.7, "step": 83625, "train_speed(iter/s)": 1.529218 }, { "acc": 0.99444447, "epoch": 39.19850011717835, "grad_norm": 6.693319797515869, "learning_rate": 1.2214609859834325e-06, "loss": 0.02248701, "memory(GiB)": 13.7, "step": 83630, "train_speed(iter/s)": 1.529218 }, { "acc": 0.99383011, "epoch": 39.20084368408718, "grad_norm": 0.0026262945029884577, "learning_rate": 1.220953561106197e-06, "loss": 0.02727905, "memory(GiB)": 13.7, "step": 83635, "train_speed(iter/s)": 1.529223 }, { "acc": 0.98903847, "epoch": 39.20318725099602, "grad_norm": 3.8222343921661377, "learning_rate": 1.22044622707684e-06, "loss": 0.03167679, "memory(GiB)": 13.7, "step": 83640, "train_speed(iter/s)": 1.529224 }, { "acc": 0.97875004, "epoch": 39.20553081790485, "grad_norm": 4.596555233001709, "learning_rate": 1.2199389839075588e-06, "loss": 0.04062884, "memory(GiB)": 13.7, "step": 83645, "train_speed(iter/s)": 1.529224 }, { "acc": 0.97124996, "epoch": 39.207874384813685, "grad_norm": 4.8260698318481445, "learning_rate": 1.2194318316105423e-06, "loss": 0.0644289, "memory(GiB)": 13.7, "step": 83650, "train_speed(iter/s)": 1.529223 }, { "acc": 0.98198128, "epoch": 39.21021795172252, "grad_norm": 1.3863893747329712, "learning_rate": 1.218924770197985e-06, "loss": 0.06672066, "memory(GiB)": 13.7, "step": 83655, "train_speed(iter/s)": 1.52923 }, { "acc": 0.99020834, "epoch": 39.212561518631354, "grad_norm": 4.5042219161987305, "learning_rate": 1.2184177996820709e-06, "loss": 0.02888547, "memory(GiB)": 13.7, "step": 83660, "train_speed(iter/s)": 1.529235 }, { "acc": 0.96312504, "epoch": 39.214905085540195, "grad_norm": 0.016331499442458153, "learning_rate": 1.2179109200749879e-06, "loss": 0.06419171, "memory(GiB)": 13.7, "step": 83665, "train_speed(iter/s)": 1.52924 }, { "acc": 0.9840477, "epoch": 39.21724865244903, "grad_norm": 4.124118328094482, "learning_rate": 1.2174041313889195e-06, "loss": 0.04756401, "memory(GiB)": 13.7, "step": 83670, "train_speed(iter/s)": 1.529243 }, { "acc": 0.9916667, "epoch": 39.219592219357864, "grad_norm": 0.0027208044193685055, "learning_rate": 1.216897433636049e-06, "loss": 0.01590284, "memory(GiB)": 13.7, "step": 83675, "train_speed(iter/s)": 1.529246 }, { "acc": 0.99375, "epoch": 39.2219357862667, "grad_norm": 1.839775800704956, "learning_rate": 1.2163908268285525e-06, "loss": 0.01542297, "memory(GiB)": 13.7, "step": 83680, "train_speed(iter/s)": 1.529244 }, { "acc": 0.98520832, "epoch": 39.22427935317553, "grad_norm": 0.3544658422470093, "learning_rate": 1.2158843109786086e-06, "loss": 0.03312343, "memory(GiB)": 13.7, "step": 83685, "train_speed(iter/s)": 1.529249 }, { "acc": 0.98497477, "epoch": 39.22662292008437, "grad_norm": 0.992627739906311, "learning_rate": 1.2153778860983942e-06, "loss": 0.03783281, "memory(GiB)": 13.7, "step": 83690, "train_speed(iter/s)": 1.529251 }, { "acc": 0.97800598, "epoch": 39.2289664869932, "grad_norm": 3.5162854194641113, "learning_rate": 1.2148715522000803e-06, "loss": 0.04684317, "memory(GiB)": 13.7, "step": 83695, "train_speed(iter/s)": 1.52925 }, { "acc": 0.98452377, "epoch": 39.231310053902035, "grad_norm": 3.0282557010650635, "learning_rate": 1.2143653092958362e-06, "loss": 0.02166411, "memory(GiB)": 13.7, "step": 83700, "train_speed(iter/s)": 1.529253 }, { "acc": 0.98973217, "epoch": 39.23365362081088, "grad_norm": 3.94640851020813, "learning_rate": 1.2138591573978312e-06, "loss": 0.02356494, "memory(GiB)": 13.7, "step": 83705, "train_speed(iter/s)": 1.529256 }, { "acc": 0.99437504, "epoch": 39.23599718771971, "grad_norm": 4.084083080291748, "learning_rate": 1.2133530965182336e-06, "loss": 0.02627458, "memory(GiB)": 13.7, "step": 83710, "train_speed(iter/s)": 1.529259 }, { "acc": 0.9958334, "epoch": 39.238340754628545, "grad_norm": 3.331195116043091, "learning_rate": 1.2128471266692044e-06, "loss": 0.03005642, "memory(GiB)": 13.7, "step": 83715, "train_speed(iter/s)": 1.529261 }, { "acc": 0.97778845, "epoch": 39.24068432153738, "grad_norm": 3.963684558868408, "learning_rate": 1.2123412478629066e-06, "loss": 0.03297568, "memory(GiB)": 13.7, "step": 83720, "train_speed(iter/s)": 1.529255 }, { "acc": 0.9828125, "epoch": 39.243027888446214, "grad_norm": 3.834028720855713, "learning_rate": 1.2118354601115022e-06, "loss": 0.03970912, "memory(GiB)": 13.7, "step": 83725, "train_speed(iter/s)": 1.52926 }, { "acc": 0.98321428, "epoch": 39.24537145535505, "grad_norm": 3.070150136947632, "learning_rate": 1.2113297634271462e-06, "loss": 0.08651724, "memory(GiB)": 13.7, "step": 83730, "train_speed(iter/s)": 1.529262 }, { "acc": 0.98416672, "epoch": 39.24771502226388, "grad_norm": 2.472136974334717, "learning_rate": 1.2108241578219931e-06, "loss": 0.03121444, "memory(GiB)": 13.7, "step": 83735, "train_speed(iter/s)": 1.529264 }, { "acc": 0.99416666, "epoch": 39.250058589172724, "grad_norm": 1.0275940895080566, "learning_rate": 1.2103186433081968e-06, "loss": 0.03226197, "memory(GiB)": 13.7, "step": 83740, "train_speed(iter/s)": 1.529261 }, { "acc": 0.99125004, "epoch": 39.25240215608156, "grad_norm": 0.0021822291892021894, "learning_rate": 1.209813219897909e-06, "loss": 0.02620327, "memory(GiB)": 13.7, "step": 83745, "train_speed(iter/s)": 1.529265 }, { "acc": 0.98455353, "epoch": 39.25474572299039, "grad_norm": 2.1154134273529053, "learning_rate": 1.2093078876032794e-06, "loss": 0.0346158, "memory(GiB)": 13.7, "step": 83750, "train_speed(iter/s)": 1.529272 }, { "acc": 0.984375, "epoch": 39.25708928989923, "grad_norm": 3.4864718914031982, "learning_rate": 1.2088026464364516e-06, "loss": 0.02658481, "memory(GiB)": 13.7, "step": 83755, "train_speed(iter/s)": 1.529271 }, { "acc": 0.97875004, "epoch": 39.25943285680806, "grad_norm": 10.1607666015625, "learning_rate": 1.2082974964095715e-06, "loss": 0.04746373, "memory(GiB)": 13.7, "step": 83760, "train_speed(iter/s)": 1.529275 }, { "acc": 0.9802084, "epoch": 39.261776423716896, "grad_norm": 3.166811943054199, "learning_rate": 1.2077924375347824e-06, "loss": 0.06660401, "memory(GiB)": 13.7, "step": 83765, "train_speed(iter/s)": 1.529281 }, { "acc": 0.9979167, "epoch": 39.26411999062573, "grad_norm": 2.9121673107147217, "learning_rate": 1.2072874698242237e-06, "loss": 0.03040383, "memory(GiB)": 13.7, "step": 83770, "train_speed(iter/s)": 1.529287 }, { "acc": 0.99125004, "epoch": 39.266463557534564, "grad_norm": 2.809495687484741, "learning_rate": 1.2067825932900303e-06, "loss": 0.02572835, "memory(GiB)": 13.7, "step": 83775, "train_speed(iter/s)": 1.529291 }, { "acc": 0.99213715, "epoch": 39.268807124443406, "grad_norm": 2.9029269218444824, "learning_rate": 1.2062778079443408e-06, "loss": 0.04626991, "memory(GiB)": 13.7, "step": 83780, "train_speed(iter/s)": 1.529292 }, { "acc": 0.98839741, "epoch": 39.27115069135224, "grad_norm": 5.239238262176514, "learning_rate": 1.2057731137992888e-06, "loss": 0.06932756, "memory(GiB)": 13.7, "step": 83785, "train_speed(iter/s)": 1.529298 }, { "acc": 0.9875, "epoch": 39.273494258261074, "grad_norm": 0.6369444727897644, "learning_rate": 1.205268510867003e-06, "loss": 0.03224089, "memory(GiB)": 13.7, "step": 83790, "train_speed(iter/s)": 1.529303 }, { "acc": 0.99750004, "epoch": 39.27583782516991, "grad_norm": 1.4675918817520142, "learning_rate": 1.2047639991596143e-06, "loss": 0.0310795, "memory(GiB)": 13.7, "step": 83795, "train_speed(iter/s)": 1.529301 }, { "acc": 0.99437504, "epoch": 39.27818139207874, "grad_norm": 3.2085790634155273, "learning_rate": 1.2042595786892504e-06, "loss": 0.01551529, "memory(GiB)": 13.7, "step": 83800, "train_speed(iter/s)": 1.529303 }, { "acc": 0.98374996, "epoch": 39.28052495898758, "grad_norm": 1.1709259748458862, "learning_rate": 1.2037552494680331e-06, "loss": 0.03805972, "memory(GiB)": 13.7, "step": 83805, "train_speed(iter/s)": 1.529306 }, { "acc": 0.984375, "epoch": 39.28286852589641, "grad_norm": 3.9104416370391846, "learning_rate": 1.2032510115080864e-06, "loss": 0.05319662, "memory(GiB)": 13.7, "step": 83810, "train_speed(iter/s)": 1.529312 }, { "acc": 0.97770824, "epoch": 39.28521209280525, "grad_norm": 1.7708367109298706, "learning_rate": 1.2027468648215319e-06, "loss": 0.06193092, "memory(GiB)": 13.7, "step": 83815, "train_speed(iter/s)": 1.529317 }, { "acc": 0.97416668, "epoch": 39.28755565971409, "grad_norm": 5.737313747406006, "learning_rate": 1.2022428094204846e-06, "loss": 0.06192244, "memory(GiB)": 13.7, "step": 83820, "train_speed(iter/s)": 1.529317 }, { "acc": 0.98874998, "epoch": 39.28989922662292, "grad_norm": 0.5901355743408203, "learning_rate": 1.2017388453170638e-06, "loss": 0.01990119, "memory(GiB)": 13.7, "step": 83825, "train_speed(iter/s)": 1.529324 }, { "acc": 0.99187498, "epoch": 39.292242793531756, "grad_norm": 1.7856377363204956, "learning_rate": 1.2012349725233792e-06, "loss": 0.01964019, "memory(GiB)": 13.7, "step": 83830, "train_speed(iter/s)": 1.529321 }, { "acc": 0.9958333, "epoch": 39.29458636044059, "grad_norm": 0.08192114531993866, "learning_rate": 1.2007311910515449e-06, "loss": 0.025466, "memory(GiB)": 13.7, "step": 83835, "train_speed(iter/s)": 1.529323 }, { "acc": 0.98654766, "epoch": 39.296929927349424, "grad_norm": 0.00407683290541172, "learning_rate": 1.2002275009136703e-06, "loss": 0.06559535, "memory(GiB)": 13.7, "step": 83840, "train_speed(iter/s)": 1.529325 }, { "acc": 0.99520836, "epoch": 39.29927349425826, "grad_norm": 2.6696064472198486, "learning_rate": 1.199723902121861e-06, "loss": 0.0170371, "memory(GiB)": 13.7, "step": 83845, "train_speed(iter/s)": 1.529325 }, { "acc": 0.990625, "epoch": 39.30161706116709, "grad_norm": 4.694144248962402, "learning_rate": 1.1992203946882219e-06, "loss": 0.02271318, "memory(GiB)": 13.7, "step": 83850, "train_speed(iter/s)": 1.529326 }, { "acc": 0.98425598, "epoch": 39.303960628075934, "grad_norm": 1.9586149454116821, "learning_rate": 1.1987169786248583e-06, "loss": 0.03662931, "memory(GiB)": 13.7, "step": 83855, "train_speed(iter/s)": 1.529332 }, { "acc": 0.9885416, "epoch": 39.30630419498477, "grad_norm": 3.6118571758270264, "learning_rate": 1.1982136539438682e-06, "loss": 0.04411019, "memory(GiB)": 13.7, "step": 83860, "train_speed(iter/s)": 1.529332 }, { "acc": 0.9880209, "epoch": 39.3086477618936, "grad_norm": 5.611499309539795, "learning_rate": 1.1977104206573493e-06, "loss": 0.04155309, "memory(GiB)": 13.7, "step": 83865, "train_speed(iter/s)": 1.529337 }, { "acc": 0.9864584, "epoch": 39.31099132880244, "grad_norm": 0.8205856084823608, "learning_rate": 1.1972072787773987e-06, "loss": 0.03155615, "memory(GiB)": 13.7, "step": 83870, "train_speed(iter/s)": 1.529335 }, { "acc": 0.98812504, "epoch": 39.31333489571127, "grad_norm": 0.009938642382621765, "learning_rate": 1.1967042283161118e-06, "loss": 0.01995956, "memory(GiB)": 13.7, "step": 83875, "train_speed(iter/s)": 1.529338 }, { "acc": 0.99258928, "epoch": 39.315678462620106, "grad_norm": 3.599555492401123, "learning_rate": 1.1962012692855777e-06, "loss": 0.02235786, "memory(GiB)": 13.7, "step": 83880, "train_speed(iter/s)": 1.52934 }, { "acc": 0.97459688, "epoch": 39.31802202952894, "grad_norm": 2.0150530338287354, "learning_rate": 1.1956984016978864e-06, "loss": 0.07173583, "memory(GiB)": 13.7, "step": 83885, "train_speed(iter/s)": 1.529342 }, { "acc": 0.9895833, "epoch": 39.32036559643778, "grad_norm": 2.9602978229522705, "learning_rate": 1.1951956255651263e-06, "loss": 0.03806477, "memory(GiB)": 13.7, "step": 83890, "train_speed(iter/s)": 1.529345 }, { "acc": 0.99375, "epoch": 39.322709163346616, "grad_norm": 0.8265938758850098, "learning_rate": 1.1946929408993832e-06, "loss": 0.01824137, "memory(GiB)": 13.7, "step": 83895, "train_speed(iter/s)": 1.529346 }, { "acc": 0.96859207, "epoch": 39.32505273025545, "grad_norm": 6.671138286590576, "learning_rate": 1.1941903477127384e-06, "loss": 0.06309744, "memory(GiB)": 13.7, "step": 83900, "train_speed(iter/s)": 1.52935 }, { "acc": 0.98500004, "epoch": 39.327396297164285, "grad_norm": 5.560908317565918, "learning_rate": 1.1936878460172717e-06, "loss": 0.02827293, "memory(GiB)": 13.7, "step": 83905, "train_speed(iter/s)": 1.529352 }, { "acc": 0.98624992, "epoch": 39.32973986407312, "grad_norm": 1.053328275680542, "learning_rate": 1.1931854358250629e-06, "loss": 0.03191837, "memory(GiB)": 13.7, "step": 83910, "train_speed(iter/s)": 1.52935 }, { "acc": 0.98559532, "epoch": 39.33208343098195, "grad_norm": 0.8212877511978149, "learning_rate": 1.1926831171481898e-06, "loss": 0.04632397, "memory(GiB)": 13.7, "step": 83915, "train_speed(iter/s)": 1.529354 }, { "acc": 0.99258928, "epoch": 39.33442699789079, "grad_norm": 1.603723406791687, "learning_rate": 1.1921808899987234e-06, "loss": 0.02754992, "memory(GiB)": 13.7, "step": 83920, "train_speed(iter/s)": 1.529357 }, { "acc": 0.97614584, "epoch": 39.33677056479962, "grad_norm": 2.6297554969787598, "learning_rate": 1.1916787543887373e-06, "loss": 0.04596536, "memory(GiB)": 13.7, "step": 83925, "train_speed(iter/s)": 1.529358 }, { "acc": 0.97557535, "epoch": 39.33911413170846, "grad_norm": 0.001232267008163035, "learning_rate": 1.1911767103303021e-06, "loss": 0.06527894, "memory(GiB)": 13.7, "step": 83930, "train_speed(iter/s)": 1.52936 }, { "acc": 0.99281254, "epoch": 39.3414576986173, "grad_norm": 0.004294826183468103, "learning_rate": 1.190674757835483e-06, "loss": 0.04744616, "memory(GiB)": 13.7, "step": 83935, "train_speed(iter/s)": 1.529363 }, { "acc": 0.97218752, "epoch": 39.34380126552613, "grad_norm": 3.081894874572754, "learning_rate": 1.1901728969163473e-06, "loss": 0.07713149, "memory(GiB)": 13.7, "step": 83940, "train_speed(iter/s)": 1.529364 }, { "acc": 0.98984375, "epoch": 39.346144832434966, "grad_norm": 8.544727325439453, "learning_rate": 1.1896711275849565e-06, "loss": 0.02436406, "memory(GiB)": 13.7, "step": 83945, "train_speed(iter/s)": 1.529364 }, { "acc": 0.99020834, "epoch": 39.3484883993438, "grad_norm": 2.2053301334381104, "learning_rate": 1.1891694498533736e-06, "loss": 0.0270508, "memory(GiB)": 13.7, "step": 83950, "train_speed(iter/s)": 1.529364 }, { "acc": 0.98968754, "epoch": 39.350831966252635, "grad_norm": 2.0549824237823486, "learning_rate": 1.1886678637336541e-06, "loss": 0.03871991, "memory(GiB)": 13.7, "step": 83955, "train_speed(iter/s)": 1.529366 }, { "acc": 0.99197302, "epoch": 39.35317553316147, "grad_norm": 1.565953016281128, "learning_rate": 1.1881663692378566e-06, "loss": 0.02914961, "memory(GiB)": 13.7, "step": 83960, "train_speed(iter/s)": 1.529371 }, { "acc": 0.98723211, "epoch": 39.35551910007031, "grad_norm": 1.9190853834152222, "learning_rate": 1.1876649663780355e-06, "loss": 0.03692422, "memory(GiB)": 13.7, "step": 83965, "train_speed(iter/s)": 1.529369 }, { "acc": 0.979072, "epoch": 39.357862666979145, "grad_norm": 3.4829540252685547, "learning_rate": 1.1871636551662432e-06, "loss": 0.04673841, "memory(GiB)": 13.7, "step": 83970, "train_speed(iter/s)": 1.52937 }, { "acc": 0.9947916, "epoch": 39.36020623388798, "grad_norm": 2.8179209232330322, "learning_rate": 1.1866624356145277e-06, "loss": 0.01741499, "memory(GiB)": 13.7, "step": 83975, "train_speed(iter/s)": 1.529371 }, { "acc": 0.98187504, "epoch": 39.36254980079681, "grad_norm": 2.547170877456665, "learning_rate": 1.1861613077349397e-06, "loss": 0.0492279, "memory(GiB)": 13.7, "step": 83980, "train_speed(iter/s)": 1.529374 }, { "acc": 0.97718754, "epoch": 39.36489336770565, "grad_norm": 4.38234806060791, "learning_rate": 1.1856602715395205e-06, "loss": 0.05156482, "memory(GiB)": 13.7, "step": 83985, "train_speed(iter/s)": 1.529374 }, { "acc": 1.0, "epoch": 39.36723693461448, "grad_norm": 1.3582589626312256, "learning_rate": 1.1851593270403176e-06, "loss": 0.02248726, "memory(GiB)": 13.7, "step": 83990, "train_speed(iter/s)": 1.529375 }, { "acc": 0.98780642, "epoch": 39.369580501523316, "grad_norm": 4.036299705505371, "learning_rate": 1.1846584742493688e-06, "loss": 0.02722923, "memory(GiB)": 13.7, "step": 83995, "train_speed(iter/s)": 1.529376 }, { "acc": 0.98908329, "epoch": 39.37192406843215, "grad_norm": 4.664803981781006, "learning_rate": 1.1841577131787138e-06, "loss": 0.03549876, "memory(GiB)": 13.7, "step": 84000, "train_speed(iter/s)": 1.529379 }, { "acc": 0.98633928, "epoch": 39.37426763534099, "grad_norm": 3.35516095161438, "learning_rate": 1.1836570438403913e-06, "loss": 0.03527191, "memory(GiB)": 13.7, "step": 84005, "train_speed(iter/s)": 1.529381 }, { "acc": 0.98519344, "epoch": 39.376611202249826, "grad_norm": 6.668174743652344, "learning_rate": 1.1831564662464326e-06, "loss": 0.04893144, "memory(GiB)": 13.7, "step": 84010, "train_speed(iter/s)": 1.52938 }, { "acc": 0.9864584, "epoch": 39.37895476915866, "grad_norm": 4.216591835021973, "learning_rate": 1.182655980408872e-06, "loss": 0.03578181, "memory(GiB)": 13.7, "step": 84015, "train_speed(iter/s)": 1.529383 }, { "acc": 0.996875, "epoch": 39.381298336067495, "grad_norm": 1.5462207794189453, "learning_rate": 1.1821555863397402e-06, "loss": 0.02317737, "memory(GiB)": 13.7, "step": 84020, "train_speed(iter/s)": 1.529383 }, { "acc": 0.99229164, "epoch": 39.38364190297633, "grad_norm": 2.2389631271362305, "learning_rate": 1.1816552840510628e-06, "loss": 0.01931664, "memory(GiB)": 13.7, "step": 84025, "train_speed(iter/s)": 1.52939 }, { "acc": 0.9854166, "epoch": 39.38598546988516, "grad_norm": 5.633175373077393, "learning_rate": 1.1811550735548673e-06, "loss": 0.04374563, "memory(GiB)": 13.7, "step": 84030, "train_speed(iter/s)": 1.529393 }, { "acc": 0.98916664, "epoch": 39.388329036794, "grad_norm": 3.754288673400879, "learning_rate": 1.180654954863175e-06, "loss": 0.02877677, "memory(GiB)": 13.7, "step": 84035, "train_speed(iter/s)": 1.529394 }, { "acc": 0.9833333, "epoch": 39.39067260370284, "grad_norm": 4.368152141571045, "learning_rate": 1.180154927988009e-06, "loss": 0.03588694, "memory(GiB)": 13.7, "step": 84040, "train_speed(iter/s)": 1.529394 }, { "acc": 0.99125004, "epoch": 39.39301617061167, "grad_norm": 2.726755380630493, "learning_rate": 1.1796549929413885e-06, "loss": 0.01593392, "memory(GiB)": 13.7, "step": 84045, "train_speed(iter/s)": 1.529397 }, { "acc": 0.99036932, "epoch": 39.39535973752051, "grad_norm": 0.4719356298446655, "learning_rate": 1.1791551497353283e-06, "loss": 0.03887749, "memory(GiB)": 13.7, "step": 84050, "train_speed(iter/s)": 1.529401 }, { "acc": 0.98341351, "epoch": 39.39770330442934, "grad_norm": 3.997598648071289, "learning_rate": 1.1786553983818444e-06, "loss": 0.0341311, "memory(GiB)": 13.7, "step": 84055, "train_speed(iter/s)": 1.529406 }, { "acc": 0.98979168, "epoch": 39.400046871338176, "grad_norm": 1.2885491847991943, "learning_rate": 1.1781557388929501e-06, "loss": 0.03029982, "memory(GiB)": 13.7, "step": 84060, "train_speed(iter/s)": 1.52941 }, { "acc": 0.97166672, "epoch": 39.40239043824701, "grad_norm": 5.719494819641113, "learning_rate": 1.1776561712806545e-06, "loss": 0.06144115, "memory(GiB)": 13.7, "step": 84065, "train_speed(iter/s)": 1.529414 }, { "acc": 0.96957788, "epoch": 39.404734005155845, "grad_norm": 3.264526844024658, "learning_rate": 1.1771566955569643e-06, "loss": 0.06144106, "memory(GiB)": 13.7, "step": 84070, "train_speed(iter/s)": 1.529414 }, { "acc": 0.98347759, "epoch": 39.40707757206468, "grad_norm": 0.009923305362462997, "learning_rate": 1.176657311733886e-06, "loss": 0.05009488, "memory(GiB)": 13.7, "step": 84075, "train_speed(iter/s)": 1.529415 }, { "acc": 0.99196434, "epoch": 39.40942113897352, "grad_norm": 3.749594211578369, "learning_rate": 1.1761580198234256e-06, "loss": 0.01579385, "memory(GiB)": 13.7, "step": 84080, "train_speed(iter/s)": 1.52941 }, { "acc": 0.98354168, "epoch": 39.411764705882355, "grad_norm": 4.1302995681762695, "learning_rate": 1.175658819837581e-06, "loss": 0.04255707, "memory(GiB)": 13.7, "step": 84085, "train_speed(iter/s)": 1.529414 }, { "acc": 0.99459324, "epoch": 39.41410827279119, "grad_norm": 3.391465425491333, "learning_rate": 1.1751597117883527e-06, "loss": 0.03667885, "memory(GiB)": 13.7, "step": 84090, "train_speed(iter/s)": 1.529415 }, { "acc": 0.98395834, "epoch": 39.416451839700024, "grad_norm": 4.749828338623047, "learning_rate": 1.1746606956877376e-06, "loss": 0.05734009, "memory(GiB)": 13.7, "step": 84095, "train_speed(iter/s)": 1.529426 }, { "acc": 0.98779755, "epoch": 39.41879540660886, "grad_norm": 3.530055522918701, "learning_rate": 1.1741617715477318e-06, "loss": 0.02925067, "memory(GiB)": 13.7, "step": 84100, "train_speed(iter/s)": 1.529429 }, { "acc": 0.98415184, "epoch": 39.42113897351769, "grad_norm": 1.6145774126052856, "learning_rate": 1.1736629393803267e-06, "loss": 0.02381386, "memory(GiB)": 13.7, "step": 84105, "train_speed(iter/s)": 1.529429 }, { "acc": 0.9885417, "epoch": 39.42348254042653, "grad_norm": 2.9286341667175293, "learning_rate": 1.1731641991975114e-06, "loss": 0.01996529, "memory(GiB)": 13.7, "step": 84110, "train_speed(iter/s)": 1.529428 }, { "acc": 1.0, "epoch": 39.42582610733537, "grad_norm": 3.629993200302124, "learning_rate": 1.1726655510112743e-06, "loss": 0.01559146, "memory(GiB)": 13.7, "step": 84115, "train_speed(iter/s)": 1.529431 }, { "acc": 0.98916664, "epoch": 39.4281696742442, "grad_norm": 3.3145928382873535, "learning_rate": 1.1721669948336038e-06, "loss": 0.02429685, "memory(GiB)": 13.7, "step": 84120, "train_speed(iter/s)": 1.529434 }, { "acc": 0.9916667, "epoch": 39.43051324115304, "grad_norm": 3.825497627258301, "learning_rate": 1.17166853067648e-06, "loss": 0.02998459, "memory(GiB)": 13.7, "step": 84125, "train_speed(iter/s)": 1.52944 }, { "acc": 0.9791667, "epoch": 39.43285680806187, "grad_norm": 4.007588863372803, "learning_rate": 1.1711701585518865e-06, "loss": 0.03570469, "memory(GiB)": 13.7, "step": 84130, "train_speed(iter/s)": 1.529445 }, { "acc": 0.97101192, "epoch": 39.435200374970705, "grad_norm": 4.8950276374816895, "learning_rate": 1.1706718784718032e-06, "loss": 0.04627058, "memory(GiB)": 13.7, "step": 84135, "train_speed(iter/s)": 1.52945 }, { "acc": 0.99263401, "epoch": 39.43754394187954, "grad_norm": 0.6089186668395996, "learning_rate": 1.1701736904482046e-06, "loss": 0.02094556, "memory(GiB)": 13.7, "step": 84140, "train_speed(iter/s)": 1.529456 }, { "acc": 0.99020834, "epoch": 39.439887508788374, "grad_norm": 1.5057705640792847, "learning_rate": 1.1696755944930663e-06, "loss": 0.02536395, "memory(GiB)": 13.7, "step": 84145, "train_speed(iter/s)": 1.529456 }, { "acc": 0.98289146, "epoch": 39.44223107569721, "grad_norm": 0.8453614711761475, "learning_rate": 1.1691775906183637e-06, "loss": 0.06149902, "memory(GiB)": 13.7, "step": 84150, "train_speed(iter/s)": 1.529459 }, { "acc": 0.98633928, "epoch": 39.44457464260605, "grad_norm": 5.446205139160156, "learning_rate": 1.1686796788360648e-06, "loss": 0.04460371, "memory(GiB)": 13.7, "step": 84155, "train_speed(iter/s)": 1.529459 }, { "acc": 0.98708334, "epoch": 39.446918209514884, "grad_norm": 0.003961476963013411, "learning_rate": 1.1681818591581364e-06, "loss": 0.03196738, "memory(GiB)": 13.7, "step": 84160, "train_speed(iter/s)": 1.52946 }, { "acc": 0.9885417, "epoch": 39.44926177642372, "grad_norm": 4.983081817626953, "learning_rate": 1.1676841315965458e-06, "loss": 0.02476908, "memory(GiB)": 13.7, "step": 84165, "train_speed(iter/s)": 1.529459 }, { "acc": 0.9875, "epoch": 39.45160534333255, "grad_norm": 4.461102485656738, "learning_rate": 1.1671864961632569e-06, "loss": 0.02030553, "memory(GiB)": 13.7, "step": 84170, "train_speed(iter/s)": 1.529459 }, { "acc": 0.99591351, "epoch": 39.45394891024139, "grad_norm": 4.216170310974121, "learning_rate": 1.166688952870233e-06, "loss": 0.03898087, "memory(GiB)": 13.7, "step": 84175, "train_speed(iter/s)": 1.52946 }, { "acc": 0.98249998, "epoch": 39.45629247715022, "grad_norm": 4.202066898345947, "learning_rate": 1.1661915017294293e-06, "loss": 0.06304026, "memory(GiB)": 13.7, "step": 84180, "train_speed(iter/s)": 1.529465 }, { "acc": 0.99079857, "epoch": 39.458636044059055, "grad_norm": 2.8166592121124268, "learning_rate": 1.1656941427528065e-06, "loss": 0.03534737, "memory(GiB)": 13.7, "step": 84185, "train_speed(iter/s)": 1.529468 }, { "acc": 0.99093208, "epoch": 39.46097961096789, "grad_norm": 4.176192760467529, "learning_rate": 1.1651968759523188e-06, "loss": 0.04440585, "memory(GiB)": 13.7, "step": 84190, "train_speed(iter/s)": 1.529473 }, { "acc": 0.98270226, "epoch": 39.46332317787673, "grad_norm": 2.637087821960449, "learning_rate": 1.1646997013399183e-06, "loss": 0.03483489, "memory(GiB)": 13.7, "step": 84195, "train_speed(iter/s)": 1.529472 }, { "acc": 0.99125004, "epoch": 39.465666744785565, "grad_norm": 1.436199426651001, "learning_rate": 1.1642026189275544e-06, "loss": 0.02014738, "memory(GiB)": 13.7, "step": 84200, "train_speed(iter/s)": 1.529477 }, { "acc": 0.97875004, "epoch": 39.4680103116944, "grad_norm": 3.703808069229126, "learning_rate": 1.163705628727176e-06, "loss": 0.0496841, "memory(GiB)": 13.7, "step": 84205, "train_speed(iter/s)": 1.529475 }, { "acc": 0.97458334, "epoch": 39.470353878603234, "grad_norm": 3.5999197959899902, "learning_rate": 1.163208730750731e-06, "loss": 0.05171237, "memory(GiB)": 13.7, "step": 84210, "train_speed(iter/s)": 1.529474 }, { "acc": 0.98208332, "epoch": 39.47269744551207, "grad_norm": 1.377181053161621, "learning_rate": 1.16271192501016e-06, "loss": 0.07137059, "memory(GiB)": 13.7, "step": 84215, "train_speed(iter/s)": 1.529475 }, { "acc": 0.97215271, "epoch": 39.4750410124209, "grad_norm": 8.270277976989746, "learning_rate": 1.1622152115174063e-06, "loss": 0.0645066, "memory(GiB)": 13.7, "step": 84220, "train_speed(iter/s)": 1.529476 }, { "acc": 0.99278851, "epoch": 39.47738457932974, "grad_norm": 0.0038689214270561934, "learning_rate": 1.1617185902844105e-06, "loss": 0.04010236, "memory(GiB)": 13.7, "step": 84225, "train_speed(iter/s)": 1.529481 }, { "acc": 0.97989578, "epoch": 39.47972814623858, "grad_norm": 5.163641929626465, "learning_rate": 1.1612220613231072e-06, "loss": 0.04327074, "memory(GiB)": 13.7, "step": 84230, "train_speed(iter/s)": 1.529487 }, { "acc": 0.98904762, "epoch": 39.48207171314741, "grad_norm": 0.12510767579078674, "learning_rate": 1.1607256246454335e-06, "loss": 0.03669739, "memory(GiB)": 13.7, "step": 84235, "train_speed(iter/s)": 1.529487 }, { "acc": 0.99541664, "epoch": 39.48441528005625, "grad_norm": 2.709928512573242, "learning_rate": 1.1602292802633204e-06, "loss": 0.03664264, "memory(GiB)": 13.7, "step": 84240, "train_speed(iter/s)": 1.529492 }, { "acc": 0.98208332, "epoch": 39.48675884696508, "grad_norm": 4.7999796867370605, "learning_rate": 1.1597330281886989e-06, "loss": 0.03539139, "memory(GiB)": 13.7, "step": 84245, "train_speed(iter/s)": 1.5295 }, { "acc": 0.99495192, "epoch": 39.489102413873916, "grad_norm": 0.376861035823822, "learning_rate": 1.1592368684334985e-06, "loss": 0.03242596, "memory(GiB)": 13.7, "step": 84250, "train_speed(iter/s)": 1.529502 }, { "acc": 0.9760417, "epoch": 39.49144598078275, "grad_norm": 0.004338941071182489, "learning_rate": 1.1587408010096432e-06, "loss": 0.0475593, "memory(GiB)": 13.7, "step": 84255, "train_speed(iter/s)": 1.529505 }, { "acc": 0.97979164, "epoch": 39.493789547691584, "grad_norm": 1.0261433124542236, "learning_rate": 1.1582448259290584e-06, "loss": 0.04693092, "memory(GiB)": 13.7, "step": 84260, "train_speed(iter/s)": 1.529506 }, { "acc": 0.98833332, "epoch": 39.49613311460042, "grad_norm": 3.4641172885894775, "learning_rate": 1.157748943203666e-06, "loss": 0.03354054, "memory(GiB)": 13.7, "step": 84265, "train_speed(iter/s)": 1.529505 }, { "acc": 0.9802084, "epoch": 39.49847668150926, "grad_norm": 0.0007065945537760854, "learning_rate": 1.157253152845383e-06, "loss": 0.04034901, "memory(GiB)": 13.7, "step": 84270, "train_speed(iter/s)": 1.529504 }, { "acc": 0.97800598, "epoch": 39.500820248418094, "grad_norm": 4.239940166473389, "learning_rate": 1.1567574548661299e-06, "loss": 0.05414221, "memory(GiB)": 13.7, "step": 84275, "train_speed(iter/s)": 1.529506 }, { "acc": 0.98125, "epoch": 39.50316381532693, "grad_norm": 3.544196128845215, "learning_rate": 1.1562618492778186e-06, "loss": 0.03906886, "memory(GiB)": 13.7, "step": 84280, "train_speed(iter/s)": 1.52951 }, { "acc": 0.98674679, "epoch": 39.50550738223576, "grad_norm": 2.1817855834960938, "learning_rate": 1.1557663360923642e-06, "loss": 0.02580201, "memory(GiB)": 13.7, "step": 84285, "train_speed(iter/s)": 1.529511 }, { "acc": 0.9919445, "epoch": 39.5078509491446, "grad_norm": 1.6611794233322144, "learning_rate": 1.1552709153216748e-06, "loss": 0.03080176, "memory(GiB)": 13.7, "step": 84290, "train_speed(iter/s)": 1.52951 }, { "acc": 0.990625, "epoch": 39.51019451605343, "grad_norm": 2.014552354812622, "learning_rate": 1.1547755869776606e-06, "loss": 0.03483413, "memory(GiB)": 13.7, "step": 84295, "train_speed(iter/s)": 1.529511 }, { "acc": 0.975, "epoch": 39.512538082962266, "grad_norm": 3.2148311138153076, "learning_rate": 1.1542803510722279e-06, "loss": 0.05513675, "memory(GiB)": 13.7, "step": 84300, "train_speed(iter/s)": 1.52951 }, { "acc": 0.9875, "epoch": 39.51488164987111, "grad_norm": 0.11788658797740936, "learning_rate": 1.1537852076172783e-06, "loss": 0.02062382, "memory(GiB)": 13.7, "step": 84305, "train_speed(iter/s)": 1.529506 }, { "acc": 0.99154758, "epoch": 39.51722521677994, "grad_norm": 4.375561714172363, "learning_rate": 1.1532901566247153e-06, "loss": 0.06267833, "memory(GiB)": 13.7, "step": 84310, "train_speed(iter/s)": 1.529509 }, { "acc": 0.99026203, "epoch": 39.519568783688776, "grad_norm": 3.9429171085357666, "learning_rate": 1.1527951981064392e-06, "loss": 0.03538662, "memory(GiB)": 13.7, "step": 84315, "train_speed(iter/s)": 1.52951 }, { "acc": 0.99072914, "epoch": 39.52191235059761, "grad_norm": 1.4924185276031494, "learning_rate": 1.1523003320743444e-06, "loss": 0.02328912, "memory(GiB)": 13.7, "step": 84320, "train_speed(iter/s)": 1.529507 }, { "acc": 0.97595234, "epoch": 39.524255917506444, "grad_norm": 0.000264666072325781, "learning_rate": 1.1518055585403282e-06, "loss": 0.05424251, "memory(GiB)": 13.7, "step": 84325, "train_speed(iter/s)": 1.529512 }, { "acc": 0.99229164, "epoch": 39.52659948441528, "grad_norm": 3.3070571422576904, "learning_rate": 1.1513108775162815e-06, "loss": 0.02297759, "memory(GiB)": 13.7, "step": 84330, "train_speed(iter/s)": 1.529517 }, { "acc": 0.97133923, "epoch": 39.52894305132411, "grad_norm": 0.012359865941107273, "learning_rate": 1.1508162890140956e-06, "loss": 0.06814336, "memory(GiB)": 13.7, "step": 84335, "train_speed(iter/s)": 1.529519 }, { "acc": 0.99125004, "epoch": 39.53128661823295, "grad_norm": 0.29788729548454285, "learning_rate": 1.1503217930456608e-06, "loss": 0.03215946, "memory(GiB)": 13.7, "step": 84340, "train_speed(iter/s)": 1.529516 }, { "acc": 0.96863098, "epoch": 39.53363018514179, "grad_norm": 3.5005340576171875, "learning_rate": 1.1498273896228592e-06, "loss": 0.04690258, "memory(GiB)": 13.7, "step": 84345, "train_speed(iter/s)": 1.529519 }, { "acc": 0.9770833, "epoch": 39.53597375205062, "grad_norm": 2.8912487030029297, "learning_rate": 1.1493330787575768e-06, "loss": 0.05183178, "memory(GiB)": 13.7, "step": 84350, "train_speed(iter/s)": 1.529519 }, { "acc": 0.99541664, "epoch": 39.53831731895946, "grad_norm": 0.0023025982081890106, "learning_rate": 1.1488388604616972e-06, "loss": 0.03637995, "memory(GiB)": 13.7, "step": 84355, "train_speed(iter/s)": 1.529521 }, { "acc": 0.98571434, "epoch": 39.54066088586829, "grad_norm": 0.44984713196754456, "learning_rate": 1.1483447347470972e-06, "loss": 0.0325123, "memory(GiB)": 13.7, "step": 84360, "train_speed(iter/s)": 1.529523 }, { "acc": 0.99279766, "epoch": 39.543004452777126, "grad_norm": 2.6052258014678955, "learning_rate": 1.1478507016256529e-06, "loss": 0.04255542, "memory(GiB)": 13.7, "step": 84365, "train_speed(iter/s)": 1.529525 }, { "acc": 0.99125004, "epoch": 39.54534801968596, "grad_norm": 3.56838059425354, "learning_rate": 1.1473567611092412e-06, "loss": 0.03536593, "memory(GiB)": 13.7, "step": 84370, "train_speed(iter/s)": 1.529529 }, { "acc": 0.9885417, "epoch": 39.547691586594794, "grad_norm": 2.847827196121216, "learning_rate": 1.1468629132097351e-06, "loss": 0.02465685, "memory(GiB)": 13.7, "step": 84375, "train_speed(iter/s)": 1.529533 }, { "acc": 0.98291664, "epoch": 39.550035153503636, "grad_norm": 3.248128890991211, "learning_rate": 1.1463691579390038e-06, "loss": 0.02801873, "memory(GiB)": 13.7, "step": 84380, "train_speed(iter/s)": 1.529536 }, { "acc": 0.98395834, "epoch": 39.55237872041247, "grad_norm": 7.598519325256348, "learning_rate": 1.1458754953089155e-06, "loss": 0.0291796, "memory(GiB)": 13.7, "step": 84385, "train_speed(iter/s)": 1.52954 }, { "acc": 0.98624992, "epoch": 39.554722287321304, "grad_norm": 3.8042843341827393, "learning_rate": 1.1453819253313373e-06, "loss": 0.03929062, "memory(GiB)": 13.7, "step": 84390, "train_speed(iter/s)": 1.529541 }, { "acc": 0.9916667, "epoch": 39.55706585423014, "grad_norm": 0.1026989072561264, "learning_rate": 1.144888448018134e-06, "loss": 0.0231681, "memory(GiB)": 13.7, "step": 84395, "train_speed(iter/s)": 1.529545 }, { "acc": 0.97624998, "epoch": 39.55940942113897, "grad_norm": 0.007787580601871014, "learning_rate": 1.1443950633811654e-06, "loss": 0.03658712, "memory(GiB)": 13.7, "step": 84400, "train_speed(iter/s)": 1.529548 }, { "acc": 0.9947917, "epoch": 39.56175298804781, "grad_norm": 0.004736871458590031, "learning_rate": 1.14390177143229e-06, "loss": 0.03966605, "memory(GiB)": 13.7, "step": 84405, "train_speed(iter/s)": 1.529551 }, { "acc": 0.98937492, "epoch": 39.56409655495664, "grad_norm": 0.18881015479564667, "learning_rate": 1.1434085721833662e-06, "loss": 0.01855359, "memory(GiB)": 13.7, "step": 84410, "train_speed(iter/s)": 1.529553 }, { "acc": 0.97800598, "epoch": 39.566440121865476, "grad_norm": 1.6699378490447998, "learning_rate": 1.1429154656462503e-06, "loss": 0.06508018, "memory(GiB)": 13.7, "step": 84415, "train_speed(iter/s)": 1.529555 }, { "acc": 0.9890625, "epoch": 39.56878368877432, "grad_norm": 4.7609992027282715, "learning_rate": 1.1424224518327914e-06, "loss": 0.02224395, "memory(GiB)": 13.7, "step": 84420, "train_speed(iter/s)": 1.529562 }, { "acc": 0.97863102, "epoch": 39.57112725568315, "grad_norm": 1.270076870918274, "learning_rate": 1.1419295307548427e-06, "loss": 0.03244861, "memory(GiB)": 13.7, "step": 84425, "train_speed(iter/s)": 1.529573 }, { "acc": 0.98103628, "epoch": 39.573470822591986, "grad_norm": 3.3620314598083496, "learning_rate": 1.141436702424253e-06, "loss": 0.04250593, "memory(GiB)": 13.7, "step": 84430, "train_speed(iter/s)": 1.529577 }, { "acc": 0.99458332, "epoch": 39.57581438950082, "grad_norm": 0.003624927718192339, "learning_rate": 1.1409439668528654e-06, "loss": 0.04529868, "memory(GiB)": 13.7, "step": 84435, "train_speed(iter/s)": 1.529578 }, { "acc": 0.9725893, "epoch": 39.578157956409655, "grad_norm": 4.423375129699707, "learning_rate": 1.1404513240525266e-06, "loss": 0.06808816, "memory(GiB)": 13.7, "step": 84440, "train_speed(iter/s)": 1.529587 }, { "acc": 0.9791667, "epoch": 39.58050152331849, "grad_norm": 2.226491928100586, "learning_rate": 1.1399587740350753e-06, "loss": 0.03696088, "memory(GiB)": 13.7, "step": 84445, "train_speed(iter/s)": 1.52959 }, { "acc": 0.9739584, "epoch": 39.58284509022732, "grad_norm": 3.044832706451416, "learning_rate": 1.1394663168123523e-06, "loss": 0.04185084, "memory(GiB)": 13.7, "step": 84450, "train_speed(iter/s)": 1.529595 }, { "acc": 0.97952347, "epoch": 39.585188657136165, "grad_norm": 5.356571197509766, "learning_rate": 1.1389739523961956e-06, "loss": 0.07316831, "memory(GiB)": 13.7, "step": 84455, "train_speed(iter/s)": 1.529596 }, { "acc": 0.98395834, "epoch": 39.587532224045, "grad_norm": 5.147161960601807, "learning_rate": 1.1384816807984378e-06, "loss": 0.04755363, "memory(GiB)": 13.7, "step": 84460, "train_speed(iter/s)": 1.529599 }, { "acc": 0.98673611, "epoch": 39.58987579095383, "grad_norm": 0.024987714365124702, "learning_rate": 1.1379895020309129e-06, "loss": 0.066626, "memory(GiB)": 13.7, "step": 84465, "train_speed(iter/s)": 1.529599 }, { "acc": 0.98312502, "epoch": 39.59221935786267, "grad_norm": 3.562126636505127, "learning_rate": 1.1374974161054527e-06, "loss": 0.03817941, "memory(GiB)": 13.7, "step": 84470, "train_speed(iter/s)": 1.529598 }, { "acc": 0.97573872, "epoch": 39.5945629247715, "grad_norm": 2.9429287910461426, "learning_rate": 1.1370054230338815e-06, "loss": 0.05756766, "memory(GiB)": 13.7, "step": 84475, "train_speed(iter/s)": 1.529604 }, { "acc": 0.9902977, "epoch": 39.596906491680336, "grad_norm": 0.8398695588111877, "learning_rate": 1.1365135228280278e-06, "loss": 0.04368052, "memory(GiB)": 13.7, "step": 84480, "train_speed(iter/s)": 1.529608 }, { "acc": 0.99458332, "epoch": 39.59925005858917, "grad_norm": 1.3547754287719727, "learning_rate": 1.1360217154997163e-06, "loss": 0.04311896, "memory(GiB)": 13.7, "step": 84485, "train_speed(iter/s)": 1.52961 }, { "acc": 0.98988094, "epoch": 39.601593625498005, "grad_norm": 4.929678440093994, "learning_rate": 1.1355300010607666e-06, "loss": 0.03528761, "memory(GiB)": 13.7, "step": 84490, "train_speed(iter/s)": 1.529614 }, { "acc": 0.98969002, "epoch": 39.603937192406846, "grad_norm": 1.653235912322998, "learning_rate": 1.1350383795229977e-06, "loss": 0.05130538, "memory(GiB)": 13.7, "step": 84495, "train_speed(iter/s)": 1.52962 }, { "acc": 0.984375, "epoch": 39.60628075931568, "grad_norm": 3.8937137126922607, "learning_rate": 1.1345468508982264e-06, "loss": 0.05610707, "memory(GiB)": 13.7, "step": 84500, "train_speed(iter/s)": 1.529619 }, { "acc": 0.98604164, "epoch": 39.608624326224515, "grad_norm": 7.255494594573975, "learning_rate": 1.1340554151982697e-06, "loss": 0.03583511, "memory(GiB)": 13.7, "step": 84505, "train_speed(iter/s)": 1.529625 }, { "acc": 0.98113098, "epoch": 39.61096789313335, "grad_norm": 2.8574585914611816, "learning_rate": 1.1335640724349372e-06, "loss": 0.0779868, "memory(GiB)": 13.7, "step": 84510, "train_speed(iter/s)": 1.529626 }, { "acc": 0.97166672, "epoch": 39.61331146004218, "grad_norm": 2.1141793727874756, "learning_rate": 1.1330728226200403e-06, "loss": 0.06974336, "memory(GiB)": 13.7, "step": 84515, "train_speed(iter/s)": 1.529628 }, { "acc": 0.9885417, "epoch": 39.61565502695102, "grad_norm": 7.6742658615112305, "learning_rate": 1.132581665765389e-06, "loss": 0.03338115, "memory(GiB)": 13.7, "step": 84520, "train_speed(iter/s)": 1.529624 }, { "acc": 0.97946434, "epoch": 39.61799859385985, "grad_norm": 0.19212207198143005, "learning_rate": 1.1320906018827856e-06, "loss": 0.04022644, "memory(GiB)": 13.7, "step": 84525, "train_speed(iter/s)": 1.529628 }, { "acc": 0.9786459, "epoch": 39.62034216076869, "grad_norm": 3.930198907852173, "learning_rate": 1.131599630984037e-06, "loss": 0.0553931, "memory(GiB)": 13.7, "step": 84530, "train_speed(iter/s)": 1.52963 }, { "acc": 0.97659187, "epoch": 39.62268572767753, "grad_norm": 0.0006468815263360739, "learning_rate": 1.1311087530809418e-06, "loss": 0.07013887, "memory(GiB)": 13.7, "step": 84535, "train_speed(iter/s)": 1.52963 }, { "acc": 0.97520828, "epoch": 39.62502929458636, "grad_norm": 2.8542234897613525, "learning_rate": 1.1306179681852999e-06, "loss": 0.04455427, "memory(GiB)": 13.7, "step": 84540, "train_speed(iter/s)": 1.529634 }, { "acc": 0.996875, "epoch": 39.627372861495196, "grad_norm": 2.7777962684631348, "learning_rate": 1.1301272763089098e-06, "loss": 0.023905, "memory(GiB)": 13.7, "step": 84545, "train_speed(iter/s)": 1.529635 }, { "acc": 0.97768431, "epoch": 39.62971642840403, "grad_norm": 6.199610233306885, "learning_rate": 1.1296366774635639e-06, "loss": 0.04076183, "memory(GiB)": 13.7, "step": 84550, "train_speed(iter/s)": 1.529631 }, { "acc": 0.99231157, "epoch": 39.632059995312865, "grad_norm": 2.2870278358459473, "learning_rate": 1.129146171661055e-06, "loss": 0.03031738, "memory(GiB)": 13.7, "step": 84555, "train_speed(iter/s)": 1.529634 }, { "acc": 0.9895833, "epoch": 39.6344035622217, "grad_norm": 0.0019631420727819204, "learning_rate": 1.1286557589131758e-06, "loss": 0.02517687, "memory(GiB)": 13.7, "step": 84560, "train_speed(iter/s)": 1.529634 }, { "acc": 0.97875004, "epoch": 39.63674712913053, "grad_norm": 5.821356296539307, "learning_rate": 1.1281654392317103e-06, "loss": 0.06166128, "memory(GiB)": 13.7, "step": 84565, "train_speed(iter/s)": 1.529635 }, { "acc": 0.975, "epoch": 39.639090696039375, "grad_norm": 1.0561938285827637, "learning_rate": 1.1276752126284473e-06, "loss": 0.06235362, "memory(GiB)": 13.7, "step": 84570, "train_speed(iter/s)": 1.529636 }, { "acc": 0.97506943, "epoch": 39.64143426294821, "grad_norm": 2.091002941131592, "learning_rate": 1.127185079115168e-06, "loss": 0.05076154, "memory(GiB)": 13.7, "step": 84575, "train_speed(iter/s)": 1.529641 }, { "acc": 0.97520828, "epoch": 39.643777829857044, "grad_norm": 1.9403042793273926, "learning_rate": 1.1266950387036556e-06, "loss": 0.03677602, "memory(GiB)": 13.7, "step": 84580, "train_speed(iter/s)": 1.529644 }, { "acc": 0.97770834, "epoch": 39.64612139676588, "grad_norm": 5.204001426696777, "learning_rate": 1.1262050914056867e-06, "loss": 0.06433159, "memory(GiB)": 13.7, "step": 84585, "train_speed(iter/s)": 1.529646 }, { "acc": 0.98874998, "epoch": 39.64846496367471, "grad_norm": 0.025910429656505585, "learning_rate": 1.1257152372330397e-06, "loss": 0.0318653, "memory(GiB)": 13.7, "step": 84590, "train_speed(iter/s)": 1.529647 }, { "acc": 0.97923613, "epoch": 39.65080853058355, "grad_norm": 1.8602066040039062, "learning_rate": 1.1252254761974884e-06, "loss": 0.03439459, "memory(GiB)": 13.7, "step": 84595, "train_speed(iter/s)": 1.529651 }, { "acc": 0.98145828, "epoch": 39.65315209749238, "grad_norm": 5.187864780426025, "learning_rate": 1.124735808310807e-06, "loss": 0.038112, "memory(GiB)": 13.7, "step": 84600, "train_speed(iter/s)": 1.529653 }, { "acc": 0.98145828, "epoch": 39.655495664401215, "grad_norm": 2.5885062217712402, "learning_rate": 1.124246233584762e-06, "loss": 0.05919186, "memory(GiB)": 13.7, "step": 84605, "train_speed(iter/s)": 1.529652 }, { "acc": 0.98500004, "epoch": 39.65783923131006, "grad_norm": 4.80801248550415, "learning_rate": 1.1237567520311248e-06, "loss": 0.02702786, "memory(GiB)": 13.7, "step": 84610, "train_speed(iter/s)": 1.529655 }, { "acc": 0.99125004, "epoch": 39.66018279821889, "grad_norm": 0.16623032093048096, "learning_rate": 1.1232673636616577e-06, "loss": 0.01187066, "memory(GiB)": 13.7, "step": 84615, "train_speed(iter/s)": 1.529655 }, { "acc": 0.9864583, "epoch": 39.662526365127725, "grad_norm": 0.04209086671471596, "learning_rate": 1.1227780684881272e-06, "loss": 0.03056814, "memory(GiB)": 13.7, "step": 84620, "train_speed(iter/s)": 1.529657 }, { "acc": 0.97175598, "epoch": 39.66486993203656, "grad_norm": 3.179037094116211, "learning_rate": 1.1222888665222908e-06, "loss": 0.04447348, "memory(GiB)": 13.7, "step": 84625, "train_speed(iter/s)": 1.529657 }, { "acc": 0.97987175, "epoch": 39.667213498945394, "grad_norm": 4.63743257522583, "learning_rate": 1.1217997577759096e-06, "loss": 0.0403214, "memory(GiB)": 13.7, "step": 84630, "train_speed(iter/s)": 1.529661 }, { "acc": 0.97458334, "epoch": 39.66955706585423, "grad_norm": 3.906625509262085, "learning_rate": 1.1213107422607406e-06, "loss": 0.08273286, "memory(GiB)": 13.7, "step": 84635, "train_speed(iter/s)": 1.529665 }, { "acc": 0.9734375, "epoch": 39.67190063276306, "grad_norm": 3.865260124206543, "learning_rate": 1.120821819988536e-06, "loss": 0.05920719, "memory(GiB)": 13.7, "step": 84640, "train_speed(iter/s)": 1.529661 }, { "acc": 0.98670635, "epoch": 39.674244199671904, "grad_norm": 1.57587730884552, "learning_rate": 1.1203329909710495e-06, "loss": 0.02594758, "memory(GiB)": 13.7, "step": 84645, "train_speed(iter/s)": 1.529659 }, { "acc": 0.9895834, "epoch": 39.67658776658074, "grad_norm": 2.2769417762756348, "learning_rate": 1.1198442552200314e-06, "loss": 0.02281559, "memory(GiB)": 13.7, "step": 84650, "train_speed(iter/s)": 1.529657 }, { "acc": 0.99375, "epoch": 39.67893133348957, "grad_norm": 4.503674507141113, "learning_rate": 1.119355612747229e-06, "loss": 0.00735604, "memory(GiB)": 13.7, "step": 84655, "train_speed(iter/s)": 1.529661 }, { "acc": 0.97163696, "epoch": 39.68127490039841, "grad_norm": 6.504838943481445, "learning_rate": 1.1188670635643855e-06, "loss": 0.03695309, "memory(GiB)": 13.7, "step": 84660, "train_speed(iter/s)": 1.529667 }, { "acc": 0.99020824, "epoch": 39.68361846730724, "grad_norm": 3.235837697982788, "learning_rate": 1.1183786076832453e-06, "loss": 0.03235198, "memory(GiB)": 13.7, "step": 84665, "train_speed(iter/s)": 1.52967 }, { "acc": 0.99300594, "epoch": 39.685962034216075, "grad_norm": 4.588690280914307, "learning_rate": 1.1178902451155497e-06, "loss": 0.04611402, "memory(GiB)": 13.7, "step": 84670, "train_speed(iter/s)": 1.529672 }, { "acc": 0.9927084, "epoch": 39.68830560112491, "grad_norm": 3.1105053424835205, "learning_rate": 1.1174019758730387e-06, "loss": 0.01767633, "memory(GiB)": 13.7, "step": 84675, "train_speed(iter/s)": 1.529676 }, { "acc": 0.98527775, "epoch": 39.690649168033744, "grad_norm": 0.0621923990547657, "learning_rate": 1.1169137999674465e-06, "loss": 0.03020931, "memory(GiB)": 13.7, "step": 84680, "train_speed(iter/s)": 1.529681 }, { "acc": 0.98069439, "epoch": 39.692992734942585, "grad_norm": 5.139593601226807, "learning_rate": 1.1164257174105073e-06, "loss": 0.04998233, "memory(GiB)": 13.7, "step": 84685, "train_speed(iter/s)": 1.529692 }, { "acc": 0.98478622, "epoch": 39.69533630185142, "grad_norm": 4.710871696472168, "learning_rate": 1.115937728213955e-06, "loss": 0.0388008, "memory(GiB)": 13.7, "step": 84690, "train_speed(iter/s)": 1.52969 }, { "acc": 0.97865524, "epoch": 39.697679868760254, "grad_norm": 2.6575961112976074, "learning_rate": 1.1154498323895181e-06, "loss": 0.04408937, "memory(GiB)": 13.7, "step": 84695, "train_speed(iter/s)": 1.529691 }, { "acc": 0.98562498, "epoch": 39.70002343566909, "grad_norm": 4.404780387878418, "learning_rate": 1.1149620299489227e-06, "loss": 0.03232311, "memory(GiB)": 13.7, "step": 84700, "train_speed(iter/s)": 1.529696 }, { "acc": 0.98588543, "epoch": 39.70236700257792, "grad_norm": 2.183485746383667, "learning_rate": 1.1144743209038948e-06, "loss": 0.05759035, "memory(GiB)": 13.7, "step": 84705, "train_speed(iter/s)": 1.5297 }, { "acc": 0.98673611, "epoch": 39.70471056948676, "grad_norm": 3.2914488315582275, "learning_rate": 1.1139867052661596e-06, "loss": 0.02344481, "memory(GiB)": 13.7, "step": 84710, "train_speed(iter/s)": 1.529699 }, { "acc": 0.98640881, "epoch": 39.70705413639559, "grad_norm": 5.090548038482666, "learning_rate": 1.1134991830474338e-06, "loss": 0.04043895, "memory(GiB)": 13.7, "step": 84715, "train_speed(iter/s)": 1.529702 }, { "acc": 0.98104162, "epoch": 39.70939770330443, "grad_norm": 4.52528190612793, "learning_rate": 1.113011754259438e-06, "loss": 0.0366734, "memory(GiB)": 13.7, "step": 84720, "train_speed(iter/s)": 1.529704 }, { "acc": 0.9890399, "epoch": 39.71174127021327, "grad_norm": 2.2276580333709717, "learning_rate": 1.1125244189138899e-06, "loss": 0.02345938, "memory(GiB)": 13.7, "step": 84725, "train_speed(iter/s)": 1.52971 }, { "acc": 0.9958334, "epoch": 39.7140848371221, "grad_norm": 6.792342662811279, "learning_rate": 1.1120371770224995e-06, "loss": 0.0599941, "memory(GiB)": 13.7, "step": 84730, "train_speed(iter/s)": 1.529714 }, { "acc": 0.99035721, "epoch": 39.716428404030935, "grad_norm": 1.3204342126846313, "learning_rate": 1.1115500285969832e-06, "loss": 0.03626142, "memory(GiB)": 13.7, "step": 84735, "train_speed(iter/s)": 1.529717 }, { "acc": 0.97250004, "epoch": 39.71877197093977, "grad_norm": 7.732120037078857, "learning_rate": 1.1110629736490465e-06, "loss": 0.04304379, "memory(GiB)": 13.7, "step": 84740, "train_speed(iter/s)": 1.529718 }, { "acc": 0.98038692, "epoch": 39.721115537848604, "grad_norm": 3.41654634475708, "learning_rate": 1.1105760121903976e-06, "loss": 0.06037685, "memory(GiB)": 13.7, "step": 84745, "train_speed(iter/s)": 1.529721 }, { "acc": 0.97875004, "epoch": 39.72345910475744, "grad_norm": 4.02518892288208, "learning_rate": 1.1100891442327437e-06, "loss": 0.03784528, "memory(GiB)": 13.7, "step": 84750, "train_speed(iter/s)": 1.529722 }, { "acc": 0.9875, "epoch": 39.72580267166627, "grad_norm": 6.639333248138428, "learning_rate": 1.1096023697877844e-06, "loss": 0.02948892, "memory(GiB)": 13.7, "step": 84755, "train_speed(iter/s)": 1.529729 }, { "acc": 0.9895834, "epoch": 39.728146238575114, "grad_norm": 3.2128257751464844, "learning_rate": 1.1091156888672214e-06, "loss": 0.04927107, "memory(GiB)": 13.7, "step": 84760, "train_speed(iter/s)": 1.529734 }, { "acc": 0.996875, "epoch": 39.73048980548395, "grad_norm": 0.00020837498595938087, "learning_rate": 1.1086291014827546e-06, "loss": 0.01440305, "memory(GiB)": 13.7, "step": 84765, "train_speed(iter/s)": 1.529736 }, { "acc": 0.98978624, "epoch": 39.73283337239278, "grad_norm": 3.5125668048858643, "learning_rate": 1.1081426076460764e-06, "loss": 0.07145507, "memory(GiB)": 13.7, "step": 84770, "train_speed(iter/s)": 1.529734 }, { "acc": 0.98348217, "epoch": 39.73517693930162, "grad_norm": 3.1649787425994873, "learning_rate": 1.1076562073688835e-06, "loss": 0.03919154, "memory(GiB)": 13.7, "step": 84775, "train_speed(iter/s)": 1.529739 }, { "acc": 0.97488098, "epoch": 39.73752050621045, "grad_norm": 3.8014771938323975, "learning_rate": 1.1071699006628675e-06, "loss": 0.05478799, "memory(GiB)": 13.7, "step": 84780, "train_speed(iter/s)": 1.529742 }, { "acc": 0.99187498, "epoch": 39.739864073119286, "grad_norm": 0.004549297969788313, "learning_rate": 1.1066836875397164e-06, "loss": 0.01972279, "memory(GiB)": 13.7, "step": 84785, "train_speed(iter/s)": 1.529748 }, { "acc": 0.98458328, "epoch": 39.74220764002812, "grad_norm": 2.852128028869629, "learning_rate": 1.1061975680111154e-06, "loss": 0.05320203, "memory(GiB)": 13.7, "step": 84790, "train_speed(iter/s)": 1.529754 }, { "acc": 0.97956848, "epoch": 39.74455120693696, "grad_norm": 5.195476055145264, "learning_rate": 1.1057115420887515e-06, "loss": 0.05742087, "memory(GiB)": 13.7, "step": 84795, "train_speed(iter/s)": 1.529759 }, { "acc": 0.99125004, "epoch": 39.746894773845796, "grad_norm": 2.4385712146759033, "learning_rate": 1.1052256097843087e-06, "loss": 0.02703922, "memory(GiB)": 13.7, "step": 84800, "train_speed(iter/s)": 1.52976 }, { "acc": 0.9958334, "epoch": 39.74923834075463, "grad_norm": 0.8805896639823914, "learning_rate": 1.1047397711094629e-06, "loss": 0.01982458, "memory(GiB)": 13.7, "step": 84805, "train_speed(iter/s)": 1.529757 }, { "acc": 0.99125004, "epoch": 39.751581907663464, "grad_norm": 2.527606725692749, "learning_rate": 1.1042540260758948e-06, "loss": 0.03265896, "memory(GiB)": 13.7, "step": 84810, "train_speed(iter/s)": 1.529759 }, { "acc": 0.98902779, "epoch": 39.7539254745723, "grad_norm": 1.5915164947509766, "learning_rate": 1.1037683746952802e-06, "loss": 0.02334961, "memory(GiB)": 13.7, "step": 84815, "train_speed(iter/s)": 1.529759 }, { "acc": 0.98874998, "epoch": 39.75626904148113, "grad_norm": 1.685624122619629, "learning_rate": 1.1032828169792931e-06, "loss": 0.03607588, "memory(GiB)": 13.7, "step": 84820, "train_speed(iter/s)": 1.529761 }, { "acc": 0.9885417, "epoch": 39.75861260838997, "grad_norm": 2.2462308406829834, "learning_rate": 1.1027973529396037e-06, "loss": 0.0280995, "memory(GiB)": 13.7, "step": 84825, "train_speed(iter/s)": 1.529758 }, { "acc": 0.9864584, "epoch": 39.7609561752988, "grad_norm": 3.3217880725860596, "learning_rate": 1.10231198258788e-06, "loss": 0.04876444, "memory(GiB)": 13.7, "step": 84830, "train_speed(iter/s)": 1.529763 }, { "acc": 0.96845961, "epoch": 39.76329974220764, "grad_norm": 4.646328449249268, "learning_rate": 1.101826705935789e-06, "loss": 0.07324427, "memory(GiB)": 13.7, "step": 84835, "train_speed(iter/s)": 1.529768 }, { "acc": 0.97666664, "epoch": 39.76564330911648, "grad_norm": 3.252094268798828, "learning_rate": 1.1013415229949977e-06, "loss": 0.05893971, "memory(GiB)": 13.7, "step": 84840, "train_speed(iter/s)": 1.529774 }, { "acc": 0.99090271, "epoch": 39.76798687602531, "grad_norm": 0.9766108393669128, "learning_rate": 1.1008564337771647e-06, "loss": 0.0282874, "memory(GiB)": 13.7, "step": 84845, "train_speed(iter/s)": 1.52978 }, { "acc": 0.97529764, "epoch": 39.770330442934146, "grad_norm": 1.484277606010437, "learning_rate": 1.1003714382939522e-06, "loss": 0.04592935, "memory(GiB)": 13.7, "step": 84850, "train_speed(iter/s)": 1.529784 }, { "acc": 0.98888893, "epoch": 39.77267400984298, "grad_norm": 2.6700448989868164, "learning_rate": 1.0998865365570187e-06, "loss": 0.02503172, "memory(GiB)": 13.7, "step": 84855, "train_speed(iter/s)": 1.529785 }, { "acc": 0.9864584, "epoch": 39.775017576751814, "grad_norm": 2.897219181060791, "learning_rate": 1.0994017285780182e-06, "loss": 0.0170896, "memory(GiB)": 13.7, "step": 84860, "train_speed(iter/s)": 1.529784 }, { "acc": 0.9875, "epoch": 39.77736114366065, "grad_norm": 4.559964656829834, "learning_rate": 1.0989170143686028e-06, "loss": 0.01576222, "memory(GiB)": 13.7, "step": 84865, "train_speed(iter/s)": 1.52978 }, { "acc": 0.9796875, "epoch": 39.77970471056949, "grad_norm": 5.017217636108398, "learning_rate": 1.0984323939404245e-06, "loss": 0.09030679, "memory(GiB)": 13.7, "step": 84870, "train_speed(iter/s)": 1.529785 }, { "acc": 0.9932291, "epoch": 39.782048277478324, "grad_norm": 2.729818105697632, "learning_rate": 1.097947867305132e-06, "loss": 0.02213765, "memory(GiB)": 13.7, "step": 84875, "train_speed(iter/s)": 1.52979 }, { "acc": 0.97654762, "epoch": 39.78439184438716, "grad_norm": 4.823822498321533, "learning_rate": 1.0974634344743738e-06, "loss": 0.04552034, "memory(GiB)": 13.7, "step": 84880, "train_speed(iter/s)": 1.529791 }, { "acc": 0.99154758, "epoch": 39.78673541129599, "grad_norm": 2.3423495292663574, "learning_rate": 1.0969790954597901e-06, "loss": 0.03023842, "memory(GiB)": 13.7, "step": 84885, "train_speed(iter/s)": 1.529793 }, { "acc": 0.975, "epoch": 39.78907897820483, "grad_norm": 3.32232928276062, "learning_rate": 1.0964948502730254e-06, "loss": 0.06347145, "memory(GiB)": 13.7, "step": 84890, "train_speed(iter/s)": 1.529794 }, { "acc": 0.96904764, "epoch": 39.79142254511366, "grad_norm": 5.994945526123047, "learning_rate": 1.0960106989257204e-06, "loss": 0.07364095, "memory(GiB)": 13.7, "step": 84895, "train_speed(iter/s)": 1.529797 }, { "acc": 0.98458338, "epoch": 39.793766112022496, "grad_norm": 3.2834439277648926, "learning_rate": 1.095526641429509e-06, "loss": 0.02992492, "memory(GiB)": 13.7, "step": 84900, "train_speed(iter/s)": 1.529799 }, { "acc": 0.98482141, "epoch": 39.79610967893133, "grad_norm": 0.7711249589920044, "learning_rate": 1.0950426777960301e-06, "loss": 0.0376044, "memory(GiB)": 13.7, "step": 84905, "train_speed(iter/s)": 1.529801 }, { "acc": 0.97510414, "epoch": 39.79845324584017, "grad_norm": 2.0872116088867188, "learning_rate": 1.0945588080369132e-06, "loss": 0.06338472, "memory(GiB)": 13.7, "step": 84910, "train_speed(iter/s)": 1.5298 }, { "acc": 0.96855116, "epoch": 39.800796812749006, "grad_norm": 3.41290020942688, "learning_rate": 1.0940750321637921e-06, "loss": 0.04814846, "memory(GiB)": 13.7, "step": 84915, "train_speed(iter/s)": 1.529801 }, { "acc": 0.97538691, "epoch": 39.80314037965784, "grad_norm": 1.06732177734375, "learning_rate": 1.093591350188292e-06, "loss": 0.05356195, "memory(GiB)": 13.7, "step": 84920, "train_speed(iter/s)": 1.529804 }, { "acc": 0.9864584, "epoch": 39.805483946566675, "grad_norm": 1.4251372814178467, "learning_rate": 1.0931077621220413e-06, "loss": 0.04645087, "memory(GiB)": 13.7, "step": 84925, "train_speed(iter/s)": 1.529804 }, { "acc": 0.97601194, "epoch": 39.80782751347551, "grad_norm": 2.929621458053589, "learning_rate": 1.092624267976664e-06, "loss": 0.05443258, "memory(GiB)": 13.7, "step": 84930, "train_speed(iter/s)": 1.529808 }, { "acc": 0.97700214, "epoch": 39.81017108038434, "grad_norm": 4.093540668487549, "learning_rate": 1.0921408677637798e-06, "loss": 0.08528009, "memory(GiB)": 13.7, "step": 84935, "train_speed(iter/s)": 1.529811 }, { "acc": 0.996875, "epoch": 39.81251464729318, "grad_norm": 0.8872769474983215, "learning_rate": 1.0916575614950086e-06, "loss": 0.0145916, "memory(GiB)": 13.7, "step": 84940, "train_speed(iter/s)": 1.529814 }, { "acc": 0.9895834, "epoch": 39.81485821420202, "grad_norm": 2.991240978240967, "learning_rate": 1.09117434918197e-06, "loss": 0.01969252, "memory(GiB)": 13.7, "step": 84945, "train_speed(iter/s)": 1.529816 }, { "acc": 0.98383923, "epoch": 39.81720178111085, "grad_norm": 0.9001157879829407, "learning_rate": 1.0906912308362756e-06, "loss": 0.05849934, "memory(GiB)": 13.7, "step": 84950, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98703365, "epoch": 39.81954534801969, "grad_norm": 4.007331848144531, "learning_rate": 1.0902082064695405e-06, "loss": 0.05322188, "memory(GiB)": 13.7, "step": 84955, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98104162, "epoch": 39.82188891492852, "grad_norm": 0.3096064627170563, "learning_rate": 1.0897252760933724e-06, "loss": 0.02504024, "memory(GiB)": 13.7, "step": 84960, "train_speed(iter/s)": 1.529816 }, { "acc": 0.9739583, "epoch": 39.824232481837356, "grad_norm": 3.337775230407715, "learning_rate": 1.0892424397193808e-06, "loss": 0.04684271, "memory(GiB)": 13.7, "step": 84965, "train_speed(iter/s)": 1.529817 }, { "acc": 0.98140869, "epoch": 39.82657604874619, "grad_norm": 5.866923809051514, "learning_rate": 1.088759697359173e-06, "loss": 0.04575459, "memory(GiB)": 13.7, "step": 84970, "train_speed(iter/s)": 1.529817 }, { "acc": 0.9895834, "epoch": 39.828919615655025, "grad_norm": 4.894499778747559, "learning_rate": 1.0882770490243493e-06, "loss": 0.0251021, "memory(GiB)": 13.7, "step": 84975, "train_speed(iter/s)": 1.529817 }, { "acc": 0.98145838, "epoch": 39.83126318256386, "grad_norm": 5.588651657104492, "learning_rate": 1.0877944947265128e-06, "loss": 0.02836789, "memory(GiB)": 13.7, "step": 84980, "train_speed(iter/s)": 1.529818 }, { "acc": 0.98081875, "epoch": 39.8336067494727, "grad_norm": 1.382725477218628, "learning_rate": 1.0873120344772643e-06, "loss": 0.03015327, "memory(GiB)": 13.7, "step": 84985, "train_speed(iter/s)": 1.529822 }, { "acc": 0.97453899, "epoch": 39.835950316381535, "grad_norm": 2.4271297454833984, "learning_rate": 1.0868296682881978e-06, "loss": 0.06178042, "memory(GiB)": 13.7, "step": 84990, "train_speed(iter/s)": 1.529824 }, { "acc": 0.98520832, "epoch": 39.83829388329037, "grad_norm": 3.270279884338379, "learning_rate": 1.086347396170908e-06, "loss": 0.02482768, "memory(GiB)": 13.7, "step": 84995, "train_speed(iter/s)": 1.529819 }, { "acc": 0.9822917, "epoch": 39.8406374501992, "grad_norm": 3.963263511657715, "learning_rate": 1.0858652181369876e-06, "loss": 0.05421752, "memory(GiB)": 13.7, "step": 85000, "train_speed(iter/s)": 1.529818 }, { "acc": 0.99083328, "epoch": 39.84298101710804, "grad_norm": 5.595737457275391, "learning_rate": 1.0853831341980279e-06, "loss": 0.04191084, "memory(GiB)": 13.7, "step": 85005, "train_speed(iter/s)": 1.52981 }, { "acc": 0.98187504, "epoch": 39.84532458401687, "grad_norm": 5.1521172523498535, "learning_rate": 1.0849011443656147e-06, "loss": 0.06284866, "memory(GiB)": 13.7, "step": 85010, "train_speed(iter/s)": 1.529813 }, { "acc": 0.98154764, "epoch": 39.847668150925706, "grad_norm": 3.5903661251068115, "learning_rate": 1.0844192486513337e-06, "loss": 0.04940662, "memory(GiB)": 13.7, "step": 85015, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98967266, "epoch": 39.85001171783455, "grad_norm": 3.267784833908081, "learning_rate": 1.0839374470667688e-06, "loss": 0.02515209, "memory(GiB)": 13.7, "step": 85020, "train_speed(iter/s)": 1.529819 }, { "acc": 0.98895836, "epoch": 39.85235528474338, "grad_norm": 5.46204137802124, "learning_rate": 1.0834557396235022e-06, "loss": 0.05398222, "memory(GiB)": 13.7, "step": 85025, "train_speed(iter/s)": 1.529819 }, { "acc": 0.98291664, "epoch": 39.854698851652216, "grad_norm": 1.9294812679290771, "learning_rate": 1.082974126333111e-06, "loss": 0.02735826, "memory(GiB)": 13.7, "step": 85030, "train_speed(iter/s)": 1.529825 }, { "acc": 0.98812504, "epoch": 39.85704241856105, "grad_norm": 1.1939061880111694, "learning_rate": 1.0824926072071707e-06, "loss": 0.03466288, "memory(GiB)": 13.7, "step": 85035, "train_speed(iter/s)": 1.529828 }, { "acc": 0.98458328, "epoch": 39.859385985469885, "grad_norm": 4.536528587341309, "learning_rate": 1.0820111822572556e-06, "loss": 0.02640181, "memory(GiB)": 13.7, "step": 85040, "train_speed(iter/s)": 1.52983 }, { "acc": 0.98876982, "epoch": 39.86172955237872, "grad_norm": 1.6365619897842407, "learning_rate": 1.0815298514949407e-06, "loss": 0.04147495, "memory(GiB)": 13.7, "step": 85045, "train_speed(iter/s)": 1.52983 }, { "acc": 0.98500004, "epoch": 39.86407311928755, "grad_norm": 1.7006773948669434, "learning_rate": 1.0810486149317918e-06, "loss": 0.0336164, "memory(GiB)": 13.7, "step": 85050, "train_speed(iter/s)": 1.529832 }, { "acc": 0.97113094, "epoch": 39.86641668619639, "grad_norm": 2.294283390045166, "learning_rate": 1.0805674725793778e-06, "loss": 0.06125764, "memory(GiB)": 13.7, "step": 85055, "train_speed(iter/s)": 1.529838 }, { "acc": 0.98502979, "epoch": 39.86876025310523, "grad_norm": 3.3081114292144775, "learning_rate": 1.080086424449265e-06, "loss": 0.04833985, "memory(GiB)": 13.7, "step": 85060, "train_speed(iter/s)": 1.529842 }, { "acc": 0.98419647, "epoch": 39.87110382001406, "grad_norm": 3.821429491043091, "learning_rate": 1.0796054705530138e-06, "loss": 0.07073635, "memory(GiB)": 13.7, "step": 85065, "train_speed(iter/s)": 1.529844 }, { "acc": 0.9927083, "epoch": 39.8734473869229, "grad_norm": 2.270665407180786, "learning_rate": 1.0791246109021874e-06, "loss": 0.01140801, "memory(GiB)": 13.7, "step": 85070, "train_speed(iter/s)": 1.529849 }, { "acc": 0.98002062, "epoch": 39.87579095383173, "grad_norm": 0.0025039748288691044, "learning_rate": 1.078643845508341e-06, "loss": 0.0543471, "memory(GiB)": 13.7, "step": 85075, "train_speed(iter/s)": 1.529854 }, { "acc": 0.98145828, "epoch": 39.878134520740566, "grad_norm": 2.875504732131958, "learning_rate": 1.0781631743830335e-06, "loss": 0.05608425, "memory(GiB)": 13.7, "step": 85080, "train_speed(iter/s)": 1.529853 }, { "acc": 0.99499998, "epoch": 39.8804780876494, "grad_norm": 3.6205930709838867, "learning_rate": 1.0776825975378165e-06, "loss": 0.03764731, "memory(GiB)": 13.7, "step": 85085, "train_speed(iter/s)": 1.52985 }, { "acc": 0.9947916, "epoch": 39.882821654558235, "grad_norm": 2.1004457473754883, "learning_rate": 1.077202114984242e-06, "loss": 0.01053533, "memory(GiB)": 13.7, "step": 85090, "train_speed(iter/s)": 1.529855 }, { "acc": 0.98931541, "epoch": 39.885165221467076, "grad_norm": 3.233553647994995, "learning_rate": 1.07672172673386e-06, "loss": 0.02014701, "memory(GiB)": 13.7, "step": 85095, "train_speed(iter/s)": 1.52985 }, { "acc": 0.98874998, "epoch": 39.88750878837591, "grad_norm": 2.3470847606658936, "learning_rate": 1.076241432798218e-06, "loss": 0.02358364, "memory(GiB)": 13.7, "step": 85100, "train_speed(iter/s)": 1.529851 }, { "acc": 0.9779995, "epoch": 39.889852355284745, "grad_norm": 5.004857540130615, "learning_rate": 1.0757612331888587e-06, "loss": 0.05784378, "memory(GiB)": 13.7, "step": 85105, "train_speed(iter/s)": 1.529859 }, { "acc": 0.98630953, "epoch": 39.89219592219358, "grad_norm": 3.0496890544891357, "learning_rate": 1.0752811279173254e-06, "loss": 0.04375286, "memory(GiB)": 13.7, "step": 85110, "train_speed(iter/s)": 1.52986 }, { "acc": 0.98708324, "epoch": 39.894539489102414, "grad_norm": 2.5432114601135254, "learning_rate": 1.0748011169951598e-06, "loss": 0.02969066, "memory(GiB)": 13.7, "step": 85115, "train_speed(iter/s)": 1.529864 }, { "acc": 0.97743778, "epoch": 39.89688305601125, "grad_norm": 0.00016499859339091927, "learning_rate": 1.0743212004338986e-06, "loss": 0.03951631, "memory(GiB)": 13.7, "step": 85120, "train_speed(iter/s)": 1.529868 }, { "acc": 0.98083324, "epoch": 39.89922662292008, "grad_norm": 3.2728075981140137, "learning_rate": 1.073841378245076e-06, "loss": 0.03123606, "memory(GiB)": 13.7, "step": 85125, "train_speed(iter/s)": 1.529873 }, { "acc": 0.97738094, "epoch": 39.90157018982892, "grad_norm": 0.0018964243354275823, "learning_rate": 1.0733616504402267e-06, "loss": 0.04480977, "memory(GiB)": 13.7, "step": 85130, "train_speed(iter/s)": 1.529871 }, { "acc": 0.984375, "epoch": 39.90391375673776, "grad_norm": 4.0704665184021, "learning_rate": 1.0728820170308824e-06, "loss": 0.03772902, "memory(GiB)": 13.7, "step": 85135, "train_speed(iter/s)": 1.529875 }, { "acc": 0.9770504, "epoch": 39.90625732364659, "grad_norm": 4.1649489402771, "learning_rate": 1.0724024780285704e-06, "loss": 0.05114613, "memory(GiB)": 13.7, "step": 85140, "train_speed(iter/s)": 1.529878 }, { "acc": 0.97099571, "epoch": 39.90860089055543, "grad_norm": 4.561554431915283, "learning_rate": 1.0719230334448177e-06, "loss": 0.09334846, "memory(GiB)": 13.7, "step": 85145, "train_speed(iter/s)": 1.529881 }, { "acc": 0.98571434, "epoch": 39.91094445746426, "grad_norm": 3.9357173442840576, "learning_rate": 1.0714436832911502e-06, "loss": 0.05224978, "memory(GiB)": 13.7, "step": 85150, "train_speed(iter/s)": 1.529887 }, { "acc": 0.98041668, "epoch": 39.913288024373095, "grad_norm": 4.400291919708252, "learning_rate": 1.070964427579089e-06, "loss": 0.03068341, "memory(GiB)": 13.7, "step": 85155, "train_speed(iter/s)": 1.529891 }, { "acc": 0.99645834, "epoch": 39.91563159128193, "grad_norm": 0.0014357009204104543, "learning_rate": 1.0704852663201519e-06, "loss": 0.02584158, "memory(GiB)": 13.7, "step": 85160, "train_speed(iter/s)": 1.529893 }, { "acc": 0.98041668, "epoch": 39.917975158190764, "grad_norm": 6.235838413238525, "learning_rate": 1.0700061995258576e-06, "loss": 0.08306861, "memory(GiB)": 13.7, "step": 85165, "train_speed(iter/s)": 1.529895 }, { "acc": 0.988447, "epoch": 39.9203187250996, "grad_norm": 1.5700148344039917, "learning_rate": 1.069527227207722e-06, "loss": 0.04280646, "memory(GiB)": 13.7, "step": 85170, "train_speed(iter/s)": 1.529899 }, { "acc": 0.98474197, "epoch": 39.92266229200844, "grad_norm": 3.1635050773620605, "learning_rate": 1.069048349377259e-06, "loss": 0.04317987, "memory(GiB)": 13.7, "step": 85175, "train_speed(iter/s)": 1.529903 }, { "acc": 0.97666664, "epoch": 39.925005858917274, "grad_norm": 1.1039823293685913, "learning_rate": 1.0685695660459758e-06, "loss": 0.04794907, "memory(GiB)": 13.7, "step": 85180, "train_speed(iter/s)": 1.529904 }, { "acc": 0.98050594, "epoch": 39.92734942582611, "grad_norm": 2.3301265239715576, "learning_rate": 1.0680908772253839e-06, "loss": 0.0337303, "memory(GiB)": 13.7, "step": 85185, "train_speed(iter/s)": 1.529912 }, { "acc": 0.98803024, "epoch": 39.92969299273494, "grad_norm": 3.7093024253845215, "learning_rate": 1.0676122829269887e-06, "loss": 0.02372209, "memory(GiB)": 13.7, "step": 85190, "train_speed(iter/s)": 1.529913 }, { "acc": 0.9916666, "epoch": 39.93203655964378, "grad_norm": 1.0831741094589233, "learning_rate": 1.0671337831622944e-06, "loss": 0.02447495, "memory(GiB)": 13.7, "step": 85195, "train_speed(iter/s)": 1.529911 }, { "acc": 0.97624998, "epoch": 39.93438012655261, "grad_norm": 2.780839204788208, "learning_rate": 1.0666553779427997e-06, "loss": 0.04166402, "memory(GiB)": 13.7, "step": 85200, "train_speed(iter/s)": 1.529914 }, { "acc": 0.98946428, "epoch": 39.936723693461445, "grad_norm": 3.6207778453826904, "learning_rate": 1.0661770672800068e-06, "loss": 0.03036569, "memory(GiB)": 13.7, "step": 85205, "train_speed(iter/s)": 1.529918 }, { "acc": 0.99682541, "epoch": 39.93906726037029, "grad_norm": 1.2437946796417236, "learning_rate": 1.0656988511854134e-06, "loss": 0.01192211, "memory(GiB)": 13.7, "step": 85210, "train_speed(iter/s)": 1.529924 }, { "acc": 0.9859375, "epoch": 39.94141082727912, "grad_norm": 1.0297136306762695, "learning_rate": 1.065220729670511e-06, "loss": 0.04476994, "memory(GiB)": 13.7, "step": 85215, "train_speed(iter/s)": 1.529925 }, { "acc": 0.98708334, "epoch": 39.943754394187955, "grad_norm": 2.723315715789795, "learning_rate": 1.0647427027467936e-06, "loss": 0.02114685, "memory(GiB)": 13.7, "step": 85220, "train_speed(iter/s)": 1.529926 }, { "acc": 0.98154764, "epoch": 39.94609796109679, "grad_norm": 3.541269063949585, "learning_rate": 1.0642647704257535e-06, "loss": 0.08531352, "memory(GiB)": 13.7, "step": 85225, "train_speed(iter/s)": 1.529925 }, { "acc": 0.98812504, "epoch": 39.948441528005624, "grad_norm": 0.653243899345398, "learning_rate": 1.063786932718875e-06, "loss": 0.01784955, "memory(GiB)": 13.7, "step": 85230, "train_speed(iter/s)": 1.52993 }, { "acc": 0.9895834, "epoch": 39.95078509491446, "grad_norm": 2.4774091243743896, "learning_rate": 1.0633091896376464e-06, "loss": 0.02240139, "memory(GiB)": 13.7, "step": 85235, "train_speed(iter/s)": 1.529935 }, { "acc": 0.99125004, "epoch": 39.95312866182329, "grad_norm": 2.0288045406341553, "learning_rate": 1.0628315411935506e-06, "loss": 0.01504292, "memory(GiB)": 13.7, "step": 85240, "train_speed(iter/s)": 1.529931 }, { "acc": 0.98154755, "epoch": 39.95547222873213, "grad_norm": 6.542726039886475, "learning_rate": 1.0623539873980678e-06, "loss": 0.06264747, "memory(GiB)": 13.7, "step": 85245, "train_speed(iter/s)": 1.529937 }, { "acc": 0.96429062, "epoch": 39.95781579564097, "grad_norm": 5.262312412261963, "learning_rate": 1.061876528262678e-06, "loss": 0.09382801, "memory(GiB)": 13.7, "step": 85250, "train_speed(iter/s)": 1.529937 }, { "acc": 0.98916664, "epoch": 39.9601593625498, "grad_norm": 3.351031541824341, "learning_rate": 1.0613991637988556e-06, "loss": 0.06741471, "memory(GiB)": 13.7, "step": 85255, "train_speed(iter/s)": 1.529936 }, { "acc": 0.98187504, "epoch": 39.96250292945864, "grad_norm": 1.9049278497695923, "learning_rate": 1.060921894018077e-06, "loss": 0.0344298, "memory(GiB)": 13.7, "step": 85260, "train_speed(iter/s)": 1.529937 }, { "acc": 0.97792616, "epoch": 39.96484649636747, "grad_norm": 5.5062689781188965, "learning_rate": 1.0604447189318146e-06, "loss": 0.05457058, "memory(GiB)": 13.7, "step": 85265, "train_speed(iter/s)": 1.529938 }, { "acc": 0.98455353, "epoch": 39.967190063276306, "grad_norm": 0.8924139738082886, "learning_rate": 1.0599676385515357e-06, "loss": 0.0384528, "memory(GiB)": 13.7, "step": 85270, "train_speed(iter/s)": 1.529939 }, { "acc": 0.98666668, "epoch": 39.96953363018514, "grad_norm": 2.7528645992279053, "learning_rate": 1.0594906528887092e-06, "loss": 0.02971636, "memory(GiB)": 13.7, "step": 85275, "train_speed(iter/s)": 1.529941 }, { "acc": 0.9864584, "epoch": 39.971877197093974, "grad_norm": 4.246582984924316, "learning_rate": 1.0590137619548014e-06, "loss": 0.03774972, "memory(GiB)": 13.7, "step": 85280, "train_speed(iter/s)": 1.529941 }, { "acc": 0.996875, "epoch": 39.974220764002816, "grad_norm": 3.0242114067077637, "learning_rate": 1.0585369657612739e-06, "loss": 0.026635, "memory(GiB)": 13.7, "step": 85285, "train_speed(iter/s)": 1.529941 }, { "acc": 0.98050594, "epoch": 39.97656433091165, "grad_norm": 4.071816921234131, "learning_rate": 1.0580602643195863e-06, "loss": 0.03729057, "memory(GiB)": 13.7, "step": 85290, "train_speed(iter/s)": 1.529946 }, { "acc": 0.97770824, "epoch": 39.978907897820484, "grad_norm": 3.0655972957611084, "learning_rate": 1.057583657641198e-06, "loss": 0.02989616, "memory(GiB)": 13.7, "step": 85295, "train_speed(iter/s)": 1.529949 }, { "acc": 0.98654766, "epoch": 39.98125146472932, "grad_norm": 0.0028039601165801287, "learning_rate": 1.057107145737565e-06, "loss": 0.02533298, "memory(GiB)": 13.7, "step": 85300, "train_speed(iter/s)": 1.529948 }, { "acc": 0.9895834, "epoch": 39.98359503163815, "grad_norm": 2.257262706756592, "learning_rate": 1.0566307286201425e-06, "loss": 0.02411869, "memory(GiB)": 13.7, "step": 85305, "train_speed(iter/s)": 1.529948 }, { "acc": 0.9895834, "epoch": 39.98593859854699, "grad_norm": 0.054135117679834366, "learning_rate": 1.0561544063003792e-06, "loss": 0.02806337, "memory(GiB)": 13.7, "step": 85310, "train_speed(iter/s)": 1.529953 }, { "acc": 0.971875, "epoch": 39.98828216545582, "grad_norm": 2.1861672401428223, "learning_rate": 1.055678178789726e-06, "loss": 0.06364749, "memory(GiB)": 13.7, "step": 85315, "train_speed(iter/s)": 1.529954 }, { "acc": 0.98125, "epoch": 39.990625732364656, "grad_norm": 8.265966415405273, "learning_rate": 1.0552020460996309e-06, "loss": 0.03907624, "memory(GiB)": 13.7, "step": 85320, "train_speed(iter/s)": 1.529958 }, { "acc": 0.97250004, "epoch": 39.9929692992735, "grad_norm": 6.410221099853516, "learning_rate": 1.0547260082415373e-06, "loss": 0.07064247, "memory(GiB)": 13.7, "step": 85325, "train_speed(iter/s)": 1.529961 }, { "acc": 0.965625, "epoch": 39.99531286618233, "grad_norm": 3.716005802154541, "learning_rate": 1.054250065226886e-06, "loss": 0.05020505, "memory(GiB)": 13.7, "step": 85330, "train_speed(iter/s)": 1.529966 }, { "acc": 0.99412775, "epoch": 39.997656433091166, "grad_norm": 4.130914688110352, "learning_rate": 1.0537742170671193e-06, "loss": 0.03074849, "memory(GiB)": 13.7, "step": 85335, "train_speed(iter/s)": 1.529968 }, { "acc": 0.9927084, "epoch": 40.0, "grad_norm": 2.0702788829803467, "learning_rate": 1.0532984637736756e-06, "loss": 0.04949084, "memory(GiB)": 13.7, "step": 85340, "train_speed(iter/s)": 1.529957 }, { "acc": 0.9927083, "epoch": 40.002343566908834, "grad_norm": 0.6527349948883057, "learning_rate": 1.0528228053579876e-06, "loss": 0.01814568, "memory(GiB)": 13.7, "step": 85345, "train_speed(iter/s)": 1.529956 }, { "acc": 0.98803034, "epoch": 40.00468713381767, "grad_norm": 4.338909149169922, "learning_rate": 1.0523472418314901e-06, "loss": 0.05910673, "memory(GiB)": 13.7, "step": 85350, "train_speed(iter/s)": 1.529964 }, { "acc": 0.9926136, "epoch": 40.0070307007265, "grad_norm": 1.6429029703140259, "learning_rate": 1.0518717732056162e-06, "loss": 0.04742438, "memory(GiB)": 13.7, "step": 85355, "train_speed(iter/s)": 1.529966 }, { "acc": 0.98397179, "epoch": 40.009374267635344, "grad_norm": 2.5979650020599365, "learning_rate": 1.0513963994917907e-06, "loss": 0.07096184, "memory(GiB)": 13.7, "step": 85360, "train_speed(iter/s)": 1.529965 }, { "acc": 0.98125, "epoch": 40.01171783454418, "grad_norm": 2.3762409687042236, "learning_rate": 1.0509211207014434e-06, "loss": 0.03212194, "memory(GiB)": 13.7, "step": 85365, "train_speed(iter/s)": 1.529967 }, { "acc": 0.98046875, "epoch": 40.01406140145301, "grad_norm": 0.002841578098013997, "learning_rate": 1.0504459368459959e-06, "loss": 0.07714596, "memory(GiB)": 13.7, "step": 85370, "train_speed(iter/s)": 1.529966 }, { "acc": 0.96684532, "epoch": 40.01640496836185, "grad_norm": 4.456126689910889, "learning_rate": 1.0499708479368709e-06, "loss": 0.04716669, "memory(GiB)": 13.7, "step": 85375, "train_speed(iter/s)": 1.529965 }, { "acc": 0.9895834, "epoch": 40.01874853527068, "grad_norm": 4.165079593658447, "learning_rate": 1.0494958539854898e-06, "loss": 0.03599879, "memory(GiB)": 13.7, "step": 85380, "train_speed(iter/s)": 1.529968 }, { "acc": 0.97895832, "epoch": 40.021092102179516, "grad_norm": 5.333343982696533, "learning_rate": 1.049020955003267e-06, "loss": 0.03122746, "memory(GiB)": 13.7, "step": 85385, "train_speed(iter/s)": 1.529969 }, { "acc": 0.99154758, "epoch": 40.02343566908835, "grad_norm": 1.8134853839874268, "learning_rate": 1.0485461510016187e-06, "loss": 0.03656135, "memory(GiB)": 13.7, "step": 85390, "train_speed(iter/s)": 1.529972 }, { "acc": 0.9746727, "epoch": 40.025779235997184, "grad_norm": 2.603682279586792, "learning_rate": 1.0480714419919586e-06, "loss": 0.04902488, "memory(GiB)": 13.7, "step": 85395, "train_speed(iter/s)": 1.529966 }, { "acc": 0.9895834, "epoch": 40.028122802906026, "grad_norm": 0.00403979979455471, "learning_rate": 1.0475968279856954e-06, "loss": 0.02273076, "memory(GiB)": 13.7, "step": 85400, "train_speed(iter/s)": 1.529969 }, { "acc": 0.98389606, "epoch": 40.03046636981486, "grad_norm": 1.7779349088668823, "learning_rate": 1.0471223089942398e-06, "loss": 0.04943961, "memory(GiB)": 13.7, "step": 85405, "train_speed(iter/s)": 1.529973 }, { "acc": 0.99508934, "epoch": 40.032809936723694, "grad_norm": 2.3965132236480713, "learning_rate": 1.0466478850289944e-06, "loss": 0.01370401, "memory(GiB)": 13.7, "step": 85410, "train_speed(iter/s)": 1.529973 }, { "acc": 0.98571434, "epoch": 40.03515350363253, "grad_norm": 3.399961471557617, "learning_rate": 1.0461735561013655e-06, "loss": 0.03161682, "memory(GiB)": 13.7, "step": 85415, "train_speed(iter/s)": 1.529976 }, { "acc": 0.98395834, "epoch": 40.03749707054136, "grad_norm": 6.335257530212402, "learning_rate": 1.0456993222227526e-06, "loss": 0.04995631, "memory(GiB)": 13.7, "step": 85420, "train_speed(iter/s)": 1.529978 }, { "acc": 0.98875008, "epoch": 40.0398406374502, "grad_norm": 6.615300178527832, "learning_rate": 1.045225183404555e-06, "loss": 0.03183857, "memory(GiB)": 13.7, "step": 85425, "train_speed(iter/s)": 1.529981 }, { "acc": 0.984375, "epoch": 40.04218420435903, "grad_norm": 4.01812219619751, "learning_rate": 1.0447511396581713e-06, "loss": 0.02209741, "memory(GiB)": 13.7, "step": 85430, "train_speed(iter/s)": 1.529982 }, { "acc": 0.97673607, "epoch": 40.04452777126787, "grad_norm": 5.340738296508789, "learning_rate": 1.0442771909949935e-06, "loss": 0.05597351, "memory(GiB)": 13.7, "step": 85435, "train_speed(iter/s)": 1.529986 }, { "acc": 0.98446426, "epoch": 40.04687133817671, "grad_norm": 1.9470164775848389, "learning_rate": 1.0438033374264148e-06, "loss": 0.04682041, "memory(GiB)": 13.7, "step": 85440, "train_speed(iter/s)": 1.529988 }, { "acc": 0.97465286, "epoch": 40.04921490508554, "grad_norm": 6.150070667266846, "learning_rate": 1.043329578963825e-06, "loss": 0.04287563, "memory(GiB)": 13.7, "step": 85445, "train_speed(iter/s)": 1.529994 }, { "acc": 0.9864583, "epoch": 40.051558471994376, "grad_norm": 2.097864866256714, "learning_rate": 1.0428559156186128e-06, "loss": 0.031568, "memory(GiB)": 13.7, "step": 85450, "train_speed(iter/s)": 1.529997 }, { "acc": 0.99161701, "epoch": 40.05390203890321, "grad_norm": 0.4720090925693512, "learning_rate": 1.0423823474021628e-06, "loss": 0.02744501, "memory(GiB)": 13.7, "step": 85455, "train_speed(iter/s)": 1.530002 }, { "acc": 0.9838542, "epoch": 40.056245605812045, "grad_norm": 3.763148307800293, "learning_rate": 1.0419088743258565e-06, "loss": 0.03720801, "memory(GiB)": 13.7, "step": 85460, "train_speed(iter/s)": 1.530004 }, { "acc": 0.97718754, "epoch": 40.05858917272088, "grad_norm": 6.7756733894348145, "learning_rate": 1.0414354964010755e-06, "loss": 0.04277384, "memory(GiB)": 13.7, "step": 85465, "train_speed(iter/s)": 1.530004 }, { "acc": 0.97919636, "epoch": 40.06093273962971, "grad_norm": 2.918245553970337, "learning_rate": 1.0409622136392006e-06, "loss": 0.05896943, "memory(GiB)": 13.7, "step": 85470, "train_speed(iter/s)": 1.530001 }, { "acc": 0.9864584, "epoch": 40.063276306538555, "grad_norm": 3.5911731719970703, "learning_rate": 1.0404890260516042e-06, "loss": 0.05927145, "memory(GiB)": 13.7, "step": 85475, "train_speed(iter/s)": 1.530006 }, { "acc": 0.99125004, "epoch": 40.06561987344739, "grad_norm": 4.923883898300119e-05, "learning_rate": 1.040015933649662e-06, "loss": 0.02196697, "memory(GiB)": 13.7, "step": 85480, "train_speed(iter/s)": 1.530008 }, { "acc": 0.98891926, "epoch": 40.06796344035622, "grad_norm": 2.453216075897217, "learning_rate": 1.0395429364447472e-06, "loss": 0.02328267, "memory(GiB)": 13.7, "step": 85485, "train_speed(iter/s)": 1.53001 }, { "acc": 0.9704464, "epoch": 40.07030700726506, "grad_norm": 4.055116653442383, "learning_rate": 1.0390700344482272e-06, "loss": 0.08199194, "memory(GiB)": 13.7, "step": 85490, "train_speed(iter/s)": 1.530014 }, { "acc": 0.97986116, "epoch": 40.07265057417389, "grad_norm": 4.138338565826416, "learning_rate": 1.038597227671468e-06, "loss": 0.06799802, "memory(GiB)": 13.7, "step": 85495, "train_speed(iter/s)": 1.530014 }, { "acc": 0.98350697, "epoch": 40.074994141082726, "grad_norm": 2.5893301963806152, "learning_rate": 1.0381245161258357e-06, "loss": 0.05474304, "memory(GiB)": 13.7, "step": 85500, "train_speed(iter/s)": 1.530019 }, { "acc": 0.98187504, "epoch": 40.07733770799156, "grad_norm": 4.0623698234558105, "learning_rate": 1.0376518998226936e-06, "loss": 0.0424911, "memory(GiB)": 13.7, "step": 85505, "train_speed(iter/s)": 1.530018 }, { "acc": 0.98217258, "epoch": 40.0796812749004, "grad_norm": 2.5416972637176514, "learning_rate": 1.0371793787733997e-06, "loss": 0.03681349, "memory(GiB)": 13.7, "step": 85510, "train_speed(iter/s)": 1.530024 }, { "acc": 0.99125004, "epoch": 40.082024841809236, "grad_norm": 0.2485649734735489, "learning_rate": 1.0367069529893129e-06, "loss": 0.02747371, "memory(GiB)": 13.7, "step": 85515, "train_speed(iter/s)": 1.530026 }, { "acc": 0.98916664, "epoch": 40.08436840871807, "grad_norm": 0.0028886401560157537, "learning_rate": 1.0362346224817895e-06, "loss": 0.01845181, "memory(GiB)": 13.7, "step": 85520, "train_speed(iter/s)": 1.530028 }, { "acc": 0.9819643, "epoch": 40.086711975626905, "grad_norm": 3.469360589981079, "learning_rate": 1.0357623872621827e-06, "loss": 0.06524122, "memory(GiB)": 13.7, "step": 85525, "train_speed(iter/s)": 1.530033 }, { "acc": 0.9864584, "epoch": 40.08905554253574, "grad_norm": 2.0128161907196045, "learning_rate": 1.0352902473418432e-06, "loss": 0.02936535, "memory(GiB)": 13.7, "step": 85530, "train_speed(iter/s)": 1.530033 }, { "acc": 0.99070511, "epoch": 40.09139910944457, "grad_norm": 0.5900789499282837, "learning_rate": 1.0348182027321185e-06, "loss": 0.03894507, "memory(GiB)": 13.7, "step": 85535, "train_speed(iter/s)": 1.530035 }, { "acc": 0.97243423, "epoch": 40.09374267635341, "grad_norm": 2.3438663482666016, "learning_rate": 1.0343462534443556e-06, "loss": 0.08345968, "memory(GiB)": 13.7, "step": 85540, "train_speed(iter/s)": 1.530037 }, { "acc": 0.99229164, "epoch": 40.09608624326224, "grad_norm": 0.28831833600997925, "learning_rate": 1.0338743994899e-06, "loss": 0.02761694, "memory(GiB)": 13.7, "step": 85545, "train_speed(iter/s)": 1.530042 }, { "acc": 0.9885417, "epoch": 40.09842981017108, "grad_norm": 0.00019401741155888885, "learning_rate": 1.0334026408800911e-06, "loss": 0.02518781, "memory(GiB)": 13.7, "step": 85550, "train_speed(iter/s)": 1.530042 }, { "acc": 0.97975378, "epoch": 40.10077337707992, "grad_norm": 4.80750846862793, "learning_rate": 1.0329309776262707e-06, "loss": 0.05200693, "memory(GiB)": 13.7, "step": 85555, "train_speed(iter/s)": 1.53005 }, { "acc": 0.98765869, "epoch": 40.10311694398875, "grad_norm": 0.7726391553878784, "learning_rate": 1.0324594097397756e-06, "loss": 0.03685613, "memory(GiB)": 13.7, "step": 85560, "train_speed(iter/s)": 1.53005 }, { "acc": 0.9885416, "epoch": 40.105460510897586, "grad_norm": 3.079533338546753, "learning_rate": 1.0319879372319394e-06, "loss": 0.02944754, "memory(GiB)": 13.7, "step": 85565, "train_speed(iter/s)": 1.530053 }, { "acc": 0.98571281, "epoch": 40.10780407780642, "grad_norm": 3.4697885513305664, "learning_rate": 1.031516560114095e-06, "loss": 0.06912958, "memory(GiB)": 13.7, "step": 85570, "train_speed(iter/s)": 1.530055 }, { "acc": 0.97998505, "epoch": 40.110147644715255, "grad_norm": 5.590324878692627, "learning_rate": 1.0310452783975753e-06, "loss": 0.06243654, "memory(GiB)": 13.7, "step": 85575, "train_speed(iter/s)": 1.53006 }, { "acc": 0.98500004, "epoch": 40.11249121162409, "grad_norm": 5.577317237854004, "learning_rate": 1.0305740920937064e-06, "loss": 0.06639134, "memory(GiB)": 13.7, "step": 85580, "train_speed(iter/s)": 1.530058 }, { "acc": 0.97738094, "epoch": 40.114834778532924, "grad_norm": 1.1958839893341064, "learning_rate": 1.0301030012138124e-06, "loss": 0.04245638, "memory(GiB)": 13.7, "step": 85585, "train_speed(iter/s)": 1.530057 }, { "acc": 0.994697, "epoch": 40.117178345441765, "grad_norm": 0.06740720570087433, "learning_rate": 1.0296320057692188e-06, "loss": 0.02764975, "memory(GiB)": 13.7, "step": 85590, "train_speed(iter/s)": 1.530058 }, { "acc": 0.98041668, "epoch": 40.1195219123506, "grad_norm": 3.1522910594940186, "learning_rate": 1.0291611057712464e-06, "loss": 0.03762977, "memory(GiB)": 13.7, "step": 85595, "train_speed(iter/s)": 1.530066 }, { "acc": 0.97020836, "epoch": 40.121865479259434, "grad_norm": 2.6028330326080322, "learning_rate": 1.0286903012312155e-06, "loss": 0.06347504, "memory(GiB)": 13.7, "step": 85600, "train_speed(iter/s)": 1.530067 }, { "acc": 0.99020834, "epoch": 40.12420904616827, "grad_norm": 0.010100198909640312, "learning_rate": 1.02821959216044e-06, "loss": 0.01847173, "memory(GiB)": 13.7, "step": 85605, "train_speed(iter/s)": 1.530069 }, { "acc": 0.99073868, "epoch": 40.1265526130771, "grad_norm": 0.010891621001064777, "learning_rate": 1.0277489785702352e-06, "loss": 0.02440712, "memory(GiB)": 13.7, "step": 85610, "train_speed(iter/s)": 1.53007 }, { "acc": 0.95559025, "epoch": 40.12889617998594, "grad_norm": 5.264876365661621, "learning_rate": 1.0272784604719154e-06, "loss": 0.07303956, "memory(GiB)": 13.7, "step": 85615, "train_speed(iter/s)": 1.530072 }, { "acc": 0.99174681, "epoch": 40.13123974689477, "grad_norm": 1.1516005992889404, "learning_rate": 1.026808037876788e-06, "loss": 0.02555742, "memory(GiB)": 13.7, "step": 85620, "train_speed(iter/s)": 1.530075 }, { "acc": 0.96916666, "epoch": 40.13358331380361, "grad_norm": 3.7918922901153564, "learning_rate": 1.0263377107961591e-06, "loss": 0.06670717, "memory(GiB)": 13.7, "step": 85625, "train_speed(iter/s)": 1.530076 }, { "acc": 0.99020824, "epoch": 40.13592688071245, "grad_norm": 2.053497552871704, "learning_rate": 1.0258674792413359e-06, "loss": 0.03409075, "memory(GiB)": 13.7, "step": 85630, "train_speed(iter/s)": 1.530077 }, { "acc": 0.98968754, "epoch": 40.13827044762128, "grad_norm": 0.0018467354821041226, "learning_rate": 1.0253973432236224e-06, "loss": 0.01877933, "memory(GiB)": 13.7, "step": 85635, "train_speed(iter/s)": 1.530078 }, { "acc": 0.98371525, "epoch": 40.140614014530115, "grad_norm": 4.872386932373047, "learning_rate": 1.0249273027543157e-06, "loss": 0.06584195, "memory(GiB)": 13.7, "step": 85640, "train_speed(iter/s)": 1.530085 }, { "acc": 0.99229164, "epoch": 40.14295758143895, "grad_norm": 0.03642340004444122, "learning_rate": 1.0244573578447164e-06, "loss": 0.01219445, "memory(GiB)": 13.7, "step": 85645, "train_speed(iter/s)": 1.530091 }, { "acc": 0.9822917, "epoch": 40.145301148347784, "grad_norm": 3.4532580375671387, "learning_rate": 1.0239875085061207e-06, "loss": 0.04261864, "memory(GiB)": 13.7, "step": 85650, "train_speed(iter/s)": 1.530098 }, { "acc": 0.9791666, "epoch": 40.14764471525662, "grad_norm": 2.693969249725342, "learning_rate": 1.023517754749821e-06, "loss": 0.06691973, "memory(GiB)": 13.7, "step": 85655, "train_speed(iter/s)": 1.5301 }, { "acc": 0.98625002, "epoch": 40.14998828216545, "grad_norm": 3.404616117477417, "learning_rate": 1.0230480965871094e-06, "loss": 0.03271007, "memory(GiB)": 13.7, "step": 85660, "train_speed(iter/s)": 1.530102 }, { "acc": 0.9739584, "epoch": 40.152331849074294, "grad_norm": 5.2563090324401855, "learning_rate": 1.0225785340292735e-06, "loss": 0.0520696, "memory(GiB)": 13.7, "step": 85665, "train_speed(iter/s)": 1.530107 }, { "acc": 0.996875, "epoch": 40.15467541598313, "grad_norm": 1.315553069114685, "learning_rate": 1.0221090670876017e-06, "loss": 0.02213239, "memory(GiB)": 13.7, "step": 85670, "train_speed(iter/s)": 1.530108 }, { "acc": 0.97738094, "epoch": 40.15701898289196, "grad_norm": 2.2388174533843994, "learning_rate": 1.0216396957733785e-06, "loss": 0.06963107, "memory(GiB)": 13.7, "step": 85675, "train_speed(iter/s)": 1.530109 }, { "acc": 0.98125, "epoch": 40.1593625498008, "grad_norm": 3.3313705921173096, "learning_rate": 1.0211704200978847e-06, "loss": 0.05801863, "memory(GiB)": 13.7, "step": 85680, "train_speed(iter/s)": 1.530116 }, { "acc": 0.97198868, "epoch": 40.16170611670963, "grad_norm": 1.3815754652023315, "learning_rate": 1.0207012400724009e-06, "loss": 0.03367574, "memory(GiB)": 13.7, "step": 85685, "train_speed(iter/s)": 1.530116 }, { "acc": 0.98699408, "epoch": 40.164049683618465, "grad_norm": 0.9515658020973206, "learning_rate": 1.0202321557082056e-06, "loss": 0.04831924, "memory(GiB)": 13.7, "step": 85690, "train_speed(iter/s)": 1.53011 }, { "acc": 0.97998505, "epoch": 40.1663932505273, "grad_norm": 1.4500385522842407, "learning_rate": 1.019763167016572e-06, "loss": 0.03462905, "memory(GiB)": 13.7, "step": 85695, "train_speed(iter/s)": 1.530112 }, { "acc": 0.98344698, "epoch": 40.16873681743614, "grad_norm": 3.941051483154297, "learning_rate": 1.0192942740087752e-06, "loss": 0.02640644, "memory(GiB)": 13.7, "step": 85700, "train_speed(iter/s)": 1.530112 }, { "acc": 0.99258928, "epoch": 40.171080384344975, "grad_norm": 3.140540361404419, "learning_rate": 1.0188254766960836e-06, "loss": 0.02139338, "memory(GiB)": 13.7, "step": 85705, "train_speed(iter/s)": 1.530111 }, { "acc": 0.98729172, "epoch": 40.17342395125381, "grad_norm": 4.35662841796875, "learning_rate": 1.0183567750897678e-06, "loss": 0.04499737, "memory(GiB)": 13.7, "step": 85710, "train_speed(iter/s)": 1.53011 }, { "acc": 0.99065475, "epoch": 40.175767518162644, "grad_norm": 3.749270439147949, "learning_rate": 1.0178881692010922e-06, "loss": 0.03802274, "memory(GiB)": 13.7, "step": 85715, "train_speed(iter/s)": 1.530114 }, { "acc": 0.98645287, "epoch": 40.17811108507148, "grad_norm": 3.544466733932495, "learning_rate": 1.0174196590413202e-06, "loss": 0.04599934, "memory(GiB)": 13.7, "step": 85720, "train_speed(iter/s)": 1.530119 }, { "acc": 0.97624454, "epoch": 40.18045465198031, "grad_norm": 6.8883466720581055, "learning_rate": 1.0169512446217162e-06, "loss": 0.04560467, "memory(GiB)": 13.7, "step": 85725, "train_speed(iter/s)": 1.530122 }, { "acc": 0.98529758, "epoch": 40.18279821888915, "grad_norm": 3.8595025539398193, "learning_rate": 1.0164829259535356e-06, "loss": 0.05137389, "memory(GiB)": 13.7, "step": 85730, "train_speed(iter/s)": 1.530121 }, { "acc": 0.9875, "epoch": 40.18514178579798, "grad_norm": 0.004517391789704561, "learning_rate": 1.0160147030480367e-06, "loss": 0.0252644, "memory(GiB)": 13.7, "step": 85735, "train_speed(iter/s)": 1.530123 }, { "acc": 0.99437504, "epoch": 40.18748535270682, "grad_norm": 4.083315849304199, "learning_rate": 1.0155465759164764e-06, "loss": 0.01740319, "memory(GiB)": 13.7, "step": 85740, "train_speed(iter/s)": 1.530124 }, { "acc": 0.98279762, "epoch": 40.18982891961566, "grad_norm": 0.004784675780683756, "learning_rate": 1.0150785445701029e-06, "loss": 0.05085539, "memory(GiB)": 13.7, "step": 85745, "train_speed(iter/s)": 1.530133 }, { "acc": 0.98250008, "epoch": 40.19217248652449, "grad_norm": 1.7015453577041626, "learning_rate": 1.0146106090201694e-06, "loss": 0.04026163, "memory(GiB)": 13.7, "step": 85750, "train_speed(iter/s)": 1.530139 }, { "acc": 0.98979168, "epoch": 40.194516053433325, "grad_norm": 2.304800033569336, "learning_rate": 1.0141427692779208e-06, "loss": 0.03579685, "memory(GiB)": 13.7, "step": 85755, "train_speed(iter/s)": 1.530144 }, { "acc": 0.97053032, "epoch": 40.19685962034216, "grad_norm": 4.811864376068115, "learning_rate": 1.013675025354604e-06, "loss": 0.0896826, "memory(GiB)": 13.7, "step": 85760, "train_speed(iter/s)": 1.530146 }, { "acc": 0.98500004, "epoch": 40.199203187250994, "grad_norm": 0.004312083590775728, "learning_rate": 1.0132073772614633e-06, "loss": 0.03410317, "memory(GiB)": 13.7, "step": 85765, "train_speed(iter/s)": 1.530152 }, { "acc": 0.99017859, "epoch": 40.20154675415983, "grad_norm": 1.716400384902954, "learning_rate": 1.0127398250097369e-06, "loss": 0.0164289, "memory(GiB)": 13.7, "step": 85770, "train_speed(iter/s)": 1.530156 }, { "acc": 0.99513893, "epoch": 40.20389032106867, "grad_norm": 3.366211414337158, "learning_rate": 1.012272368610664e-06, "loss": 0.02116372, "memory(GiB)": 13.7, "step": 85775, "train_speed(iter/s)": 1.530157 }, { "acc": 0.98884802, "epoch": 40.206233887977504, "grad_norm": 2.9645462036132812, "learning_rate": 1.0118050080754824e-06, "loss": 0.04469086, "memory(GiB)": 13.7, "step": 85780, "train_speed(iter/s)": 1.530161 }, { "acc": 0.97715282, "epoch": 40.20857745488634, "grad_norm": 3.8336074352264404, "learning_rate": 1.011337743415424e-06, "loss": 0.08529547, "memory(GiB)": 13.7, "step": 85785, "train_speed(iter/s)": 1.530165 }, { "acc": 0.99333334, "epoch": 40.21092102179517, "grad_norm": 0.006755146197974682, "learning_rate": 1.0108705746417203e-06, "loss": 0.0531976, "memory(GiB)": 13.7, "step": 85790, "train_speed(iter/s)": 1.530169 }, { "acc": 0.99092264, "epoch": 40.21326458870401, "grad_norm": 1.0381076335906982, "learning_rate": 1.0104035017656005e-06, "loss": 0.02248822, "memory(GiB)": 13.7, "step": 85795, "train_speed(iter/s)": 1.530172 }, { "acc": 0.98458328, "epoch": 40.21560815561284, "grad_norm": 6.781489372253418, "learning_rate": 1.0099365247982923e-06, "loss": 0.02320307, "memory(GiB)": 13.7, "step": 85800, "train_speed(iter/s)": 1.530171 }, { "acc": 0.9875, "epoch": 40.217951722521676, "grad_norm": 3.4953670501708984, "learning_rate": 1.009469643751021e-06, "loss": 0.02549663, "memory(GiB)": 13.7, "step": 85805, "train_speed(iter/s)": 1.530171 }, { "acc": 0.9864583, "epoch": 40.22029528943051, "grad_norm": 0.007729271426796913, "learning_rate": 1.0090028586350068e-06, "loss": 0.02360039, "memory(GiB)": 13.7, "step": 85810, "train_speed(iter/s)": 1.53017 }, { "acc": 0.98791666, "epoch": 40.22263885633935, "grad_norm": 6.3821210861206055, "learning_rate": 1.008536169461471e-06, "loss": 0.05100397, "memory(GiB)": 13.7, "step": 85815, "train_speed(iter/s)": 1.530169 }, { "acc": 0.97791672, "epoch": 40.224982423248186, "grad_norm": 3.228689670562744, "learning_rate": 1.0080695762416321e-06, "loss": 0.04746811, "memory(GiB)": 13.7, "step": 85820, "train_speed(iter/s)": 1.530176 }, { "acc": 0.9927084, "epoch": 40.22732599015702, "grad_norm": 0.2905116677284241, "learning_rate": 1.007603078986705e-06, "loss": 0.03109378, "memory(GiB)": 13.7, "step": 85825, "train_speed(iter/s)": 1.530177 }, { "acc": 0.99125004, "epoch": 40.229669557065854, "grad_norm": 2.1610920429229736, "learning_rate": 1.0071366777079002e-06, "loss": 0.02165983, "memory(GiB)": 13.7, "step": 85830, "train_speed(iter/s)": 1.530177 }, { "acc": 0.98559523, "epoch": 40.23201312397469, "grad_norm": 2.8999598026275635, "learning_rate": 1.0066703724164305e-06, "loss": 0.03227318, "memory(GiB)": 13.7, "step": 85835, "train_speed(iter/s)": 1.530182 }, { "acc": 0.97946434, "epoch": 40.23435669088352, "grad_norm": 0.0029439758509397507, "learning_rate": 1.0062041631235058e-06, "loss": 0.03778872, "memory(GiB)": 13.7, "step": 85840, "train_speed(iter/s)": 1.53019 }, { "acc": 0.98696423, "epoch": 40.23670025779236, "grad_norm": 4.952949047088623, "learning_rate": 1.0057380498403295e-06, "loss": 0.0302366, "memory(GiB)": 13.7, "step": 85845, "train_speed(iter/s)": 1.530191 }, { "acc": 0.98624992, "epoch": 40.2390438247012, "grad_norm": 4.938126087188721, "learning_rate": 1.0052720325781065e-06, "loss": 0.02862488, "memory(GiB)": 13.7, "step": 85850, "train_speed(iter/s)": 1.530192 }, { "acc": 0.98326387, "epoch": 40.24138739161003, "grad_norm": 2.3953371047973633, "learning_rate": 1.0048061113480398e-06, "loss": 0.03136639, "memory(GiB)": 13.7, "step": 85855, "train_speed(iter/s)": 1.530195 }, { "acc": 0.9802084, "epoch": 40.24373095851887, "grad_norm": 3.694030284881592, "learning_rate": 1.0043402861613264e-06, "loss": 0.02862179, "memory(GiB)": 13.7, "step": 85860, "train_speed(iter/s)": 1.530197 }, { "acc": 0.984375, "epoch": 40.2460745254277, "grad_norm": 0.0005112708313390613, "learning_rate": 1.0038745570291635e-06, "loss": 0.02853668, "memory(GiB)": 13.7, "step": 85865, "train_speed(iter/s)": 1.5302 }, { "acc": 0.99125004, "epoch": 40.248418092336536, "grad_norm": 2.062354326248169, "learning_rate": 1.0034089239627483e-06, "loss": 0.08345681, "memory(GiB)": 13.7, "step": 85870, "train_speed(iter/s)": 1.530204 }, { "acc": 0.99312496, "epoch": 40.25076165924537, "grad_norm": 0.8050000667572021, "learning_rate": 1.0029433869732697e-06, "loss": 0.01634678, "memory(GiB)": 13.7, "step": 85875, "train_speed(iter/s)": 1.530205 }, { "acc": 0.98376989, "epoch": 40.253105226154204, "grad_norm": 3.8189337253570557, "learning_rate": 1.0024779460719198e-06, "loss": 0.05313924, "memory(GiB)": 13.7, "step": 85880, "train_speed(iter/s)": 1.530211 }, { "acc": 0.98395834, "epoch": 40.25544879306304, "grad_norm": 0.004547142889350653, "learning_rate": 1.0020126012698844e-06, "loss": 0.0364319, "memory(GiB)": 13.7, "step": 85885, "train_speed(iter/s)": 1.530218 }, { "acc": 0.98015881, "epoch": 40.25779235997188, "grad_norm": 2.76181697845459, "learning_rate": 1.0015473525783506e-06, "loss": 0.05493532, "memory(GiB)": 13.7, "step": 85890, "train_speed(iter/s)": 1.53022 }, { "acc": 0.98583755, "epoch": 40.260135926880714, "grad_norm": 3.146857976913452, "learning_rate": 1.0010822000085015e-06, "loss": 0.0364223, "memory(GiB)": 13.7, "step": 85895, "train_speed(iter/s)": 1.530226 }, { "acc": 0.98779764, "epoch": 40.26247949378955, "grad_norm": 1.9032050371170044, "learning_rate": 1.0006171435715167e-06, "loss": 0.0270869, "memory(GiB)": 13.7, "step": 85900, "train_speed(iter/s)": 1.53023 }, { "acc": 0.98883934, "epoch": 40.26482306069838, "grad_norm": 0.9201704263687134, "learning_rate": 1.0001521832785742e-06, "loss": 0.02009965, "memory(GiB)": 13.7, "step": 85905, "train_speed(iter/s)": 1.53024 }, { "acc": 0.97541666, "epoch": 40.26716662760722, "grad_norm": 0.001840127632021904, "learning_rate": 9.996873191408529e-07, "loss": 0.06074919, "memory(GiB)": 13.7, "step": 85910, "train_speed(iter/s)": 1.530241 }, { "acc": 0.98708334, "epoch": 40.26951019451605, "grad_norm": 2.18179988861084, "learning_rate": 9.992225511695246e-07, "loss": 0.04666135, "memory(GiB)": 13.7, "step": 85915, "train_speed(iter/s)": 1.530242 }, { "acc": 0.99249992, "epoch": 40.271853761424886, "grad_norm": 0.32362183928489685, "learning_rate": 9.987578793757598e-07, "loss": 0.02452856, "memory(GiB)": 13.7, "step": 85920, "train_speed(iter/s)": 1.530243 }, { "acc": 0.98778439, "epoch": 40.27419732833373, "grad_norm": 3.953336477279663, "learning_rate": 9.982933037707288e-07, "loss": 0.03347685, "memory(GiB)": 13.7, "step": 85925, "train_speed(iter/s)": 1.530244 }, { "acc": 0.98738098, "epoch": 40.27654089524256, "grad_norm": 0.2767001986503601, "learning_rate": 9.978288243655993e-07, "loss": 0.03072193, "memory(GiB)": 13.7, "step": 85930, "train_speed(iter/s)": 1.530243 }, { "acc": 0.98501987, "epoch": 40.278884462151396, "grad_norm": 3.169511318206787, "learning_rate": 9.973644411715342e-07, "loss": 0.03561149, "memory(GiB)": 13.7, "step": 85935, "train_speed(iter/s)": 1.530244 }, { "acc": 0.97532196, "epoch": 40.28122802906023, "grad_norm": 1.392713189125061, "learning_rate": 9.969001541996968e-07, "loss": 0.04944774, "memory(GiB)": 13.7, "step": 85940, "train_speed(iter/s)": 1.53025 }, { "acc": 0.9822917, "epoch": 40.283571595969065, "grad_norm": 5.933489799499512, "learning_rate": 9.964359634612465e-07, "loss": 0.0405381, "memory(GiB)": 13.7, "step": 85945, "train_speed(iter/s)": 1.530257 }, { "acc": 0.98258018, "epoch": 40.2859151628779, "grad_norm": 2.9607186317443848, "learning_rate": 9.95971868967343e-07, "loss": 0.05185013, "memory(GiB)": 13.7, "step": 85950, "train_speed(iter/s)": 1.530262 }, { "acc": 0.98490162, "epoch": 40.28825872978673, "grad_norm": 0.004540992900729179, "learning_rate": 9.955078707291395e-07, "loss": 0.04843302, "memory(GiB)": 13.7, "step": 85955, "train_speed(iter/s)": 1.530262 }, { "acc": 0.9953125, "epoch": 40.29060229669557, "grad_norm": 0.8463523983955383, "learning_rate": 9.950439687577883e-07, "loss": 0.01164337, "memory(GiB)": 13.7, "step": 85960, "train_speed(iter/s)": 1.530262 }, { "acc": 0.9963542, "epoch": 40.29294586360441, "grad_norm": 2.0120646953582764, "learning_rate": 9.945801630644405e-07, "loss": 0.02481978, "memory(GiB)": 13.7, "step": 85965, "train_speed(iter/s)": 1.530263 }, { "acc": 0.99125004, "epoch": 40.29528943051324, "grad_norm": 1.2855027914047241, "learning_rate": 9.941164536602472e-07, "loss": 0.03039331, "memory(GiB)": 13.7, "step": 85970, "train_speed(iter/s)": 1.530263 }, { "acc": 0.990625, "epoch": 40.29763299742208, "grad_norm": 4.1094651222229, "learning_rate": 9.936528405563509e-07, "loss": 0.02663699, "memory(GiB)": 13.7, "step": 85975, "train_speed(iter/s)": 1.530265 }, { "acc": 0.97986107, "epoch": 40.29997656433091, "grad_norm": 3.9993436336517334, "learning_rate": 9.931893237638964e-07, "loss": 0.06173148, "memory(GiB)": 13.7, "step": 85980, "train_speed(iter/s)": 1.530267 }, { "acc": 0.98916664, "epoch": 40.302320131239746, "grad_norm": 2.185112953186035, "learning_rate": 9.927259032940272e-07, "loss": 0.02501892, "memory(GiB)": 13.7, "step": 85985, "train_speed(iter/s)": 1.53027 }, { "acc": 0.97729168, "epoch": 40.30466369814858, "grad_norm": 3.407545804977417, "learning_rate": 9.922625791578796e-07, "loss": 0.0436302, "memory(GiB)": 13.7, "step": 85990, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98625002, "epoch": 40.307007265057415, "grad_norm": 2.6150588989257812, "learning_rate": 9.917993513665925e-07, "loss": 0.02548771, "memory(GiB)": 13.7, "step": 85995, "train_speed(iter/s)": 1.530275 }, { "acc": 0.98604164, "epoch": 40.309350831966256, "grad_norm": 12.173192024230957, "learning_rate": 9.913362199312978e-07, "loss": 0.10434313, "memory(GiB)": 13.7, "step": 86000, "train_speed(iter/s)": 1.530276 }, { "acc": 0.99821434, "epoch": 40.31169439887509, "grad_norm": 0.000764329161029309, "learning_rate": 9.908731848631308e-07, "loss": 0.01853839, "memory(GiB)": 13.7, "step": 86005, "train_speed(iter/s)": 1.53028 }, { "acc": 0.96809521, "epoch": 40.314037965783925, "grad_norm": 0.8393809199333191, "learning_rate": 9.90410246173219e-07, "loss": 0.05126132, "memory(GiB)": 13.7, "step": 86010, "train_speed(iter/s)": 1.530284 }, { "acc": 0.98708334, "epoch": 40.31638153269276, "grad_norm": 2.3821403980255127, "learning_rate": 9.899474038726904e-07, "loss": 0.03569446, "memory(GiB)": 13.7, "step": 86015, "train_speed(iter/s)": 1.530289 }, { "acc": 0.9927084, "epoch": 40.31872509960159, "grad_norm": 4.030065536499023, "learning_rate": 9.8948465797267e-07, "loss": 0.01805539, "memory(GiB)": 13.7, "step": 86020, "train_speed(iter/s)": 1.530287 }, { "acc": 0.96883926, "epoch": 40.32106866651043, "grad_norm": 5.154892444610596, "learning_rate": 9.890220084842834e-07, "loss": 0.04599251, "memory(GiB)": 13.7, "step": 86025, "train_speed(iter/s)": 1.530284 }, { "acc": 0.98812504, "epoch": 40.32341223341926, "grad_norm": 4.9272918701171875, "learning_rate": 9.885594554186468e-07, "loss": 0.02873695, "memory(GiB)": 13.7, "step": 86030, "train_speed(iter/s)": 1.530286 }, { "acc": 0.98258934, "epoch": 40.325755800328096, "grad_norm": 2.9457342624664307, "learning_rate": 9.880969987868824e-07, "loss": 0.05062423, "memory(GiB)": 13.7, "step": 86035, "train_speed(iter/s)": 1.530287 }, { "acc": 0.98708324, "epoch": 40.32809936723694, "grad_norm": 2.778094530105591, "learning_rate": 9.87634638600103e-07, "loss": 0.02580096, "memory(GiB)": 13.7, "step": 86040, "train_speed(iter/s)": 1.530287 }, { "acc": 0.9895833, "epoch": 40.33044293414577, "grad_norm": 2.466353416442871, "learning_rate": 9.871723748694251e-07, "loss": 0.03622704, "memory(GiB)": 13.7, "step": 86045, "train_speed(iter/s)": 1.53029 }, { "acc": 0.97928028, "epoch": 40.332786501054606, "grad_norm": 2.5930368900299072, "learning_rate": 9.86710207605957e-07, "loss": 0.04107301, "memory(GiB)": 13.7, "step": 86050, "train_speed(iter/s)": 1.530292 }, { "acc": 0.9947916, "epoch": 40.33513006796344, "grad_norm": 2.664789915084839, "learning_rate": 9.862481368208095e-07, "loss": 0.02136163, "memory(GiB)": 13.7, "step": 86055, "train_speed(iter/s)": 1.530294 }, { "acc": 0.97562504, "epoch": 40.337473634872275, "grad_norm": 4.510686874389648, "learning_rate": 9.857861625250898e-07, "loss": 0.05786941, "memory(GiB)": 13.7, "step": 86060, "train_speed(iter/s)": 1.530302 }, { "acc": 0.97104168, "epoch": 40.33981720178111, "grad_norm": 3.826277494430542, "learning_rate": 9.853242847299005e-07, "loss": 0.06832533, "memory(GiB)": 13.7, "step": 86065, "train_speed(iter/s)": 1.53031 }, { "acc": 0.98696423, "epoch": 40.34216076868994, "grad_norm": 3.653301954269409, "learning_rate": 9.84862503446344e-07, "loss": 0.03342638, "memory(GiB)": 13.7, "step": 86070, "train_speed(iter/s)": 1.530308 }, { "acc": 0.9788393, "epoch": 40.344504335598785, "grad_norm": 3.2882072925567627, "learning_rate": 9.844008186855225e-07, "loss": 0.06946266, "memory(GiB)": 13.7, "step": 86075, "train_speed(iter/s)": 1.530313 }, { "acc": 0.96821432, "epoch": 40.34684790250762, "grad_norm": 2.834188222885132, "learning_rate": 9.839392304585297e-07, "loss": 0.05158027, "memory(GiB)": 13.7, "step": 86080, "train_speed(iter/s)": 1.530316 }, { "acc": 0.98500004, "epoch": 40.34919146941645, "grad_norm": 5.027300834655762, "learning_rate": 9.834777387764636e-07, "loss": 0.02351446, "memory(GiB)": 13.7, "step": 86085, "train_speed(iter/s)": 1.530314 }, { "acc": 0.98812504, "epoch": 40.35153503632529, "grad_norm": 4.403726577758789, "learning_rate": 9.830163436504146e-07, "loss": 0.03161544, "memory(GiB)": 13.7, "step": 86090, "train_speed(iter/s)": 1.530317 }, { "acc": 0.99020834, "epoch": 40.35387860323412, "grad_norm": 5.2696638107299805, "learning_rate": 9.825550450914743e-07, "loss": 0.01757076, "memory(GiB)": 13.7, "step": 86095, "train_speed(iter/s)": 1.530319 }, { "acc": 0.9725893, "epoch": 40.356222170142956, "grad_norm": 6.700953960418701, "learning_rate": 9.820938431107314e-07, "loss": 0.08001874, "memory(GiB)": 13.7, "step": 86100, "train_speed(iter/s)": 1.530323 }, { "acc": 0.98071423, "epoch": 40.35856573705179, "grad_norm": 4.134095668792725, "learning_rate": 9.816327377192706e-07, "loss": 0.03614418, "memory(GiB)": 13.7, "step": 86105, "train_speed(iter/s)": 1.530319 }, { "acc": 0.96464739, "epoch": 40.360909303960625, "grad_norm": 3.9837214946746826, "learning_rate": 9.81171728928175e-07, "loss": 0.05833008, "memory(GiB)": 13.7, "step": 86110, "train_speed(iter/s)": 1.530323 }, { "acc": 0.9916666, "epoch": 40.36325287086947, "grad_norm": 2.4913318157196045, "learning_rate": 9.807108167485288e-07, "loss": 0.02650208, "memory(GiB)": 13.7, "step": 86115, "train_speed(iter/s)": 1.530323 }, { "acc": 0.98490524, "epoch": 40.3655964377783, "grad_norm": 0.6931099891662598, "learning_rate": 9.802500011914076e-07, "loss": 0.03247498, "memory(GiB)": 13.7, "step": 86120, "train_speed(iter/s)": 1.530326 }, { "acc": 0.9697916, "epoch": 40.367940004687135, "grad_norm": 3.5471248626708984, "learning_rate": 9.797892822678881e-07, "loss": 0.06430953, "memory(GiB)": 13.7, "step": 86125, "train_speed(iter/s)": 1.53033 }, { "acc": 0.98125, "epoch": 40.37028357159597, "grad_norm": 0.8585143685340881, "learning_rate": 9.793286599890448e-07, "loss": 0.03713619, "memory(GiB)": 13.7, "step": 86130, "train_speed(iter/s)": 1.530333 }, { "acc": 0.98413696, "epoch": 40.372627138504804, "grad_norm": 3.114466428756714, "learning_rate": 9.788681343659514e-07, "loss": 0.03271165, "memory(GiB)": 13.7, "step": 86135, "train_speed(iter/s)": 1.530333 }, { "acc": 0.98708334, "epoch": 40.37497070541364, "grad_norm": 0.00048558510025031865, "learning_rate": 9.78407705409675e-07, "loss": 0.05253242, "memory(GiB)": 13.7, "step": 86140, "train_speed(iter/s)": 1.530336 }, { "acc": 0.98152771, "epoch": 40.37731427232247, "grad_norm": 3.9625327587127686, "learning_rate": 9.779473731312833e-07, "loss": 0.05134946, "memory(GiB)": 13.7, "step": 86145, "train_speed(iter/s)": 1.53034 }, { "acc": 0.97928028, "epoch": 40.37965783923131, "grad_norm": 3.288045644760132, "learning_rate": 9.774871375418432e-07, "loss": 0.06704257, "memory(GiB)": 13.7, "step": 86150, "train_speed(iter/s)": 1.530346 }, { "acc": 0.98812504, "epoch": 40.38200140614015, "grad_norm": 5.360978603363037, "learning_rate": 9.77026998652414e-07, "loss": 0.02554603, "memory(GiB)": 13.7, "step": 86155, "train_speed(iter/s)": 1.530354 }, { "acc": 0.99354172, "epoch": 40.38434497304898, "grad_norm": 2.5642600059509277, "learning_rate": 9.765669564740597e-07, "loss": 0.01922553, "memory(GiB)": 13.7, "step": 86160, "train_speed(iter/s)": 1.530357 }, { "acc": 0.97770834, "epoch": 40.38668853995782, "grad_norm": 2.984311103820801, "learning_rate": 9.761070110178343e-07, "loss": 0.04554014, "memory(GiB)": 13.7, "step": 86165, "train_speed(iter/s)": 1.53036 }, { "acc": 0.98458328, "epoch": 40.38903210686665, "grad_norm": 4.079858779907227, "learning_rate": 9.756471622947948e-07, "loss": 0.0348814, "memory(GiB)": 13.7, "step": 86170, "train_speed(iter/s)": 1.530362 }, { "acc": 0.98113098, "epoch": 40.391375673775485, "grad_norm": 1.7181963920593262, "learning_rate": 9.751874103159967e-07, "loss": 0.04301831, "memory(GiB)": 13.7, "step": 86175, "train_speed(iter/s)": 1.530368 }, { "acc": 0.96652775, "epoch": 40.39371924068432, "grad_norm": 3.7484774589538574, "learning_rate": 9.74727755092488e-07, "loss": 0.05959125, "memory(GiB)": 13.7, "step": 86180, "train_speed(iter/s)": 1.530367 }, { "acc": 0.98173609, "epoch": 40.396062807593154, "grad_norm": 4.667767524719238, "learning_rate": 9.742681966353177e-07, "loss": 0.05361773, "memory(GiB)": 13.7, "step": 86185, "train_speed(iter/s)": 1.53037 }, { "acc": 0.98125, "epoch": 40.398406374501995, "grad_norm": 6.436596393585205, "learning_rate": 9.738087349555345e-07, "loss": 0.03431538, "memory(GiB)": 13.7, "step": 86190, "train_speed(iter/s)": 1.530374 }, { "acc": 0.96713543, "epoch": 40.40074994141083, "grad_norm": 7.124527931213379, "learning_rate": 9.73349370064179e-07, "loss": 0.06789743, "memory(GiB)": 13.7, "step": 86195, "train_speed(iter/s)": 1.53038 }, { "acc": 0.97578373, "epoch": 40.403093508319664, "grad_norm": 4.544458866119385, "learning_rate": 9.72890101972295e-07, "loss": 0.04007507, "memory(GiB)": 13.7, "step": 86200, "train_speed(iter/s)": 1.530385 }, { "acc": 0.9927084, "epoch": 40.4054370752285, "grad_norm": 0.0017896965146064758, "learning_rate": 9.72430930690922e-07, "loss": 0.01820799, "memory(GiB)": 13.7, "step": 86205, "train_speed(iter/s)": 1.530387 }, { "acc": 0.98579578, "epoch": 40.40778064213733, "grad_norm": 5.862546920776367, "learning_rate": 9.719718562310962e-07, "loss": 0.04573638, "memory(GiB)": 13.7, "step": 86210, "train_speed(iter/s)": 1.53039 }, { "acc": 0.99375, "epoch": 40.41012420904617, "grad_norm": 2.8430027961730957, "learning_rate": 9.715128786038511e-07, "loss": 0.02279523, "memory(GiB)": 13.7, "step": 86215, "train_speed(iter/s)": 1.530395 }, { "acc": 0.9888195, "epoch": 40.412467775955, "grad_norm": 4.0347442626953125, "learning_rate": 9.710539978202203e-07, "loss": 0.02995148, "memory(GiB)": 13.7, "step": 86220, "train_speed(iter/s)": 1.530395 }, { "acc": 0.98187504, "epoch": 40.414811342863835, "grad_norm": 0.8852903842926025, "learning_rate": 9.705952138912336e-07, "loss": 0.04858967, "memory(GiB)": 13.7, "step": 86225, "train_speed(iter/s)": 1.530397 }, { "acc": 0.98530636, "epoch": 40.41715490977268, "grad_norm": 1.9723832607269287, "learning_rate": 9.701365268279194e-07, "loss": 0.06366879, "memory(GiB)": 13.7, "step": 86230, "train_speed(iter/s)": 1.530396 }, { "acc": 0.97113094, "epoch": 40.41949847668151, "grad_norm": 3.346254825592041, "learning_rate": 9.696779366413016e-07, "loss": 0.0995625, "memory(GiB)": 13.7, "step": 86235, "train_speed(iter/s)": 1.530398 }, { "acc": 0.98031254, "epoch": 40.421842043590345, "grad_norm": 0.05454736948013306, "learning_rate": 9.69219443342404e-07, "loss": 0.057108, "memory(GiB)": 13.7, "step": 86240, "train_speed(iter/s)": 1.530405 }, { "acc": 0.990625, "epoch": 40.42418561049918, "grad_norm": 1.848124623298645, "learning_rate": 9.68761046942248e-07, "loss": 0.02006015, "memory(GiB)": 13.7, "step": 86245, "train_speed(iter/s)": 1.530411 }, { "acc": 0.98770828, "epoch": 40.426529177408014, "grad_norm": 4.626776695251465, "learning_rate": 9.683027474518514e-07, "loss": 0.02191219, "memory(GiB)": 13.7, "step": 86250, "train_speed(iter/s)": 1.530415 }, { "acc": 0.97416668, "epoch": 40.42887274431685, "grad_norm": 3.33333420753479, "learning_rate": 9.67844544882229e-07, "loss": 0.06244564, "memory(GiB)": 13.7, "step": 86255, "train_speed(iter/s)": 1.53042 }, { "acc": 0.98761368, "epoch": 40.43121631122568, "grad_norm": 0.007593915332108736, "learning_rate": 9.673864392443954e-07, "loss": 0.02477426, "memory(GiB)": 13.7, "step": 86260, "train_speed(iter/s)": 1.530422 }, { "acc": 0.98819447, "epoch": 40.433559878134524, "grad_norm": 0.0010373821714892983, "learning_rate": 9.66928430549363e-07, "loss": 0.02550908, "memory(GiB)": 13.7, "step": 86265, "train_speed(iter/s)": 1.530426 }, { "acc": 0.97937498, "epoch": 40.43590344504336, "grad_norm": 0.03343632072210312, "learning_rate": 9.664705188081382e-07, "loss": 0.04239109, "memory(GiB)": 13.7, "step": 86270, "train_speed(iter/s)": 1.530432 }, { "acc": 0.99541664, "epoch": 40.43824701195219, "grad_norm": 3.554563522338867, "learning_rate": 9.660127040317301e-07, "loss": 0.02921608, "memory(GiB)": 13.7, "step": 86275, "train_speed(iter/s)": 1.530438 }, { "acc": 0.99122028, "epoch": 40.44059057886103, "grad_norm": 0.005518294405192137, "learning_rate": 9.65554986231143e-07, "loss": 0.03846383, "memory(GiB)": 13.7, "step": 86280, "train_speed(iter/s)": 1.530437 }, { "acc": 0.98883934, "epoch": 40.44293414576986, "grad_norm": 0.0033859258983284235, "learning_rate": 9.650973654173782e-07, "loss": 0.02384049, "memory(GiB)": 13.7, "step": 86285, "train_speed(iter/s)": 1.530441 }, { "acc": 0.98000002, "epoch": 40.445277712678696, "grad_norm": 3.5528879165649414, "learning_rate": 9.646398416014345e-07, "loss": 0.02266308, "memory(GiB)": 13.7, "step": 86290, "train_speed(iter/s)": 1.530443 }, { "acc": 0.99187498, "epoch": 40.44762127958753, "grad_norm": 3.265566825866699, "learning_rate": 9.641824147943096e-07, "loss": 0.02768671, "memory(GiB)": 13.7, "step": 86295, "train_speed(iter/s)": 1.530447 }, { "acc": 0.99281254, "epoch": 40.449964846496364, "grad_norm": 0.27582433819770813, "learning_rate": 9.63725085007e-07, "loss": 0.01869833, "memory(GiB)": 13.7, "step": 86300, "train_speed(iter/s)": 1.530449 }, { "acc": 0.98831844, "epoch": 40.452308413405206, "grad_norm": 1.3532750606536865, "learning_rate": 9.63267852250498e-07, "loss": 0.0438782, "memory(GiB)": 13.7, "step": 86305, "train_speed(iter/s)": 1.530451 }, { "acc": 0.97633934, "epoch": 40.45465198031404, "grad_norm": 5.163564682006836, "learning_rate": 9.628107165357926e-07, "loss": 0.03550943, "memory(GiB)": 13.7, "step": 86310, "train_speed(iter/s)": 1.530452 }, { "acc": 0.98425598, "epoch": 40.456995547222874, "grad_norm": 2.8325750827789307, "learning_rate": 9.623536778738725e-07, "loss": 0.05261872, "memory(GiB)": 13.7, "step": 86315, "train_speed(iter/s)": 1.530451 }, { "acc": 0.98387766, "epoch": 40.45933911413171, "grad_norm": 0.7993256449699402, "learning_rate": 9.618967362757248e-07, "loss": 0.04390587, "memory(GiB)": 13.7, "step": 86320, "train_speed(iter/s)": 1.530451 }, { "acc": 0.98604164, "epoch": 40.46168268104054, "grad_norm": 1.7199593782424927, "learning_rate": 9.614398917523308e-07, "loss": 0.02524288, "memory(GiB)": 13.7, "step": 86325, "train_speed(iter/s)": 1.530452 }, { "acc": 0.98631935, "epoch": 40.46402624794938, "grad_norm": 0.0009587822714820504, "learning_rate": 9.609831443146738e-07, "loss": 0.02026289, "memory(GiB)": 13.7, "step": 86330, "train_speed(iter/s)": 1.530454 }, { "acc": 0.9875, "epoch": 40.46636981485821, "grad_norm": 1.8917464017868042, "learning_rate": 9.605264939737294e-07, "loss": 0.04475433, "memory(GiB)": 13.7, "step": 86335, "train_speed(iter/s)": 1.530454 }, { "acc": 0.98842258, "epoch": 40.46871338176705, "grad_norm": 5.719561576843262, "learning_rate": 9.600699407404778e-07, "loss": 0.05060531, "memory(GiB)": 13.7, "step": 86340, "train_speed(iter/s)": 1.530453 }, { "acc": 0.98614578, "epoch": 40.47105694867589, "grad_norm": 3.7251596450805664, "learning_rate": 9.59613484625889e-07, "loss": 0.04891709, "memory(GiB)": 13.7, "step": 86345, "train_speed(iter/s)": 1.530453 }, { "acc": 0.98062496, "epoch": 40.47340051558472, "grad_norm": 4.130741596221924, "learning_rate": 9.59157125640937e-07, "loss": 0.03781569, "memory(GiB)": 13.7, "step": 86350, "train_speed(iter/s)": 1.530456 }, { "acc": 0.98438988, "epoch": 40.475744082493556, "grad_norm": 5.900184631347656, "learning_rate": 9.58700863796592e-07, "loss": 0.05124951, "memory(GiB)": 13.7, "step": 86355, "train_speed(iter/s)": 1.530456 }, { "acc": 0.9854166, "epoch": 40.47808764940239, "grad_norm": 5.70954704284668, "learning_rate": 9.582446991038183e-07, "loss": 0.06110154, "memory(GiB)": 13.7, "step": 86360, "train_speed(iter/s)": 1.530455 }, { "acc": 0.97550592, "epoch": 40.480431216311224, "grad_norm": 3.44352650642395, "learning_rate": 9.577886315735829e-07, "loss": 0.04149696, "memory(GiB)": 13.7, "step": 86365, "train_speed(iter/s)": 1.530463 }, { "acc": 0.98142853, "epoch": 40.48277478322006, "grad_norm": 0.21583078801631927, "learning_rate": 9.573326612168486e-07, "loss": 0.03848685, "memory(GiB)": 13.7, "step": 86370, "train_speed(iter/s)": 1.530471 }, { "acc": 0.98326836, "epoch": 40.48511835012889, "grad_norm": 5.08351993560791, "learning_rate": 9.568767880445728e-07, "loss": 0.05938576, "memory(GiB)": 13.7, "step": 86375, "train_speed(iter/s)": 1.530473 }, { "acc": 0.97383928, "epoch": 40.487461917037734, "grad_norm": 3.2801661491394043, "learning_rate": 9.564210120677159e-07, "loss": 0.04063021, "memory(GiB)": 13.7, "step": 86380, "train_speed(iter/s)": 1.530476 }, { "acc": 0.9895834, "epoch": 40.48980548394657, "grad_norm": 0.33298370242118835, "learning_rate": 9.559653332972308e-07, "loss": 0.03135753, "memory(GiB)": 13.7, "step": 86385, "train_speed(iter/s)": 1.530477 }, { "acc": 0.98395834, "epoch": 40.4921490508554, "grad_norm": 5.444918632507324, "learning_rate": 9.555097517440718e-07, "loss": 0.04829159, "memory(GiB)": 13.7, "step": 86390, "train_speed(iter/s)": 1.530479 }, { "acc": 0.97613096, "epoch": 40.49449261776424, "grad_norm": 3.878795623779297, "learning_rate": 9.550542674191909e-07, "loss": 0.05868744, "memory(GiB)": 13.7, "step": 86395, "train_speed(iter/s)": 1.530483 }, { "acc": 0.971875, "epoch": 40.49683618467307, "grad_norm": 3.8113582134246826, "learning_rate": 9.545988803335337e-07, "loss": 0.03604476, "memory(GiB)": 13.7, "step": 86400, "train_speed(iter/s)": 1.530483 }, { "acc": 0.98055058, "epoch": 40.499179751581906, "grad_norm": 5.817171096801758, "learning_rate": 9.541435904980478e-07, "loss": 0.05485073, "memory(GiB)": 13.7, "step": 86405, "train_speed(iter/s)": 1.530485 }, { "acc": 0.98083344, "epoch": 40.50152331849074, "grad_norm": 2.893181324005127, "learning_rate": 9.536883979236776e-07, "loss": 0.03443357, "memory(GiB)": 13.7, "step": 86410, "train_speed(iter/s)": 1.530488 }, { "acc": 0.99187498, "epoch": 40.50386688539958, "grad_norm": 0.5962198972702026, "learning_rate": 9.532333026213637e-07, "loss": 0.01971902, "memory(GiB)": 13.7, "step": 86415, "train_speed(iter/s)": 1.530486 }, { "acc": 0.9958333, "epoch": 40.506210452308416, "grad_norm": 3.384324550628662, "learning_rate": 9.527783046020439e-07, "loss": 0.03597522, "memory(GiB)": 13.7, "step": 86420, "train_speed(iter/s)": 1.530484 }, { "acc": 0.9957386, "epoch": 40.50855401921725, "grad_norm": 1.7651269435882568, "learning_rate": 9.523234038766555e-07, "loss": 0.02978446, "memory(GiB)": 13.7, "step": 86425, "train_speed(iter/s)": 1.530483 }, { "acc": 0.99154758, "epoch": 40.510897586126084, "grad_norm": 5.131189823150635, "learning_rate": 9.518686004561342e-07, "loss": 0.02761336, "memory(GiB)": 13.7, "step": 86430, "train_speed(iter/s)": 1.530485 }, { "acc": 0.97937498, "epoch": 40.51324115303492, "grad_norm": 3.4256999492645264, "learning_rate": 9.514138943514104e-07, "loss": 0.02947041, "memory(GiB)": 13.7, "step": 86435, "train_speed(iter/s)": 1.530488 }, { "acc": 0.9885417, "epoch": 40.51558471994375, "grad_norm": 2.957197904586792, "learning_rate": 9.509592855734139e-07, "loss": 0.04967306, "memory(GiB)": 13.7, "step": 86440, "train_speed(iter/s)": 1.530489 }, { "acc": 0.9885417, "epoch": 40.51792828685259, "grad_norm": 2.624624729156494, "learning_rate": 9.505047741330721e-07, "loss": 0.03111747, "memory(GiB)": 13.7, "step": 86445, "train_speed(iter/s)": 1.530489 }, { "acc": 0.996875, "epoch": 40.52027185376142, "grad_norm": 1.9907633066177368, "learning_rate": 9.500503600413122e-07, "loss": 0.01108403, "memory(GiB)": 13.7, "step": 86450, "train_speed(iter/s)": 1.530492 }, { "acc": 0.9854167, "epoch": 40.52261542067026, "grad_norm": 3.31545352935791, "learning_rate": 9.495960433090547e-07, "loss": 0.04272241, "memory(GiB)": 13.7, "step": 86455, "train_speed(iter/s)": 1.530496 }, { "acc": 0.98311014, "epoch": 40.5249589875791, "grad_norm": 3.428684711456299, "learning_rate": 9.491418239472186e-07, "loss": 0.05077375, "memory(GiB)": 13.7, "step": 86460, "train_speed(iter/s)": 1.530498 }, { "acc": 0.98631945, "epoch": 40.52730255448793, "grad_norm": 0.00012874329695478082, "learning_rate": 9.48687701966723e-07, "loss": 0.04370367, "memory(GiB)": 13.7, "step": 86465, "train_speed(iter/s)": 1.530496 }, { "acc": 0.98354168, "epoch": 40.529646121396766, "grad_norm": 1.3812366724014282, "learning_rate": 9.482336773784854e-07, "loss": 0.04266137, "memory(GiB)": 13.7, "step": 86470, "train_speed(iter/s)": 1.530497 }, { "acc": 0.9911397, "epoch": 40.5319896883056, "grad_norm": 0.0028124803211539984, "learning_rate": 9.477797501934167e-07, "loss": 0.03936595, "memory(GiB)": 13.7, "step": 86475, "train_speed(iter/s)": 1.530499 }, { "acc": 0.990625, "epoch": 40.534333255214435, "grad_norm": 3.877727746963501, "learning_rate": 9.473259204224276e-07, "loss": 0.0242636, "memory(GiB)": 13.7, "step": 86480, "train_speed(iter/s)": 1.530503 }, { "acc": 0.98425598, "epoch": 40.53667682212327, "grad_norm": 3.8558075428009033, "learning_rate": 9.468721880764299e-07, "loss": 0.03372836, "memory(GiB)": 13.7, "step": 86485, "train_speed(iter/s)": 1.530507 }, { "acc": 0.99074821, "epoch": 40.53902038903211, "grad_norm": 0.6037847995758057, "learning_rate": 9.464185531663258e-07, "loss": 0.05071958, "memory(GiB)": 13.7, "step": 86490, "train_speed(iter/s)": 1.530508 }, { "acc": 0.99354172, "epoch": 40.541363955940945, "grad_norm": 1.629086971282959, "learning_rate": 9.459650157030223e-07, "loss": 0.01924211, "memory(GiB)": 13.7, "step": 86495, "train_speed(iter/s)": 1.530509 }, { "acc": 0.99236107, "epoch": 40.54370752284978, "grad_norm": 0.0008306517265737057, "learning_rate": 9.455115756974184e-07, "loss": 0.02381352, "memory(GiB)": 13.7, "step": 86500, "train_speed(iter/s)": 1.53051 }, { "acc": 0.990625, "epoch": 40.54605108975861, "grad_norm": 0.00029117826488800347, "learning_rate": 9.450582331604147e-07, "loss": 0.04201263, "memory(GiB)": 13.7, "step": 86505, "train_speed(iter/s)": 1.530513 }, { "acc": 0.98291664, "epoch": 40.54839465666745, "grad_norm": 6.513331413269043, "learning_rate": 9.446049881029089e-07, "loss": 0.02951609, "memory(GiB)": 13.7, "step": 86510, "train_speed(iter/s)": 1.530514 }, { "acc": 0.98479166, "epoch": 40.55073822357628, "grad_norm": 1.084511160850525, "learning_rate": 9.441518405357931e-07, "loss": 0.03430718, "memory(GiB)": 13.7, "step": 86515, "train_speed(iter/s)": 1.530523 }, { "acc": 0.98395834, "epoch": 40.553081790485116, "grad_norm": 3.4943947792053223, "learning_rate": 9.436987904699611e-07, "loss": 0.03557774, "memory(GiB)": 13.7, "step": 86520, "train_speed(iter/s)": 1.530529 }, { "acc": 0.9916667, "epoch": 40.55542535739395, "grad_norm": 0.210295632481575, "learning_rate": 9.432458379163042e-07, "loss": 0.02699107, "memory(GiB)": 13.7, "step": 86525, "train_speed(iter/s)": 1.530528 }, { "acc": 1.0, "epoch": 40.55776892430279, "grad_norm": 1.317352533340454, "learning_rate": 9.427929828857068e-07, "loss": 0.00811565, "memory(GiB)": 13.7, "step": 86530, "train_speed(iter/s)": 1.530531 }, { "acc": 0.98562498, "epoch": 40.560112491211626, "grad_norm": 0.05501860752701759, "learning_rate": 9.423402253890555e-07, "loss": 0.04219526, "memory(GiB)": 13.7, "step": 86535, "train_speed(iter/s)": 1.530533 }, { "acc": 0.9869791, "epoch": 40.56245605812046, "grad_norm": 3.2811169624328613, "learning_rate": 9.418875654372342e-07, "loss": 0.02258047, "memory(GiB)": 13.7, "step": 86540, "train_speed(iter/s)": 1.530541 }, { "acc": 0.98973217, "epoch": 40.564799625029295, "grad_norm": 3.2189886569976807, "learning_rate": 9.414350030411226e-07, "loss": 0.0341116, "memory(GiB)": 13.7, "step": 86545, "train_speed(iter/s)": 1.530544 }, { "acc": 0.98406248, "epoch": 40.56714319193813, "grad_norm": 0.0011685565114021301, "learning_rate": 9.409825382115965e-07, "loss": 0.03175076, "memory(GiB)": 13.7, "step": 86550, "train_speed(iter/s)": 1.530544 }, { "acc": 0.97358637, "epoch": 40.56948675884696, "grad_norm": 0.022832266986370087, "learning_rate": 9.405301709595345e-07, "loss": 0.04370421, "memory(GiB)": 13.7, "step": 86555, "train_speed(iter/s)": 1.530541 }, { "acc": 0.9817709, "epoch": 40.5718303257558, "grad_norm": 0.11266210675239563, "learning_rate": 9.4007790129581e-07, "loss": 0.03146686, "memory(GiB)": 13.7, "step": 86560, "train_speed(iter/s)": 1.530544 }, { "acc": 0.98695889, "epoch": 40.57417389266463, "grad_norm": 1.5794594287872314, "learning_rate": 9.396257292312916e-07, "loss": 0.03678986, "memory(GiB)": 13.7, "step": 86565, "train_speed(iter/s)": 1.530544 }, { "acc": 0.98708334, "epoch": 40.57651745957347, "grad_norm": 1.0447267293930054, "learning_rate": 9.391736547768501e-07, "loss": 0.02201316, "memory(GiB)": 13.7, "step": 86570, "train_speed(iter/s)": 1.530544 }, { "acc": 0.99229164, "epoch": 40.57886102648231, "grad_norm": 3.779843330383301, "learning_rate": 9.387216779433523e-07, "loss": 0.05085869, "memory(GiB)": 13.7, "step": 86575, "train_speed(iter/s)": 1.530545 }, { "acc": 0.9833334, "epoch": 40.58120459339114, "grad_norm": 2.4574222564697266, "learning_rate": 9.38269798741661e-07, "loss": 0.02850182, "memory(GiB)": 13.7, "step": 86580, "train_speed(iter/s)": 1.530547 }, { "acc": 0.9825695, "epoch": 40.583548160299976, "grad_norm": 3.2273623943328857, "learning_rate": 9.378180171826387e-07, "loss": 0.03307056, "memory(GiB)": 13.7, "step": 86585, "train_speed(iter/s)": 1.530555 }, { "acc": 0.9885416, "epoch": 40.58589172720881, "grad_norm": 3.986790657043457, "learning_rate": 9.373663332771431e-07, "loss": 0.03055014, "memory(GiB)": 13.7, "step": 86590, "train_speed(iter/s)": 1.530558 }, { "acc": 0.98809528, "epoch": 40.588235294117645, "grad_norm": 1.3380614519119263, "learning_rate": 9.369147470360328e-07, "loss": 0.04046464, "memory(GiB)": 13.7, "step": 86595, "train_speed(iter/s)": 1.530559 }, { "acc": 0.97674675, "epoch": 40.59057886102648, "grad_norm": 1.3683375120162964, "learning_rate": 9.364632584701636e-07, "loss": 0.0508467, "memory(GiB)": 13.7, "step": 86600, "train_speed(iter/s)": 1.530564 }, { "acc": 0.98615532, "epoch": 40.59292242793532, "grad_norm": 1.5592451095581055, "learning_rate": 9.360118675903844e-07, "loss": 0.02940143, "memory(GiB)": 13.7, "step": 86605, "train_speed(iter/s)": 1.53057 }, { "acc": 0.98458328, "epoch": 40.595265994844155, "grad_norm": 0.001329141086898744, "learning_rate": 9.355605744075473e-07, "loss": 0.02634315, "memory(GiB)": 13.7, "step": 86610, "train_speed(iter/s)": 1.530571 }, { "acc": 0.98402786, "epoch": 40.59760956175299, "grad_norm": 0.026457546278834343, "learning_rate": 9.351093789325008e-07, "loss": 0.04517524, "memory(GiB)": 13.7, "step": 86615, "train_speed(iter/s)": 1.530574 }, { "acc": 0.99375, "epoch": 40.599953128661824, "grad_norm": 0.029594365507364273, "learning_rate": 9.34658281176089e-07, "loss": 0.01432052, "memory(GiB)": 13.7, "step": 86620, "train_speed(iter/s)": 1.530578 }, { "acc": 0.99107141, "epoch": 40.60229669557066, "grad_norm": 2.4750735759735107, "learning_rate": 9.342072811491534e-07, "loss": 0.03848978, "memory(GiB)": 13.7, "step": 86625, "train_speed(iter/s)": 1.530574 }, { "acc": 0.99258928, "epoch": 40.60464026247949, "grad_norm": 0.002647014334797859, "learning_rate": 9.337563788625363e-07, "loss": 0.02769082, "memory(GiB)": 13.7, "step": 86630, "train_speed(iter/s)": 1.530578 }, { "acc": 0.99508934, "epoch": 40.60698382938833, "grad_norm": 2.0169198513031006, "learning_rate": 9.333055743270764e-07, "loss": 0.01926796, "memory(GiB)": 13.7, "step": 86635, "train_speed(iter/s)": 1.53058 }, { "acc": 0.99008923, "epoch": 40.60932739629716, "grad_norm": 2.7440388202667236, "learning_rate": 9.328548675536077e-07, "loss": 0.03736969, "memory(GiB)": 13.7, "step": 86640, "train_speed(iter/s)": 1.530583 }, { "acc": 0.98708334, "epoch": 40.611670963206, "grad_norm": 1.7223504781723022, "learning_rate": 9.324042585529645e-07, "loss": 0.03646163, "memory(GiB)": 13.7, "step": 86645, "train_speed(iter/s)": 1.530583 }, { "acc": 0.98133926, "epoch": 40.61401453011484, "grad_norm": 0.9849684238433838, "learning_rate": 9.319537473359787e-07, "loss": 0.0447297, "memory(GiB)": 13.7, "step": 86650, "train_speed(iter/s)": 1.530582 }, { "acc": 0.99154758, "epoch": 40.61635809702367, "grad_norm": 3.3413589000701904, "learning_rate": 9.315033339134792e-07, "loss": 0.04121944, "memory(GiB)": 13.7, "step": 86655, "train_speed(iter/s)": 1.530586 }, { "acc": 0.97166672, "epoch": 40.618701663932505, "grad_norm": 5.241790294647217, "learning_rate": 9.31053018296291e-07, "loss": 0.0669588, "memory(GiB)": 13.7, "step": 86660, "train_speed(iter/s)": 1.530591 }, { "acc": 0.9885416, "epoch": 40.62104523084134, "grad_norm": 0.8783512115478516, "learning_rate": 9.306028004952404e-07, "loss": 0.04630696, "memory(GiB)": 13.7, "step": 86665, "train_speed(iter/s)": 1.530592 }, { "acc": 0.97208328, "epoch": 40.623388797750174, "grad_norm": 2.6491873264312744, "learning_rate": 9.301526805211464e-07, "loss": 0.06324649, "memory(GiB)": 13.7, "step": 86670, "train_speed(iter/s)": 1.530596 }, { "acc": 0.96541672, "epoch": 40.62573236465901, "grad_norm": 4.6117987632751465, "learning_rate": 9.297026583848311e-07, "loss": 0.05814381, "memory(GiB)": 13.7, "step": 86675, "train_speed(iter/s)": 1.530599 }, { "acc": 0.9947917, "epoch": 40.62807593156785, "grad_norm": 2.790430784225464, "learning_rate": 9.29252734097109e-07, "loss": 0.03023928, "memory(GiB)": 13.7, "step": 86680, "train_speed(iter/s)": 1.530603 }, { "acc": 0.98375006, "epoch": 40.630419498476684, "grad_norm": 3.7746012210845947, "learning_rate": 9.288029076687963e-07, "loss": 0.04863758, "memory(GiB)": 13.7, "step": 86685, "train_speed(iter/s)": 1.530611 }, { "acc": 0.98311958, "epoch": 40.63276306538552, "grad_norm": 2.6114020347595215, "learning_rate": 9.283531791107061e-07, "loss": 0.02161843, "memory(GiB)": 13.7, "step": 86690, "train_speed(iter/s)": 1.530614 }, { "acc": 0.98916664, "epoch": 40.63510663229435, "grad_norm": 2.954949378967285, "learning_rate": 9.279035484336462e-07, "loss": 0.03795813, "memory(GiB)": 13.7, "step": 86695, "train_speed(iter/s)": 1.530619 }, { "acc": 0.98120041, "epoch": 40.63745019920319, "grad_norm": 5.468132495880127, "learning_rate": 9.274540156484259e-07, "loss": 0.03218935, "memory(GiB)": 13.7, "step": 86700, "train_speed(iter/s)": 1.530625 }, { "acc": 0.990625, "epoch": 40.63979376611202, "grad_norm": 4.010076999664307, "learning_rate": 9.270045807658514e-07, "loss": 0.01741542, "memory(GiB)": 13.7, "step": 86705, "train_speed(iter/s)": 1.530626 }, { "acc": 0.9802084, "epoch": 40.642137333020855, "grad_norm": 2.812983989715576, "learning_rate": 9.265552437967239e-07, "loss": 0.04261971, "memory(GiB)": 13.7, "step": 86710, "train_speed(iter/s)": 1.530627 }, { "acc": 0.99370041, "epoch": 40.64448089992969, "grad_norm": 1.4833711385726929, "learning_rate": 9.261060047518429e-07, "loss": 0.03161477, "memory(GiB)": 13.7, "step": 86715, "train_speed(iter/s)": 1.530631 }, { "acc": 0.98599205, "epoch": 40.64682446683853, "grad_norm": 4.046581268310547, "learning_rate": 9.256568636420085e-07, "loss": 0.0385305, "memory(GiB)": 13.7, "step": 86720, "train_speed(iter/s)": 1.530636 }, { "acc": 0.9822916, "epoch": 40.649168033747365, "grad_norm": 4.499868392944336, "learning_rate": 9.25207820478016e-07, "loss": 0.0294522, "memory(GiB)": 13.7, "step": 86725, "train_speed(iter/s)": 1.530643 }, { "acc": 0.99196434, "epoch": 40.6515116006562, "grad_norm": 3.279191493988037, "learning_rate": 9.247588752706607e-07, "loss": 0.00916318, "memory(GiB)": 13.7, "step": 86730, "train_speed(iter/s)": 1.530643 }, { "acc": 0.97907248, "epoch": 40.653855167565034, "grad_norm": 5.0774617195129395, "learning_rate": 9.243100280307307e-07, "loss": 0.04621719, "memory(GiB)": 13.7, "step": 86735, "train_speed(iter/s)": 1.530644 }, { "acc": 0.99276209, "epoch": 40.65619873447387, "grad_norm": 2.3037960529327393, "learning_rate": 9.238612787690164e-07, "loss": 0.04660578, "memory(GiB)": 13.7, "step": 86740, "train_speed(iter/s)": 1.530648 }, { "acc": 0.98279762, "epoch": 40.6585423013827, "grad_norm": 0.960456371307373, "learning_rate": 9.23412627496305e-07, "loss": 0.03192295, "memory(GiB)": 13.7, "step": 86745, "train_speed(iter/s)": 1.530648 }, { "acc": 0.97446432, "epoch": 40.66088586829154, "grad_norm": 4.69618558883667, "learning_rate": 9.229640742233805e-07, "loss": 0.04625129, "memory(GiB)": 13.7, "step": 86750, "train_speed(iter/s)": 1.53065 }, { "acc": 0.98895836, "epoch": 40.66322943520038, "grad_norm": 5.4700493812561035, "learning_rate": 9.225156189610223e-07, "loss": 0.02494591, "memory(GiB)": 13.7, "step": 86755, "train_speed(iter/s)": 1.530649 }, { "acc": 0.990625, "epoch": 40.66557300210921, "grad_norm": 0.5813988447189331, "learning_rate": 9.220672617200113e-07, "loss": 0.03597188, "memory(GiB)": 13.7, "step": 86760, "train_speed(iter/s)": 1.530649 }, { "acc": 0.9833334, "epoch": 40.66791656901805, "grad_norm": 2.2723803520202637, "learning_rate": 9.216190025111258e-07, "loss": 0.03277053, "memory(GiB)": 13.7, "step": 86765, "train_speed(iter/s)": 1.53065 }, { "acc": 0.98812504, "epoch": 40.67026013592688, "grad_norm": 2.88853120803833, "learning_rate": 9.211708413451383e-07, "loss": 0.04355453, "memory(GiB)": 13.7, "step": 86770, "train_speed(iter/s)": 1.530648 }, { "acc": 0.99258928, "epoch": 40.672603702835715, "grad_norm": 4.0171027183532715, "learning_rate": 9.207227782328217e-07, "loss": 0.04085706, "memory(GiB)": 13.7, "step": 86775, "train_speed(iter/s)": 1.530652 }, { "acc": 0.98123684, "epoch": 40.67494726974455, "grad_norm": 0.9331150650978088, "learning_rate": 9.202748131849481e-07, "loss": 0.05032496, "memory(GiB)": 13.7, "step": 86780, "train_speed(iter/s)": 1.530657 }, { "acc": 0.98520832, "epoch": 40.677290836653384, "grad_norm": 0.7888403534889221, "learning_rate": 9.198269462122822e-07, "loss": 0.03278521, "memory(GiB)": 13.7, "step": 86785, "train_speed(iter/s)": 1.530658 }, { "acc": 0.9777915, "epoch": 40.67963440356222, "grad_norm": 2.843472719192505, "learning_rate": 9.193791773255915e-07, "loss": 0.04470981, "memory(GiB)": 13.7, "step": 86790, "train_speed(iter/s)": 1.53066 }, { "acc": 0.97458334, "epoch": 40.68197797047106, "grad_norm": 6.964271545410156, "learning_rate": 9.189315065356369e-07, "loss": 0.04810123, "memory(GiB)": 13.7, "step": 86795, "train_speed(iter/s)": 1.530664 }, { "acc": 0.98383923, "epoch": 40.684321537379894, "grad_norm": 3.450685977935791, "learning_rate": 9.184839338531794e-07, "loss": 0.02841583, "memory(GiB)": 13.7, "step": 86800, "train_speed(iter/s)": 1.530665 }, { "acc": 0.98779764, "epoch": 40.68666510428873, "grad_norm": 3.7970147132873535, "learning_rate": 9.180364592889791e-07, "loss": 0.0417167, "memory(GiB)": 13.7, "step": 86805, "train_speed(iter/s)": 1.530664 }, { "acc": 0.98500004, "epoch": 40.68900867119756, "grad_norm": 1.3168543577194214, "learning_rate": 9.175890828537894e-07, "loss": 0.037199, "memory(GiB)": 13.7, "step": 86810, "train_speed(iter/s)": 1.530667 }, { "acc": 0.99404764, "epoch": 40.6913522381064, "grad_norm": 4.7954816818237305, "learning_rate": 9.171418045583646e-07, "loss": 0.01416062, "memory(GiB)": 13.7, "step": 86815, "train_speed(iter/s)": 1.530672 }, { "acc": 0.97979164, "epoch": 40.69369580501523, "grad_norm": 2.633782386779785, "learning_rate": 9.166946244134572e-07, "loss": 0.04338371, "memory(GiB)": 13.7, "step": 86820, "train_speed(iter/s)": 1.530675 }, { "acc": 0.97875004, "epoch": 40.696039371924066, "grad_norm": 7.049816131591797, "learning_rate": 9.162475424298133e-07, "loss": 0.05426999, "memory(GiB)": 13.7, "step": 86825, "train_speed(iter/s)": 1.530677 }, { "acc": 0.9874053, "epoch": 40.69838293883291, "grad_norm": 3.4092648029327393, "learning_rate": 9.158005586181823e-07, "loss": 0.02876295, "memory(GiB)": 13.7, "step": 86830, "train_speed(iter/s)": 1.530683 }, { "acc": 0.9895834, "epoch": 40.70072650574174, "grad_norm": 3.6911375522613525, "learning_rate": 9.153536729893049e-07, "loss": 0.0878054, "memory(GiB)": 13.7, "step": 86835, "train_speed(iter/s)": 1.530687 }, { "acc": 0.98395834, "epoch": 40.703070072650576, "grad_norm": 0.04192817583680153, "learning_rate": 9.14906885553926e-07, "loss": 0.03176861, "memory(GiB)": 13.7, "step": 86840, "train_speed(iter/s)": 1.530691 }, { "acc": 0.98764877, "epoch": 40.70541363955941, "grad_norm": 1.7118017673492432, "learning_rate": 9.14460196322782e-07, "loss": 0.03001783, "memory(GiB)": 13.7, "step": 86845, "train_speed(iter/s)": 1.530694 }, { "acc": 0.99333334, "epoch": 40.707757206468244, "grad_norm": 4.926340579986572, "learning_rate": 9.140136053066111e-07, "loss": 0.03152732, "memory(GiB)": 13.7, "step": 86850, "train_speed(iter/s)": 1.530697 }, { "acc": 0.98979168, "epoch": 40.71010077337708, "grad_norm": 3.5827994346618652, "learning_rate": 9.135671125161496e-07, "loss": 0.05766662, "memory(GiB)": 13.7, "step": 86855, "train_speed(iter/s)": 1.530699 }, { "acc": 0.98944445, "epoch": 40.71244434028591, "grad_norm": 5.876652240753174, "learning_rate": 9.131207179621264e-07, "loss": 0.05113133, "memory(GiB)": 13.7, "step": 86860, "train_speed(iter/s)": 1.530704 }, { "acc": 0.9697916, "epoch": 40.71478790719475, "grad_norm": 3.8550209999084473, "learning_rate": 9.126744216552733e-07, "loss": 0.04988737, "memory(GiB)": 13.7, "step": 86865, "train_speed(iter/s)": 1.530706 }, { "acc": 0.97562504, "epoch": 40.71713147410359, "grad_norm": 4.516834735870361, "learning_rate": 9.122282236063173e-07, "loss": 0.07624067, "memory(GiB)": 13.7, "step": 86870, "train_speed(iter/s)": 1.530712 }, { "acc": 0.97703381, "epoch": 40.71947504101242, "grad_norm": 3.271294355392456, "learning_rate": 9.11782123825985e-07, "loss": 0.05618787, "memory(GiB)": 13.7, "step": 86875, "train_speed(iter/s)": 1.530712 }, { "acc": 0.98842258, "epoch": 40.72181860792126, "grad_norm": 0.0033461677376180887, "learning_rate": 9.113361223249983e-07, "loss": 0.04759844, "memory(GiB)": 13.7, "step": 86880, "train_speed(iter/s)": 1.530711 }, { "acc": 0.98842258, "epoch": 40.72416217483009, "grad_norm": 4.316753387451172, "learning_rate": 9.108902191140762e-07, "loss": 0.02290509, "memory(GiB)": 13.7, "step": 86885, "train_speed(iter/s)": 1.530711 }, { "acc": 0.98770828, "epoch": 40.726505741738926, "grad_norm": 0.6955735087394714, "learning_rate": 9.104444142039374e-07, "loss": 0.02507843, "memory(GiB)": 13.7, "step": 86890, "train_speed(iter/s)": 1.530709 }, { "acc": 0.98779755, "epoch": 40.72884930864776, "grad_norm": 0.0005810661241412163, "learning_rate": 9.099987076052993e-07, "loss": 0.02009187, "memory(GiB)": 13.7, "step": 86895, "train_speed(iter/s)": 1.530713 }, { "acc": 0.98467255, "epoch": 40.731192875556594, "grad_norm": 0.7247150540351868, "learning_rate": 9.095530993288729e-07, "loss": 0.04263292, "memory(GiB)": 13.7, "step": 86900, "train_speed(iter/s)": 1.530716 }, { "acc": 0.97895832, "epoch": 40.733536442465436, "grad_norm": 2.242520809173584, "learning_rate": 9.091075893853698e-07, "loss": 0.04257125, "memory(GiB)": 13.7, "step": 86905, "train_speed(iter/s)": 1.530716 }, { "acc": 0.98968754, "epoch": 40.73588000937427, "grad_norm": 2.294234275817871, "learning_rate": 9.086621777855008e-07, "loss": 0.03489385, "memory(GiB)": 13.7, "step": 86910, "train_speed(iter/s)": 1.530716 }, { "acc": 0.978125, "epoch": 40.738223576283104, "grad_norm": 4.266419410705566, "learning_rate": 9.082168645399699e-07, "loss": 0.0876845, "memory(GiB)": 13.7, "step": 86915, "train_speed(iter/s)": 1.530718 }, { "acc": 0.9875, "epoch": 40.74056714319194, "grad_norm": 2.1769938468933105, "learning_rate": 9.077716496594801e-07, "loss": 0.02488616, "memory(GiB)": 13.7, "step": 86920, "train_speed(iter/s)": 1.530726 }, { "acc": 0.9791666, "epoch": 40.74291071010077, "grad_norm": 0.006697171367704868, "learning_rate": 9.073265331547346e-07, "loss": 0.04523762, "memory(GiB)": 13.7, "step": 86925, "train_speed(iter/s)": 1.530728 }, { "acc": 0.9802084, "epoch": 40.74525427700961, "grad_norm": 5.473260879516602, "learning_rate": 9.068815150364331e-07, "loss": 0.02904832, "memory(GiB)": 13.7, "step": 86930, "train_speed(iter/s)": 1.530735 }, { "acc": 0.98239584, "epoch": 40.74759784391844, "grad_norm": 1.773693561553955, "learning_rate": 9.064365953152701e-07, "loss": 0.03041585, "memory(GiB)": 13.7, "step": 86935, "train_speed(iter/s)": 1.530736 }, { "acc": 0.98083324, "epoch": 40.749941410827276, "grad_norm": 4.405097961425781, "learning_rate": 9.059917740019411e-07, "loss": 0.06458366, "memory(GiB)": 13.7, "step": 86940, "train_speed(iter/s)": 1.530737 }, { "acc": 0.99571428, "epoch": 40.75228497773612, "grad_norm": 2.4594242572784424, "learning_rate": 9.055470511071386e-07, "loss": 0.06843151, "memory(GiB)": 13.7, "step": 86945, "train_speed(iter/s)": 1.530739 }, { "acc": 0.97999992, "epoch": 40.75462854464495, "grad_norm": 1.5252147912979126, "learning_rate": 9.051024266415537e-07, "loss": 0.06092255, "memory(GiB)": 13.7, "step": 86950, "train_speed(iter/s)": 1.53074 }, { "acc": 0.98187504, "epoch": 40.756972111553786, "grad_norm": 3.5969395637512207, "learning_rate": 9.046579006158702e-07, "loss": 0.04088735, "memory(GiB)": 13.7, "step": 86955, "train_speed(iter/s)": 1.530746 }, { "acc": 0.98966351, "epoch": 40.75931567846262, "grad_norm": 0.0018680692883208394, "learning_rate": 9.042134730407763e-07, "loss": 0.03783596, "memory(GiB)": 13.7, "step": 86960, "train_speed(iter/s)": 1.530751 }, { "acc": 0.98759918, "epoch": 40.761659245371455, "grad_norm": 1.2664448022842407, "learning_rate": 9.037691439269519e-07, "loss": 0.05539464, "memory(GiB)": 13.7, "step": 86965, "train_speed(iter/s)": 1.530753 }, { "acc": 0.97562504, "epoch": 40.76400281228029, "grad_norm": 6.396681308746338, "learning_rate": 9.033249132850794e-07, "loss": 0.04179488, "memory(GiB)": 13.7, "step": 86970, "train_speed(iter/s)": 1.530757 }, { "acc": 0.9875, "epoch": 40.76634637918912, "grad_norm": 5.745617866516113, "learning_rate": 9.028807811258351e-07, "loss": 0.06095606, "memory(GiB)": 13.7, "step": 86975, "train_speed(iter/s)": 1.530761 }, { "acc": 0.97937498, "epoch": 40.768689946097965, "grad_norm": 5.756217002868652, "learning_rate": 9.024367474598945e-07, "loss": 0.03884534, "memory(GiB)": 13.7, "step": 86980, "train_speed(iter/s)": 1.530765 }, { "acc": 0.978125, "epoch": 40.7710335130068, "grad_norm": 4.391547203063965, "learning_rate": 9.019928122979328e-07, "loss": 0.11007988, "memory(GiB)": 13.7, "step": 86985, "train_speed(iter/s)": 1.530768 }, { "acc": 0.99562502, "epoch": 40.77337707991563, "grad_norm": 1.8843621015548706, "learning_rate": 9.015489756506185e-07, "loss": 0.03689109, "memory(GiB)": 13.7, "step": 86990, "train_speed(iter/s)": 1.530773 }, { "acc": 0.9927083, "epoch": 40.77572064682447, "grad_norm": 2.908996105194092, "learning_rate": 9.0110523752862e-07, "loss": 0.01826651, "memory(GiB)": 13.7, "step": 86995, "train_speed(iter/s)": 1.530779 }, { "acc": 0.99208336, "epoch": 40.7780642137333, "grad_norm": 2.1127240657806396, "learning_rate": 9.006615979426057e-07, "loss": 0.04815532, "memory(GiB)": 13.7, "step": 87000, "train_speed(iter/s)": 1.530779 }, { "acc": 0.99333334, "epoch": 40.780407780642136, "grad_norm": 2.1928064823150635, "learning_rate": 9.002180569032365e-07, "loss": 0.0223199, "memory(GiB)": 13.7, "step": 87005, "train_speed(iter/s)": 1.530779 }, { "acc": 0.99213057, "epoch": 40.78275134755097, "grad_norm": 2.4572596549987793, "learning_rate": 8.997746144211756e-07, "loss": 0.01951638, "memory(GiB)": 13.7, "step": 87010, "train_speed(iter/s)": 1.530781 }, { "acc": 0.9833334, "epoch": 40.785094914459805, "grad_norm": 4.6647629737854, "learning_rate": 8.9933127050708e-07, "loss": 0.03546391, "memory(GiB)": 13.7, "step": 87015, "train_speed(iter/s)": 1.530783 }, { "acc": 0.97633934, "epoch": 40.787438481368646, "grad_norm": 5.238368511199951, "learning_rate": 8.988880251716067e-07, "loss": 0.05556704, "memory(GiB)": 13.7, "step": 87020, "train_speed(iter/s)": 1.530788 }, { "acc": 0.99466343, "epoch": 40.78978204827748, "grad_norm": 3.6420176029205322, "learning_rate": 8.984448784254126e-07, "loss": 0.03852299, "memory(GiB)": 13.7, "step": 87025, "train_speed(iter/s)": 1.530795 }, { "acc": 0.98666668, "epoch": 40.792125615186315, "grad_norm": 5.5254340171813965, "learning_rate": 8.980018302791454e-07, "loss": 0.03843826, "memory(GiB)": 13.7, "step": 87030, "train_speed(iter/s)": 1.530795 }, { "acc": 0.98696432, "epoch": 40.79446918209515, "grad_norm": 1.9742281436920166, "learning_rate": 8.975588807434564e-07, "loss": 0.0332679, "memory(GiB)": 13.7, "step": 87035, "train_speed(iter/s)": 1.530795 }, { "acc": 0.98604164, "epoch": 40.79681274900398, "grad_norm": 2.4421136379241943, "learning_rate": 8.971160298289942e-07, "loss": 0.03246114, "memory(GiB)": 13.7, "step": 87040, "train_speed(iter/s)": 1.530794 }, { "acc": 0.99187498, "epoch": 40.79915631591282, "grad_norm": 2.6477181911468506, "learning_rate": 8.966732775464021e-07, "loss": 0.01454957, "memory(GiB)": 13.7, "step": 87045, "train_speed(iter/s)": 1.5308 }, { "acc": 0.97979164, "epoch": 40.80149988282165, "grad_norm": 4.1178741455078125, "learning_rate": 8.96230623906321e-07, "loss": 0.0286914, "memory(GiB)": 13.7, "step": 87050, "train_speed(iter/s)": 1.530802 }, { "acc": 0.98738098, "epoch": 40.80384344973049, "grad_norm": 3.2668025493621826, "learning_rate": 8.957880689193921e-07, "loss": 0.0275014, "memory(GiB)": 13.7, "step": 87055, "train_speed(iter/s)": 1.530805 }, { "acc": 0.96229172, "epoch": 40.80618701663933, "grad_norm": 6.323875904083252, "learning_rate": 8.953456125962542e-07, "loss": 0.06520735, "memory(GiB)": 13.7, "step": 87060, "train_speed(iter/s)": 1.530805 }, { "acc": 0.98779764, "epoch": 40.80853058354816, "grad_norm": 0.07922521978616714, "learning_rate": 8.949032549475399e-07, "loss": 0.02489225, "memory(GiB)": 13.7, "step": 87065, "train_speed(iter/s)": 1.530804 }, { "acc": 0.97553034, "epoch": 40.810874150456996, "grad_norm": 2.280595541000366, "learning_rate": 8.944609959838839e-07, "loss": 0.04708579, "memory(GiB)": 13.7, "step": 87070, "train_speed(iter/s)": 1.530806 }, { "acc": 0.996875, "epoch": 40.81321771736583, "grad_norm": 1.3418152332305908, "learning_rate": 8.940188357159155e-07, "loss": 0.02136053, "memory(GiB)": 13.7, "step": 87075, "train_speed(iter/s)": 1.530806 }, { "acc": 0.99402781, "epoch": 40.815561284274665, "grad_norm": 1.0603009462356567, "learning_rate": 8.935767741542652e-07, "loss": 0.03035516, "memory(GiB)": 13.7, "step": 87080, "train_speed(iter/s)": 1.53081 }, { "acc": 0.9885417, "epoch": 40.8179048511835, "grad_norm": 2.263366937637329, "learning_rate": 8.931348113095571e-07, "loss": 0.0201128, "memory(GiB)": 13.7, "step": 87085, "train_speed(iter/s)": 1.530811 }, { "acc": 0.98715782, "epoch": 40.82024841809233, "grad_norm": 2.998687744140625, "learning_rate": 8.926929471924125e-07, "loss": 0.02734032, "memory(GiB)": 13.7, "step": 87090, "train_speed(iter/s)": 1.530815 }, { "acc": 0.98175602, "epoch": 40.822591985001175, "grad_norm": 3.1559078693389893, "learning_rate": 8.92251181813455e-07, "loss": 0.04910341, "memory(GiB)": 13.7, "step": 87095, "train_speed(iter/s)": 1.530816 }, { "acc": 0.99020834, "epoch": 40.82493555191001, "grad_norm": 3.7889974117279053, "learning_rate": 8.91809515183303e-07, "loss": 0.021655, "memory(GiB)": 13.7, "step": 87100, "train_speed(iter/s)": 1.530815 }, { "acc": 0.97458334, "epoch": 40.82727911881884, "grad_norm": 2.955268144607544, "learning_rate": 8.913679473125711e-07, "loss": 0.05533288, "memory(GiB)": 13.7, "step": 87105, "train_speed(iter/s)": 1.530814 }, { "acc": 0.97297115, "epoch": 40.82962268572768, "grad_norm": 4.024335861206055, "learning_rate": 8.909264782118744e-07, "loss": 0.06519161, "memory(GiB)": 13.7, "step": 87110, "train_speed(iter/s)": 1.530815 }, { "acc": 0.98329449, "epoch": 40.83196625263651, "grad_norm": 4.518609046936035, "learning_rate": 8.904851078918256e-07, "loss": 0.03174815, "memory(GiB)": 13.7, "step": 87115, "train_speed(iter/s)": 1.530821 }, { "acc": 0.97736111, "epoch": 40.834309819545346, "grad_norm": 1.5783599615097046, "learning_rate": 8.900438363630311e-07, "loss": 0.0510239, "memory(GiB)": 13.7, "step": 87120, "train_speed(iter/s)": 1.530826 }, { "acc": 0.98673611, "epoch": 40.83665338645418, "grad_norm": 1.3607232570648193, "learning_rate": 8.896026636360997e-07, "loss": 0.03529376, "memory(GiB)": 13.7, "step": 87125, "train_speed(iter/s)": 1.530827 }, { "acc": 0.98800602, "epoch": 40.838996953363015, "grad_norm": 2.1643788814544678, "learning_rate": 8.891615897216336e-07, "loss": 0.02851313, "memory(GiB)": 13.7, "step": 87130, "train_speed(iter/s)": 1.530832 }, { "acc": 0.98386364, "epoch": 40.84134052027186, "grad_norm": 3.244274854660034, "learning_rate": 8.887206146302378e-07, "loss": 0.03812557, "memory(GiB)": 13.7, "step": 87135, "train_speed(iter/s)": 1.530837 }, { "acc": 0.98917618, "epoch": 40.84368408718069, "grad_norm": 2.4515793323516846, "learning_rate": 8.882797383725086e-07, "loss": 0.04076945, "memory(GiB)": 13.7, "step": 87140, "train_speed(iter/s)": 1.530838 }, { "acc": 0.9864584, "epoch": 40.846027654089525, "grad_norm": 3.6032092571258545, "learning_rate": 8.878389609590452e-07, "loss": 0.04916717, "memory(GiB)": 13.7, "step": 87145, "train_speed(iter/s)": 1.530839 }, { "acc": 0.99265881, "epoch": 40.84837122099836, "grad_norm": 4.236832141876221, "learning_rate": 8.873982824004415e-07, "loss": 0.03951482, "memory(GiB)": 13.7, "step": 87150, "train_speed(iter/s)": 1.530838 }, { "acc": 0.99354172, "epoch": 40.850714787907194, "grad_norm": 0.1306922882795334, "learning_rate": 8.869577027072921e-07, "loss": 0.01910491, "memory(GiB)": 13.7, "step": 87155, "train_speed(iter/s)": 1.530841 }, { "acc": 0.97020836, "epoch": 40.85305835481603, "grad_norm": 5.838029861450195, "learning_rate": 8.865172218901843e-07, "loss": 0.04760425, "memory(GiB)": 13.7, "step": 87160, "train_speed(iter/s)": 1.530841 }, { "acc": 0.980019, "epoch": 40.85540192172486, "grad_norm": 1.920944094657898, "learning_rate": 8.860768399597076e-07, "loss": 0.03740392, "memory(GiB)": 13.7, "step": 87165, "train_speed(iter/s)": 1.530843 }, { "acc": 1.0, "epoch": 40.857745488633704, "grad_norm": 0.07164549082517624, "learning_rate": 8.856365569264462e-07, "loss": 0.01026983, "memory(GiB)": 13.7, "step": 87170, "train_speed(iter/s)": 1.530851 }, { "acc": 0.98604164, "epoch": 40.86008905554254, "grad_norm": 2.0777699947357178, "learning_rate": 8.851963728009848e-07, "loss": 0.04917167, "memory(GiB)": 13.7, "step": 87175, "train_speed(iter/s)": 1.530852 }, { "acc": 0.97354164, "epoch": 40.86243262245137, "grad_norm": 4.810001373291016, "learning_rate": 8.84756287593901e-07, "loss": 0.07341213, "memory(GiB)": 13.7, "step": 87180, "train_speed(iter/s)": 1.530853 }, { "acc": 0.98415184, "epoch": 40.86477618936021, "grad_norm": 3.6550891399383545, "learning_rate": 8.843163013157756e-07, "loss": 0.03571446, "memory(GiB)": 13.7, "step": 87185, "train_speed(iter/s)": 1.530861 }, { "acc": 0.9885417, "epoch": 40.86711975626904, "grad_norm": 4.153494834899902, "learning_rate": 8.838764139771843e-07, "loss": 0.03607975, "memory(GiB)": 13.7, "step": 87190, "train_speed(iter/s)": 1.530865 }, { "acc": 0.98847218, "epoch": 40.869463323177875, "grad_norm": 3.3312816619873047, "learning_rate": 8.834366255886984e-07, "loss": 0.0474063, "memory(GiB)": 13.7, "step": 87195, "train_speed(iter/s)": 1.530861 }, { "acc": 0.99154758, "epoch": 40.87180689008671, "grad_norm": 0.9979879260063171, "learning_rate": 8.829969361608911e-07, "loss": 0.01405493, "memory(GiB)": 13.7, "step": 87200, "train_speed(iter/s)": 1.530866 }, { "acc": 0.98947306, "epoch": 40.874150456995544, "grad_norm": 2.328718423843384, "learning_rate": 8.825573457043313e-07, "loss": 0.04572507, "memory(GiB)": 13.7, "step": 87205, "train_speed(iter/s)": 1.530865 }, { "acc": 0.99231358, "epoch": 40.876494023904385, "grad_norm": 3.090524673461914, "learning_rate": 8.821178542295841e-07, "loss": 0.02107986, "memory(GiB)": 13.7, "step": 87210, "train_speed(iter/s)": 1.530866 }, { "acc": 0.98406248, "epoch": 40.87883759081322, "grad_norm": 1.4624574184417725, "learning_rate": 8.816784617472133e-07, "loss": 0.05101178, "memory(GiB)": 13.7, "step": 87215, "train_speed(iter/s)": 1.530864 }, { "acc": 0.98291674, "epoch": 40.881181157722054, "grad_norm": 4.754756450653076, "learning_rate": 8.812391682677801e-07, "loss": 0.03744252, "memory(GiB)": 13.7, "step": 87220, "train_speed(iter/s)": 1.530862 }, { "acc": 0.9864584, "epoch": 40.88352472463089, "grad_norm": 1.354231595993042, "learning_rate": 8.807999738018447e-07, "loss": 0.0274826, "memory(GiB)": 13.7, "step": 87225, "train_speed(iter/s)": 1.530868 }, { "acc": 0.97305813, "epoch": 40.88586829153972, "grad_norm": 2.251762628555298, "learning_rate": 8.803608783599651e-07, "loss": 0.07072216, "memory(GiB)": 13.7, "step": 87230, "train_speed(iter/s)": 1.530873 }, { "acc": 0.97875004, "epoch": 40.88821185844856, "grad_norm": 1.2717750072479248, "learning_rate": 8.799218819526927e-07, "loss": 0.07162273, "memory(GiB)": 13.7, "step": 87235, "train_speed(iter/s)": 1.530873 }, { "acc": 0.9775095, "epoch": 40.89055542535739, "grad_norm": 5.261366844177246, "learning_rate": 8.794829845905816e-07, "loss": 0.04490845, "memory(GiB)": 13.7, "step": 87240, "train_speed(iter/s)": 1.530886 }, { "acc": 0.9875, "epoch": 40.89289899226623, "grad_norm": 0.005676163826137781, "learning_rate": 8.790441862841819e-07, "loss": 0.03496574, "memory(GiB)": 13.7, "step": 87245, "train_speed(iter/s)": 1.530891 }, { "acc": 0.98288689, "epoch": 40.89524255917507, "grad_norm": 0.013446981087327003, "learning_rate": 8.786054870440397e-07, "loss": 0.02411944, "memory(GiB)": 13.7, "step": 87250, "train_speed(iter/s)": 1.530894 }, { "acc": 0.99333334, "epoch": 40.8975861260839, "grad_norm": 3.5055558681488037, "learning_rate": 8.781668868806993e-07, "loss": 0.02989335, "memory(GiB)": 13.7, "step": 87255, "train_speed(iter/s)": 1.530899 }, { "acc": 0.98019524, "epoch": 40.899929692992735, "grad_norm": 4.322841644287109, "learning_rate": 8.777283858047039e-07, "loss": 0.03972687, "memory(GiB)": 13.7, "step": 87260, "train_speed(iter/s)": 1.530899 }, { "acc": 0.9749855, "epoch": 40.90227325990157, "grad_norm": 1.9900583028793335, "learning_rate": 8.772899838265948e-07, "loss": 0.03545873, "memory(GiB)": 13.7, "step": 87265, "train_speed(iter/s)": 1.530909 }, { "acc": 0.98738098, "epoch": 40.904616826810404, "grad_norm": 2.6888484954833984, "learning_rate": 8.768516809569075e-07, "loss": 0.03533028, "memory(GiB)": 13.7, "step": 87270, "train_speed(iter/s)": 1.53091 }, { "acc": 0.9795929, "epoch": 40.90696039371924, "grad_norm": 1.5153495073318481, "learning_rate": 8.76413477206179e-07, "loss": 0.03914428, "memory(GiB)": 13.7, "step": 87275, "train_speed(iter/s)": 1.530913 }, { "acc": 0.98291664, "epoch": 40.90930396062807, "grad_norm": 1.12969172000885, "learning_rate": 8.759753725849426e-07, "loss": 0.03799006, "memory(GiB)": 13.7, "step": 87280, "train_speed(iter/s)": 1.530915 }, { "acc": 0.98291664, "epoch": 40.911647527536914, "grad_norm": 3.522465705871582, "learning_rate": 8.75537367103727e-07, "loss": 0.02872905, "memory(GiB)": 13.7, "step": 87285, "train_speed(iter/s)": 1.530917 }, { "acc": 0.98015881, "epoch": 40.91399109444575, "grad_norm": 2.627829074859619, "learning_rate": 8.750994607730615e-07, "loss": 0.04100191, "memory(GiB)": 13.7, "step": 87290, "train_speed(iter/s)": 1.530916 }, { "acc": 0.98080368, "epoch": 40.91633466135458, "grad_norm": 2.928647518157959, "learning_rate": 8.746616536034733e-07, "loss": 0.06711596, "memory(GiB)": 13.7, "step": 87295, "train_speed(iter/s)": 1.530917 }, { "acc": 0.97276039, "epoch": 40.91867822826342, "grad_norm": 4.117509365081787, "learning_rate": 8.742239456054832e-07, "loss": 0.05822507, "memory(GiB)": 13.7, "step": 87300, "train_speed(iter/s)": 1.530922 }, { "acc": 0.97904758, "epoch": 40.92102179517225, "grad_norm": 4.472449779510498, "learning_rate": 8.73786336789615e-07, "loss": 0.04823256, "memory(GiB)": 13.7, "step": 87305, "train_speed(iter/s)": 1.530927 }, { "acc": 0.98604164, "epoch": 40.923365362081086, "grad_norm": 4.6184821128845215, "learning_rate": 8.733488271663846e-07, "loss": 0.04752398, "memory(GiB)": 13.7, "step": 87310, "train_speed(iter/s)": 1.530924 }, { "acc": 0.98458328, "epoch": 40.92570892898992, "grad_norm": 2.5455729961395264, "learning_rate": 8.729114167463095e-07, "loss": 0.03658721, "memory(GiB)": 13.7, "step": 87315, "train_speed(iter/s)": 1.530928 }, { "acc": 0.990625, "epoch": 40.92805249589876, "grad_norm": 3.387197494506836, "learning_rate": 8.724741055399053e-07, "loss": 0.02577203, "memory(GiB)": 13.7, "step": 87320, "train_speed(iter/s)": 1.530931 }, { "acc": 0.97946434, "epoch": 40.930396062807596, "grad_norm": 4.311326503753662, "learning_rate": 8.720368935576807e-07, "loss": 0.05384405, "memory(GiB)": 13.7, "step": 87325, "train_speed(iter/s)": 1.530933 }, { "acc": 0.98931541, "epoch": 40.93273962971643, "grad_norm": 4.14035701751709, "learning_rate": 8.715997808101459e-07, "loss": 0.0373589, "memory(GiB)": 13.7, "step": 87330, "train_speed(iter/s)": 1.530936 }, { "acc": 0.9854167, "epoch": 40.935083196625264, "grad_norm": 0.048082221299409866, "learning_rate": 8.711627673078092e-07, "loss": 0.03509182, "memory(GiB)": 13.7, "step": 87335, "train_speed(iter/s)": 1.53094 }, { "acc": 0.98135414, "epoch": 40.9374267635341, "grad_norm": 5.615346908569336, "learning_rate": 8.707258530611738e-07, "loss": 0.04537061, "memory(GiB)": 13.7, "step": 87340, "train_speed(iter/s)": 1.530942 }, { "acc": 0.98361111, "epoch": 40.93977033044293, "grad_norm": 3.9253013134002686, "learning_rate": 8.702890380807399e-07, "loss": 0.03875647, "memory(GiB)": 13.7, "step": 87345, "train_speed(iter/s)": 1.530944 }, { "acc": 0.995644, "epoch": 40.94211389735177, "grad_norm": 3.25046706199646, "learning_rate": 8.698523223770089e-07, "loss": 0.04732516, "memory(GiB)": 13.7, "step": 87350, "train_speed(iter/s)": 1.530942 }, { "acc": 0.98968754, "epoch": 40.9444574642606, "grad_norm": 2.9107229709625244, "learning_rate": 8.694157059604793e-07, "loss": 0.03314494, "memory(GiB)": 13.7, "step": 87355, "train_speed(iter/s)": 1.530941 }, { "acc": 0.9875, "epoch": 40.94680103116944, "grad_norm": 1.5833399295806885, "learning_rate": 8.689791888416433e-07, "loss": 0.04453387, "memory(GiB)": 13.7, "step": 87360, "train_speed(iter/s)": 1.530942 }, { "acc": 0.99125004, "epoch": 40.94914459807828, "grad_norm": 2.317776679992676, "learning_rate": 8.685427710309942e-07, "loss": 0.02541937, "memory(GiB)": 13.7, "step": 87365, "train_speed(iter/s)": 1.530941 }, { "acc": 0.98249998, "epoch": 40.95148816498711, "grad_norm": 2.9142987728118896, "learning_rate": 8.681064525390225e-07, "loss": 0.07264814, "memory(GiB)": 13.7, "step": 87370, "train_speed(iter/s)": 1.530939 }, { "acc": 0.97633934, "epoch": 40.953831731895946, "grad_norm": 2.829676628112793, "learning_rate": 8.676702333762169e-07, "loss": 0.06605688, "memory(GiB)": 13.7, "step": 87375, "train_speed(iter/s)": 1.530941 }, { "acc": 0.99031248, "epoch": 40.95617529880478, "grad_norm": 3.7852001190185547, "learning_rate": 8.672341135530613e-07, "loss": 0.01915202, "memory(GiB)": 13.7, "step": 87380, "train_speed(iter/s)": 1.530943 }, { "acc": 0.98708334, "epoch": 40.958518865713614, "grad_norm": 4.173061847686768, "learning_rate": 8.667980930800377e-07, "loss": 0.03634669, "memory(GiB)": 13.7, "step": 87385, "train_speed(iter/s)": 1.530944 }, { "acc": 0.98023472, "epoch": 40.96086243262245, "grad_norm": 0.0005239786696620286, "learning_rate": 8.663621719676279e-07, "loss": 0.06183493, "memory(GiB)": 13.7, "step": 87390, "train_speed(iter/s)": 1.530943 }, { "acc": 0.98249998, "epoch": 40.96320599953129, "grad_norm": 3.4391398429870605, "learning_rate": 8.659263502263108e-07, "loss": 0.05920637, "memory(GiB)": 13.7, "step": 87395, "train_speed(iter/s)": 1.530944 }, { "acc": 0.98291664, "epoch": 40.965549566440124, "grad_norm": 5.6349592208862305, "learning_rate": 8.654906278665598e-07, "loss": 0.03797519, "memory(GiB)": 13.7, "step": 87400, "train_speed(iter/s)": 1.530952 }, { "acc": 0.97875004, "epoch": 40.96789313334896, "grad_norm": 2.57608699798584, "learning_rate": 8.650550048988497e-07, "loss": 0.04226737, "memory(GiB)": 13.7, "step": 87405, "train_speed(iter/s)": 1.530957 }, { "acc": 0.990625, "epoch": 40.97023670025779, "grad_norm": 2.4336423873901367, "learning_rate": 8.646194813336528e-07, "loss": 0.058993, "memory(GiB)": 13.7, "step": 87410, "train_speed(iter/s)": 1.530956 }, { "acc": 0.996875, "epoch": 40.97258026716663, "grad_norm": 2.961153984069824, "learning_rate": 8.641840571814345e-07, "loss": 0.03989556, "memory(GiB)": 13.7, "step": 87415, "train_speed(iter/s)": 1.530958 }, { "acc": 0.97937508, "epoch": 40.97492383407546, "grad_norm": 4.9626617431640625, "learning_rate": 8.637487324526639e-07, "loss": 0.05768621, "memory(GiB)": 13.7, "step": 87420, "train_speed(iter/s)": 1.530961 }, { "acc": 0.97821426, "epoch": 40.977267400984296, "grad_norm": 4.785940170288086, "learning_rate": 8.633135071578025e-07, "loss": 0.0296781, "memory(GiB)": 13.7, "step": 87425, "train_speed(iter/s)": 1.530961 }, { "acc": 0.98738098, "epoch": 40.97961096789313, "grad_norm": 2.1214964389801025, "learning_rate": 8.628783813073122e-07, "loss": 0.04147218, "memory(GiB)": 13.7, "step": 87430, "train_speed(iter/s)": 1.530958 }, { "acc": 0.98708334, "epoch": 40.98195453480197, "grad_norm": 3.108795642852783, "learning_rate": 8.624433549116541e-07, "loss": 0.04190959, "memory(GiB)": 13.7, "step": 87435, "train_speed(iter/s)": 1.530959 }, { "acc": 0.98395824, "epoch": 40.984298101710806, "grad_norm": 0.0013301685685291886, "learning_rate": 8.62008427981282e-07, "loss": 0.0316954, "memory(GiB)": 13.7, "step": 87440, "train_speed(iter/s)": 1.530961 }, { "acc": 0.98755178, "epoch": 40.98664166861964, "grad_norm": 6.624988555908203, "learning_rate": 8.615736005266511e-07, "loss": 0.05939505, "memory(GiB)": 13.7, "step": 87445, "train_speed(iter/s)": 1.530962 }, { "acc": 0.98467264, "epoch": 40.988985235528475, "grad_norm": 5.062816143035889, "learning_rate": 8.611388725582146e-07, "loss": 0.03620395, "memory(GiB)": 13.7, "step": 87450, "train_speed(iter/s)": 1.530964 }, { "acc": 0.98222218, "epoch": 40.99132880243731, "grad_norm": 5.1222052574157715, "learning_rate": 8.607042440864194e-07, "loss": 0.03047458, "memory(GiB)": 13.7, "step": 87455, "train_speed(iter/s)": 1.530966 }, { "acc": 0.98125, "epoch": 40.99367236934614, "grad_norm": 4.094875335693359, "learning_rate": 8.602697151217152e-07, "loss": 0.04353876, "memory(GiB)": 13.7, "step": 87460, "train_speed(iter/s)": 1.530973 }, { "acc": 0.99126987, "epoch": 40.99601593625498, "grad_norm": 0.001631330233067274, "learning_rate": 8.598352856745448e-07, "loss": 0.07361594, "memory(GiB)": 13.7, "step": 87465, "train_speed(iter/s)": 1.530977 }, { "acc": 0.97895832, "epoch": 40.99835950316381, "grad_norm": 4.941854953765869, "learning_rate": 8.594009557553512e-07, "loss": 0.05353755, "memory(GiB)": 13.7, "step": 87470, "train_speed(iter/s)": 1.530981 }, { "acc": 0.98696432, "epoch": 41.00070307007265, "grad_norm": 3.665192127227783, "learning_rate": 8.589667253745733e-07, "loss": 0.04690406, "memory(GiB)": 13.7, "step": 87475, "train_speed(iter/s)": 1.530972 }, { "acc": 0.99125004, "epoch": 41.00304663698149, "grad_norm": 8.002337455749512, "learning_rate": 8.585325945426496e-07, "loss": 0.05038138, "memory(GiB)": 13.7, "step": 87480, "train_speed(iter/s)": 1.530973 }, { "acc": 0.9942482, "epoch": 41.00539020389032, "grad_norm": 2.379206657409668, "learning_rate": 8.580985632700159e-07, "loss": 0.01497665, "memory(GiB)": 13.7, "step": 87485, "train_speed(iter/s)": 1.530977 }, { "acc": 0.98599205, "epoch": 41.007733770799156, "grad_norm": 3.3077752590179443, "learning_rate": 8.576646315671025e-07, "loss": 0.04668095, "memory(GiB)": 13.7, "step": 87490, "train_speed(iter/s)": 1.530977 }, { "acc": 0.98500004, "epoch": 41.01007733770799, "grad_norm": 4.60191011428833, "learning_rate": 8.572307994443412e-07, "loss": 0.02062105, "memory(GiB)": 13.7, "step": 87495, "train_speed(iter/s)": 1.530978 }, { "acc": 0.97875004, "epoch": 41.012420904616825, "grad_norm": 6.4262847900390625, "learning_rate": 8.567970669121611e-07, "loss": 0.06173567, "memory(GiB)": 13.7, "step": 87500, "train_speed(iter/s)": 1.530983 }, { "acc": 0.98071423, "epoch": 41.01476447152566, "grad_norm": 0.008618430234491825, "learning_rate": 8.563634339809849e-07, "loss": 0.03598907, "memory(GiB)": 13.7, "step": 87505, "train_speed(iter/s)": 1.530989 }, { "acc": 0.98916664, "epoch": 41.0171080384345, "grad_norm": 3.4464683532714844, "learning_rate": 8.559299006612389e-07, "loss": 0.05019328, "memory(GiB)": 13.7, "step": 87510, "train_speed(iter/s)": 1.530991 }, { "acc": 0.99040184, "epoch": 41.019451605343335, "grad_norm": 1.7978029251098633, "learning_rate": 8.554964669633402e-07, "loss": 0.02283818, "memory(GiB)": 13.7, "step": 87515, "train_speed(iter/s)": 1.530993 }, { "acc": 0.98708334, "epoch": 41.02179517225217, "grad_norm": 0.5244471430778503, "learning_rate": 8.550631328977095e-07, "loss": 0.04102032, "memory(GiB)": 13.7, "step": 87520, "train_speed(iter/s)": 1.530996 }, { "acc": 0.9927084, "epoch": 41.024138739161, "grad_norm": 2.7335221767425537, "learning_rate": 8.546298984747635e-07, "loss": 0.01970652, "memory(GiB)": 13.7, "step": 87525, "train_speed(iter/s)": 1.531002 }, { "acc": 0.9910511, "epoch": 41.02648230606984, "grad_norm": 1.0320578813552856, "learning_rate": 8.54196763704913e-07, "loss": 0.03110749, "memory(GiB)": 13.7, "step": 87530, "train_speed(iter/s)": 1.531005 }, { "acc": 0.99258928, "epoch": 41.02882587297867, "grad_norm": 3.2094838619232178, "learning_rate": 8.537637285985708e-07, "loss": 0.04938869, "memory(GiB)": 13.7, "step": 87535, "train_speed(iter/s)": 1.531007 }, { "acc": 0.99020834, "epoch": 41.031169439887506, "grad_norm": 0.8617555499076843, "learning_rate": 8.533307931661468e-07, "loss": 0.02217973, "memory(GiB)": 13.7, "step": 87540, "train_speed(iter/s)": 1.531009 }, { "acc": 0.99499998, "epoch": 41.03351300679634, "grad_norm": 3.203545570373535, "learning_rate": 8.528979574180461e-07, "loss": 0.04185545, "memory(GiB)": 13.7, "step": 87545, "train_speed(iter/s)": 1.531007 }, { "acc": 0.98321428, "epoch": 41.03585657370518, "grad_norm": 1.0546578168869019, "learning_rate": 8.524652213646712e-07, "loss": 0.02366095, "memory(GiB)": 13.7, "step": 87550, "train_speed(iter/s)": 1.531008 }, { "acc": 0.98708334, "epoch": 41.038200140614016, "grad_norm": 2.660802125930786, "learning_rate": 8.520325850164249e-07, "loss": 0.03168533, "memory(GiB)": 13.7, "step": 87555, "train_speed(iter/s)": 1.53101 }, { "acc": 0.98883934, "epoch": 41.04054370752285, "grad_norm": 4.184354305267334, "learning_rate": 8.516000483837084e-07, "loss": 0.04458408, "memory(GiB)": 13.7, "step": 87560, "train_speed(iter/s)": 1.531011 }, { "acc": 0.98681545, "epoch": 41.042887274431685, "grad_norm": 1.6340571641921997, "learning_rate": 8.511676114769143e-07, "loss": 0.02945527, "memory(GiB)": 13.7, "step": 87565, "train_speed(iter/s)": 1.531016 }, { "acc": 0.99125004, "epoch": 41.04523084134052, "grad_norm": 1.80570387840271, "learning_rate": 8.507352743064397e-07, "loss": 0.02436866, "memory(GiB)": 13.7, "step": 87570, "train_speed(iter/s)": 1.531016 }, { "acc": 0.99125004, "epoch": 41.04757440824935, "grad_norm": 0.9968839883804321, "learning_rate": 8.503030368826761e-07, "loss": 0.0342404, "memory(GiB)": 13.7, "step": 87575, "train_speed(iter/s)": 1.531022 }, { "acc": 0.9794445, "epoch": 41.04991797515819, "grad_norm": 1.5951555967330933, "learning_rate": 8.49870899216014e-07, "loss": 0.03417935, "memory(GiB)": 13.7, "step": 87580, "train_speed(iter/s)": 1.531024 }, { "acc": 0.994697, "epoch": 41.05226154206703, "grad_norm": 1.3524243831634521, "learning_rate": 8.494388613168403e-07, "loss": 0.01985273, "memory(GiB)": 13.7, "step": 87585, "train_speed(iter/s)": 1.531023 }, { "acc": 0.99334679, "epoch": 41.05460510897586, "grad_norm": 2.221632242202759, "learning_rate": 8.49006923195537e-07, "loss": 0.02192875, "memory(GiB)": 13.7, "step": 87590, "train_speed(iter/s)": 1.531024 }, { "acc": 0.9791667, "epoch": 41.0569486758847, "grad_norm": 6.511501789093018, "learning_rate": 8.485750848624889e-07, "loss": 0.05822179, "memory(GiB)": 13.7, "step": 87595, "train_speed(iter/s)": 1.531028 }, { "acc": 0.98932533, "epoch": 41.05929224279353, "grad_norm": 5.8206000328063965, "learning_rate": 8.481433463280769e-07, "loss": 0.03575118, "memory(GiB)": 13.7, "step": 87600, "train_speed(iter/s)": 1.531027 }, { "acc": 0.98588715, "epoch": 41.061635809702366, "grad_norm": 0.7718902826309204, "learning_rate": 8.477117076026763e-07, "loss": 0.04369335, "memory(GiB)": 13.7, "step": 87605, "train_speed(iter/s)": 1.53103 }, { "acc": 0.98253851, "epoch": 41.0639793766112, "grad_norm": 4.779210090637207, "learning_rate": 8.47280168696663e-07, "loss": 0.04117589, "memory(GiB)": 13.7, "step": 87610, "train_speed(iter/s)": 1.531036 }, { "acc": 0.96312504, "epoch": 41.066322943520035, "grad_norm": 8.318618774414062, "learning_rate": 8.468487296204114e-07, "loss": 0.08177953, "memory(GiB)": 13.7, "step": 87615, "train_speed(iter/s)": 1.531034 }, { "acc": 0.9885416, "epoch": 41.06866651042887, "grad_norm": 1.641242265701294, "learning_rate": 8.464173903842893e-07, "loss": 0.041168, "memory(GiB)": 13.7, "step": 87620, "train_speed(iter/s)": 1.531033 }, { "acc": 0.99011364, "epoch": 41.07101007733771, "grad_norm": 3.379300594329834, "learning_rate": 8.459861509986654e-07, "loss": 0.02569451, "memory(GiB)": 13.7, "step": 87625, "train_speed(iter/s)": 1.531035 }, { "acc": 0.97749996, "epoch": 41.073353644246545, "grad_norm": 3.8557448387145996, "learning_rate": 8.455550114739078e-07, "loss": 0.03977733, "memory(GiB)": 13.7, "step": 87630, "train_speed(iter/s)": 1.531036 }, { "acc": 0.99065475, "epoch": 41.07569721115538, "grad_norm": 0.005167963448911905, "learning_rate": 8.451239718203774e-07, "loss": 0.02642405, "memory(GiB)": 13.7, "step": 87635, "train_speed(iter/s)": 1.531037 }, { "acc": 0.98755684, "epoch": 41.078040778064214, "grad_norm": 1.3069398403167725, "learning_rate": 8.44693032048434e-07, "loss": 0.03833982, "memory(GiB)": 13.7, "step": 87640, "train_speed(iter/s)": 1.531031 }, { "acc": 0.98132439, "epoch": 41.08038434497305, "grad_norm": 3.5503969192504883, "learning_rate": 8.442621921684372e-07, "loss": 0.0437214, "memory(GiB)": 13.7, "step": 87645, "train_speed(iter/s)": 1.531029 }, { "acc": 0.99437504, "epoch": 41.08272791188188, "grad_norm": 2.9047040939331055, "learning_rate": 8.438314521907427e-07, "loss": 0.02331439, "memory(GiB)": 13.7, "step": 87650, "train_speed(iter/s)": 1.531024 }, { "acc": 0.98812504, "epoch": 41.08507147879072, "grad_norm": 4.8446574211120605, "learning_rate": 8.434008121257061e-07, "loss": 0.02664644, "memory(GiB)": 13.7, "step": 87655, "train_speed(iter/s)": 1.531018 }, { "acc": 0.99100876, "epoch": 41.08741504569956, "grad_norm": 2.2324039936065674, "learning_rate": 8.429702719836756e-07, "loss": 0.02384677, "memory(GiB)": 13.7, "step": 87660, "train_speed(iter/s)": 1.531019 }, { "acc": 0.98500004, "epoch": 41.08975861260839, "grad_norm": 2.4784748554229736, "learning_rate": 8.425398317750007e-07, "loss": 0.03821645, "memory(GiB)": 13.7, "step": 87665, "train_speed(iter/s)": 1.531017 }, { "acc": 0.97238102, "epoch": 41.09210217951723, "grad_norm": 4.61226224899292, "learning_rate": 8.421094915100302e-07, "loss": 0.04437422, "memory(GiB)": 13.7, "step": 87670, "train_speed(iter/s)": 1.531019 }, { "acc": 0.99090271, "epoch": 41.09444574642606, "grad_norm": 3.427591323852539, "learning_rate": 8.416792511991056e-07, "loss": 0.02737788, "memory(GiB)": 13.7, "step": 87675, "train_speed(iter/s)": 1.531017 }, { "acc": 0.99653845, "epoch": 41.096789313334895, "grad_norm": 0.00038313938421197236, "learning_rate": 8.412491108525681e-07, "loss": 0.02166238, "memory(GiB)": 13.7, "step": 87680, "train_speed(iter/s)": 1.531015 }, { "acc": 0.98154764, "epoch": 41.09913288024373, "grad_norm": 1.1793136596679688, "learning_rate": 8.408190704807577e-07, "loss": 0.03548627, "memory(GiB)": 13.7, "step": 87685, "train_speed(iter/s)": 1.531016 }, { "acc": 0.975, "epoch": 41.101476447152564, "grad_norm": 4.657283306121826, "learning_rate": 8.403891300940122e-07, "loss": 0.0309295, "memory(GiB)": 13.7, "step": 87690, "train_speed(iter/s)": 1.531015 }, { "acc": 0.98770294, "epoch": 41.1038200140614, "grad_norm": 3.1787936687469482, "learning_rate": 8.39959289702664e-07, "loss": 0.03394671, "memory(GiB)": 13.7, "step": 87695, "train_speed(iter/s)": 1.531019 }, { "acc": 0.99437504, "epoch": 41.10616358097024, "grad_norm": 0.06358414143323898, "learning_rate": 8.395295493170459e-07, "loss": 0.04408849, "memory(GiB)": 13.7, "step": 87700, "train_speed(iter/s)": 1.531025 }, { "acc": 0.9770834, "epoch": 41.108507147879074, "grad_norm": 3.4548733234405518, "learning_rate": 8.39099908947489e-07, "loss": 0.04901687, "memory(GiB)": 13.7, "step": 87705, "train_speed(iter/s)": 1.531025 }, { "acc": 0.9802083, "epoch": 41.11085071478791, "grad_norm": 3.400562047958374, "learning_rate": 8.38670368604318e-07, "loss": 0.03327314, "memory(GiB)": 13.7, "step": 87710, "train_speed(iter/s)": 1.53103 }, { "acc": 0.9729166, "epoch": 41.11319428169674, "grad_norm": 6.610326766967773, "learning_rate": 8.382409282978593e-07, "loss": 0.04574378, "memory(GiB)": 13.7, "step": 87715, "train_speed(iter/s)": 1.531031 }, { "acc": 0.9864584, "epoch": 41.11553784860558, "grad_norm": 2.577434539794922, "learning_rate": 8.378115880384339e-07, "loss": 0.02868567, "memory(GiB)": 13.7, "step": 87720, "train_speed(iter/s)": 1.53103 }, { "acc": 0.99125004, "epoch": 41.11788141551441, "grad_norm": 4.298421859741211, "learning_rate": 8.37382347836362e-07, "loss": 0.0505886, "memory(GiB)": 13.7, "step": 87725, "train_speed(iter/s)": 1.531033 }, { "acc": 0.99333334, "epoch": 41.120224982423245, "grad_norm": 4.279569149017334, "learning_rate": 8.369532077019631e-07, "loss": 0.03593925, "memory(GiB)": 13.7, "step": 87730, "train_speed(iter/s)": 1.531033 }, { "acc": 0.98495464, "epoch": 41.12256854933209, "grad_norm": 0.0013082667719572783, "learning_rate": 8.365241676455499e-07, "loss": 0.03827634, "memory(GiB)": 13.7, "step": 87735, "train_speed(iter/s)": 1.531035 }, { "acc": 0.97983627, "epoch": 41.12491211624092, "grad_norm": 3.34661602973938, "learning_rate": 8.360952276774353e-07, "loss": 0.05143911, "memory(GiB)": 13.7, "step": 87740, "train_speed(iter/s)": 1.531037 }, { "acc": 0.98500004, "epoch": 41.127255683149755, "grad_norm": 2.765542984008789, "learning_rate": 8.35666387807932e-07, "loss": 0.05164181, "memory(GiB)": 13.7, "step": 87745, "train_speed(iter/s)": 1.531037 }, { "acc": 0.9840476, "epoch": 41.12959925005859, "grad_norm": 2.370102882385254, "learning_rate": 8.352376480473449e-07, "loss": 0.03471271, "memory(GiB)": 13.7, "step": 87750, "train_speed(iter/s)": 1.531032 }, { "acc": 0.96342106, "epoch": 41.131942816967424, "grad_norm": 4.945807933807373, "learning_rate": 8.348090084059812e-07, "loss": 0.06035027, "memory(GiB)": 13.7, "step": 87755, "train_speed(iter/s)": 1.531036 }, { "acc": 0.98562498, "epoch": 41.13428638387626, "grad_norm": 2.1823158264160156, "learning_rate": 8.343804688941433e-07, "loss": 0.03120867, "memory(GiB)": 13.7, "step": 87760, "train_speed(iter/s)": 1.531042 }, { "acc": 0.9854166, "epoch": 41.13662995078509, "grad_norm": 1.3107645511627197, "learning_rate": 8.339520295221331e-07, "loss": 0.02786895, "memory(GiB)": 13.7, "step": 87765, "train_speed(iter/s)": 1.531039 }, { "acc": 0.98083324, "epoch": 41.13897351769393, "grad_norm": 4.937810897827148, "learning_rate": 8.335236903002465e-07, "loss": 0.03327922, "memory(GiB)": 13.7, "step": 87770, "train_speed(iter/s)": 1.531042 }, { "acc": 0.9947917, "epoch": 41.14131708460277, "grad_norm": 0.08280780166387558, "learning_rate": 8.330954512387811e-07, "loss": 0.01765087, "memory(GiB)": 13.7, "step": 87775, "train_speed(iter/s)": 1.531041 }, { "acc": 0.98995533, "epoch": 41.1436606515116, "grad_norm": 2.2949631214141846, "learning_rate": 8.326673123480316e-07, "loss": 0.03370599, "memory(GiB)": 13.7, "step": 87780, "train_speed(iter/s)": 1.531043 }, { "acc": 0.97999992, "epoch": 41.14600421842044, "grad_norm": 0.0010911126155406237, "learning_rate": 8.32239273638286e-07, "loss": 0.03377256, "memory(GiB)": 13.7, "step": 87785, "train_speed(iter/s)": 1.531045 }, { "acc": 0.97739582, "epoch": 41.14834778532927, "grad_norm": 2.540990114212036, "learning_rate": 8.318113351198339e-07, "loss": 0.04936567, "memory(GiB)": 13.7, "step": 87790, "train_speed(iter/s)": 1.53105 }, { "acc": 0.98604164, "epoch": 41.150691352238105, "grad_norm": 2.4267725944519043, "learning_rate": 8.313834968029638e-07, "loss": 0.02717065, "memory(GiB)": 13.7, "step": 87795, "train_speed(iter/s)": 1.531049 }, { "acc": 0.98217258, "epoch": 41.15303491914694, "grad_norm": 2.618528127670288, "learning_rate": 8.309557586979566e-07, "loss": 0.0313141, "memory(GiB)": 13.7, "step": 87800, "train_speed(iter/s)": 1.531049 }, { "acc": 0.99437504, "epoch": 41.155378486055774, "grad_norm": 2.365701913833618, "learning_rate": 8.305281208150966e-07, "loss": 0.01937188, "memory(GiB)": 13.7, "step": 87805, "train_speed(iter/s)": 1.531048 }, { "acc": 0.98758011, "epoch": 41.157722052964616, "grad_norm": 2.137967109680176, "learning_rate": 8.301005831646594e-07, "loss": 0.04047357, "memory(GiB)": 13.7, "step": 87810, "train_speed(iter/s)": 1.531048 }, { "acc": 0.98239584, "epoch": 41.16006561987345, "grad_norm": 3.9482650756835938, "learning_rate": 8.296731457569236e-07, "loss": 0.06373571, "memory(GiB)": 13.7, "step": 87815, "train_speed(iter/s)": 1.531045 }, { "acc": 0.99333334, "epoch": 41.162409186782284, "grad_norm": 3.1909375190734863, "learning_rate": 8.292458086021639e-07, "loss": 0.03503253, "memory(GiB)": 13.7, "step": 87820, "train_speed(iter/s)": 1.531042 }, { "acc": 0.99125004, "epoch": 41.16475275369112, "grad_norm": 2.705366611480713, "learning_rate": 8.288185717106505e-07, "loss": 0.01681978, "memory(GiB)": 13.7, "step": 87825, "train_speed(iter/s)": 1.531039 }, { "acc": 0.9854167, "epoch": 41.16709632059995, "grad_norm": 0.0014493126654997468, "learning_rate": 8.283914350926535e-07, "loss": 0.02151531, "memory(GiB)": 13.7, "step": 87830, "train_speed(iter/s)": 1.53104 }, { "acc": 0.98488102, "epoch": 41.16943988750879, "grad_norm": 3.8824267387390137, "learning_rate": 8.279643987584414e-07, "loss": 0.03756261, "memory(GiB)": 13.7, "step": 87835, "train_speed(iter/s)": 1.531045 }, { "acc": 0.98145828, "epoch": 41.17178345441762, "grad_norm": 2.5574734210968018, "learning_rate": 8.275374627182773e-07, "loss": 0.08205481, "memory(GiB)": 13.7, "step": 87840, "train_speed(iter/s)": 1.531049 }, { "acc": 0.9888195, "epoch": 41.174127021326456, "grad_norm": 3.2567481994628906, "learning_rate": 8.271106269824219e-07, "loss": 0.0299006, "memory(GiB)": 13.7, "step": 87845, "train_speed(iter/s)": 1.531049 }, { "acc": 0.97625008, "epoch": 41.1764705882353, "grad_norm": 0.8803238272666931, "learning_rate": 8.266838915611361e-07, "loss": 0.02966112, "memory(GiB)": 13.7, "step": 87850, "train_speed(iter/s)": 1.531046 }, { "acc": 0.97624998, "epoch": 41.17881415514413, "grad_norm": 4.101651668548584, "learning_rate": 8.262572564646778e-07, "loss": 0.03561836, "memory(GiB)": 13.7, "step": 87855, "train_speed(iter/s)": 1.53105 }, { "acc": 0.98733139, "epoch": 41.181157722052966, "grad_norm": 4.137496471405029, "learning_rate": 8.258307217033034e-07, "loss": 0.05825933, "memory(GiB)": 13.7, "step": 87860, "train_speed(iter/s)": 1.531052 }, { "acc": 0.97781258, "epoch": 41.1835012889618, "grad_norm": 4.691808223724365, "learning_rate": 8.254042872872616e-07, "loss": 0.05374932, "memory(GiB)": 13.7, "step": 87865, "train_speed(iter/s)": 1.53106 }, { "acc": 0.98152781, "epoch": 41.185844855870634, "grad_norm": 4.6935200691223145, "learning_rate": 8.249779532268048e-07, "loss": 0.04060803, "memory(GiB)": 13.7, "step": 87870, "train_speed(iter/s)": 1.531062 }, { "acc": 0.9864584, "epoch": 41.18818842277947, "grad_norm": 2.441805601119995, "learning_rate": 8.245517195321815e-07, "loss": 0.01756758, "memory(GiB)": 13.7, "step": 87875, "train_speed(iter/s)": 1.531063 }, { "acc": 1.0, "epoch": 41.1905319896883, "grad_norm": 1.7398719787597656, "learning_rate": 8.241255862136355e-07, "loss": 0.00785382, "memory(GiB)": 13.7, "step": 87880, "train_speed(iter/s)": 1.531063 }, { "acc": 0.990625, "epoch": 41.192875556597144, "grad_norm": 3.5844831466674805, "learning_rate": 8.236995532814095e-07, "loss": 0.0239881, "memory(GiB)": 13.7, "step": 87885, "train_speed(iter/s)": 1.531065 }, { "acc": 0.99196434, "epoch": 41.19521912350598, "grad_norm": 2.4592819213867188, "learning_rate": 8.232736207457435e-07, "loss": 0.02365793, "memory(GiB)": 13.7, "step": 87890, "train_speed(iter/s)": 1.531063 }, { "acc": 0.99300594, "epoch": 41.19756269041481, "grad_norm": 0.9035905599594116, "learning_rate": 8.22847788616878e-07, "loss": 0.02938371, "memory(GiB)": 13.7, "step": 87895, "train_speed(iter/s)": 1.53106 }, { "acc": 0.98500004, "epoch": 41.19990625732365, "grad_norm": 2.3990695476531982, "learning_rate": 8.224220569050459e-07, "loss": 0.02943197, "memory(GiB)": 13.7, "step": 87900, "train_speed(iter/s)": 1.531059 }, { "acc": 0.99020834, "epoch": 41.20224982423248, "grad_norm": 6.522613048553467, "learning_rate": 8.219964256204813e-07, "loss": 0.02343227, "memory(GiB)": 13.7, "step": 87905, "train_speed(iter/s)": 1.53106 }, { "acc": 0.98454857, "epoch": 41.204593391141316, "grad_norm": 1.5773104429244995, "learning_rate": 8.215708947734167e-07, "loss": 0.05380172, "memory(GiB)": 13.7, "step": 87910, "train_speed(iter/s)": 1.531064 }, { "acc": 0.97762499, "epoch": 41.20693695805015, "grad_norm": 1.1171587705612183, "learning_rate": 8.211454643740778e-07, "loss": 0.05463178, "memory(GiB)": 13.7, "step": 87915, "train_speed(iter/s)": 1.531065 }, { "acc": 0.98875008, "epoch": 41.209280524958984, "grad_norm": 3.406303882598877, "learning_rate": 8.207201344326926e-07, "loss": 0.0159955, "memory(GiB)": 13.7, "step": 87920, "train_speed(iter/s)": 1.531067 }, { "acc": 0.98602886, "epoch": 41.211624091867826, "grad_norm": 0.004760262556374073, "learning_rate": 8.202949049594828e-07, "loss": 0.03961686, "memory(GiB)": 13.7, "step": 87925, "train_speed(iter/s)": 1.531065 }, { "acc": 0.97354164, "epoch": 41.21396765877666, "grad_norm": 2.890871047973633, "learning_rate": 8.198697759646699e-07, "loss": 0.03749859, "memory(GiB)": 13.7, "step": 87930, "train_speed(iter/s)": 1.531066 }, { "acc": 0.99125004, "epoch": 41.216311225685494, "grad_norm": 0.9193974137306213, "learning_rate": 8.194447474584747e-07, "loss": 0.0178389, "memory(GiB)": 13.7, "step": 87935, "train_speed(iter/s)": 1.53107 }, { "acc": 0.98550596, "epoch": 41.21865479259433, "grad_norm": 2.6702427864074707, "learning_rate": 8.190198194511106e-07, "loss": 0.02918104, "memory(GiB)": 13.7, "step": 87940, "train_speed(iter/s)": 1.531069 }, { "acc": 0.98645201, "epoch": 41.22099835950316, "grad_norm": 1.6869057416915894, "learning_rate": 8.185949919527932e-07, "loss": 0.04031045, "memory(GiB)": 13.7, "step": 87945, "train_speed(iter/s)": 1.531074 }, { "acc": 0.9854167, "epoch": 41.223341926412, "grad_norm": 3.479445695877075, "learning_rate": 8.181702649737342e-07, "loss": 0.02501096, "memory(GiB)": 13.7, "step": 87950, "train_speed(iter/s)": 1.531076 }, { "acc": 0.9848011, "epoch": 41.22568549332083, "grad_norm": 0.9576112031936646, "learning_rate": 8.17745638524141e-07, "loss": 0.03005642, "memory(GiB)": 13.7, "step": 87955, "train_speed(iter/s)": 1.531078 }, { "acc": 0.98071423, "epoch": 41.22802906022967, "grad_norm": 5.44652795791626, "learning_rate": 8.173211126142212e-07, "loss": 0.03264374, "memory(GiB)": 13.7, "step": 87960, "train_speed(iter/s)": 1.53108 }, { "acc": 0.99624996, "epoch": 41.23037262713851, "grad_norm": 2.3700623512268066, "learning_rate": 8.16896687254181e-07, "loss": 0.03504789, "memory(GiB)": 13.7, "step": 87965, "train_speed(iter/s)": 1.531082 }, { "acc": 0.987955, "epoch": 41.23271619404734, "grad_norm": 0.00882594846189022, "learning_rate": 8.164723624542197e-07, "loss": 0.02781224, "memory(GiB)": 13.7, "step": 87970, "train_speed(iter/s)": 1.531083 }, { "acc": 0.99154758, "epoch": 41.235059760956176, "grad_norm": 2.176133155822754, "learning_rate": 8.160481382245355e-07, "loss": 0.03537769, "memory(GiB)": 13.7, "step": 87975, "train_speed(iter/s)": 1.531085 }, { "acc": 0.9875, "epoch": 41.23740332786501, "grad_norm": 2.242006778717041, "learning_rate": 8.156240145753277e-07, "loss": 0.01722867, "memory(GiB)": 13.7, "step": 87980, "train_speed(iter/s)": 1.531089 }, { "acc": 0.99196434, "epoch": 41.239746894773845, "grad_norm": 0.37168869376182556, "learning_rate": 8.151999915167909e-07, "loss": 0.01182541, "memory(GiB)": 13.7, "step": 87985, "train_speed(iter/s)": 1.531092 }, { "acc": 0.9770834, "epoch": 41.24209046168268, "grad_norm": 3.0782670974731445, "learning_rate": 8.147760690591159e-07, "loss": 0.03749348, "memory(GiB)": 13.7, "step": 87990, "train_speed(iter/s)": 1.531094 }, { "acc": 0.9819643, "epoch": 41.24443402859151, "grad_norm": 3.3227593898773193, "learning_rate": 8.14352247212493e-07, "loss": 0.03870852, "memory(GiB)": 13.7, "step": 87995, "train_speed(iter/s)": 1.531099 }, { "acc": 0.98666668, "epoch": 41.246777595500355, "grad_norm": 0.03673994541168213, "learning_rate": 8.139285259871092e-07, "loss": 0.02295073, "memory(GiB)": 13.7, "step": 88000, "train_speed(iter/s)": 1.531098 }, { "acc": 0.98968754, "epoch": 41.24912116240919, "grad_norm": 2.431539297103882, "learning_rate": 8.135049053931508e-07, "loss": 0.01879092, "memory(GiB)": 13.7, "step": 88005, "train_speed(iter/s)": 1.531102 }, { "acc": 0.98363094, "epoch": 41.25146472931802, "grad_norm": 2.425715684890747, "learning_rate": 8.130813854407992e-07, "loss": 0.02986068, "memory(GiB)": 13.7, "step": 88010, "train_speed(iter/s)": 1.531109 }, { "acc": 0.99229164, "epoch": 41.25380829622686, "grad_norm": 5.262979030609131, "learning_rate": 8.126579661402331e-07, "loss": 0.04142394, "memory(GiB)": 13.7, "step": 88015, "train_speed(iter/s)": 1.531106 }, { "acc": 0.99092255, "epoch": 41.25615186313569, "grad_norm": 2.8291523456573486, "learning_rate": 8.122346475016316e-07, "loss": 0.01701888, "memory(GiB)": 13.7, "step": 88020, "train_speed(iter/s)": 1.531108 }, { "acc": 0.97052078, "epoch": 41.258495430044526, "grad_norm": 4.145157814025879, "learning_rate": 8.118114295351711e-07, "loss": 0.05215285, "memory(GiB)": 13.7, "step": 88025, "train_speed(iter/s)": 1.531116 }, { "acc": 0.99611111, "epoch": 41.26083899695336, "grad_norm": 3.534475803375244, "learning_rate": 8.113883122510213e-07, "loss": 0.03443456, "memory(GiB)": 13.7, "step": 88030, "train_speed(iter/s)": 1.531123 }, { "acc": 0.99187498, "epoch": 41.2631825638622, "grad_norm": 2.0396976470947266, "learning_rate": 8.109652956593543e-07, "loss": 0.0242959, "memory(GiB)": 13.7, "step": 88035, "train_speed(iter/s)": 1.531128 }, { "acc": 0.99281254, "epoch": 41.265526130771036, "grad_norm": 2.363016128540039, "learning_rate": 8.105423797703392e-07, "loss": 0.0213975, "memory(GiB)": 13.7, "step": 88040, "train_speed(iter/s)": 1.531135 }, { "acc": 0.9854167, "epoch": 41.26786969767987, "grad_norm": 0.0020548594184219837, "learning_rate": 8.101195645941385e-07, "loss": 0.0346239, "memory(GiB)": 13.7, "step": 88045, "train_speed(iter/s)": 1.531133 }, { "acc": 0.96447306, "epoch": 41.270213264588705, "grad_norm": 3.34423828125, "learning_rate": 8.096968501409186e-07, "loss": 0.08104292, "memory(GiB)": 13.7, "step": 88050, "train_speed(iter/s)": 1.531133 }, { "acc": 0.99437504, "epoch": 41.27255683149754, "grad_norm": 3.3394622802734375, "learning_rate": 8.092742364208375e-07, "loss": 0.01993455, "memory(GiB)": 13.7, "step": 88055, "train_speed(iter/s)": 1.531133 }, { "acc": 0.98332729, "epoch": 41.27490039840637, "grad_norm": 0.8680428266525269, "learning_rate": 8.088517234440557e-07, "loss": 0.0380769, "memory(GiB)": 13.7, "step": 88060, "train_speed(iter/s)": 1.531134 }, { "acc": 0.98468742, "epoch": 41.27724396531521, "grad_norm": 3.8236889839172363, "learning_rate": 8.084293112207262e-07, "loss": 0.04021342, "memory(GiB)": 13.7, "step": 88065, "train_speed(iter/s)": 1.531136 }, { "acc": 0.98874998, "epoch": 41.27958753222404, "grad_norm": 3.98883318901062, "learning_rate": 8.080069997610042e-07, "loss": 0.11097808, "memory(GiB)": 13.7, "step": 88070, "train_speed(iter/s)": 1.531141 }, { "acc": 0.97828093, "epoch": 41.28193109913288, "grad_norm": 1.3116387128829956, "learning_rate": 8.075847890750408e-07, "loss": 0.02873364, "memory(GiB)": 13.7, "step": 88075, "train_speed(iter/s)": 1.531147 }, { "acc": 0.98571434, "epoch": 41.28427466604172, "grad_norm": 1.2263811826705933, "learning_rate": 8.071626791729854e-07, "loss": 0.03737393, "memory(GiB)": 13.7, "step": 88080, "train_speed(iter/s)": 1.531149 }, { "acc": 0.98217258, "epoch": 41.28661823295055, "grad_norm": 3.890651226043701, "learning_rate": 8.06740670064982e-07, "loss": 0.0308419, "memory(GiB)": 13.7, "step": 88085, "train_speed(iter/s)": 1.531146 }, { "acc": 0.9863636, "epoch": 41.288961799859386, "grad_norm": 1.9590531587600708, "learning_rate": 8.063187617611762e-07, "loss": 0.04114074, "memory(GiB)": 13.7, "step": 88090, "train_speed(iter/s)": 1.531148 }, { "acc": 0.97612171, "epoch": 41.29130536676822, "grad_norm": 0.005908551625907421, "learning_rate": 8.058969542717074e-07, "loss": 0.05762404, "memory(GiB)": 13.7, "step": 88095, "train_speed(iter/s)": 1.53115 }, { "acc": 0.9777462, "epoch": 41.293648933677055, "grad_norm": 4.102040767669678, "learning_rate": 8.054752476067164e-07, "loss": 0.06538371, "memory(GiB)": 13.7, "step": 88100, "train_speed(iter/s)": 1.531152 }, { "acc": 0.97145834, "epoch": 41.29599250058589, "grad_norm": 5.988905906677246, "learning_rate": 8.050536417763372e-07, "loss": 0.05490416, "memory(GiB)": 13.7, "step": 88105, "train_speed(iter/s)": 1.531156 }, { "acc": 0.97514877, "epoch": 41.29833606749472, "grad_norm": 5.214165210723877, "learning_rate": 8.046321367907062e-07, "loss": 0.05873033, "memory(GiB)": 13.7, "step": 88110, "train_speed(iter/s)": 1.531158 }, { "acc": 0.97488098, "epoch": 41.300679634403565, "grad_norm": 4.228105545043945, "learning_rate": 8.042107326599551e-07, "loss": 0.07523938, "memory(GiB)": 13.7, "step": 88115, "train_speed(iter/s)": 1.531159 }, { "acc": 0.98564491, "epoch": 41.3030232013124, "grad_norm": 3.6914174556732178, "learning_rate": 8.037894293942107e-07, "loss": 0.03623507, "memory(GiB)": 13.7, "step": 88120, "train_speed(iter/s)": 1.531163 }, { "acc": 0.99437504, "epoch": 41.30536676822123, "grad_norm": 3.675431966781616, "learning_rate": 8.033682270036012e-07, "loss": 0.01484593, "memory(GiB)": 13.7, "step": 88125, "train_speed(iter/s)": 1.531168 }, { "acc": 0.9875, "epoch": 41.30771033513007, "grad_norm": 2.0140810012817383, "learning_rate": 8.029471254982522e-07, "loss": 0.0187144, "memory(GiB)": 13.7, "step": 88130, "train_speed(iter/s)": 1.531166 }, { "acc": 0.98883934, "epoch": 41.3100539020389, "grad_norm": 1.2965048551559448, "learning_rate": 8.025261248882828e-07, "loss": 0.02288164, "memory(GiB)": 13.7, "step": 88135, "train_speed(iter/s)": 1.531164 }, { "acc": 0.984375, "epoch": 41.31239746894774, "grad_norm": 3.229393720626831, "learning_rate": 8.021052251838153e-07, "loss": 0.07017095, "memory(GiB)": 13.7, "step": 88140, "train_speed(iter/s)": 1.531167 }, { "acc": 0.98302078, "epoch": 41.31474103585657, "grad_norm": 5.6438117027282715, "learning_rate": 8.016844263949643e-07, "loss": 0.03738627, "memory(GiB)": 13.7, "step": 88145, "train_speed(iter/s)": 1.531169 }, { "acc": 0.9764286, "epoch": 41.31708460276541, "grad_norm": 4.861073970794678, "learning_rate": 8.012637285318454e-07, "loss": 0.04645836, "memory(GiB)": 13.7, "step": 88150, "train_speed(iter/s)": 1.531169 }, { "acc": 0.9963315, "epoch": 41.31942816967425, "grad_norm": 2.0385890007019043, "learning_rate": 8.008431316045719e-07, "loss": 0.02164531, "memory(GiB)": 13.7, "step": 88155, "train_speed(iter/s)": 1.531172 }, { "acc": 0.99027786, "epoch": 41.32177173658308, "grad_norm": 5.348177909851074, "learning_rate": 8.00422635623252e-07, "loss": 0.03256218, "memory(GiB)": 13.7, "step": 88160, "train_speed(iter/s)": 1.531171 }, { "acc": 0.99437504, "epoch": 41.324115303491915, "grad_norm": 0.7873550653457642, "learning_rate": 8.000022405979929e-07, "loss": 0.01886663, "memory(GiB)": 13.7, "step": 88165, "train_speed(iter/s)": 1.531171 }, { "acc": 0.98508387, "epoch": 41.32645887040075, "grad_norm": 0.003985342103987932, "learning_rate": 7.995819465389014e-07, "loss": 0.04470713, "memory(GiB)": 13.7, "step": 88170, "train_speed(iter/s)": 1.531174 }, { "acc": 0.96610126, "epoch": 41.328802437309584, "grad_norm": 3.382383108139038, "learning_rate": 7.991617534560795e-07, "loss": 0.10374892, "memory(GiB)": 13.7, "step": 88175, "train_speed(iter/s)": 1.531173 }, { "acc": 0.9958334, "epoch": 41.33114600421842, "grad_norm": 3.7760133743286133, "learning_rate": 7.987416613596251e-07, "loss": 0.01649274, "memory(GiB)": 13.7, "step": 88180, "train_speed(iter/s)": 1.531179 }, { "acc": 0.9947916, "epoch": 41.33348957112725, "grad_norm": 2.911881685256958, "learning_rate": 7.983216702596372e-07, "loss": 0.03040197, "memory(GiB)": 13.7, "step": 88185, "train_speed(iter/s)": 1.531179 }, { "acc": 0.98074398, "epoch": 41.335833138036094, "grad_norm": 3.0208799839019775, "learning_rate": 7.97901780166212e-07, "loss": 0.03970542, "memory(GiB)": 13.7, "step": 88190, "train_speed(iter/s)": 1.531181 }, { "acc": 0.99208336, "epoch": 41.33817670494493, "grad_norm": 0.8145540952682495, "learning_rate": 7.974819910894406e-07, "loss": 0.03740069, "memory(GiB)": 13.7, "step": 88195, "train_speed(iter/s)": 1.531181 }, { "acc": 0.98812504, "epoch": 41.34052027185376, "grad_norm": 3.798802614212036, "learning_rate": 7.970623030394144e-07, "loss": 0.03843096, "memory(GiB)": 13.7, "step": 88200, "train_speed(iter/s)": 1.531184 }, { "acc": 0.98812504, "epoch": 41.3428638387626, "grad_norm": 2.7261316776275635, "learning_rate": 7.96642716026222e-07, "loss": 0.01655819, "memory(GiB)": 13.7, "step": 88205, "train_speed(iter/s)": 1.531185 }, { "acc": 0.99375, "epoch": 41.34520740567143, "grad_norm": 1.7695558071136475, "learning_rate": 7.962232300599468e-07, "loss": 0.02135442, "memory(GiB)": 13.7, "step": 88210, "train_speed(iter/s)": 1.531189 }, { "acc": 0.98205357, "epoch": 41.347550972580265, "grad_norm": 1.429825782775879, "learning_rate": 7.958038451506739e-07, "loss": 0.0499661, "memory(GiB)": 13.7, "step": 88215, "train_speed(iter/s)": 1.531195 }, { "acc": 0.97999992, "epoch": 41.3498945394891, "grad_norm": 4.247665882110596, "learning_rate": 7.95384561308482e-07, "loss": 0.05782229, "memory(GiB)": 13.7, "step": 88220, "train_speed(iter/s)": 1.531194 }, { "acc": 0.98874998, "epoch": 41.35223810639794, "grad_norm": 3.9726414680480957, "learning_rate": 7.949653785434508e-07, "loss": 0.02227485, "memory(GiB)": 13.7, "step": 88225, "train_speed(iter/s)": 1.531199 }, { "acc": 0.99050598, "epoch": 41.354581673306775, "grad_norm": 2.906618595123291, "learning_rate": 7.945462968656569e-07, "loss": 0.01524355, "memory(GiB)": 13.7, "step": 88230, "train_speed(iter/s)": 1.5312 }, { "acc": 0.98187494, "epoch": 41.35692524021561, "grad_norm": 2.5937507152557373, "learning_rate": 7.941273162851708e-07, "loss": 0.0460058, "memory(GiB)": 13.7, "step": 88235, "train_speed(iter/s)": 1.531205 }, { "acc": 0.9811161, "epoch": 41.359268807124444, "grad_norm": 2.4154746532440186, "learning_rate": 7.93708436812065e-07, "loss": 0.05368437, "memory(GiB)": 13.7, "step": 88240, "train_speed(iter/s)": 1.531201 }, { "acc": 0.98208332, "epoch": 41.36161237403328, "grad_norm": 2.936232805252075, "learning_rate": 7.932896584564089e-07, "loss": 0.03104893, "memory(GiB)": 13.7, "step": 88245, "train_speed(iter/s)": 1.531203 }, { "acc": 0.97416668, "epoch": 41.36395594094211, "grad_norm": 3.7325239181518555, "learning_rate": 7.928709812282668e-07, "loss": 0.06604981, "memory(GiB)": 13.7, "step": 88250, "train_speed(iter/s)": 1.531206 }, { "acc": 0.99309521, "epoch": 41.36629950785095, "grad_norm": 1.5898122787475586, "learning_rate": 7.924524051377048e-07, "loss": 0.0213607, "memory(GiB)": 13.7, "step": 88255, "train_speed(iter/s)": 1.531209 }, { "acc": 0.98916664, "epoch": 41.36864307475978, "grad_norm": 5.476379871368408, "learning_rate": 7.920339301947806e-07, "loss": 0.06119636, "memory(GiB)": 13.7, "step": 88260, "train_speed(iter/s)": 1.531208 }, { "acc": 0.98363104, "epoch": 41.37098664166862, "grad_norm": 0.011891382746398449, "learning_rate": 7.916155564095565e-07, "loss": 0.06891969, "memory(GiB)": 13.7, "step": 88265, "train_speed(iter/s)": 1.53121 }, { "acc": 0.97894344, "epoch": 41.37333020857746, "grad_norm": 2.061687469482422, "learning_rate": 7.911972837920853e-07, "loss": 0.03900193, "memory(GiB)": 13.7, "step": 88270, "train_speed(iter/s)": 1.531216 }, { "acc": 0.98562498, "epoch": 41.37567377548629, "grad_norm": 3.6335835456848145, "learning_rate": 7.907791123524234e-07, "loss": 0.04497471, "memory(GiB)": 13.7, "step": 88275, "train_speed(iter/s)": 1.531217 }, { "acc": 0.98008013, "epoch": 41.378017342395125, "grad_norm": 5.523326873779297, "learning_rate": 7.903610421006215e-07, "loss": 0.06104043, "memory(GiB)": 13.7, "step": 88280, "train_speed(iter/s)": 1.531221 }, { "acc": 0.97729168, "epoch": 41.38036090930396, "grad_norm": 4.182830810546875, "learning_rate": 7.899430730467303e-07, "loss": 0.04940026, "memory(GiB)": 13.7, "step": 88285, "train_speed(iter/s)": 1.531225 }, { "acc": 0.9895833, "epoch": 41.382704476212794, "grad_norm": 3.0442240238189697, "learning_rate": 7.895252052007927e-07, "loss": 0.02690247, "memory(GiB)": 13.7, "step": 88290, "train_speed(iter/s)": 1.531227 }, { "acc": 0.97895832, "epoch": 41.38504804312163, "grad_norm": 3.9329490661621094, "learning_rate": 7.891074385728562e-07, "loss": 0.03377182, "memory(GiB)": 13.7, "step": 88295, "train_speed(iter/s)": 1.531228 }, { "acc": 0.97979164, "epoch": 41.38739161003047, "grad_norm": 0.0007981239468790591, "learning_rate": 7.886897731729619e-07, "loss": 0.02981174, "memory(GiB)": 13.7, "step": 88300, "train_speed(iter/s)": 1.531229 }, { "acc": 0.99020834, "epoch": 41.389735176939304, "grad_norm": 1.03253173828125, "learning_rate": 7.882722090111489e-07, "loss": 0.04068792, "memory(GiB)": 13.7, "step": 88305, "train_speed(iter/s)": 1.53123 }, { "acc": 0.9895833, "epoch": 41.39207874384814, "grad_norm": 3.1585659980773926, "learning_rate": 7.878547460974523e-07, "loss": 0.02758023, "memory(GiB)": 13.7, "step": 88310, "train_speed(iter/s)": 1.531225 }, { "acc": 0.9850893, "epoch": 41.39442231075697, "grad_norm": 4.80975866317749, "learning_rate": 7.874373844419075e-07, "loss": 0.03262882, "memory(GiB)": 13.7, "step": 88315, "train_speed(iter/s)": 1.531228 }, { "acc": 0.98624992, "epoch": 41.39676587766581, "grad_norm": 0.003226327942684293, "learning_rate": 7.870201240545491e-07, "loss": 0.03143851, "memory(GiB)": 13.7, "step": 88320, "train_speed(iter/s)": 1.531229 }, { "acc": 0.99300594, "epoch": 41.39910944457464, "grad_norm": 0.008236606605350971, "learning_rate": 7.86602964945403e-07, "loss": 0.00957286, "memory(GiB)": 13.7, "step": 88325, "train_speed(iter/s)": 1.531236 }, { "acc": 0.9885417, "epoch": 41.401453011483476, "grad_norm": 0.001129767159000039, "learning_rate": 7.861859071244978e-07, "loss": 0.01511272, "memory(GiB)": 13.7, "step": 88330, "train_speed(iter/s)": 1.531241 }, { "acc": 0.98916664, "epoch": 41.40379657839231, "grad_norm": 2.241689682006836, "learning_rate": 7.857689506018594e-07, "loss": 0.02100281, "memory(GiB)": 13.7, "step": 88335, "train_speed(iter/s)": 1.531243 }, { "acc": 0.98258924, "epoch": 41.40614014530115, "grad_norm": 3.6315767765045166, "learning_rate": 7.853520953875094e-07, "loss": 0.03554592, "memory(GiB)": 13.7, "step": 88340, "train_speed(iter/s)": 1.531241 }, { "acc": 0.98708334, "epoch": 41.408483712209986, "grad_norm": 1.1778737306594849, "learning_rate": 7.849353414914659e-07, "loss": 0.05390241, "memory(GiB)": 13.7, "step": 88345, "train_speed(iter/s)": 1.531244 }, { "acc": 0.98130455, "epoch": 41.41082727911882, "grad_norm": 0.0022542246151715517, "learning_rate": 7.84518688923747e-07, "loss": 0.04127913, "memory(GiB)": 13.7, "step": 88350, "train_speed(iter/s)": 1.531246 }, { "acc": 0.996875, "epoch": 41.413170846027654, "grad_norm": 0.002528533572331071, "learning_rate": 7.841021376943692e-07, "loss": 0.03545187, "memory(GiB)": 13.7, "step": 88355, "train_speed(iter/s)": 1.53125 }, { "acc": 0.98708324, "epoch": 41.41551441293649, "grad_norm": 3.3671042919158936, "learning_rate": 7.836856878133447e-07, "loss": 0.03246824, "memory(GiB)": 13.7, "step": 88360, "train_speed(iter/s)": 1.531251 }, { "acc": 0.98770828, "epoch": 41.41785797984532, "grad_norm": 0.5838053226470947, "learning_rate": 7.832693392906815e-07, "loss": 0.03119088, "memory(GiB)": 13.7, "step": 88365, "train_speed(iter/s)": 1.531245 }, { "acc": 0.98883924, "epoch": 41.42020154675416, "grad_norm": 5.53444242477417, "learning_rate": 7.828530921363893e-07, "loss": 0.06999587, "memory(GiB)": 13.7, "step": 88370, "train_speed(iter/s)": 1.531247 }, { "acc": 0.996875, "epoch": 41.422545113663, "grad_norm": 0.839365541934967, "learning_rate": 7.824369463604737e-07, "loss": 0.02543664, "memory(GiB)": 13.7, "step": 88375, "train_speed(iter/s)": 1.531252 }, { "acc": 0.97344704, "epoch": 41.42488868057183, "grad_norm": 3.349010944366455, "learning_rate": 7.820209019729351e-07, "loss": 0.04163796, "memory(GiB)": 13.7, "step": 88380, "train_speed(iter/s)": 1.531256 }, { "acc": 0.97590275, "epoch": 41.42723224748067, "grad_norm": 5.071404933929443, "learning_rate": 7.816049589837765e-07, "loss": 0.0397163, "memory(GiB)": 13.7, "step": 88385, "train_speed(iter/s)": 1.531263 }, { "acc": 0.98544636, "epoch": 41.4295758143895, "grad_norm": 0.8487826585769653, "learning_rate": 7.811891174029937e-07, "loss": 0.02569786, "memory(GiB)": 13.7, "step": 88390, "train_speed(iter/s)": 1.531267 }, { "acc": 0.98676548, "epoch": 41.431919381298336, "grad_norm": 2.1351675987243652, "learning_rate": 7.807733772405834e-07, "loss": 0.07301021, "memory(GiB)": 13.7, "step": 88395, "train_speed(iter/s)": 1.531264 }, { "acc": 0.98133011, "epoch": 41.43426294820717, "grad_norm": 4.300286293029785, "learning_rate": 7.803577385065376e-07, "loss": 0.03692076, "memory(GiB)": 13.7, "step": 88400, "train_speed(iter/s)": 1.531266 }, { "acc": 0.99875002, "epoch": 41.436606515116004, "grad_norm": 0.023187901824712753, "learning_rate": 7.799422012108476e-07, "loss": 0.0272427, "memory(GiB)": 13.7, "step": 88405, "train_speed(iter/s)": 1.531265 }, { "acc": 0.99821434, "epoch": 41.43895008202484, "grad_norm": 0.7663971185684204, "learning_rate": 7.795267653635029e-07, "loss": 0.03888622, "memory(GiB)": 13.7, "step": 88410, "train_speed(iter/s)": 1.531271 }, { "acc": 0.984375, "epoch": 41.44129364893368, "grad_norm": 3.1631295680999756, "learning_rate": 7.79111430974486e-07, "loss": 0.02777458, "memory(GiB)": 13.7, "step": 88415, "train_speed(iter/s)": 1.531278 }, { "acc": 0.99430809, "epoch": 41.443637215842514, "grad_norm": 0.004094946663826704, "learning_rate": 7.786961980537824e-07, "loss": 0.03081121, "memory(GiB)": 13.7, "step": 88420, "train_speed(iter/s)": 1.531276 }, { "acc": 0.98968754, "epoch": 41.44598078275135, "grad_norm": 3.3792994022369385, "learning_rate": 7.782810666113733e-07, "loss": 0.02432677, "memory(GiB)": 13.7, "step": 88425, "train_speed(iter/s)": 1.531275 }, { "acc": 0.98961391, "epoch": 41.44832434966018, "grad_norm": 5.193993091583252, "learning_rate": 7.778660366572355e-07, "loss": 0.0341976, "memory(GiB)": 13.7, "step": 88430, "train_speed(iter/s)": 1.531276 }, { "acc": 0.97375002, "epoch": 41.45066791656902, "grad_norm": 6.351323127746582, "learning_rate": 7.774511082013464e-07, "loss": 0.05763139, "memory(GiB)": 13.7, "step": 88435, "train_speed(iter/s)": 1.531277 }, { "acc": 0.99598217, "epoch": 41.45301148347785, "grad_norm": 3.1708028316497803, "learning_rate": 7.77036281253678e-07, "loss": 0.02815551, "memory(GiB)": 13.7, "step": 88440, "train_speed(iter/s)": 1.531278 }, { "acc": 0.97800598, "epoch": 41.455355050386686, "grad_norm": 2.845968008041382, "learning_rate": 7.766215558242021e-07, "loss": 0.07866133, "memory(GiB)": 13.7, "step": 88445, "train_speed(iter/s)": 1.53128 }, { "acc": 0.97729168, "epoch": 41.45769861729553, "grad_norm": 2.2435367107391357, "learning_rate": 7.762069319228889e-07, "loss": 0.06001372, "memory(GiB)": 13.7, "step": 88450, "train_speed(iter/s)": 1.531283 }, { "acc": 0.9859375, "epoch": 41.46004218420436, "grad_norm": 0.00471490016207099, "learning_rate": 7.757924095597017e-07, "loss": 0.04782258, "memory(GiB)": 13.7, "step": 88455, "train_speed(iter/s)": 1.531286 }, { "acc": 0.98633938, "epoch": 41.462385751113196, "grad_norm": 4.890260219573975, "learning_rate": 7.753779887446062e-07, "loss": 0.06252931, "memory(GiB)": 13.7, "step": 88460, "train_speed(iter/s)": 1.531286 }, { "acc": 0.98527775, "epoch": 41.46472931802203, "grad_norm": 0.1672283262014389, "learning_rate": 7.749636694875644e-07, "loss": 0.03427516, "memory(GiB)": 13.7, "step": 88465, "train_speed(iter/s)": 1.531288 }, { "acc": 0.98770828, "epoch": 41.467072884930865, "grad_norm": 3.9946391582489014, "learning_rate": 7.74549451798534e-07, "loss": 0.02068589, "memory(GiB)": 13.7, "step": 88470, "train_speed(iter/s)": 1.531291 }, { "acc": 0.9875, "epoch": 41.4694164518397, "grad_norm": 6.377342224121094, "learning_rate": 7.741353356874698e-07, "loss": 0.01726752, "memory(GiB)": 13.7, "step": 88475, "train_speed(iter/s)": 1.531294 }, { "acc": 0.99750004, "epoch": 41.47176001874853, "grad_norm": 2.743610143661499, "learning_rate": 7.737213211643279e-07, "loss": 0.01953074, "memory(GiB)": 13.7, "step": 88480, "train_speed(iter/s)": 1.531296 }, { "acc": 0.9875, "epoch": 41.47410358565737, "grad_norm": 4.5433549880981445, "learning_rate": 7.733074082390602e-07, "loss": 0.02233685, "memory(GiB)": 13.7, "step": 88485, "train_speed(iter/s)": 1.531297 }, { "acc": 0.97383928, "epoch": 41.47644715256621, "grad_norm": 3.3868257999420166, "learning_rate": 7.728935969216139e-07, "loss": 0.03514502, "memory(GiB)": 13.7, "step": 88490, "train_speed(iter/s)": 1.531298 }, { "acc": 0.99499998, "epoch": 41.47879071947504, "grad_norm": 0.8927920460700989, "learning_rate": 7.724798872219371e-07, "loss": 0.01794929, "memory(GiB)": 13.7, "step": 88495, "train_speed(iter/s)": 1.531301 }, { "acc": 0.990625, "epoch": 41.48113428638388, "grad_norm": 0.03634144738316536, "learning_rate": 7.720662791499736e-07, "loss": 0.03368811, "memory(GiB)": 13.7, "step": 88500, "train_speed(iter/s)": 1.531299 }, { "acc": 0.99092264, "epoch": 41.48347785329271, "grad_norm": 1.7909499406814575, "learning_rate": 7.71652772715666e-07, "loss": 0.02949005, "memory(GiB)": 13.7, "step": 88505, "train_speed(iter/s)": 1.531299 }, { "acc": 0.99821434, "epoch": 41.485821420201546, "grad_norm": 2.8660058975219727, "learning_rate": 7.712393679289532e-07, "loss": 0.01618153, "memory(GiB)": 13.7, "step": 88510, "train_speed(iter/s)": 1.531294 }, { "acc": 0.98708324, "epoch": 41.48816498711038, "grad_norm": 4.1621012687683105, "learning_rate": 7.708260647997706e-07, "loss": 0.05318443, "memory(GiB)": 13.7, "step": 88515, "train_speed(iter/s)": 1.531294 }, { "acc": 0.98354168, "epoch": 41.490508554019215, "grad_norm": 1.660990595817566, "learning_rate": 7.704128633380537e-07, "loss": 0.0603564, "memory(GiB)": 13.7, "step": 88520, "train_speed(iter/s)": 1.531301 }, { "acc": 0.99437504, "epoch": 41.49285212092805, "grad_norm": 0.9575712084770203, "learning_rate": 7.699997635537365e-07, "loss": 0.01539924, "memory(GiB)": 13.7, "step": 88525, "train_speed(iter/s)": 1.531301 }, { "acc": 0.99541664, "epoch": 41.49519568783689, "grad_norm": 1.2057782411575317, "learning_rate": 7.69586765456745e-07, "loss": 0.02276142, "memory(GiB)": 13.7, "step": 88530, "train_speed(iter/s)": 1.531298 }, { "acc": 0.99229164, "epoch": 41.497539254745725, "grad_norm": 2.78863525390625, "learning_rate": 7.69173869057008e-07, "loss": 0.02713448, "memory(GiB)": 13.7, "step": 88535, "train_speed(iter/s)": 1.531303 }, { "acc": 0.98604164, "epoch": 41.49988282165456, "grad_norm": 2.2247936725616455, "learning_rate": 7.687610743644516e-07, "loss": 0.02429008, "memory(GiB)": 13.7, "step": 88540, "train_speed(iter/s)": 1.531302 }, { "acc": 0.97407198, "epoch": 41.50222638856339, "grad_norm": 1.030295491218567, "learning_rate": 7.683483813889951e-07, "loss": 0.07053387, "memory(GiB)": 13.7, "step": 88545, "train_speed(iter/s)": 1.5313 }, { "acc": 0.99333334, "epoch": 41.50456995547223, "grad_norm": 1.6228619813919067, "learning_rate": 7.679357901405614e-07, "loss": 0.01819973, "memory(GiB)": 13.7, "step": 88550, "train_speed(iter/s)": 1.531304 }, { "acc": 0.96970959, "epoch": 41.50691352238106, "grad_norm": 7.045798301696777, "learning_rate": 7.675233006290649e-07, "loss": 0.08858021, "memory(GiB)": 13.7, "step": 88555, "train_speed(iter/s)": 1.531305 }, { "acc": 0.98488102, "epoch": 41.509257089289896, "grad_norm": 0.11098078638315201, "learning_rate": 7.671109128644226e-07, "loss": 0.09189577, "memory(GiB)": 13.7, "step": 88560, "train_speed(iter/s)": 1.531305 }, { "acc": 0.98312502, "epoch": 41.51160065619874, "grad_norm": 5.782787799835205, "learning_rate": 7.666986268565458e-07, "loss": 0.04476254, "memory(GiB)": 13.7, "step": 88565, "train_speed(iter/s)": 1.531306 }, { "acc": 0.99145832, "epoch": 41.51394422310757, "grad_norm": 0.7751631140708923, "learning_rate": 7.662864426153443e-07, "loss": 0.01728318, "memory(GiB)": 13.7, "step": 88570, "train_speed(iter/s)": 1.531308 }, { "acc": 0.99508934, "epoch": 41.516287790016406, "grad_norm": 4.385473728179932, "learning_rate": 7.658743601507263e-07, "loss": 0.01455372, "memory(GiB)": 13.7, "step": 88575, "train_speed(iter/s)": 1.531307 }, { "acc": 0.97800598, "epoch": 41.51863135692524, "grad_norm": 1.5536366701126099, "learning_rate": 7.654623794725984e-07, "loss": 0.04499129, "memory(GiB)": 13.7, "step": 88580, "train_speed(iter/s)": 1.531308 }, { "acc": 0.97645836, "epoch": 41.520974923834075, "grad_norm": 3.0481579303741455, "learning_rate": 7.650505005908606e-07, "loss": 0.07306982, "memory(GiB)": 13.7, "step": 88585, "train_speed(iter/s)": 1.531307 }, { "acc": 0.98270836, "epoch": 41.52331849074291, "grad_norm": 0.23615339398384094, "learning_rate": 7.646387235154137e-07, "loss": 0.05296409, "memory(GiB)": 13.7, "step": 88590, "train_speed(iter/s)": 1.531312 }, { "acc": 0.9854167, "epoch": 41.52566205765174, "grad_norm": 4.728418350219727, "learning_rate": 7.642270482561576e-07, "loss": 0.04684267, "memory(GiB)": 13.7, "step": 88595, "train_speed(iter/s)": 1.531313 }, { "acc": 0.97527771, "epoch": 41.52800562456058, "grad_norm": 3.3407812118530273, "learning_rate": 7.638154748229856e-07, "loss": 0.04839862, "memory(GiB)": 13.7, "step": 88600, "train_speed(iter/s)": 1.531315 }, { "acc": 0.9958334, "epoch": 41.53034919146942, "grad_norm": 0.0008638558210805058, "learning_rate": 7.634040032257904e-07, "loss": 0.01055822, "memory(GiB)": 13.7, "step": 88605, "train_speed(iter/s)": 1.531321 }, { "acc": 0.9895834, "epoch": 41.53269275837825, "grad_norm": 2.1929728984832764, "learning_rate": 7.629926334744628e-07, "loss": 0.03053824, "memory(GiB)": 13.7, "step": 88610, "train_speed(iter/s)": 1.531328 }, { "acc": 0.99333334, "epoch": 41.53503632528709, "grad_norm": 4.948556423187256, "learning_rate": 7.62581365578892e-07, "loss": 0.0293826, "memory(GiB)": 13.7, "step": 88615, "train_speed(iter/s)": 1.531334 }, { "acc": 0.98562498, "epoch": 41.53737989219592, "grad_norm": 3.1259148120880127, "learning_rate": 7.621701995489614e-07, "loss": 0.03375182, "memory(GiB)": 13.7, "step": 88620, "train_speed(iter/s)": 1.531333 }, { "acc": 0.98175592, "epoch": 41.539723459104756, "grad_norm": 2.6124768257141113, "learning_rate": 7.617591353945554e-07, "loss": 0.03480408, "memory(GiB)": 13.7, "step": 88625, "train_speed(iter/s)": 1.531327 }, { "acc": 0.97833328, "epoch": 41.54206702601359, "grad_norm": 2.1803669929504395, "learning_rate": 7.613481731255551e-07, "loss": 0.05872333, "memory(GiB)": 13.7, "step": 88630, "train_speed(iter/s)": 1.531326 }, { "acc": 0.98916664, "epoch": 41.544410592922425, "grad_norm": 3.589759588241577, "learning_rate": 7.609373127518373e-07, "loss": 0.01911791, "memory(GiB)": 13.7, "step": 88635, "train_speed(iter/s)": 1.531331 }, { "acc": 0.97736111, "epoch": 41.546754159831266, "grad_norm": 2.8947885036468506, "learning_rate": 7.605265542832796e-07, "loss": 0.07915151, "memory(GiB)": 13.7, "step": 88640, "train_speed(iter/s)": 1.531334 }, { "acc": 0.98602676, "epoch": 41.5490977267401, "grad_norm": 1.6434003114700317, "learning_rate": 7.601158977297532e-07, "loss": 0.03015999, "memory(GiB)": 13.7, "step": 88645, "train_speed(iter/s)": 1.531331 }, { "acc": 0.99020834, "epoch": 41.551441293648935, "grad_norm": 1.0021231174468994, "learning_rate": 7.597053431011292e-07, "loss": 0.02496665, "memory(GiB)": 13.7, "step": 88650, "train_speed(iter/s)": 1.531329 }, { "acc": 0.9885416, "epoch": 41.55378486055777, "grad_norm": 3.9398741722106934, "learning_rate": 7.592948904072782e-07, "loss": 0.03408532, "memory(GiB)": 13.7, "step": 88655, "train_speed(iter/s)": 1.531334 }, { "acc": 0.9796875, "epoch": 41.556128427466604, "grad_norm": 2.4682509899139404, "learning_rate": 7.588845396580635e-07, "loss": 0.03716055, "memory(GiB)": 13.7, "step": 88660, "train_speed(iter/s)": 1.531333 }, { "acc": 0.9885417, "epoch": 41.55847199437544, "grad_norm": 4.962937831878662, "learning_rate": 7.584742908633495e-07, "loss": 0.05396656, "memory(GiB)": 13.7, "step": 88665, "train_speed(iter/s)": 1.531331 }, { "acc": 0.99416666, "epoch": 41.56081556128427, "grad_norm": 1.1974900960922241, "learning_rate": 7.580641440329993e-07, "loss": 0.02429629, "memory(GiB)": 13.7, "step": 88670, "train_speed(iter/s)": 1.531334 }, { "acc": 0.97833328, "epoch": 41.56315912819311, "grad_norm": 8.022977828979492, "learning_rate": 7.576540991768685e-07, "loss": 0.04464907, "memory(GiB)": 13.7, "step": 88675, "train_speed(iter/s)": 1.531339 }, { "acc": 0.97761364, "epoch": 41.56550269510195, "grad_norm": 4.621831893920898, "learning_rate": 7.572441563048137e-07, "loss": 0.08694149, "memory(GiB)": 13.7, "step": 88680, "train_speed(iter/s)": 1.531345 }, { "acc": 0.97967949, "epoch": 41.56784626201078, "grad_norm": 3.2522478103637695, "learning_rate": 7.568343154266891e-07, "loss": 0.06399384, "memory(GiB)": 13.7, "step": 88685, "train_speed(iter/s)": 1.531346 }, { "acc": 0.9921875, "epoch": 41.57018982891962, "grad_norm": 2.002397060394287, "learning_rate": 7.564245765523477e-07, "loss": 0.02726466, "memory(GiB)": 13.7, "step": 88690, "train_speed(iter/s)": 1.531348 }, { "acc": 0.99020824, "epoch": 41.57253339582845, "grad_norm": 3.0676798820495605, "learning_rate": 7.560149396916348e-07, "loss": 0.01647692, "memory(GiB)": 13.7, "step": 88695, "train_speed(iter/s)": 1.531353 }, { "acc": 0.98312492, "epoch": 41.574876962737285, "grad_norm": 2.9397265911102295, "learning_rate": 7.556054048543991e-07, "loss": 0.02578886, "memory(GiB)": 13.7, "step": 88700, "train_speed(iter/s)": 1.531356 }, { "acc": 0.98273811, "epoch": 41.57722052964612, "grad_norm": 0.9448094964027405, "learning_rate": 7.551959720504831e-07, "loss": 0.04668421, "memory(GiB)": 13.7, "step": 88705, "train_speed(iter/s)": 1.531359 }, { "acc": 0.98883934, "epoch": 41.579564096554954, "grad_norm": 3.899930238723755, "learning_rate": 7.547866412897309e-07, "loss": 0.03207895, "memory(GiB)": 13.7, "step": 88710, "train_speed(iter/s)": 1.531363 }, { "acc": 0.98170643, "epoch": 41.581907663463795, "grad_norm": 3.8169844150543213, "learning_rate": 7.543774125819781e-07, "loss": 0.04945797, "memory(GiB)": 13.7, "step": 88715, "train_speed(iter/s)": 1.531362 }, { "acc": 0.9916667, "epoch": 41.58425123037263, "grad_norm": 4.710178375244141, "learning_rate": 7.539682859370641e-07, "loss": 0.03322462, "memory(GiB)": 13.7, "step": 88720, "train_speed(iter/s)": 1.531362 }, { "acc": 0.99145832, "epoch": 41.586594797281464, "grad_norm": 3.3048830032348633, "learning_rate": 7.535592613648201e-07, "loss": 0.04564133, "memory(GiB)": 13.7, "step": 88725, "train_speed(iter/s)": 1.531367 }, { "acc": 0.9958333, "epoch": 41.5889383641903, "grad_norm": 1.5156344175338745, "learning_rate": 7.531503388750803e-07, "loss": 0.01632716, "memory(GiB)": 13.7, "step": 88730, "train_speed(iter/s)": 1.531376 }, { "acc": 0.9854167, "epoch": 41.59128193109913, "grad_norm": 4.445953845977783, "learning_rate": 7.527415184776719e-07, "loss": 0.03479112, "memory(GiB)": 13.7, "step": 88735, "train_speed(iter/s)": 1.531378 }, { "acc": 0.99301472, "epoch": 41.59362549800797, "grad_norm": 4.24528694152832, "learning_rate": 7.523328001824225e-07, "loss": 0.02313641, "memory(GiB)": 13.7, "step": 88740, "train_speed(iter/s)": 1.531384 }, { "acc": 0.9927084, "epoch": 41.5959690649168, "grad_norm": 2.0331156253814697, "learning_rate": 7.519241839991571e-07, "loss": 0.03673681, "memory(GiB)": 13.7, "step": 88745, "train_speed(iter/s)": 1.531388 }, { "acc": 0.96895838, "epoch": 41.598312631825635, "grad_norm": 0.03278697282075882, "learning_rate": 7.515156699376958e-07, "loss": 0.04788961, "memory(GiB)": 13.7, "step": 88750, "train_speed(iter/s)": 1.531392 }, { "acc": 0.99125004, "epoch": 41.60065619873448, "grad_norm": 0.9319965243339539, "learning_rate": 7.511072580078586e-07, "loss": 0.02051663, "memory(GiB)": 13.7, "step": 88755, "train_speed(iter/s)": 1.531396 }, { "acc": 0.996875, "epoch": 41.60299976564331, "grad_norm": 0.018850304186344147, "learning_rate": 7.506989482194633e-07, "loss": 0.01162122, "memory(GiB)": 13.7, "step": 88760, "train_speed(iter/s)": 1.531398 }, { "acc": 0.99125004, "epoch": 41.605343332552145, "grad_norm": 1.140569806098938, "learning_rate": 7.502907405823247e-07, "loss": 0.02968852, "memory(GiB)": 13.7, "step": 88765, "train_speed(iter/s)": 1.531401 }, { "acc": 0.9802083, "epoch": 41.60768689946098, "grad_norm": 1.4436986446380615, "learning_rate": 7.498826351062518e-07, "loss": 0.06187242, "memory(GiB)": 13.7, "step": 88770, "train_speed(iter/s)": 1.531405 }, { "acc": 0.9927084, "epoch": 41.610030466369814, "grad_norm": 0.004890323616564274, "learning_rate": 7.494746318010556e-07, "loss": 0.02454172, "memory(GiB)": 13.7, "step": 88775, "train_speed(iter/s)": 1.531411 }, { "acc": 0.97479172, "epoch": 41.61237403327865, "grad_norm": 3.7586841583251953, "learning_rate": 7.490667306765456e-07, "loss": 0.05233037, "memory(GiB)": 13.7, "step": 88780, "train_speed(iter/s)": 1.531411 }, { "acc": 0.97104168, "epoch": 41.61471760018748, "grad_norm": 5.765083312988281, "learning_rate": 7.486589317425228e-07, "loss": 0.08250256, "memory(GiB)": 13.7, "step": 88785, "train_speed(iter/s)": 1.531412 }, { "acc": 0.97365074, "epoch": 41.617061167096324, "grad_norm": 3.345477819442749, "learning_rate": 7.482512350087917e-07, "loss": 0.06729835, "memory(GiB)": 13.7, "step": 88790, "train_speed(iter/s)": 1.531418 }, { "acc": 0.996875, "epoch": 41.61940473400516, "grad_norm": 0.012499242089688778, "learning_rate": 7.478436404851491e-07, "loss": 0.0106779, "memory(GiB)": 13.7, "step": 88795, "train_speed(iter/s)": 1.53142 }, { "acc": 0.9833334, "epoch": 41.62174830091399, "grad_norm": 5.008560657501221, "learning_rate": 7.474361481813968e-07, "loss": 0.05864333, "memory(GiB)": 13.7, "step": 88800, "train_speed(iter/s)": 1.531422 }, { "acc": 0.99001989, "epoch": 41.62409186782283, "grad_norm": 1.6308274269104004, "learning_rate": 7.47028758107326e-07, "loss": 0.0302142, "memory(GiB)": 13.7, "step": 88805, "train_speed(iter/s)": 1.531428 }, { "acc": 0.97195644, "epoch": 41.62643543473166, "grad_norm": 1.0384116172790527, "learning_rate": 7.466214702727312e-07, "loss": 0.09614434, "memory(GiB)": 13.7, "step": 88810, "train_speed(iter/s)": 1.531429 }, { "acc": 0.97895832, "epoch": 41.628779001640495, "grad_norm": 3.6450510025024414, "learning_rate": 7.462142846874002e-07, "loss": 0.03793755, "memory(GiB)": 13.7, "step": 88815, "train_speed(iter/s)": 1.531436 }, { "acc": 0.97833328, "epoch": 41.63112256854933, "grad_norm": 4.244616985321045, "learning_rate": 7.458072013611224e-07, "loss": 0.0618873, "memory(GiB)": 13.7, "step": 88820, "train_speed(iter/s)": 1.531442 }, { "acc": 0.99508934, "epoch": 41.633466135458164, "grad_norm": 0.0007141928654164076, "learning_rate": 7.454002203036814e-07, "loss": 0.0218022, "memory(GiB)": 13.7, "step": 88825, "train_speed(iter/s)": 1.531443 }, { "acc": 0.98083334, "epoch": 41.635809702367006, "grad_norm": 4.0204644203186035, "learning_rate": 7.449933415248578e-07, "loss": 0.03781625, "memory(GiB)": 13.7, "step": 88830, "train_speed(iter/s)": 1.531445 }, { "acc": 0.98592262, "epoch": 41.63815326927584, "grad_norm": 4.028261661529541, "learning_rate": 7.445865650344367e-07, "loss": 0.02868079, "memory(GiB)": 13.7, "step": 88835, "train_speed(iter/s)": 1.531444 }, { "acc": 0.99083328, "epoch": 41.640496836184674, "grad_norm": 5.199516773223877, "learning_rate": 7.44179890842191e-07, "loss": 0.0634447, "memory(GiB)": 13.7, "step": 88840, "train_speed(iter/s)": 1.531443 }, { "acc": 0.98282204, "epoch": 41.64284040309351, "grad_norm": 1.3368395566940308, "learning_rate": 7.437733189578989e-07, "loss": 0.03847984, "memory(GiB)": 13.7, "step": 88845, "train_speed(iter/s)": 1.531442 }, { "acc": 0.9880209, "epoch": 41.64518397000234, "grad_norm": 4.9234161376953125, "learning_rate": 7.433668493913298e-07, "loss": 0.02207802, "memory(GiB)": 13.7, "step": 88850, "train_speed(iter/s)": 1.531446 }, { "acc": 0.97488098, "epoch": 41.64752753691118, "grad_norm": 5.158865928649902, "learning_rate": 7.429604821522579e-07, "loss": 0.04816028, "memory(GiB)": 13.7, "step": 88855, "train_speed(iter/s)": 1.531448 }, { "acc": 0.98758011, "epoch": 41.64987110382001, "grad_norm": 2.542296886444092, "learning_rate": 7.425542172504484e-07, "loss": 0.03129701, "memory(GiB)": 13.7, "step": 88860, "train_speed(iter/s)": 1.531454 }, { "acc": 0.97946434, "epoch": 41.65221467072885, "grad_norm": 2.910588026046753, "learning_rate": 7.421480546956675e-07, "loss": 0.05067187, "memory(GiB)": 13.7, "step": 88865, "train_speed(iter/s)": 1.531453 }, { "acc": 0.98946428, "epoch": 41.65455823763769, "grad_norm": 0.7525705695152283, "learning_rate": 7.417419944976774e-07, "loss": 0.03117481, "memory(GiB)": 13.7, "step": 88870, "train_speed(iter/s)": 1.531455 }, { "acc": 0.984375, "epoch": 41.65690180454652, "grad_norm": 6.470642566680908, "learning_rate": 7.41336036666238e-07, "loss": 0.03323879, "memory(GiB)": 13.7, "step": 88875, "train_speed(iter/s)": 1.531459 }, { "acc": 0.99363098, "epoch": 41.659245371455356, "grad_norm": 4.7383575439453125, "learning_rate": 7.409301812111091e-07, "loss": 0.01253231, "memory(GiB)": 13.7, "step": 88880, "train_speed(iter/s)": 1.531456 }, { "acc": 0.984375, "epoch": 41.66158893836419, "grad_norm": 0.010683063417673111, "learning_rate": 7.405244281420425e-07, "loss": 0.03135926, "memory(GiB)": 13.7, "step": 88885, "train_speed(iter/s)": 1.531457 }, { "acc": 0.9958333, "epoch": 41.663932505273024, "grad_norm": 0.5203604102134705, "learning_rate": 7.401187774687955e-07, "loss": 0.02596527, "memory(GiB)": 13.7, "step": 88890, "train_speed(iter/s)": 1.531461 }, { "acc": 0.98592262, "epoch": 41.66627607218186, "grad_norm": 1.8282808065414429, "learning_rate": 7.397132292011157e-07, "loss": 0.04925879, "memory(GiB)": 13.7, "step": 88895, "train_speed(iter/s)": 1.531465 }, { "acc": 0.96340275, "epoch": 41.66861963909069, "grad_norm": 6.535445690155029, "learning_rate": 7.39307783348753e-07, "loss": 0.06120821, "memory(GiB)": 13.7, "step": 88900, "train_speed(iter/s)": 1.531469 }, { "acc": 0.98582792, "epoch": 41.670963205999534, "grad_norm": 2.572345733642578, "learning_rate": 7.389024399214525e-07, "loss": 0.03477171, "memory(GiB)": 13.7, "step": 88905, "train_speed(iter/s)": 1.531477 }, { "acc": 0.9957386, "epoch": 41.67330677290837, "grad_norm": 0.14761583507061005, "learning_rate": 7.384971989289556e-07, "loss": 0.02231648, "memory(GiB)": 13.7, "step": 88910, "train_speed(iter/s)": 1.531477 }, { "acc": 0.98135414, "epoch": 41.6756503398172, "grad_norm": 4.415308475494385, "learning_rate": 7.380920603810046e-07, "loss": 0.04999487, "memory(GiB)": 13.7, "step": 88915, "train_speed(iter/s)": 1.531483 }, { "acc": 0.97970829, "epoch": 41.67799390672604, "grad_norm": 6.167702674865723, "learning_rate": 7.376870242873368e-07, "loss": 0.04429518, "memory(GiB)": 13.7, "step": 88920, "train_speed(iter/s)": 1.531484 }, { "acc": 0.98374996, "epoch": 41.68033747363487, "grad_norm": 3.411641836166382, "learning_rate": 7.372820906576898e-07, "loss": 0.02738615, "memory(GiB)": 13.7, "step": 88925, "train_speed(iter/s)": 1.531487 }, { "acc": 0.98395834, "epoch": 41.682681040543706, "grad_norm": 5.655674457550049, "learning_rate": 7.368772595017948e-07, "loss": 0.03458077, "memory(GiB)": 13.7, "step": 88930, "train_speed(iter/s)": 1.531486 }, { "acc": 0.9822916, "epoch": 41.68502460745254, "grad_norm": 4.528051853179932, "learning_rate": 7.364725308293838e-07, "loss": 0.03597691, "memory(GiB)": 13.7, "step": 88935, "train_speed(iter/s)": 1.53149 }, { "acc": 0.98562498, "epoch": 41.68736817436138, "grad_norm": 1.6047090291976929, "learning_rate": 7.360679046501833e-07, "loss": 0.03016151, "memory(GiB)": 13.7, "step": 88940, "train_speed(iter/s)": 1.531492 }, { "acc": 0.99083328, "epoch": 41.689711741270216, "grad_norm": 4.839596748352051, "learning_rate": 7.356633809739228e-07, "loss": 0.04276725, "memory(GiB)": 13.7, "step": 88945, "train_speed(iter/s)": 1.531492 }, { "acc": 0.98291664, "epoch": 41.69205530817905, "grad_norm": 1.6045128107070923, "learning_rate": 7.352589598103227e-07, "loss": 0.04419983, "memory(GiB)": 13.7, "step": 88950, "train_speed(iter/s)": 1.531496 }, { "acc": 0.98031254, "epoch": 41.694398875087884, "grad_norm": 0.004395897500216961, "learning_rate": 7.348546411691048e-07, "loss": 0.03300772, "memory(GiB)": 13.7, "step": 88955, "train_speed(iter/s)": 1.531498 }, { "acc": 0.97583332, "epoch": 41.69674244199672, "grad_norm": 1.9435518980026245, "learning_rate": 7.344504250599881e-07, "loss": 0.04825538, "memory(GiB)": 13.7, "step": 88960, "train_speed(iter/s)": 1.5315 }, { "acc": 0.97998514, "epoch": 41.69908600890555, "grad_norm": 2.880110740661621, "learning_rate": 7.34046311492687e-07, "loss": 0.05210434, "memory(GiB)": 13.7, "step": 88965, "train_speed(iter/s)": 1.531504 }, { "acc": 0.99070511, "epoch": 41.70142957581439, "grad_norm": 0.001871066284365952, "learning_rate": 7.336423004769172e-07, "loss": 0.02914272, "memory(GiB)": 13.7, "step": 88970, "train_speed(iter/s)": 1.531504 }, { "acc": 0.99340277, "epoch": 41.70377314272322, "grad_norm": 2.499939203262329, "learning_rate": 7.332383920223865e-07, "loss": 0.03602875, "memory(GiB)": 13.7, "step": 88975, "train_speed(iter/s)": 1.531503 }, { "acc": 0.9762248, "epoch": 41.70611670963206, "grad_norm": 4.7823591232299805, "learning_rate": 7.328345861388085e-07, "loss": 0.0414039, "memory(GiB)": 13.7, "step": 88980, "train_speed(iter/s)": 1.531507 }, { "acc": 0.9916666, "epoch": 41.7084602765409, "grad_norm": 0.00013239894178695977, "learning_rate": 7.324308828358853e-07, "loss": 0.0107439, "memory(GiB)": 13.7, "step": 88985, "train_speed(iter/s)": 1.531509 }, { "acc": 0.99131947, "epoch": 41.71080384344973, "grad_norm": 0.07769221067428589, "learning_rate": 7.32027282123323e-07, "loss": 0.02749849, "memory(GiB)": 13.7, "step": 88990, "train_speed(iter/s)": 1.531509 }, { "acc": 0.99035721, "epoch": 41.713147410358566, "grad_norm": 0.0023161841090768576, "learning_rate": 7.316237840108221e-07, "loss": 0.01686021, "memory(GiB)": 13.7, "step": 88995, "train_speed(iter/s)": 1.53151 }, { "acc": 0.9843523, "epoch": 41.7154909772674, "grad_norm": 3.4413068294525146, "learning_rate": 7.312203885080789e-07, "loss": 0.03733137, "memory(GiB)": 13.7, "step": 89000, "train_speed(iter/s)": 1.531513 }, { "acc": 0.98279762, "epoch": 41.717834544176235, "grad_norm": 2.3244686126708984, "learning_rate": 7.308170956247936e-07, "loss": 0.03355365, "memory(GiB)": 13.7, "step": 89005, "train_speed(iter/s)": 1.531517 }, { "acc": 0.99131947, "epoch": 41.72017811108507, "grad_norm": 0.0009934079134836793, "learning_rate": 7.304139053706572e-07, "loss": 0.02278841, "memory(GiB)": 13.7, "step": 89010, "train_speed(iter/s)": 1.531522 }, { "acc": 0.98770828, "epoch": 41.72252167799391, "grad_norm": 3.0585408210754395, "learning_rate": 7.300108177553642e-07, "loss": 0.03370346, "memory(GiB)": 13.7, "step": 89015, "train_speed(iter/s)": 1.531523 }, { "acc": 0.983848, "epoch": 41.724865244902745, "grad_norm": 1.7738207578659058, "learning_rate": 7.296078327885997e-07, "loss": 0.03381503, "memory(GiB)": 13.7, "step": 89020, "train_speed(iter/s)": 1.531524 }, { "acc": 0.98686008, "epoch": 41.72720881181158, "grad_norm": 3.6176681518554688, "learning_rate": 7.292049504800531e-07, "loss": 0.03564031, "memory(GiB)": 13.7, "step": 89025, "train_speed(iter/s)": 1.53153 }, { "acc": 0.98249998, "epoch": 41.72955237872041, "grad_norm": 4.268128871917725, "learning_rate": 7.288021708394049e-07, "loss": 0.04455211, "memory(GiB)": 13.7, "step": 89030, "train_speed(iter/s)": 1.531534 }, { "acc": 0.9854167, "epoch": 41.73189594562925, "grad_norm": 3.650205373764038, "learning_rate": 7.283994938763414e-07, "loss": 0.03044689, "memory(GiB)": 13.7, "step": 89035, "train_speed(iter/s)": 1.531538 }, { "acc": 0.96967258, "epoch": 41.73423951253808, "grad_norm": 1.2198318243026733, "learning_rate": 7.279969196005392e-07, "loss": 0.05930613, "memory(GiB)": 13.7, "step": 89040, "train_speed(iter/s)": 1.531539 }, { "acc": 0.97937508, "epoch": 41.736583079446916, "grad_norm": 4.224069595336914, "learning_rate": 7.275944480216735e-07, "loss": 0.06873121, "memory(GiB)": 13.7, "step": 89045, "train_speed(iter/s)": 1.53154 }, { "acc": 0.98458328, "epoch": 41.73892664635575, "grad_norm": 2.0370771884918213, "learning_rate": 7.271920791494204e-07, "loss": 0.03713827, "memory(GiB)": 13.7, "step": 89050, "train_speed(iter/s)": 1.531539 }, { "acc": 0.9876873, "epoch": 41.74127021326459, "grad_norm": 4.18330192565918, "learning_rate": 7.267898129934503e-07, "loss": 0.02986578, "memory(GiB)": 13.7, "step": 89055, "train_speed(iter/s)": 1.531543 }, { "acc": 0.98847227, "epoch": 41.743613780173426, "grad_norm": 3.8819754123687744, "learning_rate": 7.263876495634327e-07, "loss": 0.04980816, "memory(GiB)": 13.7, "step": 89060, "train_speed(iter/s)": 1.531545 }, { "acc": 0.98520832, "epoch": 41.74595734708226, "grad_norm": 2.1491448879241943, "learning_rate": 7.259855888690339e-07, "loss": 0.04771628, "memory(GiB)": 13.7, "step": 89065, "train_speed(iter/s)": 1.531546 }, { "acc": 0.98000002, "epoch": 41.748300913991095, "grad_norm": 3.512099504470825, "learning_rate": 7.255836309199206e-07, "loss": 0.0441466, "memory(GiB)": 13.7, "step": 89070, "train_speed(iter/s)": 1.531551 }, { "acc": 0.97625008, "epoch": 41.75064448089993, "grad_norm": 6.641841411590576, "learning_rate": 7.251817757257525e-07, "loss": 0.06385828, "memory(GiB)": 13.7, "step": 89075, "train_speed(iter/s)": 1.531552 }, { "acc": 0.99375, "epoch": 41.75298804780876, "grad_norm": 0.001093065831810236, "learning_rate": 7.247800232961877e-07, "loss": 0.03398331, "memory(GiB)": 13.7, "step": 89080, "train_speed(iter/s)": 1.531552 }, { "acc": 0.990625, "epoch": 41.7553316147176, "grad_norm": 2.1497232913970947, "learning_rate": 7.243783736408854e-07, "loss": 0.02697549, "memory(GiB)": 13.7, "step": 89085, "train_speed(iter/s)": 1.531554 }, { "acc": 0.98529758, "epoch": 41.75767518162643, "grad_norm": 3.6619873046875, "learning_rate": 7.239768267694966e-07, "loss": 0.02779104, "memory(GiB)": 13.7, "step": 89090, "train_speed(iter/s)": 1.531557 }, { "acc": 0.98249998, "epoch": 41.76001874853527, "grad_norm": 2.8007731437683105, "learning_rate": 7.235753826916775e-07, "loss": 0.02007757, "memory(GiB)": 13.7, "step": 89095, "train_speed(iter/s)": 1.531562 }, { "acc": 0.99508934, "epoch": 41.76236231544411, "grad_norm": 2.8798043727874756, "learning_rate": 7.231740414170748e-07, "loss": 0.02565918, "memory(GiB)": 13.7, "step": 89100, "train_speed(iter/s)": 1.531561 }, { "acc": 0.98812504, "epoch": 41.76470588235294, "grad_norm": 3.2488152980804443, "learning_rate": 7.227728029553363e-07, "loss": 0.0203901, "memory(GiB)": 13.7, "step": 89105, "train_speed(iter/s)": 1.531563 }, { "acc": 0.98996716, "epoch": 41.767049449261776, "grad_norm": 3.7182228565216064, "learning_rate": 7.223716673161058e-07, "loss": 0.02239304, "memory(GiB)": 13.7, "step": 89110, "train_speed(iter/s)": 1.531567 }, { "acc": 0.98685398, "epoch": 41.76939301617061, "grad_norm": 3.5551564693450928, "learning_rate": 7.219706345090267e-07, "loss": 0.02302619, "memory(GiB)": 13.7, "step": 89115, "train_speed(iter/s)": 1.531569 }, { "acc": 0.97256947, "epoch": 41.771736583079445, "grad_norm": 5.334146499633789, "learning_rate": 7.215697045437377e-07, "loss": 0.05515172, "memory(GiB)": 13.7, "step": 89120, "train_speed(iter/s)": 1.531568 }, { "acc": 0.98842258, "epoch": 41.77408014998828, "grad_norm": 3.4212145805358887, "learning_rate": 7.211688774298727e-07, "loss": 0.02941936, "memory(GiB)": 13.7, "step": 89125, "train_speed(iter/s)": 1.531575 }, { "acc": 0.9927084, "epoch": 41.77642371689712, "grad_norm": 2.410071611404419, "learning_rate": 7.207681531770719e-07, "loss": 0.03306548, "memory(GiB)": 13.7, "step": 89130, "train_speed(iter/s)": 1.531576 }, { "acc": 0.99174681, "epoch": 41.778767283805955, "grad_norm": 3.2192466259002686, "learning_rate": 7.203675317949635e-07, "loss": 0.04142575, "memory(GiB)": 13.7, "step": 89135, "train_speed(iter/s)": 1.531579 }, { "acc": 0.98520832, "epoch": 41.78111085071479, "grad_norm": 0.0008649398805573583, "learning_rate": 7.199670132931793e-07, "loss": 0.0569777, "memory(GiB)": 13.7, "step": 89140, "train_speed(iter/s)": 1.531583 }, { "acc": 0.98696423, "epoch": 41.783454417623624, "grad_norm": 3.3864872455596924, "learning_rate": 7.195665976813434e-07, "loss": 0.02881733, "memory(GiB)": 13.7, "step": 89145, "train_speed(iter/s)": 1.531585 }, { "acc": 0.98458328, "epoch": 41.78579798453246, "grad_norm": 3.668384552001953, "learning_rate": 7.191662849690849e-07, "loss": 0.04192862, "memory(GiB)": 13.7, "step": 89150, "train_speed(iter/s)": 1.531588 }, { "acc": 0.9700695, "epoch": 41.78814155144129, "grad_norm": 5.025623798370361, "learning_rate": 7.187660751660218e-07, "loss": 0.05816491, "memory(GiB)": 13.7, "step": 89155, "train_speed(iter/s)": 1.531586 }, { "acc": 0.99182539, "epoch": 41.79048511835013, "grad_norm": 2.252763032913208, "learning_rate": 7.183659682817777e-07, "loss": 0.0445904, "memory(GiB)": 13.7, "step": 89160, "train_speed(iter/s)": 1.531589 }, { "acc": 0.97467442, "epoch": 41.79282868525896, "grad_norm": 1.0909111499786377, "learning_rate": 7.179659643259673e-07, "loss": 0.03666337, "memory(GiB)": 13.7, "step": 89165, "train_speed(iter/s)": 1.531594 }, { "acc": 0.9822917, "epoch": 41.7951722521678, "grad_norm": 7.54702091217041, "learning_rate": 7.175660633082046e-07, "loss": 0.03443835, "memory(GiB)": 13.7, "step": 89170, "train_speed(iter/s)": 1.531593 }, { "acc": 0.98125, "epoch": 41.79751581907664, "grad_norm": 7.142403602600098, "learning_rate": 7.171662652381051e-07, "loss": 0.03102328, "memory(GiB)": 13.7, "step": 89175, "train_speed(iter/s)": 1.5316 }, { "acc": 0.98482571, "epoch": 41.79985938598547, "grad_norm": 4.552088260650635, "learning_rate": 7.167665701252738e-07, "loss": 0.04069876, "memory(GiB)": 13.7, "step": 89180, "train_speed(iter/s)": 1.531603 }, { "acc": 0.98986111, "epoch": 41.802202952894305, "grad_norm": 0.000578639970626682, "learning_rate": 7.163669779793238e-07, "loss": 0.01786522, "memory(GiB)": 13.7, "step": 89185, "train_speed(iter/s)": 1.531604 }, { "acc": 0.9875, "epoch": 41.80454651980314, "grad_norm": 2.9702558517456055, "learning_rate": 7.159674888098553e-07, "loss": 0.03435314, "memory(GiB)": 13.7, "step": 89190, "train_speed(iter/s)": 1.531607 }, { "acc": 0.98125, "epoch": 41.806890086711974, "grad_norm": 4.94070291519165, "learning_rate": 7.15568102626474e-07, "loss": 0.03667519, "memory(GiB)": 13.7, "step": 89195, "train_speed(iter/s)": 1.531608 }, { "acc": 0.99298611, "epoch": 41.80923365362081, "grad_norm": 0.006949952337890863, "learning_rate": 7.151688194387789e-07, "loss": 0.04250154, "memory(GiB)": 13.7, "step": 89200, "train_speed(iter/s)": 1.531608 }, { "acc": 0.98666668, "epoch": 41.81157722052965, "grad_norm": 0.0025963399093598127, "learning_rate": 7.147696392563643e-07, "loss": 0.02402664, "memory(GiB)": 13.7, "step": 89205, "train_speed(iter/s)": 1.531611 }, { "acc": 0.97875004, "epoch": 41.813920787438484, "grad_norm": 3.0368363857269287, "learning_rate": 7.1437056208883e-07, "loss": 0.03296821, "memory(GiB)": 13.7, "step": 89210, "train_speed(iter/s)": 1.53161 }, { "acc": 0.99375, "epoch": 41.81626435434732, "grad_norm": 1.4532133340835571, "learning_rate": 7.139715879457657e-07, "loss": 0.01460489, "memory(GiB)": 13.7, "step": 89215, "train_speed(iter/s)": 1.531613 }, { "acc": 0.99385414, "epoch": 41.81860792125615, "grad_norm": 1.3950380086898804, "learning_rate": 7.135727168367627e-07, "loss": 0.04543266, "memory(GiB)": 13.7, "step": 89220, "train_speed(iter/s)": 1.531618 }, { "acc": 0.98069439, "epoch": 41.82095148816499, "grad_norm": 2.7123570442199707, "learning_rate": 7.13173948771407e-07, "loss": 0.07469705, "memory(GiB)": 13.7, "step": 89225, "train_speed(iter/s)": 1.531621 }, { "acc": 0.98823862, "epoch": 41.82329505507382, "grad_norm": 6.522487640380859, "learning_rate": 7.12775283759286e-07, "loss": 0.0290871, "memory(GiB)": 13.7, "step": 89230, "train_speed(iter/s)": 1.531625 }, { "acc": 0.996875, "epoch": 41.825638621982655, "grad_norm": 1.48436439037323, "learning_rate": 7.123767218099782e-07, "loss": 0.01493612, "memory(GiB)": 13.7, "step": 89235, "train_speed(iter/s)": 1.531626 }, { "acc": 0.9854167, "epoch": 41.82798218889149, "grad_norm": 0.28743618726730347, "learning_rate": 7.11978262933069e-07, "loss": 0.03896397, "memory(GiB)": 13.7, "step": 89240, "train_speed(iter/s)": 1.531625 }, { "acc": 0.98748512, "epoch": 41.83032575580033, "grad_norm": 2.6015119552612305, "learning_rate": 7.115799071381337e-07, "loss": 0.02887394, "memory(GiB)": 13.7, "step": 89245, "train_speed(iter/s)": 1.53163 }, { "acc": 0.98979168, "epoch": 41.832669322709165, "grad_norm": 0.8298392295837402, "learning_rate": 7.111816544347457e-07, "loss": 0.038507, "memory(GiB)": 13.7, "step": 89250, "train_speed(iter/s)": 1.531634 }, { "acc": 0.99301472, "epoch": 41.835012889618, "grad_norm": 0.03158018738031387, "learning_rate": 7.107835048324804e-07, "loss": 0.02408399, "memory(GiB)": 13.7, "step": 89255, "train_speed(iter/s)": 1.531634 }, { "acc": 0.9833333, "epoch": 41.837356456526834, "grad_norm": 2.738525390625, "learning_rate": 7.103854583409059e-07, "loss": 0.02041004, "memory(GiB)": 13.7, "step": 89260, "train_speed(iter/s)": 1.531634 }, { "acc": 0.9916667, "epoch": 41.83970002343567, "grad_norm": 0.010702861472964287, "learning_rate": 7.099875149695918e-07, "loss": 0.03387771, "memory(GiB)": 13.7, "step": 89265, "train_speed(iter/s)": 1.531633 }, { "acc": 0.99499998, "epoch": 41.8420435903445, "grad_norm": 4.2520527839660645, "learning_rate": 7.095896747281002e-07, "loss": 0.02817466, "memory(GiB)": 13.7, "step": 89270, "train_speed(iter/s)": 1.531638 }, { "acc": 0.97583332, "epoch": 41.84438715725334, "grad_norm": 3.424704074859619, "learning_rate": 7.091919376259984e-07, "loss": 0.04782797, "memory(GiB)": 13.7, "step": 89275, "train_speed(iter/s)": 1.531646 }, { "acc": 0.98363094, "epoch": 41.84673072416218, "grad_norm": 6.123881816864014, "learning_rate": 7.087943036728431e-07, "loss": 0.04393797, "memory(GiB)": 13.7, "step": 89280, "train_speed(iter/s)": 1.53165 }, { "acc": 0.97854166, "epoch": 41.84907429107101, "grad_norm": 2.1276285648345947, "learning_rate": 7.083967728781949e-07, "loss": 0.04700626, "memory(GiB)": 13.7, "step": 89285, "train_speed(iter/s)": 1.531654 }, { "acc": 0.9942709, "epoch": 41.85141785797985, "grad_norm": 1.9989405870437622, "learning_rate": 7.079993452516073e-07, "loss": 0.03576009, "memory(GiB)": 13.7, "step": 89290, "train_speed(iter/s)": 1.531659 }, { "acc": 0.97867565, "epoch": 41.85376142488868, "grad_norm": 4.652810573577881, "learning_rate": 7.076020208026316e-07, "loss": 0.05329635, "memory(GiB)": 13.7, "step": 89295, "train_speed(iter/s)": 1.53166 }, { "acc": 0.98110123, "epoch": 41.856104991797515, "grad_norm": 4.585611820220947, "learning_rate": 7.072047995408216e-07, "loss": 0.06817314, "memory(GiB)": 13.7, "step": 89300, "train_speed(iter/s)": 1.53166 }, { "acc": 0.984375, "epoch": 41.85844855870635, "grad_norm": 3.2234675884246826, "learning_rate": 7.068076814757231e-07, "loss": 0.02845688, "memory(GiB)": 13.7, "step": 89305, "train_speed(iter/s)": 1.531659 }, { "acc": 0.98922272, "epoch": 41.860792125615184, "grad_norm": 0.7892942428588867, "learning_rate": 7.064106666168828e-07, "loss": 0.0419827, "memory(GiB)": 13.7, "step": 89310, "train_speed(iter/s)": 1.531665 }, { "acc": 0.9885417, "epoch": 41.86313569252402, "grad_norm": 5.5241594314575195, "learning_rate": 7.060137549738421e-07, "loss": 0.03300322, "memory(GiB)": 13.7, "step": 89315, "train_speed(iter/s)": 1.531667 }, { "acc": 0.98812504, "epoch": 41.86547925943286, "grad_norm": 4.162662506103516, "learning_rate": 7.056169465561431e-07, "loss": 0.03100931, "memory(GiB)": 13.7, "step": 89320, "train_speed(iter/s)": 1.53167 }, { "acc": 1.0, "epoch": 41.867822826341694, "grad_norm": 2.0451183319091797, "learning_rate": 7.052202413733233e-07, "loss": 0.0188184, "memory(GiB)": 13.7, "step": 89325, "train_speed(iter/s)": 1.531673 }, { "acc": 0.97588539, "epoch": 41.87016639325053, "grad_norm": 0.6596799492835999, "learning_rate": 7.048236394349151e-07, "loss": 0.04508312, "memory(GiB)": 13.7, "step": 89330, "train_speed(iter/s)": 1.531675 }, { "acc": 0.9894886, "epoch": 41.87250996015936, "grad_norm": 0.06298664957284927, "learning_rate": 7.044271407504569e-07, "loss": 0.0164888, "memory(GiB)": 13.7, "step": 89335, "train_speed(iter/s)": 1.531678 }, { "acc": 0.99187498, "epoch": 41.8748535270682, "grad_norm": 3.511585235595703, "learning_rate": 7.040307453294753e-07, "loss": 0.02261313, "memory(GiB)": 13.7, "step": 89340, "train_speed(iter/s)": 1.531683 }, { "acc": 0.984375, "epoch": 41.87719709397703, "grad_norm": 4.841306209564209, "learning_rate": 7.036344531815005e-07, "loss": 0.0315095, "memory(GiB)": 13.7, "step": 89345, "train_speed(iter/s)": 1.531686 }, { "acc": 0.96997471, "epoch": 41.879540660885866, "grad_norm": 8.118880271911621, "learning_rate": 7.032382643160553e-07, "loss": 0.07078373, "memory(GiB)": 13.7, "step": 89350, "train_speed(iter/s)": 1.531688 }, { "acc": 0.99439487, "epoch": 41.88188422779471, "grad_norm": 0.5844577550888062, "learning_rate": 7.028421787426663e-07, "loss": 0.03319767, "memory(GiB)": 13.7, "step": 89355, "train_speed(iter/s)": 1.531694 }, { "acc": 0.98723221, "epoch": 41.88422779470354, "grad_norm": 0.010131175629794598, "learning_rate": 7.024461964708513e-07, "loss": 0.04248854, "memory(GiB)": 13.7, "step": 89360, "train_speed(iter/s)": 1.531702 }, { "acc": 0.97592258, "epoch": 41.886571361612376, "grad_norm": 3.4966347217559814, "learning_rate": 7.020503175101313e-07, "loss": 0.04623744, "memory(GiB)": 13.7, "step": 89365, "train_speed(iter/s)": 1.5317 }, { "acc": 0.97696428, "epoch": 41.88891492852121, "grad_norm": 4.929872035980225, "learning_rate": 7.016545418700197e-07, "loss": 0.06633517, "memory(GiB)": 13.7, "step": 89370, "train_speed(iter/s)": 1.531705 }, { "acc": 0.97833328, "epoch": 41.891258495430044, "grad_norm": 4.576022624969482, "learning_rate": 7.01258869560029e-07, "loss": 0.07401198, "memory(GiB)": 13.7, "step": 89375, "train_speed(iter/s)": 1.531706 }, { "acc": 0.97854166, "epoch": 41.89360206233888, "grad_norm": 1.4911943674087524, "learning_rate": 7.008633005896727e-07, "loss": 0.04563703, "memory(GiB)": 13.7, "step": 89380, "train_speed(iter/s)": 1.531706 }, { "acc": 0.99613094, "epoch": 41.89594562924771, "grad_norm": 2.0957629680633545, "learning_rate": 7.004678349684546e-07, "loss": 0.01814526, "memory(GiB)": 13.7, "step": 89385, "train_speed(iter/s)": 1.531714 }, { "acc": 0.98187504, "epoch": 41.89828919615655, "grad_norm": 6.298682689666748, "learning_rate": 7.000724727058851e-07, "loss": 0.05469023, "memory(GiB)": 13.7, "step": 89390, "train_speed(iter/s)": 1.531718 }, { "acc": 0.99437504, "epoch": 41.90063276306539, "grad_norm": 3.120750665664673, "learning_rate": 6.996772138114642e-07, "loss": 0.02444487, "memory(GiB)": 13.7, "step": 89395, "train_speed(iter/s)": 1.531721 }, { "acc": 0.96791668, "epoch": 41.90297632997422, "grad_norm": 0.9542737007141113, "learning_rate": 6.992820582946949e-07, "loss": 0.04204594, "memory(GiB)": 13.7, "step": 89400, "train_speed(iter/s)": 1.531726 }, { "acc": 0.97800598, "epoch": 41.90531989688306, "grad_norm": 3.755878210067749, "learning_rate": 6.988870061650737e-07, "loss": 0.04699678, "memory(GiB)": 13.7, "step": 89405, "train_speed(iter/s)": 1.531728 }, { "acc": 0.97833328, "epoch": 41.90766346379189, "grad_norm": 2.303333282470703, "learning_rate": 6.984920574320975e-07, "loss": 0.04680164, "memory(GiB)": 13.7, "step": 89410, "train_speed(iter/s)": 1.531732 }, { "acc": 0.99434204, "epoch": 41.910007030700726, "grad_norm": 4.870516777038574, "learning_rate": 6.980972121052583e-07, "loss": 0.02801954, "memory(GiB)": 13.7, "step": 89415, "train_speed(iter/s)": 1.531727 }, { "acc": 0.98937502, "epoch": 41.91235059760956, "grad_norm": 3.3618721961975098, "learning_rate": 6.977024701940472e-07, "loss": 0.0236854, "memory(GiB)": 13.7, "step": 89420, "train_speed(iter/s)": 1.531726 }, { "acc": 0.98812504, "epoch": 41.914694164518394, "grad_norm": 2.621448516845703, "learning_rate": 6.973078317079543e-07, "loss": 0.04242776, "memory(GiB)": 13.7, "step": 89425, "train_speed(iter/s)": 1.531726 }, { "acc": 0.97541666, "epoch": 41.91703773142723, "grad_norm": 4.970938205718994, "learning_rate": 6.969132966564633e-07, "loss": 0.06484334, "memory(GiB)": 13.7, "step": 89430, "train_speed(iter/s)": 1.53173 }, { "acc": 0.99498358, "epoch": 41.91938129833607, "grad_norm": 4.262913227081299, "learning_rate": 6.965188650490588e-07, "loss": 0.02130657, "memory(GiB)": 13.7, "step": 89435, "train_speed(iter/s)": 1.531738 }, { "acc": 0.98193188, "epoch": 41.921724865244904, "grad_norm": 1.4716328382492065, "learning_rate": 6.961245368952191e-07, "loss": 0.05211946, "memory(GiB)": 13.7, "step": 89440, "train_speed(iter/s)": 1.531739 }, { "acc": 0.98916664, "epoch": 41.92406843215374, "grad_norm": 3.5800089836120605, "learning_rate": 6.957303122044269e-07, "loss": 0.04284307, "memory(GiB)": 13.7, "step": 89445, "train_speed(iter/s)": 1.531743 }, { "acc": 0.9802083, "epoch": 41.92641199906257, "grad_norm": 1.699715495109558, "learning_rate": 6.95336190986156e-07, "loss": 0.03661049, "memory(GiB)": 13.7, "step": 89450, "train_speed(iter/s)": 1.531741 }, { "acc": 0.98729172, "epoch": 41.92875556597141, "grad_norm": 1.2447375059127808, "learning_rate": 6.949421732498776e-07, "loss": 0.03212519, "memory(GiB)": 13.7, "step": 89455, "train_speed(iter/s)": 1.531741 }, { "acc": 0.98986111, "epoch": 41.93109913288024, "grad_norm": 2.260798931121826, "learning_rate": 6.945482590050663e-07, "loss": 0.01744986, "memory(GiB)": 13.7, "step": 89460, "train_speed(iter/s)": 1.531748 }, { "acc": 0.97833328, "epoch": 41.933442699789076, "grad_norm": 2.738351583480835, "learning_rate": 6.941544482611873e-07, "loss": 0.05690571, "memory(GiB)": 13.7, "step": 89465, "train_speed(iter/s)": 1.531751 }, { "acc": 0.98297043, "epoch": 41.93578626669792, "grad_norm": 1.7316516637802124, "learning_rate": 6.937607410277093e-07, "loss": 0.04220806, "memory(GiB)": 13.7, "step": 89470, "train_speed(iter/s)": 1.531754 }, { "acc": 0.99125004, "epoch": 41.93812983360675, "grad_norm": 6.423635959625244, "learning_rate": 6.933671373140916e-07, "loss": 0.03841078, "memory(GiB)": 13.7, "step": 89475, "train_speed(iter/s)": 1.531759 }, { "acc": 0.97833328, "epoch": 41.940473400515586, "grad_norm": 0.020337415859103203, "learning_rate": 6.929736371298004e-07, "loss": 0.04091304, "memory(GiB)": 13.7, "step": 89480, "train_speed(iter/s)": 1.531762 }, { "acc": 0.9927083, "epoch": 41.94281696742442, "grad_norm": 3.207707405090332, "learning_rate": 6.925802404842905e-07, "loss": 0.01623935, "memory(GiB)": 13.7, "step": 89485, "train_speed(iter/s)": 1.531768 }, { "acc": 0.97354164, "epoch": 41.945160534333255, "grad_norm": 4.474431991577148, "learning_rate": 6.921869473870201e-07, "loss": 0.05051565, "memory(GiB)": 13.7, "step": 89490, "train_speed(iter/s)": 1.531773 }, { "acc": 0.98865528, "epoch": 41.94750410124209, "grad_norm": 3.209218740463257, "learning_rate": 6.917937578474417e-07, "loss": 0.03388091, "memory(GiB)": 13.7, "step": 89495, "train_speed(iter/s)": 1.531776 }, { "acc": 0.97175598, "epoch": 41.94984766815092, "grad_norm": 4.10689115524292, "learning_rate": 6.914006718750044e-07, "loss": 0.0405456, "memory(GiB)": 13.7, "step": 89500, "train_speed(iter/s)": 1.53178 }, { "acc": 0.97416668, "epoch": 41.95219123505976, "grad_norm": 5.787858963012695, "learning_rate": 6.910076894791599e-07, "loss": 0.1055517, "memory(GiB)": 13.7, "step": 89505, "train_speed(iter/s)": 1.531784 }, { "acc": 0.98000002, "epoch": 41.9545348019686, "grad_norm": 4.21036958694458, "learning_rate": 6.90614810669352e-07, "loss": 0.05317822, "memory(GiB)": 13.7, "step": 89510, "train_speed(iter/s)": 1.531788 }, { "acc": 0.98520832, "epoch": 41.95687836887743, "grad_norm": 5.957292079925537, "learning_rate": 6.90222035455026e-07, "loss": 0.04574427, "memory(GiB)": 13.7, "step": 89515, "train_speed(iter/s)": 1.531791 }, { "acc": 0.9832386, "epoch": 41.95922193578627, "grad_norm": 4.197413921356201, "learning_rate": 6.898293638456205e-07, "loss": 0.03020147, "memory(GiB)": 13.7, "step": 89520, "train_speed(iter/s)": 1.531797 }, { "acc": 0.98041668, "epoch": 41.9615655026951, "grad_norm": 2.249055862426758, "learning_rate": 6.894367958505769e-07, "loss": 0.07248181, "memory(GiB)": 13.7, "step": 89525, "train_speed(iter/s)": 1.531801 }, { "acc": 0.97937498, "epoch": 41.963909069603936, "grad_norm": 5.836747169494629, "learning_rate": 6.890443314793279e-07, "loss": 0.05807686, "memory(GiB)": 13.7, "step": 89530, "train_speed(iter/s)": 1.531804 }, { "acc": 0.98770838, "epoch": 41.96625263651277, "grad_norm": 4.006913661956787, "learning_rate": 6.886519707413109e-07, "loss": 0.03670086, "memory(GiB)": 13.7, "step": 89535, "train_speed(iter/s)": 1.531804 }, { "acc": 0.98465271, "epoch": 41.968596203421605, "grad_norm": 0.017566680908203125, "learning_rate": 6.882597136459549e-07, "loss": 0.04607303, "memory(GiB)": 13.7, "step": 89540, "train_speed(iter/s)": 1.531805 }, { "acc": 0.98217258, "epoch": 41.970939770330446, "grad_norm": 1.6996155977249146, "learning_rate": 6.878675602026871e-07, "loss": 0.03539185, "memory(GiB)": 13.7, "step": 89545, "train_speed(iter/s)": 1.53181 }, { "acc": 0.98125, "epoch": 41.97328333723928, "grad_norm": 1.3830264806747437, "learning_rate": 6.874755104209367e-07, "loss": 0.03130934, "memory(GiB)": 13.7, "step": 89550, "train_speed(iter/s)": 1.531812 }, { "acc": 0.99508934, "epoch": 41.975626904148115, "grad_norm": 1.3745046854019165, "learning_rate": 6.870835643101245e-07, "loss": 0.01696813, "memory(GiB)": 13.7, "step": 89555, "train_speed(iter/s)": 1.531821 }, { "acc": 0.98895836, "epoch": 41.97797047105695, "grad_norm": 4.122488498687744, "learning_rate": 6.866917218796726e-07, "loss": 0.05618817, "memory(GiB)": 13.7, "step": 89560, "train_speed(iter/s)": 1.531824 }, { "acc": 0.98706226, "epoch": 41.98031403796578, "grad_norm": 2.8529441356658936, "learning_rate": 6.862999831389996e-07, "loss": 0.03975636, "memory(GiB)": 13.7, "step": 89565, "train_speed(iter/s)": 1.531825 }, { "acc": 0.971875, "epoch": 41.98265760487462, "grad_norm": 3.378023386001587, "learning_rate": 6.859083480975234e-07, "loss": 0.08189551, "memory(GiB)": 13.7, "step": 89570, "train_speed(iter/s)": 1.53183 }, { "acc": 0.97416668, "epoch": 41.98500117178345, "grad_norm": 5.703723430633545, "learning_rate": 6.855168167646542e-07, "loss": 0.05988465, "memory(GiB)": 13.7, "step": 89575, "train_speed(iter/s)": 1.531833 }, { "acc": 0.98812504, "epoch": 41.987344738692286, "grad_norm": 2.201218605041504, "learning_rate": 6.851253891498062e-07, "loss": 0.03282477, "memory(GiB)": 13.7, "step": 89580, "train_speed(iter/s)": 1.531836 }, { "acc": 0.98529758, "epoch": 41.98968830560113, "grad_norm": 5.385140419006348, "learning_rate": 6.847340652623869e-07, "loss": 0.02781913, "memory(GiB)": 13.7, "step": 89585, "train_speed(iter/s)": 1.531836 }, { "acc": 0.98520832, "epoch": 41.99203187250996, "grad_norm": 0.02740614302456379, "learning_rate": 6.843428451118001e-07, "loss": 0.04404171, "memory(GiB)": 13.7, "step": 89590, "train_speed(iter/s)": 1.531838 }, { "acc": 0.98500004, "epoch": 41.994375439418796, "grad_norm": 2.8093838691711426, "learning_rate": 6.839517287074535e-07, "loss": 0.03347013, "memory(GiB)": 13.7, "step": 89595, "train_speed(iter/s)": 1.531837 }, { "acc": 0.996875, "epoch": 41.99671900632763, "grad_norm": 0.9829486608505249, "learning_rate": 6.835607160587458e-07, "loss": 0.01686562, "memory(GiB)": 13.7, "step": 89600, "train_speed(iter/s)": 1.531845 }, { "acc": 0.98618784, "epoch": 41.999062573236465, "grad_norm": 0.8307217359542847, "learning_rate": 6.831698071750773e-07, "loss": 0.02627357, "memory(GiB)": 13.7, "step": 89605, "train_speed(iter/s)": 1.531849 }, { "acc": 0.9927083, "epoch": 42.0014061401453, "grad_norm": 0.9075208306312561, "learning_rate": 6.827790020658416e-07, "loss": 0.01445798, "memory(GiB)": 13.7, "step": 89610, "train_speed(iter/s)": 1.531832 }, { "acc": 1.0, "epoch": 42.00374970705413, "grad_norm": 2.479840040206909, "learning_rate": 6.823883007404353e-07, "loss": 0.02006444, "memory(GiB)": 13.7, "step": 89615, "train_speed(iter/s)": 1.531838 }, { "acc": 0.97990532, "epoch": 42.006093273962975, "grad_norm": 4.6818742752075195, "learning_rate": 6.819977032082475e-07, "loss": 0.06123952, "memory(GiB)": 13.7, "step": 89620, "train_speed(iter/s)": 1.531835 }, { "acc": 0.98937492, "epoch": 42.00843684087181, "grad_norm": 0.002424938604235649, "learning_rate": 6.816072094786662e-07, "loss": 0.0199322, "memory(GiB)": 13.7, "step": 89625, "train_speed(iter/s)": 1.531836 }, { "acc": 0.9927083, "epoch": 42.01078040778064, "grad_norm": 0.0464288592338562, "learning_rate": 6.812168195610806e-07, "loss": 0.01658739, "memory(GiB)": 13.7, "step": 89630, "train_speed(iter/s)": 1.531839 }, { "acc": 0.97807999, "epoch": 42.01312397468948, "grad_norm": 1.4751532077789307, "learning_rate": 6.808265334648716e-07, "loss": 0.08779594, "memory(GiB)": 13.7, "step": 89635, "train_speed(iter/s)": 1.531843 }, { "acc": 0.98863087, "epoch": 42.01546754159831, "grad_norm": 6.271917819976807, "learning_rate": 6.804363511994231e-07, "loss": 0.03035241, "memory(GiB)": 13.7, "step": 89640, "train_speed(iter/s)": 1.531846 }, { "acc": 0.98458328, "epoch": 42.017811108507146, "grad_norm": 1.8610050678253174, "learning_rate": 6.800462727741094e-07, "loss": 0.04541744, "memory(GiB)": 13.7, "step": 89645, "train_speed(iter/s)": 1.531852 }, { "acc": 0.98604164, "epoch": 42.02015467541598, "grad_norm": 3.829665184020996, "learning_rate": 6.796562981983121e-07, "loss": 0.03621132, "memory(GiB)": 13.7, "step": 89650, "train_speed(iter/s)": 1.531856 }, { "acc": 0.9864584, "epoch": 42.022498242324815, "grad_norm": 4.975039958953857, "learning_rate": 6.792664274814015e-07, "loss": 0.06111983, "memory(GiB)": 13.7, "step": 89655, "train_speed(iter/s)": 1.531858 }, { "acc": 0.97535172, "epoch": 42.024841809233656, "grad_norm": 5.333486080169678, "learning_rate": 6.788766606327499e-07, "loss": 0.04214641, "memory(GiB)": 13.7, "step": 89660, "train_speed(iter/s)": 1.531857 }, { "acc": 0.9854167, "epoch": 42.02718537614249, "grad_norm": 2.909783124923706, "learning_rate": 6.784869976617266e-07, "loss": 0.02700193, "memory(GiB)": 13.7, "step": 89665, "train_speed(iter/s)": 1.531858 }, { "acc": 0.98833332, "epoch": 42.029528943051325, "grad_norm": 5.0583906173706055, "learning_rate": 6.780974385776954e-07, "loss": 0.0254001, "memory(GiB)": 13.7, "step": 89670, "train_speed(iter/s)": 1.531858 }, { "acc": 0.98770294, "epoch": 42.03187250996016, "grad_norm": 4.213674068450928, "learning_rate": 6.777079833900227e-07, "loss": 0.03985934, "memory(GiB)": 13.7, "step": 89675, "train_speed(iter/s)": 1.531862 }, { "acc": 0.9916667, "epoch": 42.034216076868994, "grad_norm": 3.7254910469055176, "learning_rate": 6.773186321080663e-07, "loss": 0.0284638, "memory(GiB)": 13.7, "step": 89680, "train_speed(iter/s)": 1.531864 }, { "acc": 0.97208328, "epoch": 42.03655964377783, "grad_norm": 3.4692773818969727, "learning_rate": 6.769293847411896e-07, "loss": 0.0467234, "memory(GiB)": 13.7, "step": 89685, "train_speed(iter/s)": 1.531869 }, { "acc": 0.9854167, "epoch": 42.03890321068666, "grad_norm": 2.7976038455963135, "learning_rate": 6.765402412987451e-07, "loss": 0.02678166, "memory(GiB)": 13.7, "step": 89690, "train_speed(iter/s)": 1.531875 }, { "acc": 0.97577381, "epoch": 42.041246777595504, "grad_norm": 3.6959691047668457, "learning_rate": 6.761512017900896e-07, "loss": 0.05297049, "memory(GiB)": 13.7, "step": 89695, "train_speed(iter/s)": 1.531875 }, { "acc": 0.98217106, "epoch": 42.04359034450434, "grad_norm": 5.178711414337158, "learning_rate": 6.757622662245711e-07, "loss": 0.05676674, "memory(GiB)": 13.7, "step": 89700, "train_speed(iter/s)": 1.531874 }, { "acc": 0.98656254, "epoch": 42.04593391141317, "grad_norm": 4.007388114929199, "learning_rate": 6.753734346115401e-07, "loss": 0.05303366, "memory(GiB)": 13.7, "step": 89705, "train_speed(iter/s)": 1.531878 }, { "acc": 0.9916667, "epoch": 42.04827747832201, "grad_norm": 3.0233166217803955, "learning_rate": 6.749847069603432e-07, "loss": 0.02277643, "memory(GiB)": 13.7, "step": 89710, "train_speed(iter/s)": 1.531885 }, { "acc": 0.99104166, "epoch": 42.05062104523084, "grad_norm": 0.011333838105201721, "learning_rate": 6.745960832803227e-07, "loss": 0.01535701, "memory(GiB)": 13.7, "step": 89715, "train_speed(iter/s)": 1.531887 }, { "acc": 0.98152781, "epoch": 42.052964612139675, "grad_norm": 0.09040027111768723, "learning_rate": 6.74207563580822e-07, "loss": 0.04360172, "memory(GiB)": 13.7, "step": 89720, "train_speed(iter/s)": 1.531888 }, { "acc": 0.99305916, "epoch": 42.05530817904851, "grad_norm": 4.467565059661865, "learning_rate": 6.738191478711771e-07, "loss": 0.02500051, "memory(GiB)": 13.7, "step": 89725, "train_speed(iter/s)": 1.531892 }, { "acc": 0.9864584, "epoch": 42.057651745957344, "grad_norm": 2.543248176574707, "learning_rate": 6.734308361607266e-07, "loss": 0.04039394, "memory(GiB)": 13.7, "step": 89730, "train_speed(iter/s)": 1.53189 }, { "acc": 0.9822916, "epoch": 42.059995312866185, "grad_norm": 3.1764702796936035, "learning_rate": 6.730426284588012e-07, "loss": 0.03612865, "memory(GiB)": 13.7, "step": 89735, "train_speed(iter/s)": 1.531892 }, { "acc": 0.99273815, "epoch": 42.06233887977502, "grad_norm": 0.8631437420845032, "learning_rate": 6.726545247747368e-07, "loss": 0.03888297, "memory(GiB)": 13.7, "step": 89740, "train_speed(iter/s)": 1.531894 }, { "acc": 0.9822917, "epoch": 42.064682446683854, "grad_norm": 3.562901258468628, "learning_rate": 6.722665251178594e-07, "loss": 0.05332491, "memory(GiB)": 13.7, "step": 89745, "train_speed(iter/s)": 1.531896 }, { "acc": 0.97371712, "epoch": 42.06702601359269, "grad_norm": 0.7021540403366089, "learning_rate": 6.718786294974939e-07, "loss": 0.05064753, "memory(GiB)": 13.7, "step": 89750, "train_speed(iter/s)": 1.531899 }, { "acc": 0.99125004, "epoch": 42.06936958050152, "grad_norm": 3.7833030223846436, "learning_rate": 6.714908379229669e-07, "loss": 0.02427753, "memory(GiB)": 13.7, "step": 89755, "train_speed(iter/s)": 1.531898 }, { "acc": 0.98723211, "epoch": 42.07171314741036, "grad_norm": 5.391718864440918, "learning_rate": 6.711031504035972e-07, "loss": 0.03010061, "memory(GiB)": 13.7, "step": 89760, "train_speed(iter/s)": 1.531896 }, { "acc": 0.9842803, "epoch": 42.07405671431919, "grad_norm": 2.496244192123413, "learning_rate": 6.707155669487056e-07, "loss": 0.04926431, "memory(GiB)": 13.7, "step": 89765, "train_speed(iter/s)": 1.531904 }, { "acc": 0.97562504, "epoch": 42.07640028122803, "grad_norm": 2.8316586017608643, "learning_rate": 6.70328087567605e-07, "loss": 0.02568236, "memory(GiB)": 13.7, "step": 89770, "train_speed(iter/s)": 1.531907 }, { "acc": 0.980966, "epoch": 42.07874384813687, "grad_norm": 3.577766180038452, "learning_rate": 6.699407122696139e-07, "loss": 0.0582257, "memory(GiB)": 13.7, "step": 89775, "train_speed(iter/s)": 1.531907 }, { "acc": 0.97770834, "epoch": 42.0810874150457, "grad_norm": 2.1465251445770264, "learning_rate": 6.695534410640395e-07, "loss": 0.08291873, "memory(GiB)": 13.7, "step": 89780, "train_speed(iter/s)": 1.53191 }, { "acc": 0.97937498, "epoch": 42.083430981954535, "grad_norm": 0.0012654035817831755, "learning_rate": 6.691662739601938e-07, "loss": 0.03333258, "memory(GiB)": 13.7, "step": 89785, "train_speed(iter/s)": 1.531909 }, { "acc": 0.9822917, "epoch": 42.08577454886337, "grad_norm": 5.051346778869629, "learning_rate": 6.687792109673814e-07, "loss": 0.02434422, "memory(GiB)": 13.7, "step": 89790, "train_speed(iter/s)": 1.531912 }, { "acc": 0.9869791, "epoch": 42.088118115772204, "grad_norm": 3.789720058441162, "learning_rate": 6.683922520949032e-07, "loss": 0.02351657, "memory(GiB)": 13.7, "step": 89795, "train_speed(iter/s)": 1.531911 }, { "acc": 0.98395834, "epoch": 42.09046168268104, "grad_norm": 0.019829271361231804, "learning_rate": 6.680053973520654e-07, "loss": 0.05344028, "memory(GiB)": 13.7, "step": 89800, "train_speed(iter/s)": 1.531913 }, { "acc": 0.97583332, "epoch": 42.09280524958987, "grad_norm": 4.007724761962891, "learning_rate": 6.67618646748164e-07, "loss": 0.039549, "memory(GiB)": 13.7, "step": 89805, "train_speed(iter/s)": 1.531913 }, { "acc": 0.98383932, "epoch": 42.095148816498714, "grad_norm": 2.6687369346618652, "learning_rate": 6.672320002924963e-07, "loss": 0.02865389, "memory(GiB)": 13.7, "step": 89810, "train_speed(iter/s)": 1.531915 }, { "acc": 1.0, "epoch": 42.09749238340755, "grad_norm": 0.059165459126234055, "learning_rate": 6.668454579943544e-07, "loss": 0.01831298, "memory(GiB)": 13.7, "step": 89815, "train_speed(iter/s)": 1.531911 }, { "acc": 0.9838789, "epoch": 42.09983595031638, "grad_norm": 3.3410606384277344, "learning_rate": 6.664590198630322e-07, "loss": 0.04881326, "memory(GiB)": 13.7, "step": 89820, "train_speed(iter/s)": 1.531912 }, { "acc": 0.98083334, "epoch": 42.10217951722522, "grad_norm": 0.9528727531433105, "learning_rate": 6.66072685907814e-07, "loss": 0.0350175, "memory(GiB)": 13.7, "step": 89825, "train_speed(iter/s)": 1.531918 }, { "acc": 0.96830359, "epoch": 42.10452308413405, "grad_norm": 2.903970241546631, "learning_rate": 6.656864561379909e-07, "loss": 0.0471805, "memory(GiB)": 13.7, "step": 89830, "train_speed(iter/s)": 1.531924 }, { "acc": 0.98729172, "epoch": 42.106866651042886, "grad_norm": 3.4790701866149902, "learning_rate": 6.653003305628443e-07, "loss": 0.03274803, "memory(GiB)": 13.7, "step": 89835, "train_speed(iter/s)": 1.531927 }, { "acc": 0.99309216, "epoch": 42.10921021795172, "grad_norm": 2.0562403202056885, "learning_rate": 6.649143091916549e-07, "loss": 0.03500574, "memory(GiB)": 13.7, "step": 89840, "train_speed(iter/s)": 1.531931 }, { "acc": 0.97863102, "epoch": 42.11155378486056, "grad_norm": 5.091821193695068, "learning_rate": 6.645283920337028e-07, "loss": 0.04914023, "memory(GiB)": 13.7, "step": 89845, "train_speed(iter/s)": 1.53193 }, { "acc": 0.98113098, "epoch": 42.113897351769396, "grad_norm": 3.5107736587524414, "learning_rate": 6.641425790982614e-07, "loss": 0.05171738, "memory(GiB)": 13.7, "step": 89850, "train_speed(iter/s)": 1.531932 }, { "acc": 0.98465281, "epoch": 42.11624091867823, "grad_norm": 1.857271432876587, "learning_rate": 6.637568703946084e-07, "loss": 0.03342376, "memory(GiB)": 13.7, "step": 89855, "train_speed(iter/s)": 1.53193 }, { "acc": 0.99050598, "epoch": 42.118584485587064, "grad_norm": 2.5566437244415283, "learning_rate": 6.633712659320117e-07, "loss": 0.02030198, "memory(GiB)": 13.7, "step": 89860, "train_speed(iter/s)": 1.531934 }, { "acc": 0.9802084, "epoch": 42.1209280524959, "grad_norm": 6.873855113983154, "learning_rate": 6.629857657197422e-07, "loss": 0.05355921, "memory(GiB)": 13.7, "step": 89865, "train_speed(iter/s)": 1.531936 }, { "acc": 0.991572, "epoch": 42.12327161940473, "grad_norm": 5.596114158630371, "learning_rate": 6.626003697670655e-07, "loss": 0.02321803, "memory(GiB)": 13.7, "step": 89870, "train_speed(iter/s)": 1.531936 }, { "acc": 0.99118309, "epoch": 42.12561518631357, "grad_norm": 2.5607166290283203, "learning_rate": 6.622150780832434e-07, "loss": 0.02207004, "memory(GiB)": 13.7, "step": 89875, "train_speed(iter/s)": 1.531934 }, { "acc": 0.97871246, "epoch": 42.1279587532224, "grad_norm": 0.000985626014880836, "learning_rate": 6.618298906775396e-07, "loss": 0.04644037, "memory(GiB)": 13.7, "step": 89880, "train_speed(iter/s)": 1.531935 }, { "acc": 0.98395824, "epoch": 42.13030232013124, "grad_norm": 0.000790321035310626, "learning_rate": 6.614448075592092e-07, "loss": 0.02101512, "memory(GiB)": 13.7, "step": 89885, "train_speed(iter/s)": 1.53194 }, { "acc": 0.98126068, "epoch": 42.13264588704008, "grad_norm": 3.367717981338501, "learning_rate": 6.610598287375134e-07, "loss": 0.05396785, "memory(GiB)": 13.7, "step": 89890, "train_speed(iter/s)": 1.531942 }, { "acc": 0.98249998, "epoch": 42.13498945394891, "grad_norm": 1.7087291479110718, "learning_rate": 6.60674954221702e-07, "loss": 0.02988787, "memory(GiB)": 13.7, "step": 89895, "train_speed(iter/s)": 1.531943 }, { "acc": 0.98133926, "epoch": 42.137333020857746, "grad_norm": 3.7042529582977295, "learning_rate": 6.602901840210279e-07, "loss": 0.03609711, "memory(GiB)": 13.7, "step": 89900, "train_speed(iter/s)": 1.531945 }, { "acc": 0.98312502, "epoch": 42.13967658776658, "grad_norm": 3.5919785499572754, "learning_rate": 6.599055181447384e-07, "loss": 0.05352694, "memory(GiB)": 13.7, "step": 89905, "train_speed(iter/s)": 1.531946 }, { "acc": 0.98874998, "epoch": 42.142020154675414, "grad_norm": 2.1782689094543457, "learning_rate": 6.595209566020801e-07, "loss": 0.03227577, "memory(GiB)": 13.7, "step": 89910, "train_speed(iter/s)": 1.531947 }, { "acc": 0.98715286, "epoch": 42.14436372158425, "grad_norm": 0.02149374783039093, "learning_rate": 6.591364994022979e-07, "loss": 0.04438367, "memory(GiB)": 13.7, "step": 89915, "train_speed(iter/s)": 1.531951 }, { "acc": 0.990625, "epoch": 42.14670728849309, "grad_norm": 2.4072890281677246, "learning_rate": 6.587521465546309e-07, "loss": 0.04954214, "memory(GiB)": 13.7, "step": 89920, "train_speed(iter/s)": 1.531952 }, { "acc": 0.9833334, "epoch": 42.149050855401924, "grad_norm": 3.0586633682250977, "learning_rate": 6.583678980683191e-07, "loss": 0.04675286, "memory(GiB)": 13.7, "step": 89925, "train_speed(iter/s)": 1.531955 }, { "acc": 0.9921875, "epoch": 42.15139442231076, "grad_norm": 2.6324706077575684, "learning_rate": 6.579837539525974e-07, "loss": 0.01212086, "memory(GiB)": 13.7, "step": 89930, "train_speed(iter/s)": 1.53196 }, { "acc": 0.98247023, "epoch": 42.15373798921959, "grad_norm": 4.041868209838867, "learning_rate": 6.575997142167008e-07, "loss": 0.04175785, "memory(GiB)": 13.7, "step": 89935, "train_speed(iter/s)": 1.531964 }, { "acc": 0.9833334, "epoch": 42.15608155612843, "grad_norm": 4.772237777709961, "learning_rate": 6.572157788698573e-07, "loss": 0.04295248, "memory(GiB)": 13.7, "step": 89940, "train_speed(iter/s)": 1.531966 }, { "acc": 0.99196424, "epoch": 42.15842512303726, "grad_norm": 0.026178210973739624, "learning_rate": 6.568319479213e-07, "loss": 0.03274221, "memory(GiB)": 13.7, "step": 89945, "train_speed(iter/s)": 1.531966 }, { "acc": 0.98520832, "epoch": 42.160768689946096, "grad_norm": 2.5023365020751953, "learning_rate": 6.564482213802516e-07, "loss": 0.03044843, "memory(GiB)": 13.7, "step": 89950, "train_speed(iter/s)": 1.53197 }, { "acc": 0.9888195, "epoch": 42.16311225685493, "grad_norm": 1.5595072507858276, "learning_rate": 6.560645992559376e-07, "loss": 0.03629062, "memory(GiB)": 13.7, "step": 89955, "train_speed(iter/s)": 1.531973 }, { "acc": 0.9958333, "epoch": 42.16545582376377, "grad_norm": 0.3817671239376068, "learning_rate": 6.556810815575787e-07, "loss": 0.02555407, "memory(GiB)": 13.7, "step": 89960, "train_speed(iter/s)": 1.531973 }, { "acc": 0.97124996, "epoch": 42.167799390672606, "grad_norm": 0.9699777960777283, "learning_rate": 6.552976682943911e-07, "loss": 0.04986458, "memory(GiB)": 13.7, "step": 89965, "train_speed(iter/s)": 1.531976 }, { "acc": 0.98986111, "epoch": 42.17014295758144, "grad_norm": 0.005100638139992952, "learning_rate": 6.549143594755934e-07, "loss": 0.02018668, "memory(GiB)": 13.7, "step": 89970, "train_speed(iter/s)": 1.531977 }, { "acc": 0.99237938, "epoch": 42.172486524490274, "grad_norm": 3.8696227073669434, "learning_rate": 6.545311551103961e-07, "loss": 0.01790537, "memory(GiB)": 13.7, "step": 89975, "train_speed(iter/s)": 1.531984 }, { "acc": 0.9807065, "epoch": 42.17483009139911, "grad_norm": 4.461811065673828, "learning_rate": 6.541480552080143e-07, "loss": 0.07324919, "memory(GiB)": 13.7, "step": 89980, "train_speed(iter/s)": 1.531989 }, { "acc": 0.98760424, "epoch": 42.17717365830794, "grad_norm": 4.434698581695557, "learning_rate": 6.537650597776536e-07, "loss": 0.027598, "memory(GiB)": 13.7, "step": 89985, "train_speed(iter/s)": 1.531992 }, { "acc": 0.98008938, "epoch": 42.17951722521678, "grad_norm": 1.919257640838623, "learning_rate": 6.533821688285213e-07, "loss": 0.04903122, "memory(GiB)": 13.7, "step": 89990, "train_speed(iter/s)": 1.531991 }, { "acc": 0.99072914, "epoch": 42.18186079212561, "grad_norm": 0.9514403939247131, "learning_rate": 6.529993823698209e-07, "loss": 0.01835977, "memory(GiB)": 13.7, "step": 89995, "train_speed(iter/s)": 1.531998 }, { "acc": 0.97520838, "epoch": 42.18420435903445, "grad_norm": 0.5922716856002808, "learning_rate": 6.526167004107503e-07, "loss": 0.07039535, "memory(GiB)": 13.7, "step": 90000, "train_speed(iter/s)": 1.532001 }, { "epoch": 42.18420435903445, "eval_acc": 0.7790854522155527, "eval_loss": 1.2584766149520874, "eval_runtime": 143.9519, "eval_samples_per_second": 56.047, "eval_steps_per_second": 7.009, "step": 90000 }, { "acc": 0.99571428, "epoch": 42.18654792594329, "grad_norm": 0.3559039831161499, "learning_rate": 6.522341229605124e-07, "loss": 0.02631581, "memory(GiB)": 13.7, "step": 90005, "train_speed(iter/s)": 1.527493 }, { "acc": 0.9901042, "epoch": 42.18889149285212, "grad_norm": 3.7856149673461914, "learning_rate": 6.518516500283001e-07, "loss": 0.05474343, "memory(GiB)": 13.7, "step": 90010, "train_speed(iter/s)": 1.527495 }, { "acc": 0.98666668, "epoch": 42.191235059760956, "grad_norm": 3.5777297019958496, "learning_rate": 6.514692816233081e-07, "loss": 0.03050245, "memory(GiB)": 13.7, "step": 90015, "train_speed(iter/s)": 1.5275 }, { "acc": 0.97416668, "epoch": 42.19357862666979, "grad_norm": 3.7777440547943115, "learning_rate": 6.510870177547267e-07, "loss": 0.04075505, "memory(GiB)": 13.7, "step": 90020, "train_speed(iter/s)": 1.527505 }, { "acc": 0.9916667, "epoch": 42.195922193578625, "grad_norm": 0.0022711935453116894, "learning_rate": 6.507048584317451e-07, "loss": 0.01609447, "memory(GiB)": 13.7, "step": 90025, "train_speed(iter/s)": 1.527511 }, { "acc": 0.98377514, "epoch": 42.19826576048746, "grad_norm": 1.959976315498352, "learning_rate": 6.503228036635466e-07, "loss": 0.04138708, "memory(GiB)": 13.7, "step": 90030, "train_speed(iter/s)": 1.527513 }, { "acc": 0.98978624, "epoch": 42.2006093273963, "grad_norm": 1.2442831993103027, "learning_rate": 6.499408534593187e-07, "loss": 0.03838492, "memory(GiB)": 13.7, "step": 90035, "train_speed(iter/s)": 1.527516 }, { "acc": 0.99395838, "epoch": 42.202952894305135, "grad_norm": 0.7819015383720398, "learning_rate": 6.495590078282393e-07, "loss": 0.04542353, "memory(GiB)": 13.7, "step": 90040, "train_speed(iter/s)": 1.527522 }, { "acc": 0.9875, "epoch": 42.20529646121397, "grad_norm": 0.16779683530330658, "learning_rate": 6.491772667794873e-07, "loss": 0.02172245, "memory(GiB)": 13.7, "step": 90045, "train_speed(iter/s)": 1.527522 }, { "acc": 0.98090277, "epoch": 42.2076400281228, "grad_norm": 2.0008914470672607, "learning_rate": 6.487956303222388e-07, "loss": 0.0558386, "memory(GiB)": 13.7, "step": 90050, "train_speed(iter/s)": 1.527528 }, { "acc": 0.99437504, "epoch": 42.20998359503164, "grad_norm": 1.702646255493164, "learning_rate": 6.484140984656651e-07, "loss": 0.01688477, "memory(GiB)": 13.7, "step": 90055, "train_speed(iter/s)": 1.527527 }, { "acc": 0.98083334, "epoch": 42.21232716194047, "grad_norm": 3.963956356048584, "learning_rate": 6.480326712189412e-07, "loss": 0.04114123, "memory(GiB)": 13.7, "step": 90060, "train_speed(iter/s)": 1.527526 }, { "acc": 0.98874998, "epoch": 42.214670728849306, "grad_norm": 3.005169630050659, "learning_rate": 6.476513485912317e-07, "loss": 0.02707028, "memory(GiB)": 13.7, "step": 90065, "train_speed(iter/s)": 1.527525 }, { "acc": 0.99750004, "epoch": 42.21701429575814, "grad_norm": 0.4512316882610321, "learning_rate": 6.472701305917044e-07, "loss": 0.01719114, "memory(GiB)": 13.7, "step": 90070, "train_speed(iter/s)": 1.52753 }, { "acc": 0.97956848, "epoch": 42.21935786266698, "grad_norm": 5.011229991912842, "learning_rate": 6.468890172295205e-07, "loss": 0.05393359, "memory(GiB)": 13.7, "step": 90075, "train_speed(iter/s)": 1.527538 }, { "acc": 0.98381681, "epoch": 42.221701429575816, "grad_norm": 3.618579864501953, "learning_rate": 6.465080085138433e-07, "loss": 0.09951736, "memory(GiB)": 13.7, "step": 90080, "train_speed(iter/s)": 1.527543 }, { "acc": 0.99437504, "epoch": 42.22404499648465, "grad_norm": 4.278140544891357, "learning_rate": 6.461271044538299e-07, "loss": 0.03730637, "memory(GiB)": 13.7, "step": 90085, "train_speed(iter/s)": 1.527547 }, { "acc": 0.99020824, "epoch": 42.226388563393485, "grad_norm": 0.004120268393307924, "learning_rate": 6.45746305058633e-07, "loss": 0.04788654, "memory(GiB)": 13.7, "step": 90090, "train_speed(iter/s)": 1.527555 }, { "acc": 0.97620039, "epoch": 42.22873213030232, "grad_norm": 4.613781452178955, "learning_rate": 6.453656103374108e-07, "loss": 0.05895223, "memory(GiB)": 13.7, "step": 90095, "train_speed(iter/s)": 1.527562 }, { "acc": 0.97479172, "epoch": 42.23107569721115, "grad_norm": 3.823171377182007, "learning_rate": 6.449850202993104e-07, "loss": 0.063695, "memory(GiB)": 13.7, "step": 90100, "train_speed(iter/s)": 1.527565 }, { "acc": 0.99258928, "epoch": 42.23341926411999, "grad_norm": 1.2082831859588623, "learning_rate": 6.446045349534821e-07, "loss": 0.03586466, "memory(GiB)": 13.7, "step": 90105, "train_speed(iter/s)": 1.527564 }, { "acc": 0.99652777, "epoch": 42.23576283102883, "grad_norm": 1.6724824905395508, "learning_rate": 6.442241543090698e-07, "loss": 0.02384888, "memory(GiB)": 13.7, "step": 90110, "train_speed(iter/s)": 1.527567 }, { "acc": 0.98292618, "epoch": 42.23810639793766, "grad_norm": 3.194357395172119, "learning_rate": 6.438438783752182e-07, "loss": 0.03686664, "memory(GiB)": 13.7, "step": 90115, "train_speed(iter/s)": 1.52757 }, { "acc": 0.98505678, "epoch": 42.2404499648465, "grad_norm": 2.84985089302063, "learning_rate": 6.434637071610648e-07, "loss": 0.04144925, "memory(GiB)": 13.7, "step": 90120, "train_speed(iter/s)": 1.527575 }, { "acc": 1.0, "epoch": 42.24279353175533, "grad_norm": 2.21683669090271, "learning_rate": 6.430836406757525e-07, "loss": 0.01200244, "memory(GiB)": 13.7, "step": 90125, "train_speed(iter/s)": 1.527577 }, { "acc": 0.97270832, "epoch": 42.245137098664166, "grad_norm": 1.875139594078064, "learning_rate": 6.427036789284143e-07, "loss": 0.06243771, "memory(GiB)": 13.7, "step": 90130, "train_speed(iter/s)": 1.527584 }, { "acc": 0.9989584, "epoch": 42.247480665573, "grad_norm": 2.7131400108337402, "learning_rate": 6.423238219281815e-07, "loss": 0.01981336, "memory(GiB)": 13.7, "step": 90135, "train_speed(iter/s)": 1.527583 }, { "acc": 0.98395834, "epoch": 42.249824232481835, "grad_norm": 3.79225754737854, "learning_rate": 6.419440696841876e-07, "loss": 0.05603323, "memory(GiB)": 13.7, "step": 90140, "train_speed(iter/s)": 1.527584 }, { "acc": 0.9895834, "epoch": 42.25216779939067, "grad_norm": 5.347701549530029, "learning_rate": 6.415644222055574e-07, "loss": 0.03175081, "memory(GiB)": 13.7, "step": 90145, "train_speed(iter/s)": 1.527588 }, { "acc": 0.98872023, "epoch": 42.25451136629951, "grad_norm": 2.5598716735839844, "learning_rate": 6.4118487950142e-07, "loss": 0.02701851, "memory(GiB)": 13.7, "step": 90150, "train_speed(iter/s)": 1.52759 }, { "acc": 0.98583336, "epoch": 42.256854933208345, "grad_norm": 2.2248387336730957, "learning_rate": 6.408054415808961e-07, "loss": 0.02459412, "memory(GiB)": 13.7, "step": 90155, "train_speed(iter/s)": 1.527593 }, { "acc": 0.97773809, "epoch": 42.25919850011718, "grad_norm": 5.096124649047852, "learning_rate": 6.404261084531071e-07, "loss": 0.07462562, "memory(GiB)": 13.7, "step": 90160, "train_speed(iter/s)": 1.527597 }, { "acc": 0.98803024, "epoch": 42.261542067026014, "grad_norm": 5.020758628845215, "learning_rate": 6.40046880127171e-07, "loss": 0.050591, "memory(GiB)": 13.7, "step": 90165, "train_speed(iter/s)": 1.527596 }, { "acc": 0.98173609, "epoch": 42.26388563393485, "grad_norm": 2.845292806625366, "learning_rate": 6.396677566122014e-07, "loss": 0.04126679, "memory(GiB)": 13.7, "step": 90170, "train_speed(iter/s)": 1.527598 }, { "acc": 0.97104168, "epoch": 42.26622920084368, "grad_norm": 5.113269329071045, "learning_rate": 6.392887379173141e-07, "loss": 0.03778934, "memory(GiB)": 13.7, "step": 90175, "train_speed(iter/s)": 1.527604 }, { "acc": 0.99437504, "epoch": 42.26857276775252, "grad_norm": 2.6413750648498535, "learning_rate": 6.389098240516153e-07, "loss": 0.01930376, "memory(GiB)": 13.7, "step": 90180, "train_speed(iter/s)": 1.527604 }, { "acc": 0.9947916, "epoch": 42.27091633466136, "grad_norm": 0.7477543950080872, "learning_rate": 6.385310150242175e-07, "loss": 0.02645074, "memory(GiB)": 13.7, "step": 90185, "train_speed(iter/s)": 1.527606 }, { "acc": 0.99333324, "epoch": 42.27325990157019, "grad_norm": 0.9864184260368347, "learning_rate": 6.381523108442227e-07, "loss": 0.02872041, "memory(GiB)": 13.7, "step": 90190, "train_speed(iter/s)": 1.527607 }, { "acc": 0.9848959, "epoch": 42.27560346847903, "grad_norm": 1.7712455987930298, "learning_rate": 6.377737115207359e-07, "loss": 0.02493585, "memory(GiB)": 13.7, "step": 90195, "train_speed(iter/s)": 1.527608 }, { "acc": 0.99821434, "epoch": 42.27794703538786, "grad_norm": 3.846961498260498, "learning_rate": 6.373952170628549e-07, "loss": 0.01903581, "memory(GiB)": 13.7, "step": 90200, "train_speed(iter/s)": 1.52761 }, { "acc": 0.9786458, "epoch": 42.280290602296695, "grad_norm": 2.476229190826416, "learning_rate": 6.370168274796802e-07, "loss": 0.04172187, "memory(GiB)": 13.7, "step": 90205, "train_speed(iter/s)": 1.527614 }, { "acc": 0.98708334, "epoch": 42.28263416920553, "grad_norm": 2.6945900917053223, "learning_rate": 6.366385427803067e-07, "loss": 0.037775, "memory(GiB)": 13.7, "step": 90210, "train_speed(iter/s)": 1.527617 }, { "acc": 0.96937504, "epoch": 42.284977736114364, "grad_norm": 2.9032819271087646, "learning_rate": 6.362603629738248e-07, "loss": 0.06869646, "memory(GiB)": 13.7, "step": 90215, "train_speed(iter/s)": 1.527621 }, { "acc": 0.9885417, "epoch": 42.2873213030232, "grad_norm": 3.52475643157959, "learning_rate": 6.358822880693271e-07, "loss": 0.02997349, "memory(GiB)": 13.7, "step": 90220, "train_speed(iter/s)": 1.52762 }, { "acc": 0.98500004, "epoch": 42.28966486993204, "grad_norm": 6.863755226135254, "learning_rate": 6.355043180758993e-07, "loss": 0.03339345, "memory(GiB)": 13.7, "step": 90225, "train_speed(iter/s)": 1.527623 }, { "acc": 0.99011364, "epoch": 42.292008436840874, "grad_norm": 2.669844388961792, "learning_rate": 6.351264530026287e-07, "loss": 0.02707155, "memory(GiB)": 13.7, "step": 90230, "train_speed(iter/s)": 1.527624 }, { "acc": 0.98354168, "epoch": 42.29435200374971, "grad_norm": 2.660470724105835, "learning_rate": 6.347486928585948e-07, "loss": 0.03526184, "memory(GiB)": 13.7, "step": 90235, "train_speed(iter/s)": 1.527623 }, { "acc": 0.9770834, "epoch": 42.29669557065854, "grad_norm": 6.642426013946533, "learning_rate": 6.343710376528813e-07, "loss": 0.05151174, "memory(GiB)": 13.7, "step": 90240, "train_speed(iter/s)": 1.527624 }, { "acc": 0.98666668, "epoch": 42.29903913756738, "grad_norm": 3.0700643062591553, "learning_rate": 6.339934873945638e-07, "loss": 0.03063779, "memory(GiB)": 13.7, "step": 90245, "train_speed(iter/s)": 1.527624 }, { "acc": 0.98500004, "epoch": 42.30138270447621, "grad_norm": 3.193657398223877, "learning_rate": 6.336160420927187e-07, "loss": 0.02401271, "memory(GiB)": 13.7, "step": 90250, "train_speed(iter/s)": 1.527627 }, { "acc": 0.96644344, "epoch": 42.303726271385045, "grad_norm": 2.9274044036865234, "learning_rate": 6.332387017564177e-07, "loss": 0.05846437, "memory(GiB)": 13.7, "step": 90255, "train_speed(iter/s)": 1.527633 }, { "acc": 0.9875, "epoch": 42.30606983829389, "grad_norm": 3.485428810119629, "learning_rate": 6.328614663947294e-07, "loss": 0.03116704, "memory(GiB)": 13.7, "step": 90260, "train_speed(iter/s)": 1.527637 }, { "acc": 0.98458328, "epoch": 42.30841340520272, "grad_norm": 0.001843435107730329, "learning_rate": 6.324843360167227e-07, "loss": 0.05162601, "memory(GiB)": 13.7, "step": 90265, "train_speed(iter/s)": 1.527637 }, { "acc": 0.98937502, "epoch": 42.310756972111555, "grad_norm": 7.07429838180542, "learning_rate": 6.321073106314623e-07, "loss": 0.05497672, "memory(GiB)": 13.7, "step": 90270, "train_speed(iter/s)": 1.527638 }, { "acc": 0.98488102, "epoch": 42.31310053902039, "grad_norm": 3.184077024459839, "learning_rate": 6.317303902480125e-07, "loss": 0.02909756, "memory(GiB)": 13.7, "step": 90275, "train_speed(iter/s)": 1.527646 }, { "acc": 0.98562498, "epoch": 42.315444105929224, "grad_norm": 2.713165283203125, "learning_rate": 6.313535748754303e-07, "loss": 0.05827509, "memory(GiB)": 13.7, "step": 90280, "train_speed(iter/s)": 1.527653 }, { "acc": 0.98363094, "epoch": 42.31778767283806, "grad_norm": 2.6530606746673584, "learning_rate": 6.30976864522775e-07, "loss": 0.0336893, "memory(GiB)": 13.7, "step": 90285, "train_speed(iter/s)": 1.527659 }, { "acc": 0.98299818, "epoch": 42.32013123974689, "grad_norm": 1.303055763244629, "learning_rate": 6.306002591991013e-07, "loss": 0.03800581, "memory(GiB)": 13.7, "step": 90290, "train_speed(iter/s)": 1.52766 }, { "acc": 0.98041668, "epoch": 42.32247480665573, "grad_norm": 5.15695333480835, "learning_rate": 6.302237589134591e-07, "loss": 0.05421384, "memory(GiB)": 13.7, "step": 90295, "train_speed(iter/s)": 1.527661 }, { "acc": 0.9864584, "epoch": 42.32481837356457, "grad_norm": 2.172729015350342, "learning_rate": 6.298473636749023e-07, "loss": 0.02389054, "memory(GiB)": 13.7, "step": 90300, "train_speed(iter/s)": 1.527662 }, { "acc": 0.98125, "epoch": 42.3271619404734, "grad_norm": 1.3578577041625977, "learning_rate": 6.294710734924751e-07, "loss": 0.04554261, "memory(GiB)": 13.7, "step": 90305, "train_speed(iter/s)": 1.527668 }, { "acc": 0.97925596, "epoch": 42.32950550738224, "grad_norm": 2.1062698364257812, "learning_rate": 6.290948883752243e-07, "loss": 0.06782062, "memory(GiB)": 13.7, "step": 90310, "train_speed(iter/s)": 1.527668 }, { "acc": 0.9666666, "epoch": 42.33184907429107, "grad_norm": 5.3760600090026855, "learning_rate": 6.287188083321903e-07, "loss": 0.06352295, "memory(GiB)": 13.7, "step": 90315, "train_speed(iter/s)": 1.527669 }, { "acc": 0.99437504, "epoch": 42.334192641199905, "grad_norm": 0.011073010042309761, "learning_rate": 6.283428333724144e-07, "loss": 0.02947535, "memory(GiB)": 13.7, "step": 90320, "train_speed(iter/s)": 1.527673 }, { "acc": 0.9875, "epoch": 42.33653620810874, "grad_norm": 0.00044480914948508143, "learning_rate": 6.279669635049316e-07, "loss": 0.02386287, "memory(GiB)": 13.7, "step": 90325, "train_speed(iter/s)": 1.527674 }, { "acc": 0.98904762, "epoch": 42.338879775017574, "grad_norm": 2.613807439804077, "learning_rate": 6.275911987387799e-07, "loss": 0.02680737, "memory(GiB)": 13.7, "step": 90330, "train_speed(iter/s)": 1.527675 }, { "acc": 0.97383928, "epoch": 42.341223341926415, "grad_norm": 5.750990867614746, "learning_rate": 6.272155390829898e-07, "loss": 0.0721204, "memory(GiB)": 13.7, "step": 90335, "train_speed(iter/s)": 1.527674 }, { "acc": 0.99226189, "epoch": 42.34356690883525, "grad_norm": 4.552734851837158, "learning_rate": 6.268399845465895e-07, "loss": 0.03806431, "memory(GiB)": 13.7, "step": 90340, "train_speed(iter/s)": 1.527679 }, { "acc": 0.97788687, "epoch": 42.345910475744084, "grad_norm": 2.2556545734405518, "learning_rate": 6.264645351386088e-07, "loss": 0.03488319, "memory(GiB)": 13.7, "step": 90345, "train_speed(iter/s)": 1.527685 }, { "acc": 0.990625, "epoch": 42.34825404265292, "grad_norm": 4.2559494972229, "learning_rate": 6.260891908680687e-07, "loss": 0.02735924, "memory(GiB)": 13.7, "step": 90350, "train_speed(iter/s)": 1.52768 }, { "acc": 0.97624998, "epoch": 42.35059760956175, "grad_norm": 5.839428424835205, "learning_rate": 6.257139517439949e-07, "loss": 0.04348054, "memory(GiB)": 13.7, "step": 90355, "train_speed(iter/s)": 1.527682 }, { "acc": 0.99258928, "epoch": 42.35294117647059, "grad_norm": 2.620664596557617, "learning_rate": 6.253388177754048e-07, "loss": 0.02661448, "memory(GiB)": 13.7, "step": 90360, "train_speed(iter/s)": 1.527687 }, { "acc": 0.99187498, "epoch": 42.35528474337942, "grad_norm": 0.8606276512145996, "learning_rate": 6.24963788971317e-07, "loss": 0.02692425, "memory(GiB)": 13.7, "step": 90365, "train_speed(iter/s)": 1.527691 }, { "acc": 0.98008928, "epoch": 42.357628310288256, "grad_norm": 5.180434703826904, "learning_rate": 6.245888653407441e-07, "loss": 0.04209325, "memory(GiB)": 13.7, "step": 90370, "train_speed(iter/s)": 1.527695 }, { "acc": 0.98666134, "epoch": 42.3599718771971, "grad_norm": 3.6374528408050537, "learning_rate": 6.242140468926999e-07, "loss": 0.04632549, "memory(GiB)": 13.7, "step": 90375, "train_speed(iter/s)": 1.527697 }, { "acc": 0.99736109, "epoch": 42.36231544410593, "grad_norm": 1.2990257740020752, "learning_rate": 6.238393336361926e-07, "loss": 0.03502041, "memory(GiB)": 13.7, "step": 90380, "train_speed(iter/s)": 1.527697 }, { "acc": 0.9988636, "epoch": 42.364659011014766, "grad_norm": 2.5813865661621094, "learning_rate": 6.234647255802274e-07, "loss": 0.01285556, "memory(GiB)": 13.7, "step": 90385, "train_speed(iter/s)": 1.527703 }, { "acc": 0.978125, "epoch": 42.3670025779236, "grad_norm": 0.0025571519508957863, "learning_rate": 6.230902227338123e-07, "loss": 0.06353388, "memory(GiB)": 13.7, "step": 90390, "train_speed(iter/s)": 1.527706 }, { "acc": 0.98934517, "epoch": 42.369346144832434, "grad_norm": 2.9122490882873535, "learning_rate": 6.227158251059464e-07, "loss": 0.0367663, "memory(GiB)": 13.7, "step": 90395, "train_speed(iter/s)": 1.527708 }, { "acc": 0.98529758, "epoch": 42.37168971174127, "grad_norm": 3.4136104583740234, "learning_rate": 6.223415327056311e-07, "loss": 0.03930548, "memory(GiB)": 13.7, "step": 90400, "train_speed(iter/s)": 1.527708 }, { "acc": 0.99104166, "epoch": 42.3740332786501, "grad_norm": 1.5249987840652466, "learning_rate": 6.21967345541861e-07, "loss": 0.03399805, "memory(GiB)": 13.7, "step": 90405, "train_speed(iter/s)": 1.52771 }, { "acc": 0.9864584, "epoch": 42.37637684555894, "grad_norm": 1.352826476097107, "learning_rate": 6.215932636236314e-07, "loss": 0.03127314, "memory(GiB)": 13.7, "step": 90410, "train_speed(iter/s)": 1.527712 }, { "acc": 0.98571434, "epoch": 42.37872041246778, "grad_norm": 2.224079132080078, "learning_rate": 6.212192869599348e-07, "loss": 0.03733084, "memory(GiB)": 13.7, "step": 90415, "train_speed(iter/s)": 1.527715 }, { "acc": 0.99690475, "epoch": 42.38106397937661, "grad_norm": 3.261612892150879, "learning_rate": 6.208454155597585e-07, "loss": 0.01695757, "memory(GiB)": 13.7, "step": 90420, "train_speed(iter/s)": 1.527717 }, { "acc": 1.0, "epoch": 42.38340754628545, "grad_norm": 3.9224343299865723, "learning_rate": 6.204716494320912e-07, "loss": 0.03844107, "memory(GiB)": 13.7, "step": 90425, "train_speed(iter/s)": 1.52772 }, { "acc": 0.9819643, "epoch": 42.38575111319428, "grad_norm": 2.5315897464752197, "learning_rate": 6.200979885859148e-07, "loss": 0.02975866, "memory(GiB)": 13.7, "step": 90430, "train_speed(iter/s)": 1.527722 }, { "acc": 0.99511366, "epoch": 42.388094680103116, "grad_norm": 2.2017898559570312, "learning_rate": 6.19724433030213e-07, "loss": 0.02523771, "memory(GiB)": 13.7, "step": 90435, "train_speed(iter/s)": 1.527726 }, { "acc": 0.98467264, "epoch": 42.39043824701195, "grad_norm": 2.6773343086242676, "learning_rate": 6.193509827739619e-07, "loss": 0.03237399, "memory(GiB)": 13.7, "step": 90440, "train_speed(iter/s)": 1.527727 }, { "acc": 0.99458332, "epoch": 42.392781813920784, "grad_norm": 0.07149052619934082, "learning_rate": 6.189776378261419e-07, "loss": 0.01443396, "memory(GiB)": 13.7, "step": 90445, "train_speed(iter/s)": 1.527726 }, { "acc": 0.9979166, "epoch": 42.395125380829626, "grad_norm": 2.464224100112915, "learning_rate": 6.186043981957239e-07, "loss": 0.01902512, "memory(GiB)": 13.7, "step": 90450, "train_speed(iter/s)": 1.527726 }, { "acc": 0.98916664, "epoch": 42.39746894773846, "grad_norm": 0.9314488172531128, "learning_rate": 6.182312638916819e-07, "loss": 0.04390604, "memory(GiB)": 13.7, "step": 90455, "train_speed(iter/s)": 1.52773 }, { "acc": 0.9958333, "epoch": 42.399812514647294, "grad_norm": 9.640008647693321e-05, "learning_rate": 6.178582349229835e-07, "loss": 0.0171641, "memory(GiB)": 13.7, "step": 90460, "train_speed(iter/s)": 1.527736 }, { "acc": 0.98377972, "epoch": 42.40215608155613, "grad_norm": 6.064453601837158, "learning_rate": 6.174853112985939e-07, "loss": 0.03220966, "memory(GiB)": 13.7, "step": 90465, "train_speed(iter/s)": 1.527738 }, { "acc": 0.97800598, "epoch": 42.40449964846496, "grad_norm": 2.6985225677490234, "learning_rate": 6.171124930274789e-07, "loss": 0.04959755, "memory(GiB)": 13.7, "step": 90470, "train_speed(iter/s)": 1.527737 }, { "acc": 0.97607136, "epoch": 42.4068432153738, "grad_norm": 5.784263610839844, "learning_rate": 6.16739780118597e-07, "loss": 0.06068528, "memory(GiB)": 13.7, "step": 90475, "train_speed(iter/s)": 1.527739 }, { "acc": 0.97325764, "epoch": 42.40918678228263, "grad_norm": 1.1913617849349976, "learning_rate": 6.163671725809117e-07, "loss": 0.06387178, "memory(GiB)": 13.7, "step": 90480, "train_speed(iter/s)": 1.52774 }, { "acc": 0.97413692, "epoch": 42.411530349191466, "grad_norm": 4.329389572143555, "learning_rate": 6.159946704233749e-07, "loss": 0.04616426, "memory(GiB)": 13.7, "step": 90485, "train_speed(iter/s)": 1.527744 }, { "acc": 0.98527775, "epoch": 42.41387391610031, "grad_norm": 2.9206717014312744, "learning_rate": 6.156222736549433e-07, "loss": 0.04475141, "memory(GiB)": 13.7, "step": 90490, "train_speed(iter/s)": 1.527746 }, { "acc": 0.97748508, "epoch": 42.41621748300914, "grad_norm": 4.726659297943115, "learning_rate": 6.152499822845645e-07, "loss": 0.05448396, "memory(GiB)": 13.7, "step": 90495, "train_speed(iter/s)": 1.52775 }, { "acc": 0.98312492, "epoch": 42.418561049917976, "grad_norm": 0.028284722939133644, "learning_rate": 6.148777963211914e-07, "loss": 0.0556376, "memory(GiB)": 13.7, "step": 90500, "train_speed(iter/s)": 1.527752 }, { "acc": 0.9875, "epoch": 42.42090461682681, "grad_norm": 0.0019380835583433509, "learning_rate": 6.145057157737686e-07, "loss": 0.04031209, "memory(GiB)": 13.7, "step": 90505, "train_speed(iter/s)": 1.527753 }, { "acc": 0.99011364, "epoch": 42.423248183735645, "grad_norm": 1.0731494426727295, "learning_rate": 6.141337406512383e-07, "loss": 0.026018, "memory(GiB)": 13.7, "step": 90510, "train_speed(iter/s)": 1.527754 }, { "acc": 0.97666664, "epoch": 42.42559175064448, "grad_norm": 3.658717155456543, "learning_rate": 6.137618709625434e-07, "loss": 0.03749152, "memory(GiB)": 13.7, "step": 90515, "train_speed(iter/s)": 1.527758 }, { "acc": 0.98048611, "epoch": 42.42793531755331, "grad_norm": 4.660752296447754, "learning_rate": 6.133901067166204e-07, "loss": 0.05468266, "memory(GiB)": 13.7, "step": 90520, "train_speed(iter/s)": 1.527763 }, { "acc": 0.98779755, "epoch": 42.430278884462155, "grad_norm": 4.821154594421387, "learning_rate": 6.130184479224078e-07, "loss": 0.02951469, "memory(GiB)": 13.7, "step": 90525, "train_speed(iter/s)": 1.527766 }, { "acc": 0.99571428, "epoch": 42.43262245137099, "grad_norm": 3.7943248748779297, "learning_rate": 6.126468945888354e-07, "loss": 0.01734301, "memory(GiB)": 13.7, "step": 90530, "train_speed(iter/s)": 1.527772 }, { "acc": 0.99541664, "epoch": 42.43496601827982, "grad_norm": 2.3994855880737305, "learning_rate": 6.122754467248386e-07, "loss": 0.01798873, "memory(GiB)": 13.7, "step": 90535, "train_speed(iter/s)": 1.527777 }, { "acc": 0.98708334, "epoch": 42.43730958518866, "grad_norm": 3.837102174758911, "learning_rate": 6.119041043393433e-07, "loss": 0.02192969, "memory(GiB)": 13.7, "step": 90540, "train_speed(iter/s)": 1.527781 }, { "acc": 0.9802083, "epoch": 42.43965315209749, "grad_norm": 3.610773801803589, "learning_rate": 6.11532867441275e-07, "loss": 0.02845445, "memory(GiB)": 13.7, "step": 90545, "train_speed(iter/s)": 1.527778 }, { "acc": 0.98812504, "epoch": 42.441996719006326, "grad_norm": 2.903635025024414, "learning_rate": 6.111617360395587e-07, "loss": 0.02651492, "memory(GiB)": 13.7, "step": 90550, "train_speed(iter/s)": 1.52778 }, { "acc": 0.99354172, "epoch": 42.44434028591516, "grad_norm": 3.2109525203704834, "learning_rate": 6.10790710143112e-07, "loss": 0.02901863, "memory(GiB)": 13.7, "step": 90555, "train_speed(iter/s)": 1.52778 }, { "acc": 0.9927084, "epoch": 42.446683852823995, "grad_norm": 1.5428022146224976, "learning_rate": 6.104197897608573e-07, "loss": 0.03103433, "memory(GiB)": 13.7, "step": 90560, "train_speed(iter/s)": 1.527783 }, { "acc": 0.9864584, "epoch": 42.449027419732836, "grad_norm": 3.669572114944458, "learning_rate": 6.100489749017076e-07, "loss": 0.02485611, "memory(GiB)": 13.7, "step": 90565, "train_speed(iter/s)": 1.527781 }, { "acc": 1.0, "epoch": 42.45137098664167, "grad_norm": 1.7875232696533203, "learning_rate": 6.096782655745779e-07, "loss": 0.02754274, "memory(GiB)": 13.7, "step": 90570, "train_speed(iter/s)": 1.527785 }, { "acc": 0.98239584, "epoch": 42.453714553550505, "grad_norm": 1.440976858139038, "learning_rate": 6.093076617883761e-07, "loss": 0.06511641, "memory(GiB)": 13.7, "step": 90575, "train_speed(iter/s)": 1.527794 }, { "acc": 0.98810673, "epoch": 42.45605812045934, "grad_norm": 2.839768409729004, "learning_rate": 6.089371635520136e-07, "loss": 0.05784011, "memory(GiB)": 13.7, "step": 90580, "train_speed(iter/s)": 1.527798 }, { "acc": 0.98633928, "epoch": 42.45840168736817, "grad_norm": 4.087892532348633, "learning_rate": 6.085667708743937e-07, "loss": 0.03672302, "memory(GiB)": 13.7, "step": 90585, "train_speed(iter/s)": 1.527797 }, { "acc": 0.98633928, "epoch": 42.46074525427701, "grad_norm": 3.1152923107147217, "learning_rate": 6.081964837644183e-07, "loss": 0.02814924, "memory(GiB)": 13.7, "step": 90590, "train_speed(iter/s)": 1.5278 }, { "acc": 0.98666668, "epoch": 42.46308882118584, "grad_norm": 1.2420012950897217, "learning_rate": 6.078263022309915e-07, "loss": 0.02242726, "memory(GiB)": 13.7, "step": 90595, "train_speed(iter/s)": 1.527807 }, { "acc": 0.9833334, "epoch": 42.46543238809468, "grad_norm": 0.634856104850769, "learning_rate": 6.074562262830088e-07, "loss": 0.03728251, "memory(GiB)": 13.7, "step": 90600, "train_speed(iter/s)": 1.527806 }, { "acc": 0.97988091, "epoch": 42.46777595500352, "grad_norm": 3.364149332046509, "learning_rate": 6.070862559293664e-07, "loss": 0.0473315, "memory(GiB)": 13.7, "step": 90605, "train_speed(iter/s)": 1.527813 }, { "acc": 0.97937508, "epoch": 42.47011952191235, "grad_norm": 3.7761154174804688, "learning_rate": 6.067163911789566e-07, "loss": 0.03379614, "memory(GiB)": 13.7, "step": 90610, "train_speed(iter/s)": 1.527817 }, { "acc": 0.98136368, "epoch": 42.472463088821186, "grad_norm": 3.0875542163848877, "learning_rate": 6.063466320406706e-07, "loss": 0.03775386, "memory(GiB)": 13.7, "step": 90615, "train_speed(iter/s)": 1.52782 }, { "acc": 0.98026714, "epoch": 42.47480665573002, "grad_norm": 2.0371458530426025, "learning_rate": 6.059769785233939e-07, "loss": 0.04315999, "memory(GiB)": 13.7, "step": 90620, "train_speed(iter/s)": 1.527823 }, { "acc": 0.98604164, "epoch": 42.477150222638855, "grad_norm": 2.7836315631866455, "learning_rate": 6.056074306360156e-07, "loss": 0.03691856, "memory(GiB)": 13.7, "step": 90625, "train_speed(iter/s)": 1.527823 }, { "acc": 0.99187498, "epoch": 42.47949378954769, "grad_norm": 1.7354342937469482, "learning_rate": 6.05237988387416e-07, "loss": 0.0235558, "memory(GiB)": 13.7, "step": 90630, "train_speed(iter/s)": 1.527823 }, { "acc": 0.97911701, "epoch": 42.48183735645652, "grad_norm": 5.988630294799805, "learning_rate": 6.048686517864745e-07, "loss": 0.03875231, "memory(GiB)": 13.7, "step": 90635, "train_speed(iter/s)": 1.527825 }, { "acc": 0.9947917, "epoch": 42.484180923365365, "grad_norm": 2.931433916091919, "learning_rate": 6.044994208420712e-07, "loss": 0.01731949, "memory(GiB)": 13.7, "step": 90640, "train_speed(iter/s)": 1.527824 }, { "acc": 0.9885417, "epoch": 42.4865244902742, "grad_norm": 4.101789951324463, "learning_rate": 6.041302955630779e-07, "loss": 0.04091803, "memory(GiB)": 13.7, "step": 90645, "train_speed(iter/s)": 1.527826 }, { "acc": 0.97870045, "epoch": 42.48886805718303, "grad_norm": 4.046881675720215, "learning_rate": 6.037612759583708e-07, "loss": 0.03489293, "memory(GiB)": 13.7, "step": 90650, "train_speed(iter/s)": 1.527827 }, { "acc": 0.97709274, "epoch": 42.49121162409187, "grad_norm": 4.079565525054932, "learning_rate": 6.033923620368175e-07, "loss": 0.04050485, "memory(GiB)": 13.7, "step": 90655, "train_speed(iter/s)": 1.527829 }, { "acc": 0.97020836, "epoch": 42.4935551910007, "grad_norm": 4.011949062347412, "learning_rate": 6.030235538072869e-07, "loss": 0.04220617, "memory(GiB)": 13.7, "step": 90660, "train_speed(iter/s)": 1.527834 }, { "acc": 0.99092264, "epoch": 42.495898757909536, "grad_norm": 2.9364936351776123, "learning_rate": 6.026548512786424e-07, "loss": 0.03046513, "memory(GiB)": 13.7, "step": 90665, "train_speed(iter/s)": 1.527835 }, { "acc": 0.99125004, "epoch": 42.49824232481837, "grad_norm": 1.3648314476013184, "learning_rate": 6.022862544597484e-07, "loss": 0.0264567, "memory(GiB)": 13.7, "step": 90670, "train_speed(iter/s)": 1.527834 }, { "acc": 0.97312498, "epoch": 42.50058589172721, "grad_norm": 0.9443076252937317, "learning_rate": 6.019177633594635e-07, "loss": 0.0601397, "memory(GiB)": 13.7, "step": 90675, "train_speed(iter/s)": 1.527836 }, { "acc": 0.98562498, "epoch": 42.502929458636046, "grad_norm": 3.304466724395752, "learning_rate": 6.015493779866426e-07, "loss": 0.0256469, "memory(GiB)": 13.7, "step": 90680, "train_speed(iter/s)": 1.527838 }, { "acc": 0.9891964, "epoch": 42.50527302554488, "grad_norm": 3.9242632389068604, "learning_rate": 6.011810983501454e-07, "loss": 0.03842788, "memory(GiB)": 13.7, "step": 90685, "train_speed(iter/s)": 1.527837 }, { "acc": 0.98236113, "epoch": 42.507616592453715, "grad_norm": 3.8232388496398926, "learning_rate": 6.008129244588207e-07, "loss": 0.0538928, "memory(GiB)": 13.7, "step": 90690, "train_speed(iter/s)": 1.527839 }, { "acc": 0.99375, "epoch": 42.50996015936255, "grad_norm": 2.79445219039917, "learning_rate": 6.0044485632152e-07, "loss": 0.04074732, "memory(GiB)": 13.7, "step": 90695, "train_speed(iter/s)": 1.527842 }, { "acc": 0.98070889, "epoch": 42.512303726271384, "grad_norm": 4.5602521896362305, "learning_rate": 6.000768939470882e-07, "loss": 0.05616515, "memory(GiB)": 13.7, "step": 90700, "train_speed(iter/s)": 1.527839 }, { "acc": 0.98154764, "epoch": 42.51464729318022, "grad_norm": 2.6006505489349365, "learning_rate": 5.997090373443731e-07, "loss": 0.0312219, "memory(GiB)": 13.7, "step": 90705, "train_speed(iter/s)": 1.527838 }, { "acc": 0.9864583, "epoch": 42.51699086008905, "grad_norm": 0.08404265344142914, "learning_rate": 5.993412865222148e-07, "loss": 0.01466143, "memory(GiB)": 13.7, "step": 90710, "train_speed(iter/s)": 1.527837 }, { "acc": 0.99014425, "epoch": 42.519334426997894, "grad_norm": 2.724656105041504, "learning_rate": 5.98973641489452e-07, "loss": 0.02263196, "memory(GiB)": 13.7, "step": 90715, "train_speed(iter/s)": 1.527836 }, { "acc": 0.98675594, "epoch": 42.52167799390673, "grad_norm": 2.542569160461426, "learning_rate": 5.986061022549239e-07, "loss": 0.03012804, "memory(GiB)": 13.7, "step": 90720, "train_speed(iter/s)": 1.527838 }, { "acc": 0.98968754, "epoch": 42.52402156081556, "grad_norm": 0.09752406924962997, "learning_rate": 5.982386688274631e-07, "loss": 0.02486292, "memory(GiB)": 13.7, "step": 90725, "train_speed(iter/s)": 1.527838 }, { "acc": 0.9890625, "epoch": 42.5263651277244, "grad_norm": 2.3388307094573975, "learning_rate": 5.978713412159028e-07, "loss": 0.05906174, "memory(GiB)": 13.7, "step": 90730, "train_speed(iter/s)": 1.527837 }, { "acc": 0.99092264, "epoch": 42.52870869463323, "grad_norm": 2.0025041103363037, "learning_rate": 5.975041194290694e-07, "loss": 0.02212059, "memory(GiB)": 13.7, "step": 90735, "train_speed(iter/s)": 1.527842 }, { "acc": 0.97322922, "epoch": 42.531052261542065, "grad_norm": 5.01299524307251, "learning_rate": 5.971370034757945e-07, "loss": 0.03916863, "memory(GiB)": 13.7, "step": 90740, "train_speed(iter/s)": 1.527839 }, { "acc": 0.97927084, "epoch": 42.5333958284509, "grad_norm": 1.4748789072036743, "learning_rate": 5.967699933648986e-07, "loss": 0.04580382, "memory(GiB)": 13.7, "step": 90745, "train_speed(iter/s)": 1.527841 }, { "acc": 0.97805557, "epoch": 42.53573939535974, "grad_norm": 2.948509454727173, "learning_rate": 5.964030891052057e-07, "loss": 0.04871253, "memory(GiB)": 13.7, "step": 90750, "train_speed(iter/s)": 1.527842 }, { "acc": 0.98467264, "epoch": 42.538082962268575, "grad_norm": 5.506198406219482, "learning_rate": 5.960362907055343e-07, "loss": 0.02721691, "memory(GiB)": 13.7, "step": 90755, "train_speed(iter/s)": 1.527843 }, { "acc": 0.9791666, "epoch": 42.54042652917741, "grad_norm": 3.3099606037139893, "learning_rate": 5.956695981746995e-07, "loss": 0.04672008, "memory(GiB)": 13.7, "step": 90760, "train_speed(iter/s)": 1.527844 }, { "acc": 1.0, "epoch": 42.542770096086244, "grad_norm": 2.370565414428711, "learning_rate": 5.953030115215163e-07, "loss": 0.02617807, "memory(GiB)": 13.7, "step": 90765, "train_speed(iter/s)": 1.527848 }, { "acc": 0.98812504, "epoch": 42.54511366299508, "grad_norm": 1.4224110841751099, "learning_rate": 5.949365307547967e-07, "loss": 0.03551648, "memory(GiB)": 13.7, "step": 90770, "train_speed(iter/s)": 1.527853 }, { "acc": 0.99092264, "epoch": 42.54745722990391, "grad_norm": 2.594733238220215, "learning_rate": 5.945701558833502e-07, "loss": 0.03615029, "memory(GiB)": 13.7, "step": 90775, "train_speed(iter/s)": 1.527854 }, { "acc": 0.99008923, "epoch": 42.54980079681275, "grad_norm": 1.8374853134155273, "learning_rate": 5.942038869159819e-07, "loss": 0.02955409, "memory(GiB)": 13.7, "step": 90780, "train_speed(iter/s)": 1.527855 }, { "acc": 0.96652775, "epoch": 42.55214436372158, "grad_norm": 3.5547404289245605, "learning_rate": 5.938377238614974e-07, "loss": 0.06831249, "memory(GiB)": 13.7, "step": 90785, "train_speed(iter/s)": 1.527858 }, { "acc": 0.99291668, "epoch": 42.55448793063042, "grad_norm": 2.636016368865967, "learning_rate": 5.934716667286949e-07, "loss": 0.03879913, "memory(GiB)": 13.7, "step": 90790, "train_speed(iter/s)": 1.527861 }, { "acc": 0.984375, "epoch": 42.55683149753926, "grad_norm": 0.00781638640910387, "learning_rate": 5.931057155263775e-07, "loss": 0.0680742, "memory(GiB)": 13.7, "step": 90795, "train_speed(iter/s)": 1.527857 }, { "acc": 0.9856945, "epoch": 42.55917506444809, "grad_norm": 0.19361759722232819, "learning_rate": 5.927398702633389e-07, "loss": 0.03939573, "memory(GiB)": 13.7, "step": 90800, "train_speed(iter/s)": 1.527863 }, { "acc": 0.9850893, "epoch": 42.561518631356925, "grad_norm": 0.06914510577917099, "learning_rate": 5.923741309483724e-07, "loss": 0.03123159, "memory(GiB)": 13.7, "step": 90805, "train_speed(iter/s)": 1.527859 }, { "acc": 0.9770833, "epoch": 42.56386219826576, "grad_norm": 3.096618175506592, "learning_rate": 5.920084975902713e-07, "loss": 0.03271195, "memory(GiB)": 13.7, "step": 90810, "train_speed(iter/s)": 1.527863 }, { "acc": 0.98594322, "epoch": 42.566205765174594, "grad_norm": 0.00314813619479537, "learning_rate": 5.916429701978214e-07, "loss": 0.03395626, "memory(GiB)": 13.7, "step": 90815, "train_speed(iter/s)": 1.527864 }, { "acc": 0.97124996, "epoch": 42.56854933208343, "grad_norm": 4.0035295486450195, "learning_rate": 5.912775487798115e-07, "loss": 0.03635888, "memory(GiB)": 13.7, "step": 90820, "train_speed(iter/s)": 1.527869 }, { "acc": 0.9916667, "epoch": 42.57089289899227, "grad_norm": 0.002929014153778553, "learning_rate": 5.909122333450221e-07, "loss": 0.03317183, "memory(GiB)": 13.7, "step": 90825, "train_speed(iter/s)": 1.527872 }, { "acc": 0.97381945, "epoch": 42.573236465901104, "grad_norm": 2.3470511436462402, "learning_rate": 5.905470239022374e-07, "loss": 0.05580956, "memory(GiB)": 13.7, "step": 90830, "train_speed(iter/s)": 1.527872 }, { "acc": 0.98395824, "epoch": 42.57558003280994, "grad_norm": 2.2887682914733887, "learning_rate": 5.901819204602352e-07, "loss": 0.03975748, "memory(GiB)": 13.7, "step": 90835, "train_speed(iter/s)": 1.527876 }, { "acc": 0.9875, "epoch": 42.57792359971877, "grad_norm": 4.525044918060303, "learning_rate": 5.898169230277891e-07, "loss": 0.02377997, "memory(GiB)": 13.7, "step": 90840, "train_speed(iter/s)": 1.52788 }, { "acc": 0.98041668, "epoch": 42.58026716662761, "grad_norm": 0.008939751423895359, "learning_rate": 5.894520316136751e-07, "loss": 0.03272361, "memory(GiB)": 13.7, "step": 90845, "train_speed(iter/s)": 1.527881 }, { "acc": 0.96719694, "epoch": 42.58261073353644, "grad_norm": 5.614412307739258, "learning_rate": 5.890872462266606e-07, "loss": 0.05379534, "memory(GiB)": 13.7, "step": 90850, "train_speed(iter/s)": 1.527885 }, { "acc": 0.99154758, "epoch": 42.584954300445276, "grad_norm": 3.28188419342041, "learning_rate": 5.887225668755185e-07, "loss": 0.02552127, "memory(GiB)": 13.7, "step": 90855, "train_speed(iter/s)": 1.527884 }, { "acc": 0.9927084, "epoch": 42.58729786735411, "grad_norm": 3.7761895656585693, "learning_rate": 5.883579935690105e-07, "loss": 0.02491571, "memory(GiB)": 13.7, "step": 90860, "train_speed(iter/s)": 1.527884 }, { "acc": 0.98363094, "epoch": 42.58964143426295, "grad_norm": 4.30078125, "learning_rate": 5.879935263159026e-07, "loss": 0.04456134, "memory(GiB)": 13.7, "step": 90865, "train_speed(iter/s)": 1.527888 }, { "acc": 0.9871726, "epoch": 42.591985001171786, "grad_norm": 1.4405161142349243, "learning_rate": 5.876291651249535e-07, "loss": 0.03923004, "memory(GiB)": 13.7, "step": 90870, "train_speed(iter/s)": 1.527892 }, { "acc": 0.98125, "epoch": 42.59432856808062, "grad_norm": 0.01373822707682848, "learning_rate": 5.872649100049226e-07, "loss": 0.04444146, "memory(GiB)": 13.7, "step": 90875, "train_speed(iter/s)": 1.527894 }, { "acc": 0.97383928, "epoch": 42.596672134989454, "grad_norm": 5.945805072784424, "learning_rate": 5.869007609645647e-07, "loss": 0.08378105, "memory(GiB)": 13.7, "step": 90880, "train_speed(iter/s)": 1.5279 }, { "acc": 0.98562498, "epoch": 42.59901570189829, "grad_norm": 2.190995931625366, "learning_rate": 5.865367180126304e-07, "loss": 0.03528401, "memory(GiB)": 13.7, "step": 90885, "train_speed(iter/s)": 1.527903 }, { "acc": 0.97979164, "epoch": 42.60135926880712, "grad_norm": 3.5308783054351807, "learning_rate": 5.861727811578746e-07, "loss": 0.04441453, "memory(GiB)": 13.7, "step": 90890, "train_speed(iter/s)": 1.527904 }, { "acc": 0.990625, "epoch": 42.60370283571596, "grad_norm": 1.4180045127868652, "learning_rate": 5.858089504090422e-07, "loss": 0.02316477, "memory(GiB)": 13.7, "step": 90895, "train_speed(iter/s)": 1.527909 }, { "acc": 0.98104172, "epoch": 42.6060464026248, "grad_norm": 4.605985641479492, "learning_rate": 5.854452257748801e-07, "loss": 0.06242973, "memory(GiB)": 13.7, "step": 90900, "train_speed(iter/s)": 1.527914 }, { "acc": 0.98312502, "epoch": 42.60838996953363, "grad_norm": 2.417760133743286, "learning_rate": 5.850816072641291e-07, "loss": 0.03727291, "memory(GiB)": 13.7, "step": 90905, "train_speed(iter/s)": 1.527918 }, { "acc": 0.98530636, "epoch": 42.61073353644247, "grad_norm": 3.2753469944000244, "learning_rate": 5.847180948855306e-07, "loss": 0.03245658, "memory(GiB)": 13.7, "step": 90910, "train_speed(iter/s)": 1.527922 }, { "acc": 0.97986107, "epoch": 42.6130771033513, "grad_norm": 1.8455268144607544, "learning_rate": 5.843546886478216e-07, "loss": 0.05946729, "memory(GiB)": 13.7, "step": 90915, "train_speed(iter/s)": 1.527927 }, { "acc": 0.984762, "epoch": 42.615420670260136, "grad_norm": 2.377868413925171, "learning_rate": 5.839913885597396e-07, "loss": 0.04879735, "memory(GiB)": 13.7, "step": 90920, "train_speed(iter/s)": 1.52793 }, { "acc": 0.99333334, "epoch": 42.61776423716897, "grad_norm": 0.17873741686344147, "learning_rate": 5.836281946300147e-07, "loss": 0.01627467, "memory(GiB)": 13.7, "step": 90925, "train_speed(iter/s)": 1.527933 }, { "acc": 0.99125004, "epoch": 42.620107804077804, "grad_norm": 0.005143217276781797, "learning_rate": 5.832651068673766e-07, "loss": 0.01951233, "memory(GiB)": 13.7, "step": 90930, "train_speed(iter/s)": 1.527939 }, { "acc": 0.98968754, "epoch": 42.62245137098664, "grad_norm": 1.0110207796096802, "learning_rate": 5.829021252805544e-07, "loss": 0.04007852, "memory(GiB)": 13.7, "step": 90935, "train_speed(iter/s)": 1.527936 }, { "acc": 0.97823868, "epoch": 42.62479493789548, "grad_norm": 3.7868025302886963, "learning_rate": 5.825392498782704e-07, "loss": 0.06527148, "memory(GiB)": 13.7, "step": 90940, "train_speed(iter/s)": 1.527937 }, { "acc": 0.99008923, "epoch": 42.627138504804314, "grad_norm": 0.8867825269699097, "learning_rate": 5.821764806692511e-07, "loss": 0.02562129, "memory(GiB)": 13.7, "step": 90945, "train_speed(iter/s)": 1.52794 }, { "acc": 0.9791667, "epoch": 42.62948207171315, "grad_norm": 0.44038906693458557, "learning_rate": 5.818138176622121e-07, "loss": 0.04743711, "memory(GiB)": 13.7, "step": 90950, "train_speed(iter/s)": 1.527943 }, { "acc": 0.98571434, "epoch": 42.63182563862198, "grad_norm": 3.546919345855713, "learning_rate": 5.814512608658737e-07, "loss": 0.07186689, "memory(GiB)": 13.7, "step": 90955, "train_speed(iter/s)": 1.527948 }, { "acc": 0.98406248, "epoch": 42.63416920553082, "grad_norm": 2.1933066844940186, "learning_rate": 5.810888102889493e-07, "loss": 0.05529348, "memory(GiB)": 13.7, "step": 90960, "train_speed(iter/s)": 1.527954 }, { "acc": 0.97800598, "epoch": 42.63651277243965, "grad_norm": 2.377851724624634, "learning_rate": 5.807264659401501e-07, "loss": 0.04093675, "memory(GiB)": 13.7, "step": 90965, "train_speed(iter/s)": 1.527958 }, { "acc": 0.97302084, "epoch": 42.638856339348486, "grad_norm": 2.0764403343200684, "learning_rate": 5.803642278281879e-07, "loss": 0.04418862, "memory(GiB)": 13.7, "step": 90970, "train_speed(iter/s)": 1.527959 }, { "acc": 0.99571428, "epoch": 42.64119990625732, "grad_norm": 3.300689697265625, "learning_rate": 5.800020959617657e-07, "loss": 0.02397756, "memory(GiB)": 13.7, "step": 90975, "train_speed(iter/s)": 1.52796 }, { "acc": 0.96967258, "epoch": 42.64354347316616, "grad_norm": 5.2109503746032715, "learning_rate": 5.796400703495934e-07, "loss": 0.08293568, "memory(GiB)": 13.7, "step": 90980, "train_speed(iter/s)": 1.527963 }, { "acc": 0.9864584, "epoch": 42.645887040074996, "grad_norm": 0.9712616801261902, "learning_rate": 5.792781510003686e-07, "loss": 0.06373018, "memory(GiB)": 13.7, "step": 90985, "train_speed(iter/s)": 1.527968 }, { "acc": 0.990625, "epoch": 42.64823060698383, "grad_norm": 1.424691081047058, "learning_rate": 5.78916337922793e-07, "loss": 0.04419134, "memory(GiB)": 13.7, "step": 90990, "train_speed(iter/s)": 1.527972 }, { "acc": 0.97766094, "epoch": 42.650574173892664, "grad_norm": 3.4656338691711426, "learning_rate": 5.78554631125561e-07, "loss": 0.06002784, "memory(GiB)": 13.7, "step": 90995, "train_speed(iter/s)": 1.527976 }, { "acc": 0.97956238, "epoch": 42.6529177408015, "grad_norm": 2.5504534244537354, "learning_rate": 5.781930306173704e-07, "loss": 0.06284185, "memory(GiB)": 13.7, "step": 91000, "train_speed(iter/s)": 1.527983 }, { "acc": 0.97758932, "epoch": 42.65526130771033, "grad_norm": 4.7634406089782715, "learning_rate": 5.77831536406911e-07, "loss": 0.03895314, "memory(GiB)": 13.7, "step": 91005, "train_speed(iter/s)": 1.527986 }, { "acc": 0.98103628, "epoch": 42.65760487461917, "grad_norm": 2.2080960273742676, "learning_rate": 5.774701485028707e-07, "loss": 0.03402262, "memory(GiB)": 13.7, "step": 91010, "train_speed(iter/s)": 1.527988 }, { "acc": 0.98395834, "epoch": 42.65994844152801, "grad_norm": 0.9416411519050598, "learning_rate": 5.771088669139386e-07, "loss": 0.02751119, "memory(GiB)": 13.7, "step": 91015, "train_speed(iter/s)": 1.52799 }, { "acc": 0.98083334, "epoch": 42.66229200843684, "grad_norm": 2.4117186069488525, "learning_rate": 5.767476916487962e-07, "loss": 0.04337997, "memory(GiB)": 13.7, "step": 91020, "train_speed(iter/s)": 1.527991 }, { "acc": 0.98113098, "epoch": 42.66463557534568, "grad_norm": 3.1398766040802, "learning_rate": 5.763866227161271e-07, "loss": 0.04746192, "memory(GiB)": 13.7, "step": 91025, "train_speed(iter/s)": 1.527999 }, { "acc": 0.9895834, "epoch": 42.66697914225451, "grad_norm": 2.69413685798645, "learning_rate": 5.760256601246068e-07, "loss": 0.0322594, "memory(GiB)": 13.7, "step": 91030, "train_speed(iter/s)": 1.528001 }, { "acc": 0.99375, "epoch": 42.669322709163346, "grad_norm": 4.128241539001465, "learning_rate": 5.756648038829166e-07, "loss": 0.02122595, "memory(GiB)": 13.7, "step": 91035, "train_speed(iter/s)": 1.528002 }, { "acc": 0.98812504, "epoch": 42.67166627607218, "grad_norm": 1.4915924072265625, "learning_rate": 5.753040539997267e-07, "loss": 0.02336826, "memory(GiB)": 13.7, "step": 91040, "train_speed(iter/s)": 1.528002 }, { "acc": 0.97904758, "epoch": 42.674009842981015, "grad_norm": 4.619710445404053, "learning_rate": 5.749434104837107e-07, "loss": 0.03916251, "memory(GiB)": 13.7, "step": 91045, "train_speed(iter/s)": 1.528005 }, { "acc": 0.99682541, "epoch": 42.67635340988985, "grad_norm": 0.9360238909721375, "learning_rate": 5.745828733435355e-07, "loss": 0.02085365, "memory(GiB)": 13.7, "step": 91050, "train_speed(iter/s)": 1.528007 }, { "acc": 0.99050598, "epoch": 42.67869697679869, "grad_norm": 0.12101637572050095, "learning_rate": 5.742224425878665e-07, "loss": 0.03445428, "memory(GiB)": 13.7, "step": 91055, "train_speed(iter/s)": 1.528009 }, { "acc": 0.98465271, "epoch": 42.681040543707525, "grad_norm": 1.8668122291564941, "learning_rate": 5.7386211822537e-07, "loss": 0.03749935, "memory(GiB)": 13.7, "step": 91060, "train_speed(iter/s)": 1.528013 }, { "acc": 0.9864583, "epoch": 42.68338411061636, "grad_norm": 3.0583279132843018, "learning_rate": 5.735019002647049e-07, "loss": 0.0350215, "memory(GiB)": 13.7, "step": 91065, "train_speed(iter/s)": 1.528022 }, { "acc": 0.98326397, "epoch": 42.68572767752519, "grad_norm": 2.58426570892334, "learning_rate": 5.731417887145308e-07, "loss": 0.05115011, "memory(GiB)": 13.7, "step": 91070, "train_speed(iter/s)": 1.528025 }, { "acc": 0.98076382, "epoch": 42.68807124443403, "grad_norm": 0.0007006904925219715, "learning_rate": 5.727817835835025e-07, "loss": 0.03899485, "memory(GiB)": 13.7, "step": 91075, "train_speed(iter/s)": 1.528032 }, { "acc": 0.98249998, "epoch": 42.69041481134286, "grad_norm": 2.6015446186065674, "learning_rate": 5.724218848802745e-07, "loss": 0.04632936, "memory(GiB)": 13.7, "step": 91080, "train_speed(iter/s)": 1.52803 }, { "acc": 0.98145828, "epoch": 42.692758378251696, "grad_norm": 3.7456557750701904, "learning_rate": 5.720620926134975e-07, "loss": 0.04588877, "memory(GiB)": 13.7, "step": 91085, "train_speed(iter/s)": 1.528033 }, { "acc": 0.98177481, "epoch": 42.69510194516054, "grad_norm": 3.947373867034912, "learning_rate": 5.717024067918169e-07, "loss": 0.04534504, "memory(GiB)": 13.7, "step": 91090, "train_speed(iter/s)": 1.528038 }, { "acc": 0.97875004, "epoch": 42.69744551206937, "grad_norm": 2.924438953399658, "learning_rate": 5.713428274238827e-07, "loss": 0.03154969, "memory(GiB)": 13.7, "step": 91095, "train_speed(iter/s)": 1.528037 }, { "acc": 0.97956848, "epoch": 42.699789078978206, "grad_norm": 5.583041667938232, "learning_rate": 5.709833545183354e-07, "loss": 0.03038812, "memory(GiB)": 13.7, "step": 91100, "train_speed(iter/s)": 1.528036 }, { "acc": 0.97833328, "epoch": 42.70213264588704, "grad_norm": 6.5543646812438965, "learning_rate": 5.706239880838168e-07, "loss": 0.04959104, "memory(GiB)": 13.7, "step": 91105, "train_speed(iter/s)": 1.528034 }, { "acc": 0.98770828, "epoch": 42.704476212795875, "grad_norm": 1.378166913986206, "learning_rate": 5.702647281289635e-07, "loss": 0.02128982, "memory(GiB)": 13.7, "step": 91110, "train_speed(iter/s)": 1.528032 }, { "acc": 0.9866415, "epoch": 42.70681977970471, "grad_norm": 4.035738468170166, "learning_rate": 5.699055746624118e-07, "loss": 0.04471759, "memory(GiB)": 13.7, "step": 91115, "train_speed(iter/s)": 1.528031 }, { "acc": 0.98041668, "epoch": 42.70916334661354, "grad_norm": 3.882237434387207, "learning_rate": 5.695465276927941e-07, "loss": 0.05524346, "memory(GiB)": 13.7, "step": 91120, "train_speed(iter/s)": 1.528031 }, { "acc": 0.97979164, "epoch": 42.71150691352238, "grad_norm": 2.7483127117156982, "learning_rate": 5.691875872287423e-07, "loss": 0.04724602, "memory(GiB)": 13.7, "step": 91125, "train_speed(iter/s)": 1.528034 }, { "acc": 0.97963543, "epoch": 42.71385048043122, "grad_norm": 0.011046442203223705, "learning_rate": 5.688287532788826e-07, "loss": 0.03131299, "memory(GiB)": 13.7, "step": 91130, "train_speed(iter/s)": 1.528035 }, { "acc": 0.98874998, "epoch": 42.71619404734005, "grad_norm": 1.1726107597351074, "learning_rate": 5.684700258518399e-07, "loss": 0.03282857, "memory(GiB)": 13.7, "step": 91135, "train_speed(iter/s)": 1.528032 }, { "acc": 0.9822917, "epoch": 42.71853761424889, "grad_norm": 4.661524772644043, "learning_rate": 5.681114049562381e-07, "loss": 0.04892934, "memory(GiB)": 13.7, "step": 91140, "train_speed(iter/s)": 1.528033 }, { "acc": 0.98195515, "epoch": 42.72088118115772, "grad_norm": 0.9132405519485474, "learning_rate": 5.677528906006946e-07, "loss": 0.02695344, "memory(GiB)": 13.7, "step": 91145, "train_speed(iter/s)": 1.528036 }, { "acc": 0.97729168, "epoch": 42.723224748066556, "grad_norm": 4.01990270614624, "learning_rate": 5.673944827938314e-07, "loss": 0.0698232, "memory(GiB)": 13.7, "step": 91150, "train_speed(iter/s)": 1.528038 }, { "acc": 0.98291664, "epoch": 42.72556831497539, "grad_norm": 3.507721185684204, "learning_rate": 5.670361815442592e-07, "loss": 0.02503581, "memory(GiB)": 13.7, "step": 91155, "train_speed(iter/s)": 1.528046 }, { "acc": 0.99229164, "epoch": 42.727911881884225, "grad_norm": 3.290452480316162, "learning_rate": 5.666779868605937e-07, "loss": 0.03375859, "memory(GiB)": 13.7, "step": 91160, "train_speed(iter/s)": 1.528049 }, { "acc": 0.984375, "epoch": 42.730255448793066, "grad_norm": 0.7541316151618958, "learning_rate": 5.66319898751442e-07, "loss": 0.0358858, "memory(GiB)": 13.7, "step": 91165, "train_speed(iter/s)": 1.528052 }, { "acc": 0.97922039, "epoch": 42.7325990157019, "grad_norm": 4.084616184234619, "learning_rate": 5.659619172254136e-07, "loss": 0.05961039, "memory(GiB)": 13.7, "step": 91170, "train_speed(iter/s)": 1.528057 }, { "acc": 0.9921875, "epoch": 42.734942582610735, "grad_norm": 2.589308261871338, "learning_rate": 5.656040422911124e-07, "loss": 0.02516204, "memory(GiB)": 13.7, "step": 91175, "train_speed(iter/s)": 1.528056 }, { "acc": 0.98576927, "epoch": 42.73728614951957, "grad_norm": 3.4273433685302734, "learning_rate": 5.652462739571373e-07, "loss": 0.03152525, "memory(GiB)": 13.7, "step": 91180, "train_speed(iter/s)": 1.528061 }, { "acc": 0.99333324, "epoch": 42.739629716428404, "grad_norm": 0.01904045231640339, "learning_rate": 5.648886122320936e-07, "loss": 0.0212154, "memory(GiB)": 13.7, "step": 91185, "train_speed(iter/s)": 1.528063 }, { "acc": 0.98154764, "epoch": 42.74197328333724, "grad_norm": 6.000064373016357, "learning_rate": 5.645310571245748e-07, "loss": 0.03325431, "memory(GiB)": 13.7, "step": 91190, "train_speed(iter/s)": 1.528069 }, { "acc": 0.97666664, "epoch": 42.74431685024607, "grad_norm": 3.5222156047821045, "learning_rate": 5.641736086431763e-07, "loss": 0.03091723, "memory(GiB)": 13.7, "step": 91195, "train_speed(iter/s)": 1.528073 }, { "acc": 0.98840866, "epoch": 42.74666041715491, "grad_norm": 2.1321051120758057, "learning_rate": 5.638162667964885e-07, "loss": 0.03768466, "memory(GiB)": 13.7, "step": 91200, "train_speed(iter/s)": 1.52808 }, { "acc": 0.99682541, "epoch": 42.74900398406375, "grad_norm": 1.251259446144104, "learning_rate": 5.634590315931032e-07, "loss": 0.05816164, "memory(GiB)": 13.7, "step": 91205, "train_speed(iter/s)": 1.528079 }, { "acc": 0.97579079, "epoch": 42.75134755097258, "grad_norm": 4.3185319900512695, "learning_rate": 5.631019030416055e-07, "loss": 0.0505017, "memory(GiB)": 13.7, "step": 91210, "train_speed(iter/s)": 1.528085 }, { "acc": 0.9802083, "epoch": 42.75369111788142, "grad_norm": 3.6518166065216064, "learning_rate": 5.627448811505815e-07, "loss": 0.06264387, "memory(GiB)": 13.7, "step": 91215, "train_speed(iter/s)": 1.528086 }, { "acc": 0.98395834, "epoch": 42.75603468479025, "grad_norm": 7.078396320343018, "learning_rate": 5.623879659286106e-07, "loss": 0.03288114, "memory(GiB)": 13.7, "step": 91220, "train_speed(iter/s)": 1.528089 }, { "acc": 0.98761368, "epoch": 42.758378251699085, "grad_norm": 0.01978130079805851, "learning_rate": 5.620311573842719e-07, "loss": 0.03259911, "memory(GiB)": 13.7, "step": 91225, "train_speed(iter/s)": 1.528095 }, { "acc": 0.98395834, "epoch": 42.76072181860792, "grad_norm": 2.9334323406219482, "learning_rate": 5.616744555261443e-07, "loss": 0.05312378, "memory(GiB)": 13.7, "step": 91230, "train_speed(iter/s)": 1.528098 }, { "acc": 0.98187504, "epoch": 42.763065385516754, "grad_norm": 7.210912704467773, "learning_rate": 5.613178603627973e-07, "loss": 0.04148308, "memory(GiB)": 13.7, "step": 91235, "train_speed(iter/s)": 1.528102 }, { "acc": 0.98872032, "epoch": 42.765408952425595, "grad_norm": 4.024394512176514, "learning_rate": 5.609613719028075e-07, "loss": 0.02524513, "memory(GiB)": 13.7, "step": 91240, "train_speed(iter/s)": 1.528103 }, { "acc": 0.98244047, "epoch": 42.76775251933443, "grad_norm": 2.580404281616211, "learning_rate": 5.606049901547402e-07, "loss": 0.0492427, "memory(GiB)": 13.7, "step": 91245, "train_speed(iter/s)": 1.528105 }, { "acc": 0.98812504, "epoch": 42.770096086243264, "grad_norm": 3.642045736312866, "learning_rate": 5.602487151271637e-07, "loss": 0.02331418, "memory(GiB)": 13.7, "step": 91250, "train_speed(iter/s)": 1.528105 }, { "acc": 0.98500004, "epoch": 42.7724396531521, "grad_norm": 2.369680166244507, "learning_rate": 5.598925468286408e-07, "loss": 0.05109817, "memory(GiB)": 13.7, "step": 91255, "train_speed(iter/s)": 1.528108 }, { "acc": 0.98114586, "epoch": 42.77478322006093, "grad_norm": 2.029879093170166, "learning_rate": 5.595364852677305e-07, "loss": 0.04991348, "memory(GiB)": 13.7, "step": 91260, "train_speed(iter/s)": 1.528109 }, { "acc": 0.97675591, "epoch": 42.77712678696977, "grad_norm": 2.6217758655548096, "learning_rate": 5.591805304529951e-07, "loss": 0.03684753, "memory(GiB)": 13.7, "step": 91265, "train_speed(iter/s)": 1.528114 }, { "acc": 0.97624998, "epoch": 42.7794703538786, "grad_norm": 3.2674925327301025, "learning_rate": 5.588246823929886e-07, "loss": 0.03630198, "memory(GiB)": 13.7, "step": 91270, "train_speed(iter/s)": 1.528118 }, { "acc": 0.98041668, "epoch": 42.781813920787435, "grad_norm": 2.5611791610717773, "learning_rate": 5.584689410962649e-07, "loss": 0.03741205, "memory(GiB)": 13.7, "step": 91275, "train_speed(iter/s)": 1.528119 }, { "acc": 0.99248514, "epoch": 42.78415748769628, "grad_norm": 3.140165328979492, "learning_rate": 5.581133065713742e-07, "loss": 0.04410228, "memory(GiB)": 13.7, "step": 91280, "train_speed(iter/s)": 1.528122 }, { "acc": 0.9947916, "epoch": 42.78650105460511, "grad_norm": 2.4551939964294434, "learning_rate": 5.577577788268658e-07, "loss": 0.01731515, "memory(GiB)": 13.7, "step": 91285, "train_speed(iter/s)": 1.528126 }, { "acc": 0.9866457, "epoch": 42.788844621513945, "grad_norm": 0.009202335961163044, "learning_rate": 5.574023578712834e-07, "loss": 0.05751966, "memory(GiB)": 13.7, "step": 91290, "train_speed(iter/s)": 1.528125 }, { "acc": 0.984375, "epoch": 42.79118818842278, "grad_norm": 2.630295991897583, "learning_rate": 5.570470437131735e-07, "loss": 0.02826355, "memory(GiB)": 13.7, "step": 91295, "train_speed(iter/s)": 1.528128 }, { "acc": 0.99508934, "epoch": 42.793531755331614, "grad_norm": 4.252237319946289, "learning_rate": 5.566918363610747e-07, "loss": 0.04402966, "memory(GiB)": 13.7, "step": 91300, "train_speed(iter/s)": 1.52813 }, { "acc": 0.97520828, "epoch": 42.79587532224045, "grad_norm": 5.3322038650512695, "learning_rate": 5.563367358235237e-07, "loss": 0.06030866, "memory(GiB)": 13.7, "step": 91305, "train_speed(iter/s)": 1.528129 }, { "acc": 0.98779755, "epoch": 42.79821888914928, "grad_norm": 3.2408483028411865, "learning_rate": 5.55981742109059e-07, "loss": 0.05074702, "memory(GiB)": 13.7, "step": 91310, "train_speed(iter/s)": 1.528127 }, { "acc": 0.99375, "epoch": 42.800562456058124, "grad_norm": 2.7819395065307617, "learning_rate": 5.556268552262105e-07, "loss": 0.03478598, "memory(GiB)": 13.7, "step": 91315, "train_speed(iter/s)": 1.52813 }, { "acc": 0.99187498, "epoch": 42.80290602296696, "grad_norm": 3.2263145446777344, "learning_rate": 5.552720751835107e-07, "loss": 0.02338517, "memory(GiB)": 13.7, "step": 91320, "train_speed(iter/s)": 1.528135 }, { "acc": 0.99375, "epoch": 42.80524958987579, "grad_norm": 5.8357014656066895, "learning_rate": 5.549174019894847e-07, "loss": 0.05325532, "memory(GiB)": 13.7, "step": 91325, "train_speed(iter/s)": 1.528137 }, { "acc": 0.9822917, "epoch": 42.80759315678463, "grad_norm": 3.0732996463775635, "learning_rate": 5.545628356526615e-07, "loss": 0.03032398, "memory(GiB)": 13.7, "step": 91330, "train_speed(iter/s)": 1.528135 }, { "acc": 0.9864109, "epoch": 42.80993672369346, "grad_norm": 4.482316493988037, "learning_rate": 5.54208376181561e-07, "loss": 0.0544704, "memory(GiB)": 13.7, "step": 91335, "train_speed(iter/s)": 1.528139 }, { "acc": 0.99294643, "epoch": 42.812280290602295, "grad_norm": 1.0944639444351196, "learning_rate": 5.538540235847047e-07, "loss": 0.02232144, "memory(GiB)": 13.7, "step": 91340, "train_speed(iter/s)": 1.528145 }, { "acc": 0.98716345, "epoch": 42.81462385751113, "grad_norm": 3.6189827919006348, "learning_rate": 5.534997778706093e-07, "loss": 0.04212043, "memory(GiB)": 13.7, "step": 91345, "train_speed(iter/s)": 1.528144 }, { "acc": 0.9738636, "epoch": 42.816967424419964, "grad_norm": 11.392765045166016, "learning_rate": 5.531456390477875e-07, "loss": 0.06270304, "memory(GiB)": 13.7, "step": 91350, "train_speed(iter/s)": 1.528147 }, { "acc": 0.99125004, "epoch": 42.819310991328805, "grad_norm": 0.9261868596076965, "learning_rate": 5.527916071247557e-07, "loss": 0.02305595, "memory(GiB)": 13.7, "step": 91355, "train_speed(iter/s)": 1.528152 }, { "acc": 0.98565474, "epoch": 42.82165455823764, "grad_norm": 2.756901741027832, "learning_rate": 5.524376821100213e-07, "loss": 0.02718877, "memory(GiB)": 13.7, "step": 91360, "train_speed(iter/s)": 1.528155 }, { "acc": 0.98351288, "epoch": 42.823998125146474, "grad_norm": 3.564857006072998, "learning_rate": 5.520838640120927e-07, "loss": 0.04640065, "memory(GiB)": 13.7, "step": 91365, "train_speed(iter/s)": 1.528159 }, { "acc": 0.99404755, "epoch": 42.82634169205531, "grad_norm": 1.6149097681045532, "learning_rate": 5.517301528394732e-07, "loss": 0.02385229, "memory(GiB)": 13.7, "step": 91370, "train_speed(iter/s)": 1.52816 }, { "acc": 0.9885416, "epoch": 42.82868525896414, "grad_norm": 2.487518310546875, "learning_rate": 5.513765486006659e-07, "loss": 0.03556056, "memory(GiB)": 13.7, "step": 91375, "train_speed(iter/s)": 1.528162 }, { "acc": 0.9875, "epoch": 42.83102882587298, "grad_norm": 0.0028284620493650436, "learning_rate": 5.510230513041703e-07, "loss": 0.04044811, "memory(GiB)": 13.7, "step": 91380, "train_speed(iter/s)": 1.528162 }, { "acc": 0.98178034, "epoch": 42.83337239278181, "grad_norm": 1.7798354625701904, "learning_rate": 5.50669660958481e-07, "loss": 0.05732898, "memory(GiB)": 13.7, "step": 91385, "train_speed(iter/s)": 1.528162 }, { "acc": 0.98946428, "epoch": 42.835715959690646, "grad_norm": 0.008494673296809196, "learning_rate": 5.503163775720959e-07, "loss": 0.02988726, "memory(GiB)": 13.7, "step": 91390, "train_speed(iter/s)": 1.528164 }, { "acc": 0.9880208, "epoch": 42.83805952659949, "grad_norm": 3.5779659748077393, "learning_rate": 5.499632011535039e-07, "loss": 0.02087055, "memory(GiB)": 13.7, "step": 91395, "train_speed(iter/s)": 1.528169 }, { "acc": 0.99092264, "epoch": 42.84040309350832, "grad_norm": 0.004254813306033611, "learning_rate": 5.496101317111971e-07, "loss": 0.01894659, "memory(GiB)": 13.7, "step": 91400, "train_speed(iter/s)": 1.528172 }, { "acc": 0.98547354, "epoch": 42.842746660417156, "grad_norm": 3.2790982723236084, "learning_rate": 5.492571692536581e-07, "loss": 0.04097951, "memory(GiB)": 13.7, "step": 91405, "train_speed(iter/s)": 1.528175 }, { "acc": 0.9895833, "epoch": 42.84509022732599, "grad_norm": 5.075161933898926, "learning_rate": 5.489043137893751e-07, "loss": 0.02373933, "memory(GiB)": 13.7, "step": 91410, "train_speed(iter/s)": 1.528179 }, { "acc": 0.98273811, "epoch": 42.847433794234824, "grad_norm": 3.247500419616699, "learning_rate": 5.485515653268268e-07, "loss": 0.03724343, "memory(GiB)": 13.7, "step": 91415, "train_speed(iter/s)": 1.528187 }, { "acc": 0.9916667, "epoch": 42.84977736114366, "grad_norm": 1.1894266605377197, "learning_rate": 5.481989238744943e-07, "loss": 0.01846387, "memory(GiB)": 13.7, "step": 91420, "train_speed(iter/s)": 1.52819 }, { "acc": 0.98937502, "epoch": 42.85212092805249, "grad_norm": 2.6357994079589844, "learning_rate": 5.478463894408524e-07, "loss": 0.04747612, "memory(GiB)": 13.7, "step": 91425, "train_speed(iter/s)": 1.528191 }, { "acc": 0.97258015, "epoch": 42.854464494961334, "grad_norm": 1.6969510316848755, "learning_rate": 5.474939620343741e-07, "loss": 0.04491771, "memory(GiB)": 13.7, "step": 91430, "train_speed(iter/s)": 1.528197 }, { "acc": 0.98487186, "epoch": 42.85680806187017, "grad_norm": 1.8478459119796753, "learning_rate": 5.471416416635331e-07, "loss": 0.06940119, "memory(GiB)": 13.7, "step": 91435, "train_speed(iter/s)": 1.528198 }, { "acc": 0.9890399, "epoch": 42.859151628779, "grad_norm": 4.986227512359619, "learning_rate": 5.467894283367945e-07, "loss": 0.0271647, "memory(GiB)": 13.7, "step": 91440, "train_speed(iter/s)": 1.528198 }, { "acc": 0.99092264, "epoch": 42.86149519568784, "grad_norm": 1.7216984033584595, "learning_rate": 5.464373220626287e-07, "loss": 0.06329803, "memory(GiB)": 13.7, "step": 91445, "train_speed(iter/s)": 1.528202 }, { "acc": 0.98495045, "epoch": 42.86383876259667, "grad_norm": 1.5602129697799683, "learning_rate": 5.460853228494957e-07, "loss": 0.05023473, "memory(GiB)": 13.7, "step": 91450, "train_speed(iter/s)": 1.528207 }, { "acc": 0.98393726, "epoch": 42.866182329505506, "grad_norm": 0.010974257253110409, "learning_rate": 5.457334307058588e-07, "loss": 0.03770571, "memory(GiB)": 13.7, "step": 91455, "train_speed(iter/s)": 1.52821 }, { "acc": 0.98125, "epoch": 42.86852589641434, "grad_norm": 3.516792058944702, "learning_rate": 5.453816456401736e-07, "loss": 0.04140583, "memory(GiB)": 13.7, "step": 91460, "train_speed(iter/s)": 1.528219 }, { "acc": 0.99211311, "epoch": 42.870869463323174, "grad_norm": 3.419647216796875, "learning_rate": 5.450299676608989e-07, "loss": 0.02769451, "memory(GiB)": 13.7, "step": 91465, "train_speed(iter/s)": 1.528219 }, { "acc": 0.98548613, "epoch": 42.873213030232016, "grad_norm": 3.9837629795074463, "learning_rate": 5.446783967764857e-07, "loss": 0.0647002, "memory(GiB)": 13.7, "step": 91470, "train_speed(iter/s)": 1.528219 }, { "acc": 0.99613094, "epoch": 42.87555659714085, "grad_norm": 3.7936649322509766, "learning_rate": 5.44326932995385e-07, "loss": 0.01694753, "memory(GiB)": 13.7, "step": 91475, "train_speed(iter/s)": 1.528215 }, { "acc": 0.99020834, "epoch": 42.877900164049684, "grad_norm": 0.8566932678222656, "learning_rate": 5.439755763260463e-07, "loss": 0.02498029, "memory(GiB)": 13.7, "step": 91480, "train_speed(iter/s)": 1.528214 }, { "acc": 0.9890625, "epoch": 42.88024373095852, "grad_norm": 5.968908309936523, "learning_rate": 5.436243267769124e-07, "loss": 0.0790844, "memory(GiB)": 13.7, "step": 91485, "train_speed(iter/s)": 1.528219 }, { "acc": 0.9927083, "epoch": 42.88258729786735, "grad_norm": 1.6091530323028564, "learning_rate": 5.432731843564293e-07, "loss": 0.04132939, "memory(GiB)": 13.7, "step": 91490, "train_speed(iter/s)": 1.528222 }, { "acc": 0.9770834, "epoch": 42.88493086477619, "grad_norm": 2.7661759853363037, "learning_rate": 5.429221490730336e-07, "loss": 0.03077494, "memory(GiB)": 13.7, "step": 91495, "train_speed(iter/s)": 1.52822 }, { "acc": 0.9885417, "epoch": 42.88727443168502, "grad_norm": 0.28414490818977356, "learning_rate": 5.425712209351674e-07, "loss": 0.01753392, "memory(GiB)": 13.7, "step": 91500, "train_speed(iter/s)": 1.528222 }, { "acc": 0.97988091, "epoch": 42.88961799859386, "grad_norm": 2.666459560394287, "learning_rate": 5.422203999512629e-07, "loss": 0.05338144, "memory(GiB)": 13.7, "step": 91505, "train_speed(iter/s)": 1.528223 }, { "acc": 0.99082794, "epoch": 42.8919615655027, "grad_norm": 1.129539132118225, "learning_rate": 5.418696861297531e-07, "loss": 0.02449308, "memory(GiB)": 13.7, "step": 91510, "train_speed(iter/s)": 1.528225 }, { "acc": 0.98354168, "epoch": 42.89430513241153, "grad_norm": 0.9342901706695557, "learning_rate": 5.415190794790688e-07, "loss": 0.02067699, "memory(GiB)": 13.7, "step": 91515, "train_speed(iter/s)": 1.528226 }, { "acc": 0.9919445, "epoch": 42.896648699320366, "grad_norm": 1.8344975709915161, "learning_rate": 5.411685800076363e-07, "loss": 0.03119934, "memory(GiB)": 13.7, "step": 91520, "train_speed(iter/s)": 1.52823 }, { "acc": 0.99229164, "epoch": 42.8989922662292, "grad_norm": 2.4167661666870117, "learning_rate": 5.408181877238819e-07, "loss": 0.02399449, "memory(GiB)": 13.7, "step": 91525, "train_speed(iter/s)": 1.528236 }, { "acc": 0.98645287, "epoch": 42.901335833138035, "grad_norm": 3.7137250900268555, "learning_rate": 5.404679026362252e-07, "loss": 0.04507757, "memory(GiB)": 13.7, "step": 91530, "train_speed(iter/s)": 1.52824 }, { "acc": 0.98209381, "epoch": 42.90367940004687, "grad_norm": 3.901901960372925, "learning_rate": 5.401177247530898e-07, "loss": 0.04168649, "memory(GiB)": 13.7, "step": 91535, "train_speed(iter/s)": 1.528235 }, { "acc": 0.9947917, "epoch": 42.9060229669557, "grad_norm": 1.0538346767425537, "learning_rate": 5.397676540828899e-07, "loss": 0.02631593, "memory(GiB)": 13.7, "step": 91540, "train_speed(iter/s)": 1.528236 }, { "acc": 0.98811016, "epoch": 42.908366533864545, "grad_norm": 2.723745822906494, "learning_rate": 5.394176906340415e-07, "loss": 0.03534026, "memory(GiB)": 13.7, "step": 91545, "train_speed(iter/s)": 1.528238 }, { "acc": 0.98869057, "epoch": 42.91071010077338, "grad_norm": 1.8071945905685425, "learning_rate": 5.390678344149567e-07, "loss": 0.03238497, "memory(GiB)": 13.7, "step": 91550, "train_speed(iter/s)": 1.528241 }, { "acc": 0.98604164, "epoch": 42.91305366768221, "grad_norm": 0.007408682722598314, "learning_rate": 5.387180854340418e-07, "loss": 0.05754777, "memory(GiB)": 13.7, "step": 91555, "train_speed(iter/s)": 1.528243 }, { "acc": 0.98351192, "epoch": 42.91539723459105, "grad_norm": 5.113324165344238, "learning_rate": 5.383684436997081e-07, "loss": 0.05657762, "memory(GiB)": 13.7, "step": 91560, "train_speed(iter/s)": 1.528243 }, { "acc": 0.9875, "epoch": 42.91774080149988, "grad_norm": 2.4554996490478516, "learning_rate": 5.380189092203563e-07, "loss": 0.02734309, "memory(GiB)": 13.7, "step": 91565, "train_speed(iter/s)": 1.528246 }, { "acc": 0.99624996, "epoch": 42.920084368408716, "grad_norm": 0.03051823563873768, "learning_rate": 5.376694820043911e-07, "loss": 0.02003209, "memory(GiB)": 13.7, "step": 91570, "train_speed(iter/s)": 1.528251 }, { "acc": 0.98604174, "epoch": 42.92242793531755, "grad_norm": 1.094063401222229, "learning_rate": 5.37320162060209e-07, "loss": 0.05823461, "memory(GiB)": 13.7, "step": 91575, "train_speed(iter/s)": 1.528252 }, { "acc": 0.98760414, "epoch": 42.92477150222639, "grad_norm": 1.0759339332580566, "learning_rate": 5.369709493962084e-07, "loss": 0.04246302, "memory(GiB)": 13.7, "step": 91580, "train_speed(iter/s)": 1.528256 }, { "acc": 0.97383938, "epoch": 42.927115069135226, "grad_norm": 0.004528458695858717, "learning_rate": 5.366218440207805e-07, "loss": 0.04736205, "memory(GiB)": 13.7, "step": 91585, "train_speed(iter/s)": 1.528256 }, { "acc": 0.98166666, "epoch": 42.92945863604406, "grad_norm": 2.067680835723877, "learning_rate": 5.362728459423203e-07, "loss": 0.03424856, "memory(GiB)": 13.7, "step": 91590, "train_speed(iter/s)": 1.528258 }, { "acc": 0.97674961, "epoch": 42.931802202952895, "grad_norm": 5.6847004890441895, "learning_rate": 5.359239551692149e-07, "loss": 0.06920304, "memory(GiB)": 13.7, "step": 91595, "train_speed(iter/s)": 1.528259 }, { "acc": 0.99499998, "epoch": 42.93414576986173, "grad_norm": 0.011953121051192284, "learning_rate": 5.355751717098491e-07, "loss": 0.01965785, "memory(GiB)": 13.7, "step": 91600, "train_speed(iter/s)": 1.528261 }, { "acc": 0.99187498, "epoch": 42.93648933677056, "grad_norm": 0.25099170207977295, "learning_rate": 5.352264955726089e-07, "loss": 0.01366274, "memory(GiB)": 13.7, "step": 91605, "train_speed(iter/s)": 1.52826 }, { "acc": 0.98363094, "epoch": 42.9388329036794, "grad_norm": 2.3493402004241943, "learning_rate": 5.348779267658731e-07, "loss": 0.03725627, "memory(GiB)": 13.7, "step": 91610, "train_speed(iter/s)": 1.528262 }, { "acc": 0.98770828, "epoch": 42.94117647058823, "grad_norm": 2.6167550086975098, "learning_rate": 5.345294652980217e-07, "loss": 0.02482405, "memory(GiB)": 13.7, "step": 91615, "train_speed(iter/s)": 1.528263 }, { "acc": 0.98302488, "epoch": 42.94352003749707, "grad_norm": 3.753413438796997, "learning_rate": 5.341811111774297e-07, "loss": 0.04281976, "memory(GiB)": 13.7, "step": 91620, "train_speed(iter/s)": 1.528265 }, { "acc": 0.97635422, "epoch": 42.94586360440591, "grad_norm": 1.9378254413604736, "learning_rate": 5.338328644124719e-07, "loss": 0.07453595, "memory(GiB)": 13.7, "step": 91625, "train_speed(iter/s)": 1.528271 }, { "acc": 0.98611107, "epoch": 42.94820717131474, "grad_norm": 2.7539708614349365, "learning_rate": 5.334847250115179e-07, "loss": 0.02538807, "memory(GiB)": 13.7, "step": 91630, "train_speed(iter/s)": 1.528277 }, { "acc": 0.9885416, "epoch": 42.950550738223576, "grad_norm": 0.0012964693596586585, "learning_rate": 5.331366929829346e-07, "loss": 0.03346364, "memory(GiB)": 13.7, "step": 91635, "train_speed(iter/s)": 1.528281 }, { "acc": 0.98104172, "epoch": 42.95289430513241, "grad_norm": 2.4632208347320557, "learning_rate": 5.327887683350901e-07, "loss": 0.04373017, "memory(GiB)": 13.7, "step": 91640, "train_speed(iter/s)": 1.528284 }, { "acc": 0.98264103, "epoch": 42.955237872041245, "grad_norm": 2.101256847381592, "learning_rate": 5.324409510763436e-07, "loss": 0.04583535, "memory(GiB)": 13.7, "step": 91645, "train_speed(iter/s)": 1.528287 }, { "acc": 0.98298607, "epoch": 42.95758143895008, "grad_norm": 5.249329566955566, "learning_rate": 5.320932412150602e-07, "loss": 0.03306344, "memory(GiB)": 13.7, "step": 91650, "train_speed(iter/s)": 1.52829 }, { "acc": 0.99229164, "epoch": 42.95992500585892, "grad_norm": 2.0869224071502686, "learning_rate": 5.317456387595947e-07, "loss": 0.02886498, "memory(GiB)": 13.7, "step": 91655, "train_speed(iter/s)": 1.52829 }, { "acc": 0.98125, "epoch": 42.962268572767755, "grad_norm": 3.11232328414917, "learning_rate": 5.313981437183035e-07, "loss": 0.02699462, "memory(GiB)": 13.7, "step": 91660, "train_speed(iter/s)": 1.528294 }, { "acc": 0.990625, "epoch": 42.96461213967659, "grad_norm": 1.1871947050094604, "learning_rate": 5.310507560995384e-07, "loss": 0.03497688, "memory(GiB)": 13.7, "step": 91665, "train_speed(iter/s)": 1.528295 }, { "acc": 0.99125004, "epoch": 42.96695570658542, "grad_norm": 3.691159963607788, "learning_rate": 5.307034759116503e-07, "loss": 0.03043069, "memory(GiB)": 13.7, "step": 91670, "train_speed(iter/s)": 1.528296 }, { "acc": 0.98208332, "epoch": 42.96929927349426, "grad_norm": 2.264564037322998, "learning_rate": 5.303563031629862e-07, "loss": 0.04002448, "memory(GiB)": 13.7, "step": 91675, "train_speed(iter/s)": 1.528297 }, { "acc": 0.97738094, "epoch": 42.97164284040309, "grad_norm": 1.9818270206451416, "learning_rate": 5.300092378618895e-07, "loss": 0.04694332, "memory(GiB)": 13.7, "step": 91680, "train_speed(iter/s)": 1.5283 }, { "acc": 0.98791676, "epoch": 42.973986407311926, "grad_norm": 0.6072431802749634, "learning_rate": 5.296622800167058e-07, "loss": 0.0230475, "memory(GiB)": 13.7, "step": 91685, "train_speed(iter/s)": 1.528303 }, { "acc": 0.9895833, "epoch": 42.97632997422076, "grad_norm": 3.1910154819488525, "learning_rate": 5.293154296357717e-07, "loss": 0.03329097, "memory(GiB)": 13.7, "step": 91690, "train_speed(iter/s)": 1.528303 }, { "acc": 0.98604164, "epoch": 42.9786735411296, "grad_norm": 3.147325277328491, "learning_rate": 5.289686867274271e-07, "loss": 0.02942241, "memory(GiB)": 13.7, "step": 91695, "train_speed(iter/s)": 1.528301 }, { "acc": 0.99125004, "epoch": 42.981017108038436, "grad_norm": 3.6315951347351074, "learning_rate": 5.286220513000032e-07, "loss": 0.01320208, "memory(GiB)": 13.7, "step": 91700, "train_speed(iter/s)": 1.528304 }, { "acc": 0.98458328, "epoch": 42.98336067494727, "grad_norm": 0.4487149715423584, "learning_rate": 5.282755233618356e-07, "loss": 0.04304297, "memory(GiB)": 13.7, "step": 91705, "train_speed(iter/s)": 1.528307 }, { "acc": 0.98907204, "epoch": 42.985704241856105, "grad_norm": 0.14199772477149963, "learning_rate": 5.279291029212515e-07, "loss": 0.02333794, "memory(GiB)": 13.7, "step": 91710, "train_speed(iter/s)": 1.52831 }, { "acc": 0.98273811, "epoch": 42.98804780876494, "grad_norm": 2.5468406677246094, "learning_rate": 5.275827899865786e-07, "loss": 0.06599679, "memory(GiB)": 13.7, "step": 91715, "train_speed(iter/s)": 1.528309 }, { "acc": 0.9833334, "epoch": 42.990391375673774, "grad_norm": 5.730862617492676, "learning_rate": 5.272365845661411e-07, "loss": 0.03983583, "memory(GiB)": 13.7, "step": 91720, "train_speed(iter/s)": 1.528309 }, { "acc": 0.98874998, "epoch": 42.99273494258261, "grad_norm": 2.579554796218872, "learning_rate": 5.268904866682595e-07, "loss": 0.0308365, "memory(GiB)": 13.7, "step": 91725, "train_speed(iter/s)": 1.528313 }, { "acc": 0.990625, "epoch": 42.99507850949145, "grad_norm": 2.2890894412994385, "learning_rate": 5.265444963012546e-07, "loss": 0.01939347, "memory(GiB)": 13.7, "step": 91730, "train_speed(iter/s)": 1.528315 }, { "acc": 0.991572, "epoch": 42.997422076400284, "grad_norm": 0.013386407867074013, "learning_rate": 5.261986134734395e-07, "loss": 0.02633958, "memory(GiB)": 13.7, "step": 91735, "train_speed(iter/s)": 1.52832 }, { "acc": 0.996875, "epoch": 42.99976564330912, "grad_norm": 1.7495334148406982, "learning_rate": 5.258528381931327e-07, "loss": 0.01632685, "memory(GiB)": 13.7, "step": 91740, "train_speed(iter/s)": 1.528322 }, { "acc": 0.99027777, "epoch": 43.00210921021795, "grad_norm": 2.295635461807251, "learning_rate": 5.255071704686427e-07, "loss": 0.03037072, "memory(GiB)": 13.7, "step": 91745, "train_speed(iter/s)": 1.528304 }, { "acc": 0.9736805, "epoch": 43.00445277712679, "grad_norm": 1.8268648386001587, "learning_rate": 5.251616103082792e-07, "loss": 0.04414425, "memory(GiB)": 13.7, "step": 91750, "train_speed(iter/s)": 1.528307 }, { "acc": 0.99321432, "epoch": 43.00679634403562, "grad_norm": 0.0008704282809048891, "learning_rate": 5.248161577203474e-07, "loss": 0.01753658, "memory(GiB)": 13.7, "step": 91755, "train_speed(iter/s)": 1.528304 }, { "acc": 0.98571434, "epoch": 43.009139910944455, "grad_norm": 0.07870914041996002, "learning_rate": 5.244708127131511e-07, "loss": 0.04460868, "memory(GiB)": 13.7, "step": 91760, "train_speed(iter/s)": 1.528307 }, { "acc": 0.98270836, "epoch": 43.01148347785329, "grad_norm": 0.08599638193845749, "learning_rate": 5.241255752949928e-07, "loss": 0.02731362, "memory(GiB)": 13.7, "step": 91765, "train_speed(iter/s)": 1.528314 }, { "acc": 0.9911458, "epoch": 43.01382704476213, "grad_norm": 0.0011829999275505543, "learning_rate": 5.237804454741689e-07, "loss": 0.02493783, "memory(GiB)": 13.7, "step": 91770, "train_speed(iter/s)": 1.528315 }, { "acc": 0.98604164, "epoch": 43.016170611670965, "grad_norm": 3.117095947265625, "learning_rate": 5.234354232589764e-07, "loss": 0.02861784, "memory(GiB)": 13.7, "step": 91775, "train_speed(iter/s)": 1.528323 }, { "acc": 0.98258934, "epoch": 43.0185141785798, "grad_norm": 5.922943115234375, "learning_rate": 5.230905086577072e-07, "loss": 0.03605608, "memory(GiB)": 13.7, "step": 91780, "train_speed(iter/s)": 1.528323 }, { "acc": 0.98111115, "epoch": 43.020857745488634, "grad_norm": 2.7993004322052, "learning_rate": 5.22745701678654e-07, "loss": 0.0372997, "memory(GiB)": 13.7, "step": 91785, "train_speed(iter/s)": 1.528329 }, { "acc": 0.9885416, "epoch": 43.02320131239747, "grad_norm": 2.646322250366211, "learning_rate": 5.224010023301014e-07, "loss": 0.02159333, "memory(GiB)": 13.7, "step": 91790, "train_speed(iter/s)": 1.528331 }, { "acc": 0.97406254, "epoch": 43.0255448793063, "grad_norm": 3.6546590328216553, "learning_rate": 5.220564106203388e-07, "loss": 0.06158078, "memory(GiB)": 13.7, "step": 91795, "train_speed(iter/s)": 1.528335 }, { "acc": 0.97696438, "epoch": 43.02788844621514, "grad_norm": 1.0451709032058716, "learning_rate": 5.217119265576473e-07, "loss": 0.03574376, "memory(GiB)": 13.7, "step": 91800, "train_speed(iter/s)": 1.528337 }, { "acc": 0.98282194, "epoch": 43.03023201312398, "grad_norm": 4.887043476104736, "learning_rate": 5.213675501503064e-07, "loss": 0.04338027, "memory(GiB)": 13.7, "step": 91805, "train_speed(iter/s)": 1.52834 }, { "acc": 0.98380203, "epoch": 43.03257558003281, "grad_norm": 1.9040729999542236, "learning_rate": 5.210232814065952e-07, "loss": 0.03647836, "memory(GiB)": 13.7, "step": 91810, "train_speed(iter/s)": 1.528342 }, { "acc": 0.99229164, "epoch": 43.03491914694165, "grad_norm": 2.7616658210754395, "learning_rate": 5.206791203347875e-07, "loss": 0.0154544, "memory(GiB)": 13.7, "step": 91815, "train_speed(iter/s)": 1.528346 }, { "acc": 0.9833333, "epoch": 43.03726271385048, "grad_norm": 2.193270683288574, "learning_rate": 5.203350669431567e-07, "loss": 0.06062189, "memory(GiB)": 13.7, "step": 91820, "train_speed(iter/s)": 1.528349 }, { "acc": 0.9880208, "epoch": 43.039606280759315, "grad_norm": 3.0102620124816895, "learning_rate": 5.199911212399713e-07, "loss": 0.02392465, "memory(GiB)": 13.7, "step": 91825, "train_speed(iter/s)": 1.528351 }, { "acc": 0.98715277, "epoch": 43.04194984766815, "grad_norm": 1.6274065971374512, "learning_rate": 5.196472832335008e-07, "loss": 0.0402486, "memory(GiB)": 13.7, "step": 91830, "train_speed(iter/s)": 1.528356 }, { "acc": 0.98993053, "epoch": 43.044293414576984, "grad_norm": 0.019141819328069687, "learning_rate": 5.193035529320083e-07, "loss": 0.04634118, "memory(GiB)": 13.7, "step": 91835, "train_speed(iter/s)": 1.528356 }, { "acc": 0.98946428, "epoch": 43.04663698148582, "grad_norm": 3.297445297241211, "learning_rate": 5.189599303437575e-07, "loss": 0.03392884, "memory(GiB)": 13.7, "step": 91840, "train_speed(iter/s)": 1.528358 }, { "acc": 0.98392859, "epoch": 43.04898054839466, "grad_norm": 2.3207764625549316, "learning_rate": 5.186164154770072e-07, "loss": 0.03821517, "memory(GiB)": 13.7, "step": 91845, "train_speed(iter/s)": 1.528363 }, { "acc": 0.97579441, "epoch": 43.051324115303494, "grad_norm": 3.7003114223480225, "learning_rate": 5.182730083400122e-07, "loss": 0.06953842, "memory(GiB)": 13.7, "step": 91850, "train_speed(iter/s)": 1.528364 }, { "acc": 0.99452381, "epoch": 43.05366768221233, "grad_norm": 3.3012070655822754, "learning_rate": 5.179297089410304e-07, "loss": 0.03242837, "memory(GiB)": 13.7, "step": 91855, "train_speed(iter/s)": 1.528366 }, { "acc": 0.9904356, "epoch": 43.05601124912116, "grad_norm": 0.0002807657001540065, "learning_rate": 5.175865172883113e-07, "loss": 0.04201613, "memory(GiB)": 13.7, "step": 91860, "train_speed(iter/s)": 1.528373 }, { "acc": 0.97696428, "epoch": 43.05835481603, "grad_norm": 4.391465663909912, "learning_rate": 5.172434333901057e-07, "loss": 0.04112403, "memory(GiB)": 13.7, "step": 91865, "train_speed(iter/s)": 1.528377 }, { "acc": 0.99385414, "epoch": 43.06069838293883, "grad_norm": 2.320563316345215, "learning_rate": 5.169004572546584e-07, "loss": 0.03235955, "memory(GiB)": 13.7, "step": 91870, "train_speed(iter/s)": 1.528383 }, { "acc": 0.9842804, "epoch": 43.063041949847666, "grad_norm": 3.071650266647339, "learning_rate": 5.165575888902153e-07, "loss": 0.0318943, "memory(GiB)": 13.7, "step": 91875, "train_speed(iter/s)": 1.528388 }, { "acc": 0.99048615, "epoch": 43.06538551675651, "grad_norm": 4.353085041046143, "learning_rate": 5.16214828305015e-07, "loss": 0.02933779, "memory(GiB)": 13.7, "step": 91880, "train_speed(iter/s)": 1.528392 }, { "acc": 0.99412775, "epoch": 43.06772908366534, "grad_norm": 0.9853548407554626, "learning_rate": 5.158721755072997e-07, "loss": 0.01920556, "memory(GiB)": 13.7, "step": 91885, "train_speed(iter/s)": 1.528392 }, { "acc": 0.99131947, "epoch": 43.070072650574176, "grad_norm": 4.282679557800293, "learning_rate": 5.155296305053047e-07, "loss": 0.05543711, "memory(GiB)": 13.7, "step": 91890, "train_speed(iter/s)": 1.52839 }, { "acc": 0.98249998, "epoch": 43.07241621748301, "grad_norm": 3.8201003074645996, "learning_rate": 5.15187193307262e-07, "loss": 0.05045494, "memory(GiB)": 13.7, "step": 91895, "train_speed(iter/s)": 1.528389 }, { "acc": 0.97979164, "epoch": 43.074759784391844, "grad_norm": 2.6771955490112305, "learning_rate": 5.148448639214043e-07, "loss": 0.05377415, "memory(GiB)": 13.7, "step": 91900, "train_speed(iter/s)": 1.52839 }, { "acc": 0.97822914, "epoch": 43.07710335130068, "grad_norm": 4.81719970703125, "learning_rate": 5.14502642355958e-07, "loss": 0.05607989, "memory(GiB)": 13.7, "step": 91905, "train_speed(iter/s)": 1.528394 }, { "acc": 0.98458328, "epoch": 43.07944691820951, "grad_norm": 3.8466782569885254, "learning_rate": 5.141605286191523e-07, "loss": 0.02097425, "memory(GiB)": 13.7, "step": 91910, "train_speed(iter/s)": 1.528393 }, { "acc": 0.99250002, "epoch": 43.08179048511835, "grad_norm": 2.712345600128174, "learning_rate": 5.138185227192083e-07, "loss": 0.01563827, "memory(GiB)": 13.7, "step": 91915, "train_speed(iter/s)": 1.528396 }, { "acc": 0.99055557, "epoch": 43.08413405202719, "grad_norm": 1.5446289777755737, "learning_rate": 5.134766246643479e-07, "loss": 0.02327679, "memory(GiB)": 13.7, "step": 91920, "train_speed(iter/s)": 1.528402 }, { "acc": 0.99861107, "epoch": 43.08647761893602, "grad_norm": 0.6779011487960815, "learning_rate": 5.131348344627883e-07, "loss": 0.01770637, "memory(GiB)": 13.7, "step": 91925, "train_speed(iter/s)": 1.528407 }, { "acc": 0.98787775, "epoch": 43.08882118584486, "grad_norm": 0.8684507608413696, "learning_rate": 5.12793152122744e-07, "loss": 0.02524949, "memory(GiB)": 13.7, "step": 91930, "train_speed(iter/s)": 1.528408 }, { "acc": 0.98562498, "epoch": 43.09116475275369, "grad_norm": 3.135833263397217, "learning_rate": 5.124515776524303e-07, "loss": 0.03759134, "memory(GiB)": 13.7, "step": 91935, "train_speed(iter/s)": 1.528409 }, { "acc": 0.99020824, "epoch": 43.093508319662526, "grad_norm": 2.675584554672241, "learning_rate": 5.121101110600539e-07, "loss": 0.03685386, "memory(GiB)": 13.7, "step": 91940, "train_speed(iter/s)": 1.528414 }, { "acc": 0.98081226, "epoch": 43.09585188657136, "grad_norm": 5.227777004241943, "learning_rate": 5.117687523538269e-07, "loss": 0.04252027, "memory(GiB)": 13.7, "step": 91945, "train_speed(iter/s)": 1.52842 }, { "acc": 0.9885417, "epoch": 43.098195453480194, "grad_norm": 2.0965564250946045, "learning_rate": 5.11427501541952e-07, "loss": 0.02842016, "memory(GiB)": 13.7, "step": 91950, "train_speed(iter/s)": 1.528425 }, { "acc": 0.98083334, "epoch": 43.10053902038903, "grad_norm": 3.9675796031951904, "learning_rate": 5.11086358632633e-07, "loss": 0.0578347, "memory(GiB)": 13.7, "step": 91955, "train_speed(iter/s)": 1.528429 }, { "acc": 0.9927083, "epoch": 43.10288258729787, "grad_norm": 3.383392572402954, "learning_rate": 5.107453236340679e-07, "loss": 0.01317235, "memory(GiB)": 13.7, "step": 91960, "train_speed(iter/s)": 1.528432 }, { "acc": 0.98395834, "epoch": 43.105226154206704, "grad_norm": 1.9984549283981323, "learning_rate": 5.104043965544562e-07, "loss": 0.02881887, "memory(GiB)": 13.7, "step": 91965, "train_speed(iter/s)": 1.528433 }, { "acc": 0.979072, "epoch": 43.10756972111554, "grad_norm": 4.555700778961182, "learning_rate": 5.100635774019908e-07, "loss": 0.04962495, "memory(GiB)": 13.7, "step": 91970, "train_speed(iter/s)": 1.528437 }, { "acc": 0.97958336, "epoch": 43.10991328802437, "grad_norm": 0.002235221676528454, "learning_rate": 5.097228661848652e-07, "loss": 0.06152224, "memory(GiB)": 13.7, "step": 91975, "train_speed(iter/s)": 1.528443 }, { "acc": 0.9770834, "epoch": 43.11225685493321, "grad_norm": 6.7622175216674805, "learning_rate": 5.093822629112689e-07, "loss": 0.04494452, "memory(GiB)": 13.7, "step": 91980, "train_speed(iter/s)": 1.528442 }, { "acc": 0.9822917, "epoch": 43.11460042184204, "grad_norm": 3.509608507156372, "learning_rate": 5.090417675893882e-07, "loss": 0.04123229, "memory(GiB)": 13.7, "step": 91985, "train_speed(iter/s)": 1.528447 }, { "acc": 0.97624998, "epoch": 43.116943988750876, "grad_norm": 0.002102776663377881, "learning_rate": 5.087013802274084e-07, "loss": 0.02337622, "memory(GiB)": 13.7, "step": 91990, "train_speed(iter/s)": 1.528445 }, { "acc": 0.9926136, "epoch": 43.11928755565972, "grad_norm": 2.1863930225372314, "learning_rate": 5.083611008335087e-07, "loss": 0.04924709, "memory(GiB)": 13.7, "step": 91995, "train_speed(iter/s)": 1.528449 }, { "acc": 0.98383923, "epoch": 43.12163112256855, "grad_norm": 6.898436546325684, "learning_rate": 5.080209294158724e-07, "loss": 0.02121129, "memory(GiB)": 13.7, "step": 92000, "train_speed(iter/s)": 1.528453 }, { "acc": 0.97145834, "epoch": 43.123974689477386, "grad_norm": 5.7973127365112305, "learning_rate": 5.076808659826726e-07, "loss": 0.04672627, "memory(GiB)": 13.7, "step": 92005, "train_speed(iter/s)": 1.528457 }, { "acc": 0.98698864, "epoch": 43.12631825638622, "grad_norm": 2.96234130859375, "learning_rate": 5.073409105420865e-07, "loss": 0.02033498, "memory(GiB)": 13.7, "step": 92010, "train_speed(iter/s)": 1.528457 }, { "acc": 0.98562498, "epoch": 43.128661823295054, "grad_norm": 2.350867748260498, "learning_rate": 5.070010631022834e-07, "loss": 0.04804071, "memory(GiB)": 13.7, "step": 92015, "train_speed(iter/s)": 1.528458 }, { "acc": 0.99750004, "epoch": 43.13100539020389, "grad_norm": 0.0020083195995539427, "learning_rate": 5.066613236714313e-07, "loss": 0.02362323, "memory(GiB)": 13.7, "step": 92020, "train_speed(iter/s)": 1.528463 }, { "acc": 0.99041672, "epoch": 43.13334895711272, "grad_norm": 0.8798569440841675, "learning_rate": 5.063216922576988e-07, "loss": 0.01232566, "memory(GiB)": 13.7, "step": 92025, "train_speed(iter/s)": 1.528464 }, { "acc": 0.98419647, "epoch": 43.13569252402156, "grad_norm": 0.787728488445282, "learning_rate": 5.05982168869246e-07, "loss": 0.02467614, "memory(GiB)": 13.7, "step": 92030, "train_speed(iter/s)": 1.528464 }, { "acc": 0.97267895, "epoch": 43.1380360909304, "grad_norm": 1.119900107383728, "learning_rate": 5.056427535142384e-07, "loss": 0.08120561, "memory(GiB)": 13.7, "step": 92035, "train_speed(iter/s)": 1.528465 }, { "acc": 0.98988094, "epoch": 43.14037965783923, "grad_norm": 0.0015958002768456936, "learning_rate": 5.053034462008315e-07, "loss": 0.01393195, "memory(GiB)": 13.7, "step": 92040, "train_speed(iter/s)": 1.528467 }, { "acc": 0.97333336, "epoch": 43.14272322474807, "grad_norm": 3.400052309036255, "learning_rate": 5.049642469371824e-07, "loss": 0.06490474, "memory(GiB)": 13.7, "step": 92045, "train_speed(iter/s)": 1.528473 }, { "acc": 0.99092255, "epoch": 43.1450667916569, "grad_norm": 1.161001205444336, "learning_rate": 5.046251557314443e-07, "loss": 0.06306932, "memory(GiB)": 13.7, "step": 92050, "train_speed(iter/s)": 1.528474 }, { "acc": 0.98467264, "epoch": 43.147410358565736, "grad_norm": 2.120574474334717, "learning_rate": 5.042861725917648e-07, "loss": 0.04058205, "memory(GiB)": 13.7, "step": 92055, "train_speed(iter/s)": 1.528475 }, { "acc": 0.99333334, "epoch": 43.14975392547457, "grad_norm": 2.178598165512085, "learning_rate": 5.039472975262971e-07, "loss": 0.02196116, "memory(GiB)": 13.7, "step": 92060, "train_speed(iter/s)": 1.528477 }, { "acc": 0.99312496, "epoch": 43.152097492383405, "grad_norm": 0.000863495864905417, "learning_rate": 5.036085305431824e-07, "loss": 0.05216955, "memory(GiB)": 13.7, "step": 92065, "train_speed(iter/s)": 1.528481 }, { "acc": 0.98687496, "epoch": 43.154441059292246, "grad_norm": 1.6721625328063965, "learning_rate": 5.032698716505666e-07, "loss": 0.05589873, "memory(GiB)": 13.7, "step": 92070, "train_speed(iter/s)": 1.52848 }, { "acc": 0.9864584, "epoch": 43.15678462620108, "grad_norm": 1.16170334815979, "learning_rate": 5.029313208565879e-07, "loss": 0.04088135, "memory(GiB)": 13.7, "step": 92075, "train_speed(iter/s)": 1.528484 }, { "acc": 0.98687496, "epoch": 43.159128193109915, "grad_norm": 2.853637456893921, "learning_rate": 5.025928781693851e-07, "loss": 0.01787615, "memory(GiB)": 13.7, "step": 92080, "train_speed(iter/s)": 1.528483 }, { "acc": 0.98125, "epoch": 43.16147176001875, "grad_norm": 8.202008247375488, "learning_rate": 5.022545435970908e-07, "loss": 0.06055461, "memory(GiB)": 13.7, "step": 92085, "train_speed(iter/s)": 1.528489 }, { "acc": 0.99125004, "epoch": 43.16381532692758, "grad_norm": 1.2583926916122437, "learning_rate": 5.019163171478411e-07, "loss": 0.02047369, "memory(GiB)": 13.7, "step": 92090, "train_speed(iter/s)": 1.528486 }, { "acc": 0.9864583, "epoch": 43.16615889383642, "grad_norm": 1.469693899154663, "learning_rate": 5.015781988297644e-07, "loss": 0.02944123, "memory(GiB)": 13.7, "step": 92095, "train_speed(iter/s)": 1.528489 }, { "acc": 0.97436008, "epoch": 43.16850246074525, "grad_norm": 6.406725883483887, "learning_rate": 5.01240188650987e-07, "loss": 0.06625814, "memory(GiB)": 13.7, "step": 92100, "train_speed(iter/s)": 1.528494 }, { "acc": 0.98445654, "epoch": 43.170846027654086, "grad_norm": 3.717681884765625, "learning_rate": 5.009022866196351e-07, "loss": 0.03778685, "memory(GiB)": 13.7, "step": 92105, "train_speed(iter/s)": 1.528499 }, { "acc": 0.9875, "epoch": 43.17318959456293, "grad_norm": 3.6681227684020996, "learning_rate": 5.005644927438289e-07, "loss": 0.04661437, "memory(GiB)": 13.7, "step": 92110, "train_speed(iter/s)": 1.528501 }, { "acc": 0.98770828, "epoch": 43.17553316147176, "grad_norm": 1.3819677829742432, "learning_rate": 5.002268070316887e-07, "loss": 0.03865481, "memory(GiB)": 13.7, "step": 92115, "train_speed(iter/s)": 1.528502 }, { "acc": 0.99250002, "epoch": 43.177876728380596, "grad_norm": 1.4913098812103271, "learning_rate": 4.998892294913314e-07, "loss": 0.05987658, "memory(GiB)": 13.7, "step": 92120, "train_speed(iter/s)": 1.528502 }, { "acc": 0.9854167, "epoch": 43.18022029528943, "grad_norm": 2.5124945640563965, "learning_rate": 4.995517601308725e-07, "loss": 0.02896051, "memory(GiB)": 13.7, "step": 92125, "train_speed(iter/s)": 1.528505 }, { "acc": 0.99250002, "epoch": 43.182563862198265, "grad_norm": 3.5167665481567383, "learning_rate": 4.99214398958422e-07, "loss": 0.02708268, "memory(GiB)": 13.7, "step": 92130, "train_speed(iter/s)": 1.528505 }, { "acc": 0.975, "epoch": 43.1849074291071, "grad_norm": 5.14773416519165, "learning_rate": 4.988771459820897e-07, "loss": 0.04895098, "memory(GiB)": 13.7, "step": 92135, "train_speed(iter/s)": 1.528512 }, { "acc": 0.9958334, "epoch": 43.18725099601593, "grad_norm": 5.498568534851074, "learning_rate": 4.985400012099822e-07, "loss": 0.05099276, "memory(GiB)": 13.7, "step": 92140, "train_speed(iter/s)": 1.528517 }, { "acc": 0.99404764, "epoch": 43.189594562924775, "grad_norm": 9.669554128777236e-05, "learning_rate": 4.982029646502013e-07, "loss": 0.01370303, "memory(GiB)": 13.7, "step": 92145, "train_speed(iter/s)": 1.528521 }, { "acc": 0.98604164, "epoch": 43.19193812983361, "grad_norm": 1.9085681438446045, "learning_rate": 4.978660363108512e-07, "loss": 0.06280259, "memory(GiB)": 13.7, "step": 92150, "train_speed(iter/s)": 1.52852 }, { "acc": 0.97437496, "epoch": 43.19428169674244, "grad_norm": 7.708376407623291, "learning_rate": 4.97529216200029e-07, "loss": 0.07038472, "memory(GiB)": 13.7, "step": 92155, "train_speed(iter/s)": 1.528522 }, { "acc": 0.97677078, "epoch": 43.19662526365128, "grad_norm": 5.484060764312744, "learning_rate": 4.971925043258315e-07, "loss": 0.05001157, "memory(GiB)": 13.7, "step": 92160, "train_speed(iter/s)": 1.528527 }, { "acc": 0.99125004, "epoch": 43.19896883056011, "grad_norm": 3.3158020973205566, "learning_rate": 4.968559006963507e-07, "loss": 0.05454849, "memory(GiB)": 13.7, "step": 92165, "train_speed(iter/s)": 1.528532 }, { "acc": 0.9833334, "epoch": 43.201312397468946, "grad_norm": 4.328392505645752, "learning_rate": 4.9651940531968e-07, "loss": 0.02664743, "memory(GiB)": 13.7, "step": 92170, "train_speed(iter/s)": 1.528533 }, { "acc": 0.99437504, "epoch": 43.20365596437778, "grad_norm": 0.8327778577804565, "learning_rate": 4.961830182039048e-07, "loss": 0.03398691, "memory(GiB)": 13.7, "step": 92175, "train_speed(iter/s)": 1.528535 }, { "acc": 0.99732151, "epoch": 43.205999531286615, "grad_norm": 2.6106410026550293, "learning_rate": 4.958467393571116e-07, "loss": 0.01714489, "memory(GiB)": 13.7, "step": 92180, "train_speed(iter/s)": 1.528539 }, { "acc": 0.9848485, "epoch": 43.208343098195456, "grad_norm": 2.1208536624908447, "learning_rate": 4.955105687873852e-07, "loss": 0.04142299, "memory(GiB)": 13.7, "step": 92185, "train_speed(iter/s)": 1.52854 }, { "acc": 0.97994118, "epoch": 43.21068666510429, "grad_norm": 6.171951770782471, "learning_rate": 4.951745065028039e-07, "loss": 0.06166994, "memory(GiB)": 13.7, "step": 92190, "train_speed(iter/s)": 1.528542 }, { "acc": 0.98279762, "epoch": 43.213030232013125, "grad_norm": 0.16633905470371246, "learning_rate": 4.94838552511447e-07, "loss": 0.04227246, "memory(GiB)": 13.7, "step": 92195, "train_speed(iter/s)": 1.528545 }, { "acc": 0.9927084, "epoch": 43.21537379892196, "grad_norm": 3.46492600440979, "learning_rate": 4.94502706821387e-07, "loss": 0.02450759, "memory(GiB)": 13.7, "step": 92200, "train_speed(iter/s)": 1.528545 }, { "acc": 0.98654766, "epoch": 43.217717365830794, "grad_norm": 3.890610456466675, "learning_rate": 4.94166969440701e-07, "loss": 0.02086306, "memory(GiB)": 13.7, "step": 92205, "train_speed(iter/s)": 1.528545 }, { "acc": 0.975, "epoch": 43.22006093273963, "grad_norm": 4.794285774230957, "learning_rate": 4.938313403774553e-07, "loss": 0.04263023, "memory(GiB)": 13.7, "step": 92210, "train_speed(iter/s)": 1.528549 }, { "acc": 0.98008928, "epoch": 43.22240449964846, "grad_norm": 1.0285441875457764, "learning_rate": 4.934958196397197e-07, "loss": 0.03622336, "memory(GiB)": 13.7, "step": 92215, "train_speed(iter/s)": 1.528551 }, { "acc": 0.97517853, "epoch": 43.224748066557304, "grad_norm": 3.212052822113037, "learning_rate": 4.931604072355576e-07, "loss": 0.05162977, "memory(GiB)": 13.7, "step": 92220, "train_speed(iter/s)": 1.528552 }, { "acc": 0.97875004, "epoch": 43.22709163346614, "grad_norm": 9.113041877746582, "learning_rate": 4.92825103173031e-07, "loss": 0.04951939, "memory(GiB)": 13.7, "step": 92225, "train_speed(iter/s)": 1.528557 }, { "acc": 0.97759218, "epoch": 43.22943520037497, "grad_norm": 1.667004108428955, "learning_rate": 4.924899074602008e-07, "loss": 0.05320784, "memory(GiB)": 13.7, "step": 92230, "train_speed(iter/s)": 1.528559 }, { "acc": 0.98238096, "epoch": 43.23177876728381, "grad_norm": 3.6518378257751465, "learning_rate": 4.921548201051216e-07, "loss": 0.0746244, "memory(GiB)": 13.7, "step": 92235, "train_speed(iter/s)": 1.528559 }, { "acc": 0.98133154, "epoch": 43.23412233419264, "grad_norm": 1.459943175315857, "learning_rate": 4.918198411158512e-07, "loss": 0.04763296, "memory(GiB)": 13.7, "step": 92240, "train_speed(iter/s)": 1.528563 }, { "acc": 0.98166122, "epoch": 43.236465901101475, "grad_norm": 3.98498272895813, "learning_rate": 4.914849705004386e-07, "loss": 0.05211696, "memory(GiB)": 13.7, "step": 92245, "train_speed(iter/s)": 1.528564 }, { "acc": 0.99145832, "epoch": 43.23880946801031, "grad_norm": 2.6110668182373047, "learning_rate": 4.911502082669349e-07, "loss": 0.02190531, "memory(GiB)": 13.7, "step": 92250, "train_speed(iter/s)": 1.528565 }, { "acc": 0.99499998, "epoch": 43.241153034919144, "grad_norm": 3.8392515182495117, "learning_rate": 4.908155544233845e-07, "loss": 0.02655093, "memory(GiB)": 13.7, "step": 92255, "train_speed(iter/s)": 1.528566 }, { "acc": 0.98624458, "epoch": 43.243496601827985, "grad_norm": 3.037496328353882, "learning_rate": 4.904810089778339e-07, "loss": 0.04013353, "memory(GiB)": 13.7, "step": 92260, "train_speed(iter/s)": 1.528565 }, { "acc": 0.973769, "epoch": 43.24584016873682, "grad_norm": 7.584731578826904, "learning_rate": 4.901465719383238e-07, "loss": 0.08778961, "memory(GiB)": 13.7, "step": 92265, "train_speed(iter/s)": 1.528568 }, { "acc": 0.9869565, "epoch": 43.248183735645654, "grad_norm": 2.2239248752593994, "learning_rate": 4.898122433128913e-07, "loss": 0.04010521, "memory(GiB)": 13.7, "step": 92270, "train_speed(iter/s)": 1.528573 }, { "acc": 0.9838541, "epoch": 43.25052730255449, "grad_norm": 3.262625217437744, "learning_rate": 4.894780231095749e-07, "loss": 0.05947154, "memory(GiB)": 13.7, "step": 92275, "train_speed(iter/s)": 1.528575 }, { "acc": 0.9802084, "epoch": 43.25287086946332, "grad_norm": 1.0418775081634521, "learning_rate": 4.891439113364055e-07, "loss": 0.04287497, "memory(GiB)": 13.7, "step": 92280, "train_speed(iter/s)": 1.528574 }, { "acc": 0.98819447, "epoch": 43.25521443637216, "grad_norm": 2.5873095989227295, "learning_rate": 4.88809908001417e-07, "loss": 0.0282053, "memory(GiB)": 13.7, "step": 92285, "train_speed(iter/s)": 1.528578 }, { "acc": 0.97979164, "epoch": 43.25755800328099, "grad_norm": 4.647253513336182, "learning_rate": 4.884760131126341e-07, "loss": 0.04187067, "memory(GiB)": 13.7, "step": 92290, "train_speed(iter/s)": 1.52858 }, { "acc": 0.99541664, "epoch": 43.25990157018983, "grad_norm": 3.033224582672119, "learning_rate": 4.881422266780864e-07, "loss": 0.04402511, "memory(GiB)": 13.7, "step": 92295, "train_speed(iter/s)": 1.528584 }, { "acc": 0.98604164, "epoch": 43.26224513709867, "grad_norm": 2.9435930252075195, "learning_rate": 4.878085487057948e-07, "loss": 0.02145366, "memory(GiB)": 13.7, "step": 92300, "train_speed(iter/s)": 1.528587 }, { "acc": 0.9958333, "epoch": 43.2645887040075, "grad_norm": 2.8255839347839355, "learning_rate": 4.874749792037808e-07, "loss": 0.01705192, "memory(GiB)": 13.7, "step": 92305, "train_speed(iter/s)": 1.528591 }, { "acc": 0.98708334, "epoch": 43.266932270916335, "grad_norm": 3.187171459197998, "learning_rate": 4.871415181800623e-07, "loss": 0.02516423, "memory(GiB)": 13.7, "step": 92310, "train_speed(iter/s)": 1.528591 }, { "acc": 0.9916667, "epoch": 43.26927583782517, "grad_norm": 4.169547080993652, "learning_rate": 4.86808165642653e-07, "loss": 0.04262822, "memory(GiB)": 13.7, "step": 92315, "train_speed(iter/s)": 1.528591 }, { "acc": 0.98010416, "epoch": 43.271619404734004, "grad_norm": 2.595590353012085, "learning_rate": 4.86474921599567e-07, "loss": 0.03344302, "memory(GiB)": 13.7, "step": 92320, "train_speed(iter/s)": 1.528591 }, { "acc": 0.98833332, "epoch": 43.27396297164284, "grad_norm": 3.033790349960327, "learning_rate": 4.861417860588142e-07, "loss": 0.03018287, "memory(GiB)": 13.7, "step": 92325, "train_speed(iter/s)": 1.528595 }, { "acc": 0.984375, "epoch": 43.27630653855167, "grad_norm": 2.59795880317688, "learning_rate": 4.858087590284024e-07, "loss": 0.03924703, "memory(GiB)": 13.7, "step": 92330, "train_speed(iter/s)": 1.5286 }, { "acc": 0.9791667, "epoch": 43.278650105460514, "grad_norm": 3.6926615238189697, "learning_rate": 4.854758405163355e-07, "loss": 0.03250705, "memory(GiB)": 13.7, "step": 92335, "train_speed(iter/s)": 1.5286 }, { "acc": 0.9729166, "epoch": 43.28099367236935, "grad_norm": 3.9525275230407715, "learning_rate": 4.851430305306176e-07, "loss": 0.0526552, "memory(GiB)": 13.7, "step": 92340, "train_speed(iter/s)": 1.528603 }, { "acc": 0.99300594, "epoch": 43.28333723927818, "grad_norm": 1.9894942045211792, "learning_rate": 4.848103290792467e-07, "loss": 0.02142581, "memory(GiB)": 13.7, "step": 92345, "train_speed(iter/s)": 1.528607 }, { "acc": 0.98078375, "epoch": 43.28568080618702, "grad_norm": 0.0014180561993271112, "learning_rate": 4.844777361702187e-07, "loss": 0.03408494, "memory(GiB)": 13.7, "step": 92350, "train_speed(iter/s)": 1.528615 }, { "acc": 0.98761368, "epoch": 43.28802437309585, "grad_norm": 2.314462184906006, "learning_rate": 4.841452518115318e-07, "loss": 0.02546442, "memory(GiB)": 13.7, "step": 92355, "train_speed(iter/s)": 1.528618 }, { "acc": 0.98031254, "epoch": 43.290367940004685, "grad_norm": 1.757204294204712, "learning_rate": 4.838128760111744e-07, "loss": 0.04335914, "memory(GiB)": 13.7, "step": 92360, "train_speed(iter/s)": 1.528618 }, { "acc": 0.97987013, "epoch": 43.29271150691352, "grad_norm": 6.727389812469482, "learning_rate": 4.834806087771382e-07, "loss": 0.05332277, "memory(GiB)": 13.7, "step": 92365, "train_speed(iter/s)": 1.528619 }, { "acc": 0.98500004, "epoch": 43.295055073822354, "grad_norm": 1.7384284734725952, "learning_rate": 4.831484501174071e-07, "loss": 0.02631136, "memory(GiB)": 13.7, "step": 92370, "train_speed(iter/s)": 1.528621 }, { "acc": 0.9827919, "epoch": 43.297398640731195, "grad_norm": 3.0599257946014404, "learning_rate": 4.828164000399678e-07, "loss": 0.04680275, "memory(GiB)": 13.7, "step": 92375, "train_speed(iter/s)": 1.528622 }, { "acc": 0.98672075, "epoch": 43.29974220764003, "grad_norm": 3.9479026794433594, "learning_rate": 4.824844585527988e-07, "loss": 0.02349131, "memory(GiB)": 13.7, "step": 92380, "train_speed(iter/s)": 1.528627 }, { "acc": 0.99104176, "epoch": 43.302085774548864, "grad_norm": 1.0576694011688232, "learning_rate": 4.821526256638822e-07, "loss": 0.0164746, "memory(GiB)": 13.7, "step": 92385, "train_speed(iter/s)": 1.528628 }, { "acc": 0.97520828, "epoch": 43.3044293414577, "grad_norm": 5.577282905578613, "learning_rate": 4.818209013811931e-07, "loss": 0.052274, "memory(GiB)": 13.7, "step": 92390, "train_speed(iter/s)": 1.528626 }, { "acc": 0.97555561, "epoch": 43.30677290836653, "grad_norm": 8.303065299987793, "learning_rate": 4.814892857127032e-07, "loss": 0.09233218, "memory(GiB)": 13.7, "step": 92395, "train_speed(iter/s)": 1.528623 }, { "acc": 0.9963542, "epoch": 43.30911647527537, "grad_norm": 2.7889082431793213, "learning_rate": 4.811577786663854e-07, "loss": 0.0169777, "memory(GiB)": 13.7, "step": 92400, "train_speed(iter/s)": 1.528624 }, { "acc": 0.97185516, "epoch": 43.3114600421842, "grad_norm": 2.5329627990722656, "learning_rate": 4.808263802502057e-07, "loss": 0.0596613, "memory(GiB)": 13.7, "step": 92405, "train_speed(iter/s)": 1.528632 }, { "acc": 0.98604164, "epoch": 43.31380360909304, "grad_norm": 3.5342519283294678, "learning_rate": 4.804950904721338e-07, "loss": 0.0535619, "memory(GiB)": 13.7, "step": 92410, "train_speed(iter/s)": 1.52864 }, { "acc": 0.96341267, "epoch": 43.31614717600188, "grad_norm": 4.018168926239014, "learning_rate": 4.801639093401291e-07, "loss": 0.08615996, "memory(GiB)": 13.7, "step": 92415, "train_speed(iter/s)": 1.528644 }, { "acc": 0.98675594, "epoch": 43.31849074291071, "grad_norm": 4.917690277099609, "learning_rate": 4.798328368621549e-07, "loss": 0.03109786, "memory(GiB)": 13.7, "step": 92420, "train_speed(iter/s)": 1.528643 }, { "acc": 0.98875008, "epoch": 43.320834309819546, "grad_norm": 2.9411892890930176, "learning_rate": 4.795018730461669e-07, "loss": 0.05071129, "memory(GiB)": 13.7, "step": 92425, "train_speed(iter/s)": 1.528644 }, { "acc": 0.98249998, "epoch": 43.32317787672838, "grad_norm": 5.10586404800415, "learning_rate": 4.791710179001219e-07, "loss": 0.04404002, "memory(GiB)": 13.7, "step": 92430, "train_speed(iter/s)": 1.528647 }, { "acc": 0.9854166, "epoch": 43.325521443637214, "grad_norm": 2.4269213676452637, "learning_rate": 4.788402714319721e-07, "loss": 0.03251009, "memory(GiB)": 13.7, "step": 92435, "train_speed(iter/s)": 1.528652 }, { "acc": 0.9697917, "epoch": 43.32786501054605, "grad_norm": 5.010873794555664, "learning_rate": 4.785096336496655e-07, "loss": 0.04607523, "memory(GiB)": 13.7, "step": 92440, "train_speed(iter/s)": 1.528654 }, { "acc": 0.97946434, "epoch": 43.33020857745488, "grad_norm": 5.42571496963501, "learning_rate": 4.78179104561153e-07, "loss": 0.03460509, "memory(GiB)": 13.7, "step": 92445, "train_speed(iter/s)": 1.528654 }, { "acc": 0.97976198, "epoch": 43.332552144363724, "grad_norm": 4.946987628936768, "learning_rate": 4.778486841743771e-07, "loss": 0.05271927, "memory(GiB)": 13.7, "step": 92450, "train_speed(iter/s)": 1.528655 }, { "acc": 0.984375, "epoch": 43.33489571127256, "grad_norm": 4.761857032775879, "learning_rate": 4.775183724972817e-07, "loss": 0.03377699, "memory(GiB)": 13.7, "step": 92455, "train_speed(iter/s)": 1.528657 }, { "acc": 0.97437496, "epoch": 43.33723927818139, "grad_norm": 4.769044876098633, "learning_rate": 4.771881695378041e-07, "loss": 0.049268, "memory(GiB)": 13.7, "step": 92460, "train_speed(iter/s)": 1.528661 }, { "acc": 0.98258934, "epoch": 43.33958284509023, "grad_norm": 3.422922372817993, "learning_rate": 4.768580753038832e-07, "loss": 0.04322662, "memory(GiB)": 13.7, "step": 92465, "train_speed(iter/s)": 1.52866 }, { "acc": 0.98753853, "epoch": 43.34192641199906, "grad_norm": 2.9403345584869385, "learning_rate": 4.765280898034537e-07, "loss": 0.05248004, "memory(GiB)": 13.7, "step": 92470, "train_speed(iter/s)": 1.52866 }, { "acc": 0.98705359, "epoch": 43.344269978907896, "grad_norm": 3.816322088241577, "learning_rate": 4.761982130444456e-07, "loss": 0.05583786, "memory(GiB)": 13.7, "step": 92475, "train_speed(iter/s)": 1.528663 }, { "acc": 0.99375, "epoch": 43.34661354581673, "grad_norm": 4.035456657409668, "learning_rate": 4.7586844503479003e-07, "loss": 0.02272427, "memory(GiB)": 13.7, "step": 92480, "train_speed(iter/s)": 1.528664 }, { "acc": 0.99352684, "epoch": 43.34895711272557, "grad_norm": 2.132741689682007, "learning_rate": 4.7553878578241155e-07, "loss": 0.02513762, "memory(GiB)": 13.7, "step": 92485, "train_speed(iter/s)": 1.528662 }, { "acc": 0.98395834, "epoch": 43.351300679634406, "grad_norm": 2.2973392009735107, "learning_rate": 4.7520923529523583e-07, "loss": 0.03497161, "memory(GiB)": 13.7, "step": 92490, "train_speed(iter/s)": 1.528669 }, { "acc": 0.99607143, "epoch": 43.35364424654324, "grad_norm": 2.0926146507263184, "learning_rate": 4.748797935811813e-07, "loss": 0.03340803, "memory(GiB)": 13.7, "step": 92495, "train_speed(iter/s)": 1.528673 }, { "acc": 0.984375, "epoch": 43.355987813452074, "grad_norm": 3.514248847961426, "learning_rate": 4.745504606481709e-07, "loss": 0.03931003, "memory(GiB)": 13.7, "step": 92500, "train_speed(iter/s)": 1.528669 }, { "acc": 0.99229164, "epoch": 43.35833138036091, "grad_norm": 2.5615503787994385, "learning_rate": 4.7422123650411733e-07, "loss": 0.02975706, "memory(GiB)": 13.7, "step": 92505, "train_speed(iter/s)": 1.528666 }, { "acc": 0.9895834, "epoch": 43.36067494726974, "grad_norm": 4.271101474761963, "learning_rate": 4.738921211569364e-07, "loss": 0.02050307, "memory(GiB)": 13.7, "step": 92510, "train_speed(iter/s)": 1.528668 }, { "acc": 0.99020824, "epoch": 43.36301851417858, "grad_norm": 1.027105450630188, "learning_rate": 4.735631146145376e-07, "loss": 0.04807939, "memory(GiB)": 13.7, "step": 92515, "train_speed(iter/s)": 1.528672 }, { "acc": 0.98458328, "epoch": 43.36536208108741, "grad_norm": 2.0517117977142334, "learning_rate": 4.732342168848283e-07, "loss": 0.03251966, "memory(GiB)": 13.7, "step": 92520, "train_speed(iter/s)": 1.528674 }, { "acc": 0.98864574, "epoch": 43.36770564799625, "grad_norm": 2.214407205581665, "learning_rate": 4.729054279757163e-07, "loss": 0.02920985, "memory(GiB)": 13.7, "step": 92525, "train_speed(iter/s)": 1.528673 }, { "acc": 0.9953125, "epoch": 43.37004921490509, "grad_norm": 3.2894043922424316, "learning_rate": 4.7257674789510177e-07, "loss": 0.02747208, "memory(GiB)": 13.7, "step": 92530, "train_speed(iter/s)": 1.528676 }, { "acc": 0.9875, "epoch": 43.37239278181392, "grad_norm": 2.386423349380493, "learning_rate": 4.7224817665088866e-07, "loss": 0.07183074, "memory(GiB)": 13.7, "step": 92535, "train_speed(iter/s)": 1.52868 }, { "acc": 0.99187498, "epoch": 43.374736348722756, "grad_norm": 3.4285876750946045, "learning_rate": 4.7191971425097213e-07, "loss": 0.01443383, "memory(GiB)": 13.7, "step": 92540, "train_speed(iter/s)": 1.528681 }, { "acc": 0.97541666, "epoch": 43.37707991563159, "grad_norm": 2.8097333908081055, "learning_rate": 4.71591360703249e-07, "loss": 0.03707113, "memory(GiB)": 13.7, "step": 92545, "train_speed(iter/s)": 1.528687 }, { "acc": 0.98041668, "epoch": 43.379423482540425, "grad_norm": 3.845939874649048, "learning_rate": 4.712631160156088e-07, "loss": 0.03228655, "memory(GiB)": 13.7, "step": 92550, "train_speed(iter/s)": 1.528689 }, { "acc": 0.97764416, "epoch": 43.38176704944926, "grad_norm": 3.7727792263031006, "learning_rate": 4.70934980195946e-07, "loss": 0.0612064, "memory(GiB)": 13.7, "step": 92555, "train_speed(iter/s)": 1.528692 }, { "acc": 0.95708332, "epoch": 43.3841106163581, "grad_norm": 8.106840133666992, "learning_rate": 4.706069532521448e-07, "loss": 0.06166392, "memory(GiB)": 13.7, "step": 92560, "train_speed(iter/s)": 1.528696 }, { "acc": 0.99229164, "epoch": 43.386454183266935, "grad_norm": 0.6938636898994446, "learning_rate": 4.7027903519209007e-07, "loss": 0.02216914, "memory(GiB)": 13.7, "step": 92565, "train_speed(iter/s)": 1.528693 }, { "acc": 0.9822917, "epoch": 43.38879775017577, "grad_norm": 4.893443584442139, "learning_rate": 4.6995122602366557e-07, "loss": 0.04410684, "memory(GiB)": 13.7, "step": 92570, "train_speed(iter/s)": 1.528692 }, { "acc": 0.99174671, "epoch": 43.3911413170846, "grad_norm": 0.15371139347553253, "learning_rate": 4.6962352575474846e-07, "loss": 0.02346559, "memory(GiB)": 13.7, "step": 92575, "train_speed(iter/s)": 1.528692 }, { "acc": 0.9916667, "epoch": 43.39348488399344, "grad_norm": 1.9442819356918335, "learning_rate": 4.692959343932173e-07, "loss": 0.02408223, "memory(GiB)": 13.7, "step": 92580, "train_speed(iter/s)": 1.528693 }, { "acc": 0.98298607, "epoch": 43.39582845090227, "grad_norm": 7.320917129516602, "learning_rate": 4.689684519469443e-07, "loss": 0.03111641, "memory(GiB)": 13.7, "step": 92585, "train_speed(iter/s)": 1.528689 }, { "acc": 0.98670635, "epoch": 43.398172017811106, "grad_norm": 3.6029844284057617, "learning_rate": 4.6864107842380424e-07, "loss": 0.06574379, "memory(GiB)": 13.7, "step": 92590, "train_speed(iter/s)": 1.52869 }, { "acc": 0.98796701, "epoch": 43.40051558471994, "grad_norm": 2.625267267227173, "learning_rate": 4.6831381383166375e-07, "loss": 0.03739893, "memory(GiB)": 13.7, "step": 92595, "train_speed(iter/s)": 1.528695 }, { "acc": 0.98344698, "epoch": 43.40285915162878, "grad_norm": 4.1260833740234375, "learning_rate": 4.679866581783897e-07, "loss": 0.0485367, "memory(GiB)": 13.7, "step": 92600, "train_speed(iter/s)": 1.528697 }, { "acc": 0.98249998, "epoch": 43.405202718537616, "grad_norm": 3.6075806617736816, "learning_rate": 4.6765961147184623e-07, "loss": 0.03180328, "memory(GiB)": 13.7, "step": 92605, "train_speed(iter/s)": 1.5287 }, { "acc": 0.98552074, "epoch": 43.40754628544645, "grad_norm": 0.19084718823432922, "learning_rate": 4.6733267371989277e-07, "loss": 0.03704447, "memory(GiB)": 13.7, "step": 92610, "train_speed(iter/s)": 1.528702 }, { "acc": 0.97666664, "epoch": 43.409889852355285, "grad_norm": 4.405466556549072, "learning_rate": 4.6700584493039074e-07, "loss": 0.04010262, "memory(GiB)": 13.7, "step": 92615, "train_speed(iter/s)": 1.528701 }, { "acc": 0.99050598, "epoch": 43.41223341926412, "grad_norm": 4.004683017730713, "learning_rate": 4.666791251111941e-07, "loss": 0.03551689, "memory(GiB)": 13.7, "step": 92620, "train_speed(iter/s)": 1.528705 }, { "acc": 0.99419641, "epoch": 43.41457698617295, "grad_norm": 1.6056177616119385, "learning_rate": 4.6635251427015677e-07, "loss": 0.03674474, "memory(GiB)": 13.7, "step": 92625, "train_speed(iter/s)": 1.528709 }, { "acc": 0.9717803, "epoch": 43.41692055308179, "grad_norm": 2.660125255584717, "learning_rate": 4.6602601241512856e-07, "loss": 0.09862903, "memory(GiB)": 13.7, "step": 92630, "train_speed(iter/s)": 1.528709 }, { "acc": 0.99541664, "epoch": 43.41926411999063, "grad_norm": 1.5732696056365967, "learning_rate": 4.656996195539595e-07, "loss": 0.02149867, "memory(GiB)": 13.7, "step": 92635, "train_speed(iter/s)": 1.528707 }, { "acc": 0.9895833, "epoch": 43.42160768689946, "grad_norm": 2.4942123889923096, "learning_rate": 4.6537333569449313e-07, "loss": 0.01796847, "memory(GiB)": 13.7, "step": 92640, "train_speed(iter/s)": 1.528716 }, { "acc": 0.996875, "epoch": 43.4239512538083, "grad_norm": 0.1691756248474121, "learning_rate": 4.650471608445707e-07, "loss": 0.01702729, "memory(GiB)": 13.7, "step": 92645, "train_speed(iter/s)": 1.528717 }, { "acc": 0.98583336, "epoch": 43.42629482071713, "grad_norm": 2.814490556716919, "learning_rate": 4.6472109501203633e-07, "loss": 0.033656, "memory(GiB)": 13.7, "step": 92650, "train_speed(iter/s)": 1.528714 }, { "acc": 0.99300594, "epoch": 43.428638387625966, "grad_norm": 4.188967704772949, "learning_rate": 4.6439513820472516e-07, "loss": 0.03374915, "memory(GiB)": 13.7, "step": 92655, "train_speed(iter/s)": 1.528717 }, { "acc": 0.98946428, "epoch": 43.4309819545348, "grad_norm": 2.334409713745117, "learning_rate": 4.640692904304734e-07, "loss": 0.02729085, "memory(GiB)": 13.7, "step": 92660, "train_speed(iter/s)": 1.528718 }, { "acc": 0.98642864, "epoch": 43.433325521443635, "grad_norm": 4.009350776672363, "learning_rate": 4.6374355169711125e-07, "loss": 0.04267479, "memory(GiB)": 13.7, "step": 92665, "train_speed(iter/s)": 1.528719 }, { "acc": 0.98928576, "epoch": 43.43566908835247, "grad_norm": 2.90002179145813, "learning_rate": 4.6341792201247064e-07, "loss": 0.03735598, "memory(GiB)": 13.7, "step": 92670, "train_speed(iter/s)": 1.528722 }, { "acc": 0.98648815, "epoch": 43.43801265526131, "grad_norm": 3.7804174423217773, "learning_rate": 4.6309240138437606e-07, "loss": 0.03968754, "memory(GiB)": 13.7, "step": 92675, "train_speed(iter/s)": 1.528724 }, { "acc": 0.99196434, "epoch": 43.440356222170145, "grad_norm": 0.010085836984217167, "learning_rate": 4.6276698982065564e-07, "loss": 0.0361567, "memory(GiB)": 13.7, "step": 92680, "train_speed(iter/s)": 1.528726 }, { "acc": 0.99821434, "epoch": 43.44269978907898, "grad_norm": 2.4293458461761475, "learning_rate": 4.624416873291288e-07, "loss": 0.01883046, "memory(GiB)": 13.7, "step": 92685, "train_speed(iter/s)": 1.528731 }, { "acc": 0.97905636, "epoch": 43.44504335598781, "grad_norm": 3.061737537384033, "learning_rate": 4.621164939176148e-07, "loss": 0.03994459, "memory(GiB)": 13.7, "step": 92690, "train_speed(iter/s)": 1.528732 }, { "acc": 0.98343754, "epoch": 43.44738692289665, "grad_norm": 4.038377285003662, "learning_rate": 4.617914095939309e-07, "loss": 0.03112387, "memory(GiB)": 13.7, "step": 92695, "train_speed(iter/s)": 1.528733 }, { "acc": 0.99281254, "epoch": 43.44973048980548, "grad_norm": 2.2995057106018066, "learning_rate": 4.614664343658891e-07, "loss": 0.00781856, "memory(GiB)": 13.7, "step": 92700, "train_speed(iter/s)": 1.528734 }, { "acc": 0.99278851, "epoch": 43.452074056714316, "grad_norm": 3.3330936431884766, "learning_rate": 4.611415682413039e-07, "loss": 0.04331645, "memory(GiB)": 13.7, "step": 92705, "train_speed(iter/s)": 1.528738 }, { "acc": 0.9958333, "epoch": 43.45441762362316, "grad_norm": 0.19238893687725067, "learning_rate": 4.608168112279817e-07, "loss": 0.00611365, "memory(GiB)": 13.7, "step": 92710, "train_speed(iter/s)": 1.528737 }, { "acc": 0.98094158, "epoch": 43.45676119053199, "grad_norm": 4.521691799163818, "learning_rate": 4.604921633337304e-07, "loss": 0.04813594, "memory(GiB)": 13.7, "step": 92715, "train_speed(iter/s)": 1.52874 }, { "acc": 0.9916666, "epoch": 43.45910475744083, "grad_norm": 0.7119148373603821, "learning_rate": 4.601676245663525e-07, "loss": 0.02836115, "memory(GiB)": 13.7, "step": 92720, "train_speed(iter/s)": 1.528744 }, { "acc": 0.98604164, "epoch": 43.46144832434966, "grad_norm": 3.3684067726135254, "learning_rate": 4.5984319493364764e-07, "loss": 0.02314111, "memory(GiB)": 13.7, "step": 92725, "train_speed(iter/s)": 1.528748 }, { "acc": 0.990625, "epoch": 43.463791891258495, "grad_norm": 5.233774662017822, "learning_rate": 4.5951887444341594e-07, "loss": 0.03344901, "memory(GiB)": 13.7, "step": 92730, "train_speed(iter/s)": 1.528748 }, { "acc": 0.99187498, "epoch": 43.46613545816733, "grad_norm": 2.947927951812744, "learning_rate": 4.5919466310345046e-07, "loss": 0.03451037, "memory(GiB)": 13.7, "step": 92735, "train_speed(iter/s)": 1.528748 }, { "acc": 0.9927083, "epoch": 43.468479025076164, "grad_norm": 2.212261438369751, "learning_rate": 4.58870560921548e-07, "loss": 0.02175993, "memory(GiB)": 13.7, "step": 92740, "train_speed(iter/s)": 1.528749 }, { "acc": 0.98529758, "epoch": 43.470822591985, "grad_norm": 2.9153406620025635, "learning_rate": 4.585465679054961e-07, "loss": 0.04299035, "memory(GiB)": 13.7, "step": 92745, "train_speed(iter/s)": 1.52875 }, { "acc": 0.9864584, "epoch": 43.47316615889384, "grad_norm": 1.234195590019226, "learning_rate": 4.5822268406308376e-07, "loss": 0.03538724, "memory(GiB)": 13.7, "step": 92750, "train_speed(iter/s)": 1.528752 }, { "acc": 0.99020834, "epoch": 43.475509725802674, "grad_norm": 2.341297149658203, "learning_rate": 4.5789890940209353e-07, "loss": 0.01169024, "memory(GiB)": 13.7, "step": 92755, "train_speed(iter/s)": 1.528754 }, { "acc": 0.98552074, "epoch": 43.47785329271151, "grad_norm": 2.7833502292633057, "learning_rate": 4.5757524393031217e-07, "loss": 0.04128537, "memory(GiB)": 13.7, "step": 92760, "train_speed(iter/s)": 1.528752 }, { "acc": 0.98395834, "epoch": 43.48019685962034, "grad_norm": 6.320006370544434, "learning_rate": 4.572516876555172e-07, "loss": 0.06209333, "memory(GiB)": 13.7, "step": 92765, "train_speed(iter/s)": 1.528757 }, { "acc": 0.99020834, "epoch": 43.48254042652918, "grad_norm": 4.336346626281738, "learning_rate": 4.5692824058548497e-07, "loss": 0.02976446, "memory(GiB)": 13.7, "step": 92770, "train_speed(iter/s)": 1.528761 }, { "acc": 0.9864584, "epoch": 43.48488399343801, "grad_norm": 4.590580463409424, "learning_rate": 4.566049027279917e-07, "loss": 0.03274175, "memory(GiB)": 13.7, "step": 92775, "train_speed(iter/s)": 1.528765 }, { "acc": 0.98249998, "epoch": 43.487227560346845, "grad_norm": 0.0038123310077935457, "learning_rate": 4.5628167409080775e-07, "loss": 0.06462539, "memory(GiB)": 13.7, "step": 92780, "train_speed(iter/s)": 1.528766 }, { "acc": 0.99750004, "epoch": 43.48957112725569, "grad_norm": 2.3768134117126465, "learning_rate": 4.55958554681705e-07, "loss": 0.01812812, "memory(GiB)": 13.7, "step": 92785, "train_speed(iter/s)": 1.52877 }, { "acc": 0.99069023, "epoch": 43.49191469416452, "grad_norm": 1.8023152351379395, "learning_rate": 4.5563554450844635e-07, "loss": 0.01481755, "memory(GiB)": 13.7, "step": 92790, "train_speed(iter/s)": 1.528769 }, { "acc": 0.9942708, "epoch": 43.494258261073355, "grad_norm": 1.5051175355911255, "learning_rate": 4.553126435788e-07, "loss": 0.02913074, "memory(GiB)": 13.7, "step": 92795, "train_speed(iter/s)": 1.528773 }, { "acc": 0.99020824, "epoch": 43.49660182798219, "grad_norm": 5.097294807434082, "learning_rate": 4.5498985190052484e-07, "loss": 0.04089141, "memory(GiB)": 13.7, "step": 92800, "train_speed(iter/s)": 1.528778 }, { "acc": 0.98508015, "epoch": 43.498945394891024, "grad_norm": 4.434678077697754, "learning_rate": 4.5466716948138125e-07, "loss": 0.04065742, "memory(GiB)": 13.7, "step": 92805, "train_speed(iter/s)": 1.528783 }, { "acc": 0.99389887, "epoch": 43.50128896179986, "grad_norm": 0.9966340661048889, "learning_rate": 4.543445963291245e-07, "loss": 0.02694085, "memory(GiB)": 13.7, "step": 92810, "train_speed(iter/s)": 1.528782 }, { "acc": 0.97840271, "epoch": 43.50363252870869, "grad_norm": 3.8545243740081787, "learning_rate": 4.540221324515081e-07, "loss": 0.04137216, "memory(GiB)": 13.7, "step": 92815, "train_speed(iter/s)": 1.528785 }, { "acc": 0.96715279, "epoch": 43.50597609561753, "grad_norm": 4.060520648956299, "learning_rate": 4.536997778562829e-07, "loss": 0.08032299, "memory(GiB)": 13.7, "step": 92820, "train_speed(iter/s)": 1.528782 }, { "acc": 0.99154758, "epoch": 43.50831966252637, "grad_norm": 2.312135934829712, "learning_rate": 4.533775325511974e-07, "loss": 0.03719954, "memory(GiB)": 13.7, "step": 92825, "train_speed(iter/s)": 1.528788 }, { "acc": 0.97666674, "epoch": 43.5106632294352, "grad_norm": 6.640080451965332, "learning_rate": 4.5305539654399864e-07, "loss": 0.05582982, "memory(GiB)": 13.7, "step": 92830, "train_speed(iter/s)": 1.528791 }, { "acc": 0.98654766, "epoch": 43.51300679634404, "grad_norm": 3.238044500350952, "learning_rate": 4.5273336984242784e-07, "loss": 0.03798214, "memory(GiB)": 13.7, "step": 92835, "train_speed(iter/s)": 1.528797 }, { "acc": 0.98166666, "epoch": 43.51535036325287, "grad_norm": 2.990962505340576, "learning_rate": 4.52411452454227e-07, "loss": 0.03678268, "memory(GiB)": 13.7, "step": 92840, "train_speed(iter/s)": 1.528799 }, { "acc": 0.98885422, "epoch": 43.517693930161705, "grad_norm": 2.1881957054138184, "learning_rate": 4.5208964438713196e-07, "loss": 0.04295871, "memory(GiB)": 13.7, "step": 92845, "train_speed(iter/s)": 1.528804 }, { "acc": 0.98604164, "epoch": 43.52003749707054, "grad_norm": 0.6490619778633118, "learning_rate": 4.517679456488802e-07, "loss": 0.02680344, "memory(GiB)": 13.7, "step": 92850, "train_speed(iter/s)": 1.528809 }, { "acc": 0.98239584, "epoch": 43.522381063979374, "grad_norm": 3.618898868560791, "learning_rate": 4.5144635624720365e-07, "loss": 0.03752201, "memory(GiB)": 13.7, "step": 92855, "train_speed(iter/s)": 1.528809 }, { "acc": 0.98654766, "epoch": 43.524724630888215, "grad_norm": 2.1530706882476807, "learning_rate": 4.511248761898314e-07, "loss": 0.02811049, "memory(GiB)": 13.7, "step": 92860, "train_speed(iter/s)": 1.528815 }, { "acc": 0.9864584, "epoch": 43.52706819779705, "grad_norm": 0.0011958953691646457, "learning_rate": 4.5080350548449157e-07, "loss": 0.02704172, "memory(GiB)": 13.7, "step": 92865, "train_speed(iter/s)": 1.528822 }, { "acc": 0.98862181, "epoch": 43.529411764705884, "grad_norm": 2.6703624725341797, "learning_rate": 4.504822441389076e-07, "loss": 0.0361562, "memory(GiB)": 13.7, "step": 92870, "train_speed(iter/s)": 1.528823 }, { "acc": 0.97833328, "epoch": 43.53175533161472, "grad_norm": 0.4203185737133026, "learning_rate": 4.501610921608039e-07, "loss": 0.02808379, "memory(GiB)": 13.7, "step": 92875, "train_speed(iter/s)": 1.528828 }, { "acc": 0.99333334, "epoch": 43.53409889852355, "grad_norm": 0.0035641405265778303, "learning_rate": 4.4984004955789616e-07, "loss": 0.02143457, "memory(GiB)": 13.7, "step": 92880, "train_speed(iter/s)": 1.528836 }, { "acc": 0.99298611, "epoch": 43.53644246543239, "grad_norm": 3.9913241863250732, "learning_rate": 4.495191163379058e-07, "loss": 0.038334, "memory(GiB)": 13.7, "step": 92885, "train_speed(iter/s)": 1.528836 }, { "acc": 0.9848958, "epoch": 43.53878603234122, "grad_norm": 4.030055046081543, "learning_rate": 4.4919829250854473e-07, "loss": 0.04310822, "memory(GiB)": 13.7, "step": 92890, "train_speed(iter/s)": 1.528835 }, { "acc": 0.9729166, "epoch": 43.541129599250056, "grad_norm": 4.822890758514404, "learning_rate": 4.4887757807752377e-07, "loss": 0.07443514, "memory(GiB)": 13.7, "step": 92895, "train_speed(iter/s)": 1.528835 }, { "acc": 0.98794641, "epoch": 43.5434731661589, "grad_norm": 2.1511619091033936, "learning_rate": 4.4855697305255333e-07, "loss": 0.04912793, "memory(GiB)": 13.7, "step": 92900, "train_speed(iter/s)": 1.528835 }, { "acc": 0.98520832, "epoch": 43.54581673306773, "grad_norm": 5.155420303344727, "learning_rate": 4.482364774413369e-07, "loss": 0.02694151, "memory(GiB)": 13.7, "step": 92905, "train_speed(iter/s)": 1.528836 }, { "acc": 0.98520832, "epoch": 43.548160299976566, "grad_norm": 2.5990641117095947, "learning_rate": 4.4791609125158203e-07, "loss": 0.04037355, "memory(GiB)": 13.7, "step": 92910, "train_speed(iter/s)": 1.528837 }, { "acc": 0.9958334, "epoch": 43.5505038668854, "grad_norm": 1.9278781414031982, "learning_rate": 4.4759581449098733e-07, "loss": 0.0142948, "memory(GiB)": 13.7, "step": 92915, "train_speed(iter/s)": 1.528837 }, { "acc": 0.99256954, "epoch": 43.552847433794234, "grad_norm": 2.4069137573242188, "learning_rate": 4.472756471672526e-07, "loss": 0.0232592, "memory(GiB)": 13.7, "step": 92920, "train_speed(iter/s)": 1.528837 }, { "acc": 0.98781567, "epoch": 43.55519100070307, "grad_norm": 2.1734158992767334, "learning_rate": 4.469555892880714e-07, "loss": 0.0649782, "memory(GiB)": 13.7, "step": 92925, "train_speed(iter/s)": 1.528842 }, { "acc": 0.9888195, "epoch": 43.5575345676119, "grad_norm": 0.9866616725921631, "learning_rate": 4.4663564086113964e-07, "loss": 0.04272782, "memory(GiB)": 13.7, "step": 92930, "train_speed(iter/s)": 1.528848 }, { "acc": 0.9822917, "epoch": 43.55987813452074, "grad_norm": 5.319065093994141, "learning_rate": 4.46315801894147e-07, "loss": 0.03170187, "memory(GiB)": 13.7, "step": 92935, "train_speed(iter/s)": 1.528849 }, { "acc": 0.9911459, "epoch": 43.56222170142958, "grad_norm": 3.9040608406066895, "learning_rate": 4.459960723947783e-07, "loss": 0.02025761, "memory(GiB)": 13.7, "step": 92940, "train_speed(iter/s)": 1.528855 }, { "acc": 0.98041668, "epoch": 43.56456526833841, "grad_norm": 4.232197284698486, "learning_rate": 4.456764523707232e-07, "loss": 0.05751722, "memory(GiB)": 13.7, "step": 92945, "train_speed(iter/s)": 1.528856 }, { "acc": 0.99622478, "epoch": 43.56690883524725, "grad_norm": 1.3689790964126587, "learning_rate": 4.4535694182966155e-07, "loss": 0.0264715, "memory(GiB)": 13.7, "step": 92950, "train_speed(iter/s)": 1.528856 }, { "acc": 0.9864584, "epoch": 43.56925240215608, "grad_norm": 4.122289180755615, "learning_rate": 4.450375407792753e-07, "loss": 0.0421512, "memory(GiB)": 13.7, "step": 92955, "train_speed(iter/s)": 1.52886 }, { "acc": 0.98604164, "epoch": 43.571595969064916, "grad_norm": 0.001713992445729673, "learning_rate": 4.447182492272397e-07, "loss": 0.03815337, "memory(GiB)": 13.7, "step": 92960, "train_speed(iter/s)": 1.528863 }, { "acc": 0.98999386, "epoch": 43.57393953597375, "grad_norm": 0.6003832817077637, "learning_rate": 4.4439906718123073e-07, "loss": 0.04049963, "memory(GiB)": 13.7, "step": 92965, "train_speed(iter/s)": 1.528867 }, { "acc": 0.99548607, "epoch": 43.576283102882584, "grad_norm": 1.7601343393325806, "learning_rate": 4.4407999464891964e-07, "loss": 0.02296404, "memory(GiB)": 13.7, "step": 92970, "train_speed(iter/s)": 1.528871 }, { "acc": 0.97599211, "epoch": 43.578626669791426, "grad_norm": 0.0008496630471199751, "learning_rate": 4.437610316379781e-07, "loss": 0.0707496, "memory(GiB)": 13.7, "step": 92975, "train_speed(iter/s)": 1.528875 }, { "acc": 0.98351192, "epoch": 43.58097023670026, "grad_norm": 2.7599549293518066, "learning_rate": 4.434421781560707e-07, "loss": 0.04425952, "memory(GiB)": 13.7, "step": 92980, "train_speed(iter/s)": 1.528877 }, { "acc": 0.98343754, "epoch": 43.583313803609094, "grad_norm": 2.949651002883911, "learning_rate": 4.431234342108617e-07, "loss": 0.07056738, "memory(GiB)": 13.7, "step": 92985, "train_speed(iter/s)": 1.528879 }, { "acc": 0.98571434, "epoch": 43.58565737051793, "grad_norm": 2.932048797607422, "learning_rate": 4.4280479981001365e-07, "loss": 0.02728774, "memory(GiB)": 13.7, "step": 92990, "train_speed(iter/s)": 1.528882 }, { "acc": 0.98848953, "epoch": 43.58800093742676, "grad_norm": 1.7842686176300049, "learning_rate": 4.424862749611836e-07, "loss": 0.02826486, "memory(GiB)": 13.7, "step": 92995, "train_speed(iter/s)": 1.528879 }, { "acc": 0.98270836, "epoch": 43.5903445043356, "grad_norm": 1.247338056564331, "learning_rate": 4.4216785967203124e-07, "loss": 0.03737819, "memory(GiB)": 13.7, "step": 93000, "train_speed(iter/s)": 1.52888 }, { "acc": 0.99048615, "epoch": 43.59268807124443, "grad_norm": 1.8320521116256714, "learning_rate": 4.4184955395020693e-07, "loss": 0.02100507, "memory(GiB)": 13.7, "step": 93005, "train_speed(iter/s)": 1.528879 }, { "acc": 0.9764286, "epoch": 43.595031638153266, "grad_norm": 3.735095262527466, "learning_rate": 4.4153135780336433e-07, "loss": 0.06375607, "memory(GiB)": 13.7, "step": 93010, "train_speed(iter/s)": 1.528881 }, { "acc": 0.99125004, "epoch": 43.59737520506211, "grad_norm": 5.422021865844727, "learning_rate": 4.412132712391505e-07, "loss": 0.03202897, "memory(GiB)": 13.7, "step": 93015, "train_speed(iter/s)": 1.528884 }, { "acc": 0.97729168, "epoch": 43.59971877197094, "grad_norm": 4.4092278480529785, "learning_rate": 4.4089529426520963e-07, "loss": 0.03708466, "memory(GiB)": 13.7, "step": 93020, "train_speed(iter/s)": 1.528885 }, { "acc": 0.98819447, "epoch": 43.602062338879776, "grad_norm": 3.6240904331207275, "learning_rate": 4.4057742688918816e-07, "loss": 0.01857114, "memory(GiB)": 13.7, "step": 93025, "train_speed(iter/s)": 1.528887 }, { "acc": 0.9927084, "epoch": 43.60440590578861, "grad_norm": 2.2596077919006348, "learning_rate": 4.4025966911872254e-07, "loss": 0.01859464, "memory(GiB)": 13.7, "step": 93030, "train_speed(iter/s)": 1.528893 }, { "acc": 0.97145834, "epoch": 43.606749472697445, "grad_norm": 0.10816026479005814, "learning_rate": 4.3994202096145485e-07, "loss": 0.05440365, "memory(GiB)": 13.7, "step": 93035, "train_speed(iter/s)": 1.528895 }, { "acc": 0.99236107, "epoch": 43.60909303960628, "grad_norm": 2.6892049312591553, "learning_rate": 4.3962448242501765e-07, "loss": 0.02950258, "memory(GiB)": 13.7, "step": 93040, "train_speed(iter/s)": 1.5289 }, { "acc": 0.98583336, "epoch": 43.61143660651511, "grad_norm": 0.0032449145801365376, "learning_rate": 4.3930705351704457e-07, "loss": 0.03621168, "memory(GiB)": 13.7, "step": 93045, "train_speed(iter/s)": 1.528904 }, { "acc": 1.0, "epoch": 43.613780173423955, "grad_norm": 0.004780858755111694, "learning_rate": 4.3898973424516435e-07, "loss": 0.00990027, "memory(GiB)": 13.7, "step": 93050, "train_speed(iter/s)": 1.528909 }, { "acc": 0.9802083, "epoch": 43.61612374033279, "grad_norm": 3.3153131008148193, "learning_rate": 4.386725246170062e-07, "loss": 0.03862237, "memory(GiB)": 13.7, "step": 93055, "train_speed(iter/s)": 1.528908 }, { "acc": 0.9854166, "epoch": 43.61846730724162, "grad_norm": 4.781196594238281, "learning_rate": 4.3835542464019437e-07, "loss": 0.04876083, "memory(GiB)": 13.7, "step": 93060, "train_speed(iter/s)": 1.528908 }, { "acc": 0.99375, "epoch": 43.62081087415046, "grad_norm": 4.06174898147583, "learning_rate": 4.380384343223492e-07, "loss": 0.05728421, "memory(GiB)": 13.7, "step": 93065, "train_speed(iter/s)": 1.528907 }, { "acc": 0.98500004, "epoch": 43.62315444105929, "grad_norm": 0.19645729660987854, "learning_rate": 4.377215536710917e-07, "loss": 0.02933623, "memory(GiB)": 13.7, "step": 93070, "train_speed(iter/s)": 1.528908 }, { "acc": 0.98031254, "epoch": 43.625498007968126, "grad_norm": 3.6350114345550537, "learning_rate": 4.3740478269403775e-07, "loss": 0.05065004, "memory(GiB)": 13.7, "step": 93075, "train_speed(iter/s)": 1.528912 }, { "acc": 0.98395834, "epoch": 43.62784157487696, "grad_norm": 4.014238357543945, "learning_rate": 4.370881213988027e-07, "loss": 0.02500657, "memory(GiB)": 13.7, "step": 93080, "train_speed(iter/s)": 1.528913 }, { "acc": 0.98425598, "epoch": 43.630185141785795, "grad_norm": 0.920538067817688, "learning_rate": 4.367715697929952e-07, "loss": 0.03059666, "memory(GiB)": 13.7, "step": 93085, "train_speed(iter/s)": 1.528915 }, { "acc": 0.99020834, "epoch": 43.632528708694636, "grad_norm": 2.99239444732666, "learning_rate": 4.364551278842286e-07, "loss": 0.0400818, "memory(GiB)": 13.7, "step": 93090, "train_speed(iter/s)": 1.528912 }, { "acc": 0.99186954, "epoch": 43.63487227560347, "grad_norm": 2.1998846530914307, "learning_rate": 4.3613879568010526e-07, "loss": 0.04421959, "memory(GiB)": 13.7, "step": 93095, "train_speed(iter/s)": 1.528917 }, { "acc": 0.99090281, "epoch": 43.637215842512305, "grad_norm": 0.6803686022758484, "learning_rate": 4.358225731882312e-07, "loss": 0.05622841, "memory(GiB)": 13.7, "step": 93100, "train_speed(iter/s)": 1.52892 }, { "acc": 0.98979168, "epoch": 43.63955940942114, "grad_norm": 0.08071093261241913, "learning_rate": 4.3550646041620635e-07, "loss": 0.02884898, "memory(GiB)": 13.7, "step": 93105, "train_speed(iter/s)": 1.52892 }, { "acc": 0.97696428, "epoch": 43.64190297632997, "grad_norm": 4.283988952636719, "learning_rate": 4.35190457371627e-07, "loss": 0.04436437, "memory(GiB)": 13.7, "step": 93110, "train_speed(iter/s)": 1.528924 }, { "acc": 0.990625, "epoch": 43.64424654323881, "grad_norm": 2.8247182369232178, "learning_rate": 4.348745640620932e-07, "loss": 0.02891672, "memory(GiB)": 13.7, "step": 93115, "train_speed(iter/s)": 1.528926 }, { "acc": 0.98268852, "epoch": 43.64659011014764, "grad_norm": 3.2186217308044434, "learning_rate": 4.345587804951941e-07, "loss": 0.0696201, "memory(GiB)": 13.7, "step": 93120, "train_speed(iter/s)": 1.528927 }, { "acc": 0.98562498, "epoch": 43.64893367705648, "grad_norm": 2.2709596157073975, "learning_rate": 4.3424310667852294e-07, "loss": 0.04790065, "memory(GiB)": 13.7, "step": 93125, "train_speed(iter/s)": 1.528928 }, { "acc": 0.98298616, "epoch": 43.65127724396532, "grad_norm": 0.0009373554494231939, "learning_rate": 4.33927542619665e-07, "loss": 0.09098896, "memory(GiB)": 13.7, "step": 93130, "train_speed(iter/s)": 1.52893 }, { "acc": 0.97432003, "epoch": 43.65362081087415, "grad_norm": 1.6183527708053589, "learning_rate": 4.3361208832620793e-07, "loss": 0.05511147, "memory(GiB)": 13.7, "step": 93135, "train_speed(iter/s)": 1.528931 }, { "acc": 0.97833328, "epoch": 43.655964377782986, "grad_norm": 3.9419445991516113, "learning_rate": 4.332967438057322e-07, "loss": 0.03356034, "memory(GiB)": 13.7, "step": 93140, "train_speed(iter/s)": 1.528936 }, { "acc": 0.98438492, "epoch": 43.65830794469182, "grad_norm": 4.622076034545898, "learning_rate": 4.329815090658171e-07, "loss": 0.05880132, "memory(GiB)": 13.7, "step": 93145, "train_speed(iter/s)": 1.528934 }, { "acc": 0.9880209, "epoch": 43.660651511600655, "grad_norm": 2.146867036819458, "learning_rate": 4.3266638411404243e-07, "loss": 0.05564436, "memory(GiB)": 13.7, "step": 93150, "train_speed(iter/s)": 1.528934 }, { "acc": 0.99291668, "epoch": 43.66299507850949, "grad_norm": 0.3263731300830841, "learning_rate": 4.3235136895798083e-07, "loss": 0.00923146, "memory(GiB)": 13.7, "step": 93155, "train_speed(iter/s)": 1.528932 }, { "acc": 0.97624998, "epoch": 43.66533864541832, "grad_norm": 4.687374591827393, "learning_rate": 4.32036463605206e-07, "loss": 0.04353138, "memory(GiB)": 13.7, "step": 93160, "train_speed(iter/s)": 1.528933 }, { "acc": 0.98166122, "epoch": 43.667682212327165, "grad_norm": 0.29051920771598816, "learning_rate": 4.3172166806328455e-07, "loss": 0.04548381, "memory(GiB)": 13.7, "step": 93165, "train_speed(iter/s)": 1.528934 }, { "acc": 0.97000246, "epoch": 43.670025779236, "grad_norm": 4.681553840637207, "learning_rate": 4.3140698233978577e-07, "loss": 0.06204767, "memory(GiB)": 13.7, "step": 93170, "train_speed(iter/s)": 1.528933 }, { "acc": 0.990625, "epoch": 43.67236934614483, "grad_norm": 2.741001844406128, "learning_rate": 4.310924064422705e-07, "loss": 0.02024903, "memory(GiB)": 13.7, "step": 93175, "train_speed(iter/s)": 1.528935 }, { "acc": 0.98488102, "epoch": 43.67471291305367, "grad_norm": 5.954095840454102, "learning_rate": 4.307779403783037e-07, "loss": 0.04061947, "memory(GiB)": 13.7, "step": 93180, "train_speed(iter/s)": 1.528934 }, { "acc": 0.9927084, "epoch": 43.6770564799625, "grad_norm": 0.3883320391178131, "learning_rate": 4.30463584155443e-07, "loss": 0.04487664, "memory(GiB)": 13.7, "step": 93185, "train_speed(iter/s)": 1.528936 }, { "acc": 0.996875, "epoch": 43.679400046871336, "grad_norm": 0.016924217343330383, "learning_rate": 4.3014933778124284e-07, "loss": 0.00874247, "memory(GiB)": 13.7, "step": 93190, "train_speed(iter/s)": 1.528939 }, { "acc": 0.98104172, "epoch": 43.68174361378017, "grad_norm": 3.721151113510132, "learning_rate": 4.2983520126325895e-07, "loss": 0.06871176, "memory(GiB)": 13.7, "step": 93195, "train_speed(iter/s)": 1.528938 }, { "acc": 0.98151045, "epoch": 43.68408718068901, "grad_norm": 7.621411323547363, "learning_rate": 4.2952117460903905e-07, "loss": 0.0365628, "memory(GiB)": 13.7, "step": 93200, "train_speed(iter/s)": 1.528941 }, { "acc": 0.9927084, "epoch": 43.686430747597846, "grad_norm": 0.9351298809051514, "learning_rate": 4.2920725782613486e-07, "loss": 0.02991897, "memory(GiB)": 13.7, "step": 93205, "train_speed(iter/s)": 1.528941 }, { "acc": 0.99437504, "epoch": 43.68877431450668, "grad_norm": 3.0508930683135986, "learning_rate": 4.2889345092208996e-07, "loss": 0.01016959, "memory(GiB)": 13.7, "step": 93210, "train_speed(iter/s)": 1.528943 }, { "acc": 0.98666668, "epoch": 43.691117881415515, "grad_norm": 0.9471524953842163, "learning_rate": 4.2857975390444817e-07, "loss": 0.07104445, "memory(GiB)": 13.7, "step": 93215, "train_speed(iter/s)": 1.528945 }, { "acc": 0.98031244, "epoch": 43.69346144832435, "grad_norm": 3.5309154987335205, "learning_rate": 4.2826616678074825e-07, "loss": 0.05411186, "memory(GiB)": 13.7, "step": 93220, "train_speed(iter/s)": 1.528947 }, { "acc": 0.98145828, "epoch": 43.695805015233184, "grad_norm": 3.525581121444702, "learning_rate": 4.279526895585296e-07, "loss": 0.04024668, "memory(GiB)": 13.7, "step": 93225, "train_speed(iter/s)": 1.528951 }, { "acc": 0.98916664, "epoch": 43.69814858214202, "grad_norm": 3.8473942279815674, "learning_rate": 4.2763932224532643e-07, "loss": 0.02764853, "memory(GiB)": 13.7, "step": 93230, "train_speed(iter/s)": 1.528956 }, { "acc": 0.98291664, "epoch": 43.70049214905085, "grad_norm": 0.10560110211372375, "learning_rate": 4.273260648486693e-07, "loss": 0.03430322, "memory(GiB)": 13.7, "step": 93235, "train_speed(iter/s)": 1.528961 }, { "acc": 0.98660717, "epoch": 43.702835715959694, "grad_norm": 3.720524787902832, "learning_rate": 4.270129173760908e-07, "loss": 0.04015618, "memory(GiB)": 13.7, "step": 93240, "train_speed(iter/s)": 1.528967 }, { "acc": 0.9963541, "epoch": 43.70517928286853, "grad_norm": 3.6779255867004395, "learning_rate": 4.2669987983511637e-07, "loss": 0.0238726, "memory(GiB)": 13.7, "step": 93245, "train_speed(iter/s)": 1.528968 }, { "acc": 0.98675594, "epoch": 43.70752284977736, "grad_norm": 2.8182640075683594, "learning_rate": 4.263869522332715e-07, "loss": 0.01900397, "memory(GiB)": 13.7, "step": 93250, "train_speed(iter/s)": 1.528969 }, { "acc": 0.98916664, "epoch": 43.7098664166862, "grad_norm": 3.7292733192443848, "learning_rate": 4.26074134578075e-07, "loss": 0.02977968, "memory(GiB)": 13.7, "step": 93255, "train_speed(iter/s)": 1.528969 }, { "acc": 0.99249992, "epoch": 43.71220998359503, "grad_norm": 0.001327776932157576, "learning_rate": 4.257614268770506e-07, "loss": 0.02930778, "memory(GiB)": 13.7, "step": 93260, "train_speed(iter/s)": 1.528975 }, { "acc": 0.9802084, "epoch": 43.714553550503865, "grad_norm": 3.8452396392822266, "learning_rate": 4.254488291377115e-07, "loss": 0.04550269, "memory(GiB)": 13.7, "step": 93265, "train_speed(iter/s)": 1.528974 }, { "acc": 0.97666664, "epoch": 43.7168971174127, "grad_norm": 3.5333526134490967, "learning_rate": 4.2513634136757214e-07, "loss": 0.02739093, "memory(GiB)": 13.7, "step": 93270, "train_speed(iter/s)": 1.528978 }, { "acc": 0.98281107, "epoch": 43.71924068432154, "grad_norm": 2.961838960647583, "learning_rate": 4.248239635741447e-07, "loss": 0.07258478, "memory(GiB)": 13.7, "step": 93275, "train_speed(iter/s)": 1.52898 }, { "acc": 0.99562502, "epoch": 43.721584251230375, "grad_norm": 3.2511305809020996, "learning_rate": 4.2451169576493554e-07, "loss": 0.02611618, "memory(GiB)": 13.7, "step": 93280, "train_speed(iter/s)": 1.528981 }, { "acc": 0.98696423, "epoch": 43.72392781813921, "grad_norm": 3.115037441253662, "learning_rate": 4.241995379474531e-07, "loss": 0.04002132, "memory(GiB)": 13.7, "step": 93285, "train_speed(iter/s)": 1.528988 }, { "acc": 0.98008938, "epoch": 43.726271385048044, "grad_norm": 0.8200996518135071, "learning_rate": 4.2388749012919785e-07, "loss": 0.03640952, "memory(GiB)": 13.7, "step": 93290, "train_speed(iter/s)": 1.528993 }, { "acc": 0.98458328, "epoch": 43.72861495195688, "grad_norm": 0.0019853319972753525, "learning_rate": 4.235755523176734e-07, "loss": 0.02345509, "memory(GiB)": 13.7, "step": 93295, "train_speed(iter/s)": 1.528995 }, { "acc": 0.98395834, "epoch": 43.73095851886571, "grad_norm": 0.0506216362118721, "learning_rate": 4.23263724520376e-07, "loss": 0.03536677, "memory(GiB)": 13.7, "step": 93300, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97516403, "epoch": 43.73330208577455, "grad_norm": 5.2536444664001465, "learning_rate": 4.229520067448016e-07, "loss": 0.04760908, "memory(GiB)": 13.7, "step": 93305, "train_speed(iter/s)": 1.529001 }, { "acc": 0.97562504, "epoch": 43.73564565268338, "grad_norm": 3.6456332206726074, "learning_rate": 4.2264039899844283e-07, "loss": 0.06373109, "memory(GiB)": 13.7, "step": 93310, "train_speed(iter/s)": 1.529001 }, { "acc": 0.99083328, "epoch": 43.73798921959222, "grad_norm": 3.4473609924316406, "learning_rate": 4.2232890128878745e-07, "loss": 0.02337348, "memory(GiB)": 13.7, "step": 93315, "train_speed(iter/s)": 1.529002 }, { "acc": 0.9822917, "epoch": 43.74033278650106, "grad_norm": 3.5050289630889893, "learning_rate": 4.220175136233271e-07, "loss": 0.03637693, "memory(GiB)": 13.7, "step": 93320, "train_speed(iter/s)": 1.529003 }, { "acc": 0.98770828, "epoch": 43.74267635340989, "grad_norm": 2.086467742919922, "learning_rate": 4.217062360095428e-07, "loss": 0.03016433, "memory(GiB)": 13.7, "step": 93325, "train_speed(iter/s)": 1.529006 }, { "acc": 0.98342266, "epoch": 43.745019920318725, "grad_norm": 1.1035088300704956, "learning_rate": 4.2139506845491943e-07, "loss": 0.04360623, "memory(GiB)": 13.7, "step": 93330, "train_speed(iter/s)": 1.529007 }, { "acc": 0.98083334, "epoch": 43.74736348722756, "grad_norm": 0.19608166813850403, "learning_rate": 4.210840109669337e-07, "loss": 0.04227166, "memory(GiB)": 13.7, "step": 93335, "train_speed(iter/s)": 1.529011 }, { "acc": 0.98552074, "epoch": 43.749707054136394, "grad_norm": 3.253519058227539, "learning_rate": 4.2077306355306484e-07, "loss": 0.04725808, "memory(GiB)": 13.7, "step": 93340, "train_speed(iter/s)": 1.52901 }, { "acc": 0.9805357, "epoch": 43.75205062104523, "grad_norm": 0.01036878488957882, "learning_rate": 4.204622262207845e-07, "loss": 0.04905613, "memory(GiB)": 13.7, "step": 93345, "train_speed(iter/s)": 1.529012 }, { "acc": 0.98407745, "epoch": 43.75439418795406, "grad_norm": 6.3747663497924805, "learning_rate": 4.201514989775671e-07, "loss": 0.05019172, "memory(GiB)": 13.7, "step": 93350, "train_speed(iter/s)": 1.52901 }, { "acc": 0.98937502, "epoch": 43.756737754862904, "grad_norm": 2.9616899490356445, "learning_rate": 4.1984088183088e-07, "loss": 0.03171943, "memory(GiB)": 13.7, "step": 93355, "train_speed(iter/s)": 1.529012 }, { "acc": 0.9916667, "epoch": 43.75908132177174, "grad_norm": 4.120055675506592, "learning_rate": 4.195303747881889e-07, "loss": 0.02093822, "memory(GiB)": 13.7, "step": 93360, "train_speed(iter/s)": 1.529012 }, { "acc": 0.98198862, "epoch": 43.76142488868057, "grad_norm": 3.093242883682251, "learning_rate": 4.1921997785695836e-07, "loss": 0.04303629, "memory(GiB)": 13.7, "step": 93365, "train_speed(iter/s)": 1.529014 }, { "acc": 0.99750004, "epoch": 43.76376845558941, "grad_norm": 2.870664596557617, "learning_rate": 4.1890969104464835e-07, "loss": 0.04707935, "memory(GiB)": 13.7, "step": 93370, "train_speed(iter/s)": 1.529015 }, { "acc": 0.99375, "epoch": 43.76611202249824, "grad_norm": 1.3197519779205322, "learning_rate": 4.185995143587188e-07, "loss": 0.01739649, "memory(GiB)": 13.7, "step": 93375, "train_speed(iter/s)": 1.529018 }, { "acc": 1.0, "epoch": 43.768455589407075, "grad_norm": 3.69024658203125, "learning_rate": 4.1828944780662193e-07, "loss": 0.02869569, "memory(GiB)": 13.7, "step": 93380, "train_speed(iter/s)": 1.529018 }, { "acc": 0.98842258, "epoch": 43.77079915631591, "grad_norm": 2.2019853591918945, "learning_rate": 4.179794913958149e-07, "loss": 0.03695612, "memory(GiB)": 13.7, "step": 93385, "train_speed(iter/s)": 1.529019 }, { "acc": 0.98737183, "epoch": 43.77314272322475, "grad_norm": 1.3664993047714233, "learning_rate": 4.176696451337454e-07, "loss": 0.02293877, "memory(GiB)": 13.7, "step": 93390, "train_speed(iter/s)": 1.529022 }, { "acc": 0.984375, "epoch": 43.775486290133585, "grad_norm": 2.846534490585327, "learning_rate": 4.173599090278629e-07, "loss": 0.02214155, "memory(GiB)": 13.7, "step": 93395, "train_speed(iter/s)": 1.529023 }, { "acc": 0.98299675, "epoch": 43.77782985704242, "grad_norm": 1.2458322048187256, "learning_rate": 4.1705028308561117e-07, "loss": 0.03325967, "memory(GiB)": 13.7, "step": 93400, "train_speed(iter/s)": 1.529026 }, { "acc": 0.9885417, "epoch": 43.780173423951254, "grad_norm": 0.9515756964683533, "learning_rate": 4.167407673144308e-07, "loss": 0.0396125, "memory(GiB)": 13.7, "step": 93405, "train_speed(iter/s)": 1.52903 }, { "acc": 0.9885417, "epoch": 43.78251699086009, "grad_norm": 3.7366764545440674, "learning_rate": 4.1643136172176566e-07, "loss": 0.03926786, "memory(GiB)": 13.7, "step": 93410, "train_speed(iter/s)": 1.529033 }, { "acc": 0.97344704, "epoch": 43.78486055776892, "grad_norm": 7.816678047180176, "learning_rate": 4.1612206631505017e-07, "loss": 0.04817374, "memory(GiB)": 13.7, "step": 93415, "train_speed(iter/s)": 1.529034 }, { "acc": 0.98488102, "epoch": 43.78720412467776, "grad_norm": 2.5383455753326416, "learning_rate": 4.158128811017198e-07, "loss": 0.05056198, "memory(GiB)": 13.7, "step": 93420, "train_speed(iter/s)": 1.529035 }, { "acc": 0.99246111, "epoch": 43.78954769158659, "grad_norm": 2.2576725482940674, "learning_rate": 4.155038060892052e-07, "loss": 0.01376302, "memory(GiB)": 13.7, "step": 93425, "train_speed(iter/s)": 1.529028 }, { "acc": 0.98395834, "epoch": 43.79189125849543, "grad_norm": 0.6923702359199524, "learning_rate": 4.151948412849368e-07, "loss": 0.03459169, "memory(GiB)": 13.7, "step": 93430, "train_speed(iter/s)": 1.529033 }, { "acc": 0.98467255, "epoch": 43.79423482540427, "grad_norm": 0.005201764404773712, "learning_rate": 4.1488598669634126e-07, "loss": 0.04722008, "memory(GiB)": 13.7, "step": 93435, "train_speed(iter/s)": 1.529039 }, { "acc": 0.9967804, "epoch": 43.7965783923131, "grad_norm": 0.7994614243507385, "learning_rate": 4.145772423308391e-07, "loss": 0.02936689, "memory(GiB)": 13.7, "step": 93440, "train_speed(iter/s)": 1.529043 }, { "acc": 0.99125004, "epoch": 43.798921959221936, "grad_norm": 3.8654541969299316, "learning_rate": 4.14268608195856e-07, "loss": 0.01457386, "memory(GiB)": 13.7, "step": 93445, "train_speed(iter/s)": 1.529047 }, { "acc": 0.98395834, "epoch": 43.80126552613077, "grad_norm": 2.8606467247009277, "learning_rate": 4.1396008429880784e-07, "loss": 0.01855749, "memory(GiB)": 13.7, "step": 93450, "train_speed(iter/s)": 1.529047 }, { "acc": 0.99229164, "epoch": 43.803609093039604, "grad_norm": 2.3200504779815674, "learning_rate": 4.136516706471126e-07, "loss": 0.03148877, "memory(GiB)": 13.7, "step": 93455, "train_speed(iter/s)": 1.529049 }, { "acc": 0.98249998, "epoch": 43.80595265994844, "grad_norm": 4.020597457885742, "learning_rate": 4.133433672481802e-07, "loss": 0.06018571, "memory(GiB)": 13.7, "step": 93460, "train_speed(iter/s)": 1.529057 }, { "acc": 0.9770833, "epoch": 43.80829622685728, "grad_norm": 4.43211030960083, "learning_rate": 4.13035174109425e-07, "loss": 0.05059025, "memory(GiB)": 13.7, "step": 93465, "train_speed(iter/s)": 1.529059 }, { "acc": 0.98833332, "epoch": 43.810639793766114, "grad_norm": 5.021761417388916, "learning_rate": 4.1272709123825216e-07, "loss": 0.04289019, "memory(GiB)": 13.7, "step": 93470, "train_speed(iter/s)": 1.529062 }, { "acc": 0.98675594, "epoch": 43.81298336067495, "grad_norm": 2.602334976196289, "learning_rate": 4.1241911864206936e-07, "loss": 0.04780179, "memory(GiB)": 13.7, "step": 93475, "train_speed(iter/s)": 1.529068 }, { "acc": 0.98170509, "epoch": 43.81532692758378, "grad_norm": 2.034325361251831, "learning_rate": 4.121112563282777e-07, "loss": 0.04709513, "memory(GiB)": 13.7, "step": 93480, "train_speed(iter/s)": 1.529071 }, { "acc": 0.98291664, "epoch": 43.81767049449262, "grad_norm": 4.519815444946289, "learning_rate": 4.118035043042761e-07, "loss": 0.03141707, "memory(GiB)": 13.7, "step": 93485, "train_speed(iter/s)": 1.529069 }, { "acc": 0.99437504, "epoch": 43.82001406140145, "grad_norm": 3.7089169025421143, "learning_rate": 4.114958625774645e-07, "loss": 0.06774132, "memory(GiB)": 13.7, "step": 93490, "train_speed(iter/s)": 1.529068 }, { "acc": 0.97841349, "epoch": 43.822357628310286, "grad_norm": 0.08313081413507462, "learning_rate": 4.111883311552348e-07, "loss": 0.04079765, "memory(GiB)": 13.7, "step": 93495, "train_speed(iter/s)": 1.529068 }, { "acc": 0.98458328, "epoch": 43.82470119521912, "grad_norm": 5.519235134124756, "learning_rate": 4.1088091004498174e-07, "loss": 0.02590065, "memory(GiB)": 13.7, "step": 93500, "train_speed(iter/s)": 1.529071 }, { "acc": 0.98291664, "epoch": 43.82704476212796, "grad_norm": 1.6668131351470947, "learning_rate": 4.105735992540933e-07, "loss": 0.04104756, "memory(GiB)": 13.7, "step": 93505, "train_speed(iter/s)": 1.529073 }, { "acc": 0.98862553, "epoch": 43.829388329036796, "grad_norm": 2.653963088989258, "learning_rate": 4.1026639878995716e-07, "loss": 0.03503139, "memory(GiB)": 13.7, "step": 93510, "train_speed(iter/s)": 1.529074 }, { "acc": 0.98770828, "epoch": 43.83173189594563, "grad_norm": 0.8221614360809326, "learning_rate": 4.0995930865995564e-07, "loss": 0.03053568, "memory(GiB)": 13.7, "step": 93515, "train_speed(iter/s)": 1.529074 }, { "acc": 0.9927084, "epoch": 43.834075462854464, "grad_norm": 2.5424187183380127, "learning_rate": 4.09652328871472e-07, "loss": 0.01997747, "memory(GiB)": 13.7, "step": 93520, "train_speed(iter/s)": 1.529075 }, { "acc": 0.9885417, "epoch": 43.8364190297633, "grad_norm": 2.253983736038208, "learning_rate": 4.0934545943188366e-07, "loss": 0.02232053, "memory(GiB)": 13.7, "step": 93525, "train_speed(iter/s)": 1.52908 }, { "acc": 0.9796875, "epoch": 43.83876259667213, "grad_norm": 2.7695114612579346, "learning_rate": 4.090387003485672e-07, "loss": 0.03455576, "memory(GiB)": 13.7, "step": 93530, "train_speed(iter/s)": 1.529083 }, { "acc": 0.99125004, "epoch": 43.84110616358097, "grad_norm": 3.63913631439209, "learning_rate": 4.0873205162889714e-07, "loss": 0.02035987, "memory(GiB)": 13.7, "step": 93535, "train_speed(iter/s)": 1.529083 }, { "acc": 0.99092264, "epoch": 43.84344973048981, "grad_norm": 2.6264026165008545, "learning_rate": 4.084255132802423e-07, "loss": 0.02311293, "memory(GiB)": 13.7, "step": 93540, "train_speed(iter/s)": 1.529083 }, { "acc": 0.96333332, "epoch": 43.84579329739864, "grad_norm": 5.001298427581787, "learning_rate": 4.081190853099734e-07, "loss": 0.05250713, "memory(GiB)": 13.7, "step": 93545, "train_speed(iter/s)": 1.529085 }, { "acc": 0.96868057, "epoch": 43.84813686430748, "grad_norm": 2.0290937423706055, "learning_rate": 4.0781276772545257e-07, "loss": 0.0547312, "memory(GiB)": 13.7, "step": 93550, "train_speed(iter/s)": 1.52909 }, { "acc": 0.9791667, "epoch": 43.85048043121631, "grad_norm": 6.201390743255615, "learning_rate": 4.0750656053404664e-07, "loss": 0.04757349, "memory(GiB)": 13.7, "step": 93555, "train_speed(iter/s)": 1.529093 }, { "acc": 0.99243469, "epoch": 43.852823998125146, "grad_norm": 0.011264446191489697, "learning_rate": 4.0720046374311337e-07, "loss": 0.02905718, "memory(GiB)": 13.7, "step": 93560, "train_speed(iter/s)": 1.529097 }, { "acc": 0.9875, "epoch": 43.85516756503398, "grad_norm": 0.005750552285462618, "learning_rate": 4.0689447736001e-07, "loss": 0.01879076, "memory(GiB)": 13.7, "step": 93565, "train_speed(iter/s)": 1.529101 }, { "acc": 0.9895834, "epoch": 43.857511131942815, "grad_norm": 3.332232713699341, "learning_rate": 4.0658860139209335e-07, "loss": 0.02494442, "memory(GiB)": 13.7, "step": 93570, "train_speed(iter/s)": 1.529101 }, { "acc": 0.98874998, "epoch": 43.85985469885165, "grad_norm": 2.2992281913757324, "learning_rate": 4.062828358467139e-07, "loss": 0.02331135, "memory(GiB)": 13.7, "step": 93575, "train_speed(iter/s)": 1.529102 }, { "acc": 0.99333334, "epoch": 43.86219826576049, "grad_norm": 5.344273567199707, "learning_rate": 4.0597718073122234e-07, "loss": 0.01978325, "memory(GiB)": 13.7, "step": 93580, "train_speed(iter/s)": 1.529105 }, { "acc": 0.98145828, "epoch": 43.864541832669325, "grad_norm": 0.017193999141454697, "learning_rate": 4.0567163605296366e-07, "loss": 0.04299063, "memory(GiB)": 13.7, "step": 93585, "train_speed(iter/s)": 1.529111 }, { "acc": 0.9895834, "epoch": 43.86688539957816, "grad_norm": 2.672445297241211, "learning_rate": 4.053662018192853e-07, "loss": 0.02923233, "memory(GiB)": 13.7, "step": 93590, "train_speed(iter/s)": 1.529111 }, { "acc": 0.98154764, "epoch": 43.86922896648699, "grad_norm": 2.1773364543914795, "learning_rate": 4.050608780375265e-07, "loss": 0.03817869, "memory(GiB)": 13.7, "step": 93595, "train_speed(iter/s)": 1.529112 }, { "acc": 0.98298616, "epoch": 43.87157253339583, "grad_norm": 2.753105878829956, "learning_rate": 4.047556647150275e-07, "loss": 0.05164794, "memory(GiB)": 13.7, "step": 93600, "train_speed(iter/s)": 1.529115 }, { "acc": 0.99319439, "epoch": 43.87391610030466, "grad_norm": 1.786324381828308, "learning_rate": 4.044505618591244e-07, "loss": 0.03058251, "memory(GiB)": 13.7, "step": 93605, "train_speed(iter/s)": 1.529117 }, { "acc": 0.98258934, "epoch": 43.876259667213496, "grad_norm": 0.9858768582344055, "learning_rate": 4.0414556947714906e-07, "loss": 0.05638731, "memory(GiB)": 13.7, "step": 93610, "train_speed(iter/s)": 1.52912 }, { "acc": 0.9673357, "epoch": 43.87860323412234, "grad_norm": 3.0083627700805664, "learning_rate": 4.038406875764353e-07, "loss": 0.05281227, "memory(GiB)": 13.7, "step": 93615, "train_speed(iter/s)": 1.529123 }, { "acc": 0.99541664, "epoch": 43.88094680103117, "grad_norm": 2.7379162311553955, "learning_rate": 4.0353591616430987e-07, "loss": 0.01299834, "memory(GiB)": 13.7, "step": 93620, "train_speed(iter/s)": 1.529129 }, { "acc": 0.98907204, "epoch": 43.883290367940006, "grad_norm": 5.198049068450928, "learning_rate": 4.03231255248099e-07, "loss": 0.02731989, "memory(GiB)": 13.7, "step": 93625, "train_speed(iter/s)": 1.529131 }, { "acc": 0.98374996, "epoch": 43.88563393484884, "grad_norm": 4.146547794342041, "learning_rate": 4.0292670483512555e-07, "loss": 0.04755732, "memory(GiB)": 13.7, "step": 93630, "train_speed(iter/s)": 1.52913 }, { "acc": 0.98927078, "epoch": 43.887977501757675, "grad_norm": 2.7557876110076904, "learning_rate": 4.0262226493271017e-07, "loss": 0.02366114, "memory(GiB)": 13.7, "step": 93635, "train_speed(iter/s)": 1.529131 }, { "acc": 0.99083328, "epoch": 43.89032106866651, "grad_norm": 2.2562174797058105, "learning_rate": 4.02317935548169e-07, "loss": 0.0358688, "memory(GiB)": 13.7, "step": 93640, "train_speed(iter/s)": 1.529133 }, { "acc": 0.9895833, "epoch": 43.89266463557534, "grad_norm": 0.007961279712617397, "learning_rate": 4.0201371668881986e-07, "loss": 0.03168537, "memory(GiB)": 13.7, "step": 93645, "train_speed(iter/s)": 1.529137 }, { "acc": 0.98916664, "epoch": 43.89500820248418, "grad_norm": 0.01656974107027054, "learning_rate": 4.0170960836197405e-07, "loss": 0.03159191, "memory(GiB)": 13.7, "step": 93650, "train_speed(iter/s)": 1.529136 }, { "acc": 0.99778843, "epoch": 43.89735176939302, "grad_norm": 2.0702788829803467, "learning_rate": 4.0140561057494e-07, "loss": 0.01218512, "memory(GiB)": 13.7, "step": 93655, "train_speed(iter/s)": 1.529139 }, { "acc": 0.9822916, "epoch": 43.89969533630185, "grad_norm": 2.1233181953430176, "learning_rate": 4.0110172333502653e-07, "loss": 0.03360243, "memory(GiB)": 13.7, "step": 93660, "train_speed(iter/s)": 1.529137 }, { "acc": 0.98812504, "epoch": 43.90203890321069, "grad_norm": 7.783823013305664, "learning_rate": 4.007979466495367e-07, "loss": 0.05429473, "memory(GiB)": 13.7, "step": 93665, "train_speed(iter/s)": 1.529137 }, { "acc": 0.9979167, "epoch": 43.90438247011952, "grad_norm": 0.0011050169123336673, "learning_rate": 4.004942805257733e-07, "loss": 0.00750777, "memory(GiB)": 13.7, "step": 93670, "train_speed(iter/s)": 1.529142 }, { "acc": 0.97895832, "epoch": 43.906726037028356, "grad_norm": 5.055652618408203, "learning_rate": 4.0019072497103525e-07, "loss": 0.05190786, "memory(GiB)": 13.7, "step": 93675, "train_speed(iter/s)": 1.529145 }, { "acc": 0.97936964, "epoch": 43.90906960393719, "grad_norm": 1.7538269758224487, "learning_rate": 3.9988727999261943e-07, "loss": 0.05868425, "memory(GiB)": 13.7, "step": 93680, "train_speed(iter/s)": 1.529151 }, { "acc": 0.9864584, "epoch": 43.911413170846025, "grad_norm": 1.6750603914260864, "learning_rate": 3.9958394559781976e-07, "loss": 0.02274428, "memory(GiB)": 13.7, "step": 93685, "train_speed(iter/s)": 1.529155 }, { "acc": 0.96406956, "epoch": 43.913756737754866, "grad_norm": 8.448089599609375, "learning_rate": 3.992807217939252e-07, "loss": 0.05600552, "memory(GiB)": 13.7, "step": 93690, "train_speed(iter/s)": 1.529158 }, { "acc": 0.97372026, "epoch": 43.9161003046637, "grad_norm": 5.321609973907471, "learning_rate": 3.9897760858822764e-07, "loss": 0.0420857, "memory(GiB)": 13.7, "step": 93695, "train_speed(iter/s)": 1.52916 }, { "acc": 0.99875002, "epoch": 43.918443871572535, "grad_norm": 2.9797940254211426, "learning_rate": 3.986746059880088e-07, "loss": 0.01790224, "memory(GiB)": 13.7, "step": 93700, "train_speed(iter/s)": 1.529162 }, { "acc": 0.97967262, "epoch": 43.92078743848137, "grad_norm": 1.9147565364837646, "learning_rate": 3.9837171400055656e-07, "loss": 0.04640709, "memory(GiB)": 13.7, "step": 93705, "train_speed(iter/s)": 1.529168 }, { "acc": 0.98154764, "epoch": 43.9231310053902, "grad_norm": 4.607367038726807, "learning_rate": 3.980689326331477e-07, "loss": 0.0269745, "memory(GiB)": 13.7, "step": 93710, "train_speed(iter/s)": 1.529169 }, { "acc": 0.98760414, "epoch": 43.92547457229904, "grad_norm": 2.339779853820801, "learning_rate": 3.977662618930629e-07, "loss": 0.06513612, "memory(GiB)": 13.7, "step": 93715, "train_speed(iter/s)": 1.529174 }, { "acc": 0.97833328, "epoch": 43.92781813920787, "grad_norm": 4.02403450012207, "learning_rate": 3.974637017875751e-07, "loss": 0.03382193, "memory(GiB)": 13.7, "step": 93720, "train_speed(iter/s)": 1.52918 }, { "acc": 0.98594704, "epoch": 43.93016170611671, "grad_norm": 2.923862934112549, "learning_rate": 3.971612523239582e-07, "loss": 0.04927278, "memory(GiB)": 13.7, "step": 93725, "train_speed(iter/s)": 1.529183 }, { "acc": 0.96342268, "epoch": 43.93250527302555, "grad_norm": 3.9380149841308594, "learning_rate": 3.968589135094819e-07, "loss": 0.07144916, "memory(GiB)": 13.7, "step": 93730, "train_speed(iter/s)": 1.529186 }, { "acc": 0.98696423, "epoch": 43.93484883993438, "grad_norm": 1.6547263860702515, "learning_rate": 3.965566853514118e-07, "loss": 0.02392546, "memory(GiB)": 13.7, "step": 93735, "train_speed(iter/s)": 1.529189 }, { "acc": 0.98529758, "epoch": 43.93719240684322, "grad_norm": 2.031644582748413, "learning_rate": 3.962545678570152e-07, "loss": 0.03657221, "memory(GiB)": 13.7, "step": 93740, "train_speed(iter/s)": 1.52919 }, { "acc": 0.9979167, "epoch": 43.93953597375205, "grad_norm": 2.5076584815979004, "learning_rate": 3.959525610335518e-07, "loss": 0.05532116, "memory(GiB)": 13.7, "step": 93745, "train_speed(iter/s)": 1.529188 }, { "acc": 0.9958334, "epoch": 43.941879540660885, "grad_norm": 0.00012389426410663873, "learning_rate": 3.9565066488828276e-07, "loss": 0.01834408, "memory(GiB)": 13.7, "step": 93750, "train_speed(iter/s)": 1.529195 }, { "acc": 0.97555065, "epoch": 43.94422310756972, "grad_norm": 3.553344964981079, "learning_rate": 3.9534887942846157e-07, "loss": 0.06065355, "memory(GiB)": 13.7, "step": 93755, "train_speed(iter/s)": 1.529198 }, { "acc": 0.98725195, "epoch": 43.946566674478554, "grad_norm": 2.5294175148010254, "learning_rate": 3.950472046613461e-07, "loss": 0.05754848, "memory(GiB)": 13.7, "step": 93760, "train_speed(iter/s)": 1.529206 }, { "acc": 0.9895834, "epoch": 43.948910241387395, "grad_norm": 1.4377455711364746, "learning_rate": 3.9474564059418495e-07, "loss": 0.03254627, "memory(GiB)": 13.7, "step": 93765, "train_speed(iter/s)": 1.529208 }, { "acc": 0.97550602, "epoch": 43.95125380829623, "grad_norm": 3.2233850955963135, "learning_rate": 3.944441872342288e-07, "loss": 0.03970354, "memory(GiB)": 13.7, "step": 93770, "train_speed(iter/s)": 1.529216 }, { "acc": 0.9916667, "epoch": 43.953597375205064, "grad_norm": 2.750098466873169, "learning_rate": 3.941428445887215e-07, "loss": 0.05941312, "memory(GiB)": 13.7, "step": 93775, "train_speed(iter/s)": 1.529221 }, { "acc": 0.99375, "epoch": 43.9559409421139, "grad_norm": 2.8265726566314697, "learning_rate": 3.9384161266490674e-07, "loss": 0.02404131, "memory(GiB)": 13.7, "step": 93780, "train_speed(iter/s)": 1.529221 }, { "acc": 0.97907743, "epoch": 43.95828450902273, "grad_norm": 6.641178607940674, "learning_rate": 3.935404914700268e-07, "loss": 0.06898123, "memory(GiB)": 13.7, "step": 93785, "train_speed(iter/s)": 1.529221 }, { "acc": 0.97872028, "epoch": 43.96062807593157, "grad_norm": 5.623429298400879, "learning_rate": 3.9323948101131627e-07, "loss": 0.05474243, "memory(GiB)": 13.7, "step": 93790, "train_speed(iter/s)": 1.52922 }, { "acc": 0.95988102, "epoch": 43.9629716428404, "grad_norm": 7.119783401489258, "learning_rate": 3.9293858129601415e-07, "loss": 0.04966514, "memory(GiB)": 13.7, "step": 93795, "train_speed(iter/s)": 1.529224 }, { "acc": 0.98402777, "epoch": 43.965315209749235, "grad_norm": 1.9565062522888184, "learning_rate": 3.9263779233135075e-07, "loss": 0.0267047, "memory(GiB)": 13.7, "step": 93800, "train_speed(iter/s)": 1.529225 }, { "acc": 0.98716345, "epoch": 43.96765877665808, "grad_norm": 2.472658634185791, "learning_rate": 3.923371141245578e-07, "loss": 0.041623, "memory(GiB)": 13.7, "step": 93805, "train_speed(iter/s)": 1.529229 }, { "acc": 0.98159714, "epoch": 43.97000234356691, "grad_norm": 3.391958475112915, "learning_rate": 3.9203654668286147e-07, "loss": 0.07759374, "memory(GiB)": 13.7, "step": 93810, "train_speed(iter/s)": 1.529228 }, { "acc": 0.97979164, "epoch": 43.972345910475745, "grad_norm": 0.004819312132894993, "learning_rate": 3.917360900134855e-07, "loss": 0.05825117, "memory(GiB)": 13.7, "step": 93815, "train_speed(iter/s)": 1.529233 }, { "acc": 0.96831303, "epoch": 43.97468947738458, "grad_norm": 1.8224706649780273, "learning_rate": 3.9143574412365377e-07, "loss": 0.04849905, "memory(GiB)": 13.7, "step": 93820, "train_speed(iter/s)": 1.529235 }, { "acc": 0.98395824, "epoch": 43.977033044293414, "grad_norm": 0.029752478003501892, "learning_rate": 3.9113550902058476e-07, "loss": 0.0275496, "memory(GiB)": 13.7, "step": 93825, "train_speed(iter/s)": 1.529237 }, { "acc": 0.9825695, "epoch": 43.97937661120225, "grad_norm": 0.0016055209562182426, "learning_rate": 3.908353847114954e-07, "loss": 0.06111858, "memory(GiB)": 13.7, "step": 93830, "train_speed(iter/s)": 1.529238 }, { "acc": 0.99437504, "epoch": 43.98172017811108, "grad_norm": 2.5157968997955322, "learning_rate": 3.9053537120359865e-07, "loss": 0.01132399, "memory(GiB)": 13.7, "step": 93835, "train_speed(iter/s)": 1.529242 }, { "acc": 0.99011364, "epoch": 43.984063745019924, "grad_norm": 3.279916763305664, "learning_rate": 3.90235468504108e-07, "loss": 0.04202625, "memory(GiB)": 13.7, "step": 93840, "train_speed(iter/s)": 1.529241 }, { "acc": 0.98979168, "epoch": 43.98640731192876, "grad_norm": 2.13525390625, "learning_rate": 3.8993567662022854e-07, "loss": 0.02124992, "memory(GiB)": 13.7, "step": 93845, "train_speed(iter/s)": 1.529243 }, { "acc": 0.98535709, "epoch": 43.98875087883759, "grad_norm": 3.114227294921875, "learning_rate": 3.896359955591706e-07, "loss": 0.03250408, "memory(GiB)": 13.7, "step": 93850, "train_speed(iter/s)": 1.529246 }, { "acc": 0.98460321, "epoch": 43.99109444574643, "grad_norm": 4.822378158569336, "learning_rate": 3.893364253281353e-07, "loss": 0.03087972, "memory(GiB)": 13.7, "step": 93855, "train_speed(iter/s)": 1.529247 }, { "acc": 0.98874998, "epoch": 43.99343801265526, "grad_norm": 2.061082363128662, "learning_rate": 3.89036965934323e-07, "loss": 0.02437051, "memory(GiB)": 13.7, "step": 93860, "train_speed(iter/s)": 1.529251 }, { "acc": 0.9916667, "epoch": 43.995781579564095, "grad_norm": 2.640458345413208, "learning_rate": 3.887376173849327e-07, "loss": 0.03048401, "memory(GiB)": 13.7, "step": 93865, "train_speed(iter/s)": 1.529253 }, { "acc": 0.99125004, "epoch": 43.99812514647293, "grad_norm": 1.8885643482208252, "learning_rate": 3.8843837968715844e-07, "loss": 0.02415459, "memory(GiB)": 13.7, "step": 93870, "train_speed(iter/s)": 1.529256 }, { "acc": 0.99437504, "epoch": 44.000468713381764, "grad_norm": 1.4889812469482422, "learning_rate": 3.881392528481949e-07, "loss": 0.02528934, "memory(GiB)": 13.7, "step": 93875, "train_speed(iter/s)": 1.52925 }, { "acc": 0.97520838, "epoch": 44.002812280290605, "grad_norm": 8.179841041564941, "learning_rate": 3.8784023687522895e-07, "loss": 0.05736115, "memory(GiB)": 13.7, "step": 93880, "train_speed(iter/s)": 1.529256 }, { "acc": 0.98386364, "epoch": 44.00515584719944, "grad_norm": 0.7946971654891968, "learning_rate": 3.8754133177545136e-07, "loss": 0.03569015, "memory(GiB)": 13.7, "step": 93885, "train_speed(iter/s)": 1.529259 }, { "acc": 0.98625002, "epoch": 44.007499414108274, "grad_norm": 1.0141521692276, "learning_rate": 3.8724253755604506e-07, "loss": 0.05490727, "memory(GiB)": 13.7, "step": 93890, "train_speed(iter/s)": 1.529257 }, { "acc": 0.98833332, "epoch": 44.00984298101711, "grad_norm": 2.586554765701294, "learning_rate": 3.869438542241931e-07, "loss": 0.03649164, "memory(GiB)": 13.7, "step": 93895, "train_speed(iter/s)": 1.529261 }, { "acc": 0.99913788, "epoch": 44.01218654792594, "grad_norm": 4.284502983093262, "learning_rate": 3.866452817870739e-07, "loss": 0.02554003, "memory(GiB)": 13.7, "step": 93900, "train_speed(iter/s)": 1.529265 }, { "acc": 0.99229164, "epoch": 44.01453011483478, "grad_norm": 2.666059970855713, "learning_rate": 3.863468202518633e-07, "loss": 0.02653568, "memory(GiB)": 13.7, "step": 93905, "train_speed(iter/s)": 1.529267 }, { "acc": 0.98357954, "epoch": 44.01687368174361, "grad_norm": 2.9563770294189453, "learning_rate": 3.8604846962573765e-07, "loss": 0.07148929, "memory(GiB)": 13.7, "step": 93910, "train_speed(iter/s)": 1.529268 }, { "acc": 0.9932292, "epoch": 44.019217248652446, "grad_norm": 2.9393386840820312, "learning_rate": 3.8575022991586656e-07, "loss": 0.03406308, "memory(GiB)": 13.7, "step": 93915, "train_speed(iter/s)": 1.529265 }, { "acc": 0.98604164, "epoch": 44.02156081556129, "grad_norm": 2.688567876815796, "learning_rate": 3.854521011294203e-07, "loss": 0.04137557, "memory(GiB)": 13.7, "step": 93920, "train_speed(iter/s)": 1.529268 }, { "acc": 0.98125, "epoch": 44.02390438247012, "grad_norm": 0.00135540752671659, "learning_rate": 3.851540832735629e-07, "loss": 0.03237545, "memory(GiB)": 13.7, "step": 93925, "train_speed(iter/s)": 1.529271 }, { "acc": 0.9833333, "epoch": 44.026247949378956, "grad_norm": 3.29837965965271, "learning_rate": 3.8485617635545957e-07, "loss": 0.02884029, "memory(GiB)": 13.7, "step": 93930, "train_speed(iter/s)": 1.529272 }, { "acc": 0.99513893, "epoch": 44.02859151628779, "grad_norm": 1.7585580348968506, "learning_rate": 3.845583803822684e-07, "loss": 0.01730523, "memory(GiB)": 13.7, "step": 93935, "train_speed(iter/s)": 1.529274 }, { "acc": 0.98363094, "epoch": 44.030935083196624, "grad_norm": 1.7607451677322388, "learning_rate": 3.8426069536115066e-07, "loss": 0.02550735, "memory(GiB)": 13.7, "step": 93940, "train_speed(iter/s)": 1.529274 }, { "acc": 0.9848959, "epoch": 44.03327865010546, "grad_norm": 4.419894218444824, "learning_rate": 3.839631212992611e-07, "loss": 0.04089559, "memory(GiB)": 13.7, "step": 93945, "train_speed(iter/s)": 1.529273 }, { "acc": 0.98874998, "epoch": 44.03562221701429, "grad_norm": 2.047982931137085, "learning_rate": 3.8366565820375035e-07, "loss": 0.02241847, "memory(GiB)": 13.7, "step": 93950, "train_speed(iter/s)": 1.529273 }, { "acc": 0.98652782, "epoch": 44.037965783923134, "grad_norm": 1.9154080152511597, "learning_rate": 3.833683060817704e-07, "loss": 0.02419882, "memory(GiB)": 13.7, "step": 93955, "train_speed(iter/s)": 1.529276 }, { "acc": 0.97006941, "epoch": 44.04030935083197, "grad_norm": 2.027747392654419, "learning_rate": 3.830710649404661e-07, "loss": 0.05589211, "memory(GiB)": 13.7, "step": 93960, "train_speed(iter/s)": 1.529282 }, { "acc": 0.9854167, "epoch": 44.0426529177408, "grad_norm": 5.420487880706787, "learning_rate": 3.827739347869862e-07, "loss": 0.02269833, "memory(GiB)": 13.7, "step": 93965, "train_speed(iter/s)": 1.529288 }, { "acc": 0.97979164, "epoch": 44.04499648464964, "grad_norm": 5.131839275360107, "learning_rate": 3.8247691562846896e-07, "loss": 0.05229113, "memory(GiB)": 13.7, "step": 93970, "train_speed(iter/s)": 1.529285 }, { "acc": 0.97788696, "epoch": 44.04734005155847, "grad_norm": 3.6604905128479004, "learning_rate": 3.8218000747205694e-07, "loss": 0.05941329, "memory(GiB)": 13.7, "step": 93975, "train_speed(iter/s)": 1.529288 }, { "acc": 0.9916667, "epoch": 44.049683618467306, "grad_norm": 0.0790095403790474, "learning_rate": 3.8188321032488454e-07, "loss": 0.02044971, "memory(GiB)": 13.7, "step": 93980, "train_speed(iter/s)": 1.529293 }, { "acc": 0.98075399, "epoch": 44.05202718537614, "grad_norm": 2.771806001663208, "learning_rate": 3.8158652419408554e-07, "loss": 0.05315815, "memory(GiB)": 13.7, "step": 93985, "train_speed(iter/s)": 1.529297 }, { "acc": 0.97854166, "epoch": 44.054370752284974, "grad_norm": 3.451554298400879, "learning_rate": 3.812899490867935e-07, "loss": 0.04534849, "memory(GiB)": 13.7, "step": 93990, "train_speed(iter/s)": 1.529299 }, { "acc": 0.98976192, "epoch": 44.056714319193816, "grad_norm": 4.390688419342041, "learning_rate": 3.8099348501013365e-07, "loss": 0.03451136, "memory(GiB)": 13.7, "step": 93995, "train_speed(iter/s)": 1.529306 }, { "acc": 0.9916667, "epoch": 44.05905788610265, "grad_norm": 2.9542877674102783, "learning_rate": 3.8069713197123623e-07, "loss": 0.04238153, "memory(GiB)": 13.7, "step": 94000, "train_speed(iter/s)": 1.529312 }, { "acc": 0.98291664, "epoch": 44.061401453011484, "grad_norm": 6.148192882537842, "learning_rate": 3.8040088997722153e-07, "loss": 0.03173381, "memory(GiB)": 13.7, "step": 94005, "train_speed(iter/s)": 1.529319 }, { "acc": 0.97904758, "epoch": 44.06374501992032, "grad_norm": 5.743030071258545, "learning_rate": 3.801047590352125e-07, "loss": 0.04915451, "memory(GiB)": 13.7, "step": 94010, "train_speed(iter/s)": 1.52932 }, { "acc": 0.978125, "epoch": 44.06608858682915, "grad_norm": 4.604579925537109, "learning_rate": 3.7980873915232447e-07, "loss": 0.04179878, "memory(GiB)": 13.7, "step": 94015, "train_speed(iter/s)": 1.529324 }, { "acc": 0.9885417, "epoch": 44.06843215373799, "grad_norm": 3.087841749191284, "learning_rate": 3.7951283033567496e-07, "loss": 0.0241462, "memory(GiB)": 13.7, "step": 94020, "train_speed(iter/s)": 1.529326 }, { "acc": 0.98604164, "epoch": 44.07077572064682, "grad_norm": 0.0047020199708640575, "learning_rate": 3.792170325923753e-07, "loss": 0.02073997, "memory(GiB)": 13.7, "step": 94025, "train_speed(iter/s)": 1.529325 }, { "acc": 0.99048615, "epoch": 44.07311928755566, "grad_norm": 0.00234846118837595, "learning_rate": 3.789213459295363e-07, "loss": 0.02721693, "memory(GiB)": 13.7, "step": 94030, "train_speed(iter/s)": 1.529327 }, { "acc": 0.97729168, "epoch": 44.0754628544645, "grad_norm": 5.738698482513428, "learning_rate": 3.78625770354266e-07, "loss": 0.04133846, "memory(GiB)": 13.7, "step": 94035, "train_speed(iter/s)": 1.529326 }, { "acc": 0.98395824, "epoch": 44.07780642137333, "grad_norm": 1.6414347887039185, "learning_rate": 3.7833030587366697e-07, "loss": 0.02532148, "memory(GiB)": 13.7, "step": 94040, "train_speed(iter/s)": 1.529325 }, { "acc": 0.98139133, "epoch": 44.080149988282166, "grad_norm": 2.8673386573791504, "learning_rate": 3.7803495249484323e-07, "loss": 0.02926442, "memory(GiB)": 13.7, "step": 94045, "train_speed(iter/s)": 1.529329 }, { "acc": 0.98874998, "epoch": 44.082493555191, "grad_norm": 3.278411626815796, "learning_rate": 3.777397102248914e-07, "loss": 0.0238118, "memory(GiB)": 13.7, "step": 94050, "train_speed(iter/s)": 1.529331 }, { "acc": 0.99258928, "epoch": 44.084837122099835, "grad_norm": 2.857654094696045, "learning_rate": 3.774445790709121e-07, "loss": 0.02908379, "memory(GiB)": 13.7, "step": 94055, "train_speed(iter/s)": 1.529335 }, { "acc": 0.98121719, "epoch": 44.08718068900867, "grad_norm": 3.823063850402832, "learning_rate": 3.7714955903999616e-07, "loss": 0.03317908, "memory(GiB)": 13.7, "step": 94060, "train_speed(iter/s)": 1.529334 }, { "acc": 0.97785797, "epoch": 44.0895242559175, "grad_norm": 4.269452095031738, "learning_rate": 3.768546501392368e-07, "loss": 0.04095821, "memory(GiB)": 13.7, "step": 94065, "train_speed(iter/s)": 1.52934 }, { "acc": 0.98093748, "epoch": 44.091867822826345, "grad_norm": 4.131237983703613, "learning_rate": 3.765598523757215e-07, "loss": 0.04026772, "memory(GiB)": 13.7, "step": 94070, "train_speed(iter/s)": 1.529341 }, { "acc": 0.99096584, "epoch": 44.09421138973518, "grad_norm": 0.4914775490760803, "learning_rate": 3.7626516575653595e-07, "loss": 0.03163563, "memory(GiB)": 13.7, "step": 94075, "train_speed(iter/s)": 1.529342 }, { "acc": 0.98604164, "epoch": 44.09655495664401, "grad_norm": 6.081310749053955, "learning_rate": 3.759705902887651e-07, "loss": 0.026301, "memory(GiB)": 13.7, "step": 94080, "train_speed(iter/s)": 1.529347 }, { "acc": 0.98604164, "epoch": 44.09889852355285, "grad_norm": 3.8855719566345215, "learning_rate": 3.756761259794863e-07, "loss": 0.0501415, "memory(GiB)": 13.7, "step": 94085, "train_speed(iter/s)": 1.52935 }, { "acc": 0.98059521, "epoch": 44.10124209046168, "grad_norm": 4.724174499511719, "learning_rate": 3.753817728357821e-07, "loss": 0.03961391, "memory(GiB)": 13.7, "step": 94090, "train_speed(iter/s)": 1.529349 }, { "acc": 0.9854167, "epoch": 44.103585657370516, "grad_norm": 0.5903568863868713, "learning_rate": 3.75087530864724e-07, "loss": 0.03058059, "memory(GiB)": 13.7, "step": 94095, "train_speed(iter/s)": 1.529353 }, { "acc": 0.98812504, "epoch": 44.10592922427935, "grad_norm": 0.8679466843605042, "learning_rate": 3.7479340007338676e-07, "loss": 0.02499133, "memory(GiB)": 13.7, "step": 94100, "train_speed(iter/s)": 1.529358 }, { "acc": 0.978125, "epoch": 44.10827279118819, "grad_norm": 5.816662311553955, "learning_rate": 3.7449938046883994e-07, "loss": 0.04929393, "memory(GiB)": 13.7, "step": 94105, "train_speed(iter/s)": 1.529361 }, { "acc": 0.9885417, "epoch": 44.110616358097026, "grad_norm": 5.392491340637207, "learning_rate": 3.742054720581484e-07, "loss": 0.03787256, "memory(GiB)": 13.7, "step": 94110, "train_speed(iter/s)": 1.529366 }, { "acc": 0.99125004, "epoch": 44.11295992500586, "grad_norm": 3.9040017127990723, "learning_rate": 3.7391167484838087e-07, "loss": 0.01696693, "memory(GiB)": 13.7, "step": 94115, "train_speed(iter/s)": 1.529367 }, { "acc": 0.98395834, "epoch": 44.115303491914695, "grad_norm": 1.9460762739181519, "learning_rate": 3.736179888465958e-07, "loss": 0.05088728, "memory(GiB)": 13.7, "step": 94120, "train_speed(iter/s)": 1.529367 }, { "acc": 0.9890625, "epoch": 44.11764705882353, "grad_norm": 0.0016487599350512028, "learning_rate": 3.7332441405985477e-07, "loss": 0.0185279, "memory(GiB)": 13.7, "step": 94125, "train_speed(iter/s)": 1.529371 }, { "acc": 0.97208328, "epoch": 44.11999062573236, "grad_norm": 6.771445274353027, "learning_rate": 3.730309504952131e-07, "loss": 0.04725394, "memory(GiB)": 13.7, "step": 94130, "train_speed(iter/s)": 1.529377 }, { "acc": 0.97639427, "epoch": 44.1223341926412, "grad_norm": 4.016270160675049, "learning_rate": 3.7273759815972544e-07, "loss": 0.04761771, "memory(GiB)": 13.7, "step": 94135, "train_speed(iter/s)": 1.52938 }, { "acc": 0.99020834, "epoch": 44.12467775955003, "grad_norm": 3.2160186767578125, "learning_rate": 3.724443570604405e-07, "loss": 0.04580548, "memory(GiB)": 13.7, "step": 94140, "train_speed(iter/s)": 1.529385 }, { "acc": 0.98736115, "epoch": 44.12702132645887, "grad_norm": 1.5112197399139404, "learning_rate": 3.7215122720441087e-07, "loss": 0.03879538, "memory(GiB)": 13.7, "step": 94145, "train_speed(iter/s)": 1.529391 }, { "acc": 0.979354, "epoch": 44.12936489336771, "grad_norm": 4.533644676208496, "learning_rate": 3.718582085986802e-07, "loss": 0.03826707, "memory(GiB)": 13.7, "step": 94150, "train_speed(iter/s)": 1.529391 }, { "acc": 0.9791667, "epoch": 44.13170846027654, "grad_norm": 6.326825141906738, "learning_rate": 3.715653012502915e-07, "loss": 0.05512764, "memory(GiB)": 13.7, "step": 94155, "train_speed(iter/s)": 1.529392 }, { "acc": 0.98180923, "epoch": 44.134052027185376, "grad_norm": 4.799668312072754, "learning_rate": 3.7127250516628633e-07, "loss": 0.08621293, "memory(GiB)": 13.7, "step": 94160, "train_speed(iter/s)": 1.529393 }, { "acc": 0.98249998, "epoch": 44.13639559409421, "grad_norm": 3.8188204765319824, "learning_rate": 3.709798203537011e-07, "loss": 0.03546062, "memory(GiB)": 13.7, "step": 94165, "train_speed(iter/s)": 1.529398 }, { "acc": 0.99571428, "epoch": 44.138739161003045, "grad_norm": 0.23996314406394958, "learning_rate": 3.7068724681957156e-07, "loss": 0.03802973, "memory(GiB)": 13.7, "step": 94170, "train_speed(iter/s)": 1.529399 }, { "acc": 0.99312496, "epoch": 44.14108272791188, "grad_norm": 2.1138980388641357, "learning_rate": 3.7039478457093103e-07, "loss": 0.02317355, "memory(GiB)": 13.7, "step": 94175, "train_speed(iter/s)": 1.529402 }, { "acc": 0.9875, "epoch": 44.14342629482072, "grad_norm": 3.034759044647217, "learning_rate": 3.7010243361480923e-07, "loss": 0.03381024, "memory(GiB)": 13.7, "step": 94180, "train_speed(iter/s)": 1.529404 }, { "acc": 0.98790178, "epoch": 44.145769861729555, "grad_norm": 3.809518575668335, "learning_rate": 3.6981019395823256e-07, "loss": 0.03400925, "memory(GiB)": 13.7, "step": 94185, "train_speed(iter/s)": 1.529411 }, { "acc": 0.984375, "epoch": 44.14811342863839, "grad_norm": 3.463254690170288, "learning_rate": 3.6951806560822637e-07, "loss": 0.02697931, "memory(GiB)": 13.7, "step": 94190, "train_speed(iter/s)": 1.529416 }, { "acc": 0.96508389, "epoch": 44.15045699554722, "grad_norm": 4.687368392944336, "learning_rate": 3.6922604857181166e-07, "loss": 0.05138117, "memory(GiB)": 13.7, "step": 94195, "train_speed(iter/s)": 1.529415 }, { "acc": 0.97479172, "epoch": 44.15280056245606, "grad_norm": 0.2537374496459961, "learning_rate": 3.689341428560065e-07, "loss": 0.0457459, "memory(GiB)": 13.7, "step": 94200, "train_speed(iter/s)": 1.529416 }, { "acc": 0.98633928, "epoch": 44.15514412936489, "grad_norm": 3.364055871963501, "learning_rate": 3.6864234846783005e-07, "loss": 0.03855581, "memory(GiB)": 13.7, "step": 94205, "train_speed(iter/s)": 1.52942 }, { "acc": 0.99084682, "epoch": 44.157487696273726, "grad_norm": 1.2604737281799316, "learning_rate": 3.6835066541429437e-07, "loss": 0.01522531, "memory(GiB)": 13.7, "step": 94210, "train_speed(iter/s)": 1.529418 }, { "acc": 0.98499994, "epoch": 44.15983126318256, "grad_norm": 2.653766632080078, "learning_rate": 3.680590937024111e-07, "loss": 0.03536707, "memory(GiB)": 13.7, "step": 94215, "train_speed(iter/s)": 1.529417 }, { "acc": 0.9791667, "epoch": 44.1621748300914, "grad_norm": 4.184018135070801, "learning_rate": 3.677676333391881e-07, "loss": 0.02437101, "memory(GiB)": 13.7, "step": 94220, "train_speed(iter/s)": 1.529419 }, { "acc": 0.98868065, "epoch": 44.164518397000236, "grad_norm": 2.48511004447937, "learning_rate": 3.67476284331632e-07, "loss": 0.0624022, "memory(GiB)": 13.7, "step": 94225, "train_speed(iter/s)": 1.52943 }, { "acc": 0.984375, "epoch": 44.16686196390907, "grad_norm": 4.112236499786377, "learning_rate": 3.6718504668674596e-07, "loss": 0.03148179, "memory(GiB)": 13.7, "step": 94230, "train_speed(iter/s)": 1.52943 }, { "acc": 0.98604164, "epoch": 44.169205530817905, "grad_norm": 4.460312843322754, "learning_rate": 3.668939204115275e-07, "loss": 0.02561366, "memory(GiB)": 13.7, "step": 94235, "train_speed(iter/s)": 1.529434 }, { "acc": 0.98738098, "epoch": 44.17154909772674, "grad_norm": 2.334271192550659, "learning_rate": 3.6660290551297815e-07, "loss": 0.03571594, "memory(GiB)": 13.7, "step": 94240, "train_speed(iter/s)": 1.529435 }, { "acc": 0.98968143, "epoch": 44.173892664635574, "grad_norm": 2.324841022491455, "learning_rate": 3.6631200199809106e-07, "loss": 0.02368596, "memory(GiB)": 13.7, "step": 94245, "train_speed(iter/s)": 1.529436 }, { "acc": 0.98175058, "epoch": 44.17623623154441, "grad_norm": 3.8655948638916016, "learning_rate": 3.6602120987385984e-07, "loss": 0.05103084, "memory(GiB)": 13.7, "step": 94250, "train_speed(iter/s)": 1.529437 }, { "acc": 0.97875004, "epoch": 44.17857979845325, "grad_norm": 2.4937591552734375, "learning_rate": 3.657305291472711e-07, "loss": 0.03297524, "memory(GiB)": 13.7, "step": 94255, "train_speed(iter/s)": 1.529441 }, { "acc": 0.99812498, "epoch": 44.180923365362084, "grad_norm": 2.7998015880584717, "learning_rate": 3.654399598253162e-07, "loss": 0.02943768, "memory(GiB)": 13.7, "step": 94260, "train_speed(iter/s)": 1.529444 }, { "acc": 0.98354168, "epoch": 44.18326693227092, "grad_norm": 2.5109663009643555, "learning_rate": 3.651495019149763e-07, "loss": 0.02311769, "memory(GiB)": 13.7, "step": 94265, "train_speed(iter/s)": 1.529443 }, { "acc": 0.98853626, "epoch": 44.18561049917975, "grad_norm": 3.5986850261688232, "learning_rate": 3.648591554232349e-07, "loss": 0.0555748, "memory(GiB)": 13.7, "step": 94270, "train_speed(iter/s)": 1.529445 }, { "acc": 0.98738098, "epoch": 44.18795406608859, "grad_norm": 1.0045851469039917, "learning_rate": 3.645689203570702e-07, "loss": 0.047618, "memory(GiB)": 13.7, "step": 94275, "train_speed(iter/s)": 1.529447 }, { "acc": 0.9854167, "epoch": 44.19029763299742, "grad_norm": 8.439184188842773, "learning_rate": 3.642787967234576e-07, "loss": 0.04080404, "memory(GiB)": 13.7, "step": 94280, "train_speed(iter/s)": 1.529446 }, { "acc": 0.98746109, "epoch": 44.192641199906255, "grad_norm": 3.128053665161133, "learning_rate": 3.639887845293721e-07, "loss": 0.030323, "memory(GiB)": 13.7, "step": 94285, "train_speed(iter/s)": 1.52945 }, { "acc": 0.98648815, "epoch": 44.19498476681509, "grad_norm": 2.7376365661621094, "learning_rate": 3.6369888378178264e-07, "loss": 0.0263865, "memory(GiB)": 13.7, "step": 94290, "train_speed(iter/s)": 1.529449 }, { "acc": 0.99764957, "epoch": 44.19732833372393, "grad_norm": 1.1229602098464966, "learning_rate": 3.634090944876611e-07, "loss": 0.01544005, "memory(GiB)": 13.7, "step": 94295, "train_speed(iter/s)": 1.529454 }, { "acc": 0.99250002, "epoch": 44.199671900632765, "grad_norm": 1.7623822689056396, "learning_rate": 3.631194166539693e-07, "loss": 0.02777893, "memory(GiB)": 13.7, "step": 94300, "train_speed(iter/s)": 1.529456 }, { "acc": 0.98819447, "epoch": 44.2020154675416, "grad_norm": 8.211854934692383, "learning_rate": 3.628298502876732e-07, "loss": 0.02842297, "memory(GiB)": 13.7, "step": 94305, "train_speed(iter/s)": 1.529458 }, { "acc": 0.99155636, "epoch": 44.204359034450434, "grad_norm": 3.688706874847412, "learning_rate": 3.6254039539573e-07, "loss": 0.03733446, "memory(GiB)": 13.7, "step": 94310, "train_speed(iter/s)": 1.529465 }, { "acc": 0.9916666, "epoch": 44.20670260135927, "grad_norm": 3.962625503540039, "learning_rate": 3.6225105198509953e-07, "loss": 0.01191826, "memory(GiB)": 13.7, "step": 94315, "train_speed(iter/s)": 1.529465 }, { "acc": 0.98916664, "epoch": 44.2090461682681, "grad_norm": 2.3924922943115234, "learning_rate": 3.6196182006273653e-07, "loss": 0.03, "memory(GiB)": 13.7, "step": 94320, "train_speed(iter/s)": 1.529467 }, { "acc": 0.97312508, "epoch": 44.21138973517694, "grad_norm": 1.4550672769546509, "learning_rate": 3.6167269963559153e-07, "loss": 0.0474602, "memory(GiB)": 13.7, "step": 94325, "train_speed(iter/s)": 1.529471 }, { "acc": 0.98128853, "epoch": 44.21373330208577, "grad_norm": 4.365407466888428, "learning_rate": 3.613836907106159e-07, "loss": 0.08445349, "memory(GiB)": 13.7, "step": 94330, "train_speed(iter/s)": 1.529474 }, { "acc": 0.9822917, "epoch": 44.21607686899461, "grad_norm": 2.2575597763061523, "learning_rate": 3.6109479329475527e-07, "loss": 0.03481137, "memory(GiB)": 13.7, "step": 94335, "train_speed(iter/s)": 1.529477 }, { "acc": 0.98594694, "epoch": 44.21842043590345, "grad_norm": 2.999363422393799, "learning_rate": 3.608060073949549e-07, "loss": 0.02638789, "memory(GiB)": 13.7, "step": 94340, "train_speed(iter/s)": 1.529476 }, { "acc": 0.98916664, "epoch": 44.22076400281228, "grad_norm": 0.0016284050652757287, "learning_rate": 3.6051733301815357e-07, "loss": 0.02624052, "memory(GiB)": 13.7, "step": 94345, "train_speed(iter/s)": 1.529478 }, { "acc": 0.9802084, "epoch": 44.223107569721115, "grad_norm": 5.159966468811035, "learning_rate": 3.602287701712939e-07, "loss": 0.05344378, "memory(GiB)": 13.7, "step": 94350, "train_speed(iter/s)": 1.529479 }, { "acc": 0.9958334, "epoch": 44.22545113662995, "grad_norm": 3.3283400535583496, "learning_rate": 3.599403188613103e-07, "loss": 0.02274072, "memory(GiB)": 13.7, "step": 94355, "train_speed(iter/s)": 1.529482 }, { "acc": 0.98383932, "epoch": 44.227794703538784, "grad_norm": 2.0848941802978516, "learning_rate": 3.5965197909513477e-07, "loss": 0.06704166, "memory(GiB)": 13.7, "step": 94360, "train_speed(iter/s)": 1.529484 }, { "acc": 0.97875004, "epoch": 44.23013827044762, "grad_norm": 3.6237924098968506, "learning_rate": 3.593637508797006e-07, "loss": 0.05409787, "memory(GiB)": 13.7, "step": 94365, "train_speed(iter/s)": 1.529485 }, { "acc": 0.98083334, "epoch": 44.23248183735646, "grad_norm": 5.22118616104126, "learning_rate": 3.5907563422193364e-07, "loss": 0.04891486, "memory(GiB)": 13.7, "step": 94370, "train_speed(iter/s)": 1.529486 }, { "acc": 0.98529758, "epoch": 44.234825404265294, "grad_norm": 2.870424747467041, "learning_rate": 3.587876291287606e-07, "loss": 0.04873106, "memory(GiB)": 13.7, "step": 94375, "train_speed(iter/s)": 1.529492 }, { "acc": 0.98770828, "epoch": 44.23716897117413, "grad_norm": 0.688467264175415, "learning_rate": 3.584997356071023e-07, "loss": 0.03343541, "memory(GiB)": 13.7, "step": 94380, "train_speed(iter/s)": 1.529495 }, { "acc": 0.98080349, "epoch": 44.23951253808296, "grad_norm": 4.970933437347412, "learning_rate": 3.5821195366388216e-07, "loss": 0.03951771, "memory(GiB)": 13.7, "step": 94385, "train_speed(iter/s)": 1.529498 }, { "acc": 0.99258928, "epoch": 44.2418561049918, "grad_norm": 2.7302956581115723, "learning_rate": 3.579242833060143e-07, "loss": 0.04756821, "memory(GiB)": 13.7, "step": 94390, "train_speed(iter/s)": 1.529503 }, { "acc": 0.99080353, "epoch": 44.24419967190063, "grad_norm": 3.3612382411956787, "learning_rate": 3.576367245404154e-07, "loss": 0.01917813, "memory(GiB)": 13.7, "step": 94395, "train_speed(iter/s)": 1.529504 }, { "acc": 0.98212738, "epoch": 44.246543238809465, "grad_norm": 0.6795884966850281, "learning_rate": 3.57349277373997e-07, "loss": 0.03982707, "memory(GiB)": 13.7, "step": 94400, "train_speed(iter/s)": 1.529506 }, { "acc": 0.98666668, "epoch": 44.2488868057183, "grad_norm": 1.615253210067749, "learning_rate": 3.5706194181366625e-07, "loss": 0.03025323, "memory(GiB)": 13.7, "step": 94405, "train_speed(iter/s)": 1.529506 }, { "acc": 0.98698864, "epoch": 44.25123037262714, "grad_norm": 3.502697229385376, "learning_rate": 3.5677471786633296e-07, "loss": 0.02425587, "memory(GiB)": 13.7, "step": 94410, "train_speed(iter/s)": 1.529508 }, { "acc": 0.99028845, "epoch": 44.253573939535976, "grad_norm": 0.010385549627244473, "learning_rate": 3.5648760553889933e-07, "loss": 0.01700868, "memory(GiB)": 13.7, "step": 94415, "train_speed(iter/s)": 1.529511 }, { "acc": 0.984375, "epoch": 44.25591750644481, "grad_norm": 0.6843733191490173, "learning_rate": 3.562006048382673e-07, "loss": 0.03436162, "memory(GiB)": 13.7, "step": 94420, "train_speed(iter/s)": 1.529513 }, { "acc": 0.98291664, "epoch": 44.258261073353644, "grad_norm": 4.219893455505371, "learning_rate": 3.559137157713343e-07, "loss": 0.03666671, "memory(GiB)": 13.7, "step": 94425, "train_speed(iter/s)": 1.529517 }, { "acc": 0.97562504, "epoch": 44.26060464026248, "grad_norm": 2.675799608230591, "learning_rate": 3.5562693834499776e-07, "loss": 0.04477022, "memory(GiB)": 13.7, "step": 94430, "train_speed(iter/s)": 1.529516 }, { "acc": 0.99298611, "epoch": 44.26294820717131, "grad_norm": 1.333319067955017, "learning_rate": 3.5534027256614834e-07, "loss": 0.02209183, "memory(GiB)": 13.7, "step": 94435, "train_speed(iter/s)": 1.529514 }, { "acc": 0.98779764, "epoch": 44.26529177408015, "grad_norm": 0.2064920961856842, "learning_rate": 3.550537184416802e-07, "loss": 0.04968343, "memory(GiB)": 13.7, "step": 94440, "train_speed(iter/s)": 1.529515 }, { "acc": 0.98863096, "epoch": 44.26763534098899, "grad_norm": 3.4640376567840576, "learning_rate": 3.5476727597847886e-07, "loss": 0.04920375, "memory(GiB)": 13.7, "step": 94445, "train_speed(iter/s)": 1.529516 }, { "acc": 0.97279758, "epoch": 44.26997890789782, "grad_norm": 0.10423672199249268, "learning_rate": 3.5448094518342877e-07, "loss": 0.03839413, "memory(GiB)": 13.7, "step": 94450, "train_speed(iter/s)": 1.529521 }, { "acc": 0.97796087, "epoch": 44.27232247480666, "grad_norm": 7.282250881195068, "learning_rate": 3.541947260634148e-07, "loss": 0.05010365, "memory(GiB)": 13.7, "step": 94455, "train_speed(iter/s)": 1.52952 }, { "acc": 0.97494049, "epoch": 44.27466604171549, "grad_norm": 3.0419397354125977, "learning_rate": 3.539086186253135e-07, "loss": 0.04631182, "memory(GiB)": 13.7, "step": 94460, "train_speed(iter/s)": 1.529523 }, { "acc": 0.97458334, "epoch": 44.277009608624326, "grad_norm": 9.332836151123047, "learning_rate": 3.536226228760048e-07, "loss": 0.0436559, "memory(GiB)": 13.7, "step": 94465, "train_speed(iter/s)": 1.529527 }, { "acc": 0.99375, "epoch": 44.27935317553316, "grad_norm": 3.552281379699707, "learning_rate": 3.53336738822362e-07, "loss": 0.03274943, "memory(GiB)": 13.7, "step": 94470, "train_speed(iter/s)": 1.529528 }, { "acc": 0.9822916, "epoch": 44.281696742441994, "grad_norm": 8.338123321533203, "learning_rate": 3.530509664712573e-07, "loss": 0.03148138, "memory(GiB)": 13.7, "step": 94475, "train_speed(iter/s)": 1.529527 }, { "acc": 0.97696428, "epoch": 44.28404030935083, "grad_norm": 0.13915656507015228, "learning_rate": 3.5276530582955873e-07, "loss": 0.06975163, "memory(GiB)": 13.7, "step": 94480, "train_speed(iter/s)": 1.529527 }, { "acc": 0.97354164, "epoch": 44.28638387625967, "grad_norm": 4.267388820648193, "learning_rate": 3.5247975690413367e-07, "loss": 0.05560647, "memory(GiB)": 13.7, "step": 94485, "train_speed(iter/s)": 1.529535 }, { "acc": 0.9822917, "epoch": 44.288727443168504, "grad_norm": 4.4458537101745605, "learning_rate": 3.521943197018459e-07, "loss": 0.0426953, "memory(GiB)": 13.7, "step": 94490, "train_speed(iter/s)": 1.529537 }, { "acc": 0.97962456, "epoch": 44.29107101007734, "grad_norm": 4.494089603424072, "learning_rate": 3.5190899422955364e-07, "loss": 0.06005566, "memory(GiB)": 13.7, "step": 94495, "train_speed(iter/s)": 1.529541 }, { "acc": 0.98904762, "epoch": 44.29341457698617, "grad_norm": 2.868894577026367, "learning_rate": 3.516237804941191e-07, "loss": 0.02011302, "memory(GiB)": 13.7, "step": 94500, "train_speed(iter/s)": 1.529543 }, { "acc": 0.97497025, "epoch": 44.29575814389501, "grad_norm": 4.610448837280273, "learning_rate": 3.5133867850239545e-07, "loss": 0.04857683, "memory(GiB)": 13.7, "step": 94505, "train_speed(iter/s)": 1.529543 }, { "acc": 0.98389874, "epoch": 44.29810171080384, "grad_norm": 3.9821815490722656, "learning_rate": 3.510536882612372e-07, "loss": 0.03819844, "memory(GiB)": 13.7, "step": 94510, "train_speed(iter/s)": 1.529544 }, { "acc": 0.9833334, "epoch": 44.300445277712676, "grad_norm": 4.948826789855957, "learning_rate": 3.5076880977749263e-07, "loss": 0.02380248, "memory(GiB)": 13.7, "step": 94515, "train_speed(iter/s)": 1.529545 }, { "acc": 0.98256941, "epoch": 44.30278884462152, "grad_norm": 1.1343011856079102, "learning_rate": 3.5048404305801043e-07, "loss": 0.0411652, "memory(GiB)": 13.7, "step": 94520, "train_speed(iter/s)": 1.529548 }, { "acc": 0.9802084, "epoch": 44.30513241153035, "grad_norm": 3.8390650749206543, "learning_rate": 3.5019938810963635e-07, "loss": 0.03562024, "memory(GiB)": 13.7, "step": 94525, "train_speed(iter/s)": 1.529549 }, { "acc": 0.98988094, "epoch": 44.307475978439186, "grad_norm": 4.2257771492004395, "learning_rate": 3.499148449392106e-07, "loss": 0.0377876, "memory(GiB)": 13.7, "step": 94530, "train_speed(iter/s)": 1.529553 }, { "acc": 0.98994122, "epoch": 44.30981954534802, "grad_norm": 3.3331000804901123, "learning_rate": 3.4963041355357447e-07, "loss": 0.02554676, "memory(GiB)": 13.7, "step": 94535, "train_speed(iter/s)": 1.529552 }, { "acc": 0.99229164, "epoch": 44.312163112256854, "grad_norm": 2.842787504196167, "learning_rate": 3.493460939595629e-07, "loss": 0.016541, "memory(GiB)": 13.7, "step": 94540, "train_speed(iter/s)": 1.529553 }, { "acc": 0.97737179, "epoch": 44.31450667916569, "grad_norm": 4.321038246154785, "learning_rate": 3.490618861640119e-07, "loss": 0.05331922, "memory(GiB)": 13.7, "step": 94545, "train_speed(iter/s)": 1.529555 }, { "acc": 0.97863092, "epoch": 44.31685024607452, "grad_norm": 1.9642019271850586, "learning_rate": 3.4877779017375023e-07, "loss": 0.03996702, "memory(GiB)": 13.7, "step": 94550, "train_speed(iter/s)": 1.529557 }, { "acc": 0.99375, "epoch": 44.31919381298336, "grad_norm": 3.996734142303467, "learning_rate": 3.4849380599561026e-07, "loss": 0.04883134, "memory(GiB)": 13.7, "step": 94555, "train_speed(iter/s)": 1.52956 }, { "acc": 0.99333334, "epoch": 44.3215373798922, "grad_norm": 0.9796675443649292, "learning_rate": 3.4820993363641523e-07, "loss": 0.02037805, "memory(GiB)": 13.7, "step": 94560, "train_speed(iter/s)": 1.529562 }, { "acc": 0.98770828, "epoch": 44.32388094680103, "grad_norm": 2.847381353378296, "learning_rate": 3.4792617310299e-07, "loss": 0.0241547, "memory(GiB)": 13.7, "step": 94565, "train_speed(iter/s)": 1.529563 }, { "acc": 0.98819447, "epoch": 44.32622451370987, "grad_norm": 4.047420978546143, "learning_rate": 3.476425244021553e-07, "loss": 0.04769945, "memory(GiB)": 13.7, "step": 94570, "train_speed(iter/s)": 1.529565 }, { "acc": 0.97729168, "epoch": 44.3285680806187, "grad_norm": 5.949577331542969, "learning_rate": 3.473589875407276e-07, "loss": 0.06948159, "memory(GiB)": 13.7, "step": 94575, "train_speed(iter/s)": 1.529567 }, { "acc": 0.98291664, "epoch": 44.330911647527536, "grad_norm": 3.3363161087036133, "learning_rate": 3.4707556252552365e-07, "loss": 0.0459716, "memory(GiB)": 13.7, "step": 94580, "train_speed(iter/s)": 1.52957 }, { "acc": 0.99083328, "epoch": 44.33325521443637, "grad_norm": 0.002625197172164917, "learning_rate": 3.467922493633533e-07, "loss": 0.01465549, "memory(GiB)": 13.7, "step": 94585, "train_speed(iter/s)": 1.529571 }, { "acc": 0.98583336, "epoch": 44.335598781345205, "grad_norm": 2.498208522796631, "learning_rate": 3.465090480610312e-07, "loss": 0.03193786, "memory(GiB)": 13.7, "step": 94590, "train_speed(iter/s)": 1.529573 }, { "acc": 0.98258934, "epoch": 44.337942348254046, "grad_norm": 4.355530261993408, "learning_rate": 3.46225958625361e-07, "loss": 0.03311779, "memory(GiB)": 13.7, "step": 94595, "train_speed(iter/s)": 1.529575 }, { "acc": 0.9770833, "epoch": 44.34028591516288, "grad_norm": 5.737451076507568, "learning_rate": 3.4594298106314885e-07, "loss": 0.03234835, "memory(GiB)": 13.7, "step": 94600, "train_speed(iter/s)": 1.529576 }, { "acc": 0.9713953, "epoch": 44.342629482071715, "grad_norm": 2.5900344848632812, "learning_rate": 3.456601153811943e-07, "loss": 0.05432292, "memory(GiB)": 13.7, "step": 94605, "train_speed(iter/s)": 1.529578 }, { "acc": 0.9895834, "epoch": 44.34497304898055, "grad_norm": 3.3294310569763184, "learning_rate": 3.4537736158630053e-07, "loss": 0.05666016, "memory(GiB)": 13.7, "step": 94610, "train_speed(iter/s)": 1.529579 }, { "acc": 0.98739586, "epoch": 44.34731661588938, "grad_norm": 3.4830775260925293, "learning_rate": 3.45094719685261e-07, "loss": 0.0188261, "memory(GiB)": 13.7, "step": 94615, "train_speed(iter/s)": 1.529579 }, { "acc": 0.9895834, "epoch": 44.34966018279822, "grad_norm": 4.828782081604004, "learning_rate": 3.4481218968486995e-07, "loss": 0.03223973, "memory(GiB)": 13.7, "step": 94620, "train_speed(iter/s)": 1.529581 }, { "acc": 0.98611107, "epoch": 44.35200374970705, "grad_norm": 0.8876803517341614, "learning_rate": 3.4452977159191927e-07, "loss": 0.04419402, "memory(GiB)": 13.7, "step": 94625, "train_speed(iter/s)": 1.529583 }, { "acc": 0.97905636, "epoch": 44.354347316615886, "grad_norm": 2.7026870250701904, "learning_rate": 3.4424746541319553e-07, "loss": 0.04322793, "memory(GiB)": 13.7, "step": 94630, "train_speed(iter/s)": 1.529586 }, { "acc": 0.984375, "epoch": 44.35669088352473, "grad_norm": 3.1598904132843018, "learning_rate": 3.4396527115548706e-07, "loss": 0.02149816, "memory(GiB)": 13.7, "step": 94635, "train_speed(iter/s)": 1.529587 }, { "acc": 0.99229164, "epoch": 44.35903445043356, "grad_norm": 0.018530145287513733, "learning_rate": 3.436831888255744e-07, "loss": 0.01881857, "memory(GiB)": 13.7, "step": 94640, "train_speed(iter/s)": 1.529586 }, { "acc": 0.9829546, "epoch": 44.361378017342396, "grad_norm": 3.8641929626464844, "learning_rate": 3.434012184302399e-07, "loss": 0.05290434, "memory(GiB)": 13.7, "step": 94645, "train_speed(iter/s)": 1.529592 }, { "acc": 0.9885416, "epoch": 44.36372158425123, "grad_norm": 0.6546693444252014, "learning_rate": 3.431193599762606e-07, "loss": 0.01708764, "memory(GiB)": 13.7, "step": 94650, "train_speed(iter/s)": 1.529596 }, { "acc": 0.98869047, "epoch": 44.366065151160065, "grad_norm": 0.31554803252220154, "learning_rate": 3.428376134704106e-07, "loss": 0.03387588, "memory(GiB)": 13.7, "step": 94655, "train_speed(iter/s)": 1.529597 }, { "acc": 0.98946428, "epoch": 44.3684087180689, "grad_norm": 2.0568466186523438, "learning_rate": 3.4255597891946373e-07, "loss": 0.01373073, "memory(GiB)": 13.7, "step": 94660, "train_speed(iter/s)": 1.5296 }, { "acc": 0.98487186, "epoch": 44.37075228497773, "grad_norm": 3.309126138687134, "learning_rate": 3.4227445633018665e-07, "loss": 0.05574652, "memory(GiB)": 13.7, "step": 94665, "train_speed(iter/s)": 1.529602 }, { "acc": 0.9911459, "epoch": 44.373095851886575, "grad_norm": 4.085381984710693, "learning_rate": 3.419930457093499e-07, "loss": 0.04880893, "memory(GiB)": 13.7, "step": 94670, "train_speed(iter/s)": 1.529602 }, { "acc": 0.98529758, "epoch": 44.37543941879541, "grad_norm": 0.005172176286578178, "learning_rate": 3.417117470637146e-07, "loss": 0.02848078, "memory(GiB)": 13.7, "step": 94675, "train_speed(iter/s)": 1.529602 }, { "acc": 0.97686014, "epoch": 44.37778298570424, "grad_norm": 3.8837857246398926, "learning_rate": 3.4143056040004485e-07, "loss": 0.05663813, "memory(GiB)": 13.7, "step": 94680, "train_speed(iter/s)": 1.529606 }, { "acc": 0.99125004, "epoch": 44.38012655261308, "grad_norm": 3.2566330432891846, "learning_rate": 3.411494857250978e-07, "loss": 0.03173012, "memory(GiB)": 13.7, "step": 94685, "train_speed(iter/s)": 1.529609 }, { "acc": 0.98666668, "epoch": 44.38247011952191, "grad_norm": 0.021225368604063988, "learning_rate": 3.408685230456306e-07, "loss": 0.02642838, "memory(GiB)": 13.7, "step": 94690, "train_speed(iter/s)": 1.529611 }, { "acc": 0.98849201, "epoch": 44.384813686430746, "grad_norm": 1.6934340000152588, "learning_rate": 3.405876723683955e-07, "loss": 0.02749631, "memory(GiB)": 13.7, "step": 94695, "train_speed(iter/s)": 1.529614 }, { "acc": 0.98125, "epoch": 44.38715725333958, "grad_norm": 2.309900999069214, "learning_rate": 3.4030693370014224e-07, "loss": 0.03935255, "memory(GiB)": 13.7, "step": 94700, "train_speed(iter/s)": 1.529617 }, { "acc": 0.98386364, "epoch": 44.389500820248415, "grad_norm": 3.7507307529449463, "learning_rate": 3.400263070476222e-07, "loss": 0.03075734, "memory(GiB)": 13.7, "step": 94705, "train_speed(iter/s)": 1.529618 }, { "acc": 0.99750004, "epoch": 44.391844387157256, "grad_norm": 0.7952784895896912, "learning_rate": 3.3974579241757845e-07, "loss": 0.02304886, "memory(GiB)": 13.7, "step": 94710, "train_speed(iter/s)": 1.529621 }, { "acc": 0.996875, "epoch": 44.39418795406609, "grad_norm": 3.3828928470611572, "learning_rate": 3.3946538981675423e-07, "loss": 0.03334271, "memory(GiB)": 13.7, "step": 94715, "train_speed(iter/s)": 1.529622 }, { "acc": 0.97738094, "epoch": 44.396531520974925, "grad_norm": 2.8698275089263916, "learning_rate": 3.391850992518891e-07, "loss": 0.05325085, "memory(GiB)": 13.7, "step": 94720, "train_speed(iter/s)": 1.529622 }, { "acc": 0.97950754, "epoch": 44.39887508788376, "grad_norm": 4.193221569061279, "learning_rate": 3.389049207297208e-07, "loss": 0.04795579, "memory(GiB)": 13.7, "step": 94725, "train_speed(iter/s)": 1.529622 }, { "acc": 0.98723211, "epoch": 44.401218654792594, "grad_norm": 3.2291746139526367, "learning_rate": 3.3862485425698273e-07, "loss": 0.05325191, "memory(GiB)": 13.7, "step": 94730, "train_speed(iter/s)": 1.529627 }, { "acc": 0.9958333, "epoch": 44.40356222170143, "grad_norm": 2.1515190601348877, "learning_rate": 3.3834489984040897e-07, "loss": 0.02220295, "memory(GiB)": 13.7, "step": 94735, "train_speed(iter/s)": 1.529629 }, { "acc": 0.99300594, "epoch": 44.40590578861026, "grad_norm": 1.3160737752914429, "learning_rate": 3.380650574867272e-07, "loss": 0.0304104, "memory(GiB)": 13.7, "step": 94740, "train_speed(iter/s)": 1.529631 }, { "acc": 0.978125, "epoch": 44.408249355519104, "grad_norm": 3.6793160438537598, "learning_rate": 3.377853272026641e-07, "loss": 0.06250795, "memory(GiB)": 13.7, "step": 94745, "train_speed(iter/s)": 1.529635 }, { "acc": 0.98374996, "epoch": 44.41059292242794, "grad_norm": 3.0544979572296143, "learning_rate": 3.375057089949438e-07, "loss": 0.04658945, "memory(GiB)": 13.7, "step": 94750, "train_speed(iter/s)": 1.529635 }, { "acc": 0.99340277, "epoch": 44.41293648933677, "grad_norm": 0.6661921739578247, "learning_rate": 3.3722620287028516e-07, "loss": 0.0286126, "memory(GiB)": 13.7, "step": 94755, "train_speed(iter/s)": 1.529633 }, { "acc": 0.98500004, "epoch": 44.41528005624561, "grad_norm": 6.300545692443848, "learning_rate": 3.3694680883541104e-07, "loss": 0.02462218, "memory(GiB)": 13.7, "step": 94760, "train_speed(iter/s)": 1.529633 }, { "acc": 0.98937502, "epoch": 44.41762362315444, "grad_norm": 1.830019235610962, "learning_rate": 3.3666752689703307e-07, "loss": 0.06702828, "memory(GiB)": 13.7, "step": 94765, "train_speed(iter/s)": 1.529634 }, { "acc": 0.98663692, "epoch": 44.419967190063275, "grad_norm": 3.1353976726531982, "learning_rate": 3.36388357061867e-07, "loss": 0.02353999, "memory(GiB)": 13.7, "step": 94770, "train_speed(iter/s)": 1.529633 }, { "acc": 0.99125004, "epoch": 44.42231075697211, "grad_norm": 3.4694061279296875, "learning_rate": 3.3610929933662234e-07, "loss": 0.0227485, "memory(GiB)": 13.7, "step": 94775, "train_speed(iter/s)": 1.52964 }, { "acc": 0.9895834, "epoch": 44.424654323880944, "grad_norm": 4.153651237487793, "learning_rate": 3.358303537280052e-07, "loss": 0.03933468, "memory(GiB)": 13.7, "step": 94780, "train_speed(iter/s)": 1.529642 }, { "acc": 0.9854167, "epoch": 44.426997890789785, "grad_norm": 5.201973915100098, "learning_rate": 3.3555152024272287e-07, "loss": 0.05201267, "memory(GiB)": 13.7, "step": 94785, "train_speed(iter/s)": 1.529644 }, { "acc": 0.98842258, "epoch": 44.42934145769862, "grad_norm": 2.9002163410186768, "learning_rate": 3.352727988874745e-07, "loss": 0.02142301, "memory(GiB)": 13.7, "step": 94790, "train_speed(iter/s)": 1.529647 }, { "acc": 0.99375, "epoch": 44.431685024607454, "grad_norm": 4.033644199371338, "learning_rate": 3.3499418966896323e-07, "loss": 0.01246772, "memory(GiB)": 13.7, "step": 94795, "train_speed(iter/s)": 1.529651 }, { "acc": 0.9895834, "epoch": 44.43402859151629, "grad_norm": 4.246560573577881, "learning_rate": 3.347156925938838e-07, "loss": 0.02837577, "memory(GiB)": 13.7, "step": 94800, "train_speed(iter/s)": 1.529653 }, { "acc": 0.9958334, "epoch": 44.43637215842512, "grad_norm": 4.117731094360352, "learning_rate": 3.3443730766893124e-07, "loss": 0.01394545, "memory(GiB)": 13.7, "step": 94805, "train_speed(iter/s)": 1.529655 }, { "acc": 0.9885417, "epoch": 44.43871572533396, "grad_norm": 3.575566053390503, "learning_rate": 3.3415903490079504e-07, "loss": 0.02613811, "memory(GiB)": 13.7, "step": 94810, "train_speed(iter/s)": 1.529657 }, { "acc": 0.97568455, "epoch": 44.44105929224279, "grad_norm": 3.5428073406219482, "learning_rate": 3.338808742961676e-07, "loss": 0.03981594, "memory(GiB)": 13.7, "step": 94815, "train_speed(iter/s)": 1.529661 }, { "acc": 0.98946438, "epoch": 44.44340285915163, "grad_norm": 2.723862648010254, "learning_rate": 3.336028258617322e-07, "loss": 0.02799498, "memory(GiB)": 13.7, "step": 94820, "train_speed(iter/s)": 1.529661 }, { "acc": 0.99125004, "epoch": 44.44574642606047, "grad_norm": 0.12230676412582397, "learning_rate": 3.333248896041724e-07, "loss": 0.02938566, "memory(GiB)": 13.7, "step": 94825, "train_speed(iter/s)": 1.529659 }, { "acc": 0.9801136, "epoch": 44.4480899929693, "grad_norm": 7.454625129699707, "learning_rate": 3.3304706553017055e-07, "loss": 0.02824211, "memory(GiB)": 13.7, "step": 94830, "train_speed(iter/s)": 1.529663 }, { "acc": 0.98125, "epoch": 44.450433559878135, "grad_norm": 5.5325822830200195, "learning_rate": 3.3276935364640215e-07, "loss": 0.02535114, "memory(GiB)": 13.7, "step": 94835, "train_speed(iter/s)": 1.529666 }, { "acc": 0.9819643, "epoch": 44.45277712678697, "grad_norm": 6.837161540985107, "learning_rate": 3.324917539595452e-07, "loss": 0.04453427, "memory(GiB)": 13.7, "step": 94840, "train_speed(iter/s)": 1.52967 }, { "acc": 0.9927083, "epoch": 44.455120693695804, "grad_norm": 3.180480718612671, "learning_rate": 3.322142664762687e-07, "loss": 0.03180023, "memory(GiB)": 13.7, "step": 94845, "train_speed(iter/s)": 1.529668 }, { "acc": 0.98106155, "epoch": 44.45746426060464, "grad_norm": 4.712084770202637, "learning_rate": 3.3193689120324655e-07, "loss": 0.05009626, "memory(GiB)": 13.7, "step": 94850, "train_speed(iter/s)": 1.529669 }, { "acc": 0.98512497, "epoch": 44.45980782751347, "grad_norm": 2.5675244331359863, "learning_rate": 3.316596281471434e-07, "loss": 0.02918019, "memory(GiB)": 13.7, "step": 94855, "train_speed(iter/s)": 1.529669 }, { "acc": 0.98635416, "epoch": 44.462151394422314, "grad_norm": 1.5024731159210205, "learning_rate": 3.3138247731462486e-07, "loss": 0.03497404, "memory(GiB)": 13.7, "step": 94860, "train_speed(iter/s)": 1.529672 }, { "acc": 0.97583332, "epoch": 44.46449496133115, "grad_norm": 2.8905222415924072, "learning_rate": 3.311054387123527e-07, "loss": 0.03420726, "memory(GiB)": 13.7, "step": 94865, "train_speed(iter/s)": 1.529678 }, { "acc": 0.98208332, "epoch": 44.46683852823998, "grad_norm": 3.6233365535736084, "learning_rate": 3.3082851234698494e-07, "loss": 0.04334249, "memory(GiB)": 13.7, "step": 94870, "train_speed(iter/s)": 1.529682 }, { "acc": 0.99087296, "epoch": 44.46918209514882, "grad_norm": 3.734954357147217, "learning_rate": 3.3055169822517823e-07, "loss": 0.03039401, "memory(GiB)": 13.7, "step": 94875, "train_speed(iter/s)": 1.529683 }, { "acc": 0.9958334, "epoch": 44.47152566205765, "grad_norm": 1.9699286222457886, "learning_rate": 3.3027499635358715e-07, "loss": 0.01948703, "memory(GiB)": 13.7, "step": 94880, "train_speed(iter/s)": 1.529683 }, { "acc": 0.97979164, "epoch": 44.473869228966485, "grad_norm": 1.6493797302246094, "learning_rate": 3.299984067388631e-07, "loss": 0.06051842, "memory(GiB)": 13.7, "step": 94885, "train_speed(iter/s)": 1.529688 }, { "acc": 0.99333334, "epoch": 44.47621279587532, "grad_norm": 1.230965495109558, "learning_rate": 3.297219293876526e-07, "loss": 0.03292431, "memory(GiB)": 13.7, "step": 94890, "train_speed(iter/s)": 1.52969 }, { "acc": 0.98328371, "epoch": 44.478556362784154, "grad_norm": 3.24042010307312, "learning_rate": 3.2944556430660325e-07, "loss": 0.04984471, "memory(GiB)": 13.7, "step": 94895, "train_speed(iter/s)": 1.529697 }, { "acc": 0.98078375, "epoch": 44.480899929692995, "grad_norm": 3.420413017272949, "learning_rate": 3.291693115023566e-07, "loss": 0.04674913, "memory(GiB)": 13.7, "step": 94900, "train_speed(iter/s)": 1.529704 }, { "acc": 0.97807379, "epoch": 44.48324349660183, "grad_norm": 0.0002408139407634735, "learning_rate": 3.288931709815514e-07, "loss": 0.04315081, "memory(GiB)": 13.7, "step": 94905, "train_speed(iter/s)": 1.529709 }, { "acc": 0.99258928, "epoch": 44.485587063510664, "grad_norm": 1.7958455085754395, "learning_rate": 3.2861714275082874e-07, "loss": 0.02537383, "memory(GiB)": 13.7, "step": 94910, "train_speed(iter/s)": 1.529711 }, { "acc": 0.97687502, "epoch": 44.4879306304195, "grad_norm": 4.836267471313477, "learning_rate": 3.28341226816821e-07, "loss": 0.04130685, "memory(GiB)": 13.7, "step": 94915, "train_speed(iter/s)": 1.529716 }, { "acc": 0.98395834, "epoch": 44.49027419732833, "grad_norm": 2.7867515087127686, "learning_rate": 3.2806542318616166e-07, "loss": 0.03963491, "memory(GiB)": 13.7, "step": 94920, "train_speed(iter/s)": 1.529719 }, { "acc": 0.97504845, "epoch": 44.49261776423717, "grad_norm": 3.7797231674194336, "learning_rate": 3.2778973186547865e-07, "loss": 0.06018128, "memory(GiB)": 13.7, "step": 94925, "train_speed(iter/s)": 1.529717 }, { "acc": 0.98425598, "epoch": 44.494961331146, "grad_norm": 4.120838642120361, "learning_rate": 3.2751415286140045e-07, "loss": 0.05109652, "memory(GiB)": 13.7, "step": 94930, "train_speed(iter/s)": 1.52972 }, { "acc": 0.98103628, "epoch": 44.49730489805484, "grad_norm": 4.200203895568848, "learning_rate": 3.272386861805483e-07, "loss": 0.02947414, "memory(GiB)": 13.7, "step": 94935, "train_speed(iter/s)": 1.529726 }, { "acc": 0.98988094, "epoch": 44.49964846496368, "grad_norm": 0.00393520575016737, "learning_rate": 3.269633318295468e-07, "loss": 0.01948853, "memory(GiB)": 13.7, "step": 94940, "train_speed(iter/s)": 1.529731 }, { "acc": 0.97979164, "epoch": 44.50199203187251, "grad_norm": 0.002155574271455407, "learning_rate": 3.266880898150128e-07, "loss": 0.02908838, "memory(GiB)": 13.7, "step": 94945, "train_speed(iter/s)": 1.52973 }, { "acc": 0.98596592, "epoch": 44.504335598781346, "grad_norm": 3.1656718254089355, "learning_rate": 3.26412960143562e-07, "loss": 0.04442469, "memory(GiB)": 13.7, "step": 94950, "train_speed(iter/s)": 1.529732 }, { "acc": 0.97872028, "epoch": 44.50667916569018, "grad_norm": 3.1307923793792725, "learning_rate": 3.261379428218089e-07, "loss": 0.06731523, "memory(GiB)": 13.7, "step": 94955, "train_speed(iter/s)": 1.529736 }, { "acc": 0.97411709, "epoch": 44.509022732599014, "grad_norm": 2.3943898677825928, "learning_rate": 3.258630378563617e-07, "loss": 0.05027391, "memory(GiB)": 13.7, "step": 94960, "train_speed(iter/s)": 1.529735 }, { "acc": 0.990625, "epoch": 44.51136629950785, "grad_norm": 4.995022773742676, "learning_rate": 3.2558824525383087e-07, "loss": 0.01824977, "memory(GiB)": 13.7, "step": 94965, "train_speed(iter/s)": 1.529736 }, { "acc": 0.99008932, "epoch": 44.51370986641668, "grad_norm": 3.070350408554077, "learning_rate": 3.2531356502082e-07, "loss": 0.04371352, "memory(GiB)": 13.7, "step": 94970, "train_speed(iter/s)": 1.529739 }, { "acc": 0.99154758, "epoch": 44.516053433325524, "grad_norm": 0.3864154815673828, "learning_rate": 3.250389971639321e-07, "loss": 0.01683696, "memory(GiB)": 13.7, "step": 94975, "train_speed(iter/s)": 1.529742 }, { "acc": 0.98916664, "epoch": 44.51839700023436, "grad_norm": 4.191812992095947, "learning_rate": 3.247645416897661e-07, "loss": 0.05849037, "memory(GiB)": 13.7, "step": 94980, "train_speed(iter/s)": 1.529746 }, { "acc": 0.9895833, "epoch": 44.52074056714319, "grad_norm": 0.0017004723194986582, "learning_rate": 3.244901986049205e-07, "loss": 0.04992872, "memory(GiB)": 13.7, "step": 94985, "train_speed(iter/s)": 1.529749 }, { "acc": 0.98059025, "epoch": 44.52308413405203, "grad_norm": 3.8065991401672363, "learning_rate": 3.242159679159884e-07, "loss": 0.08344433, "memory(GiB)": 13.7, "step": 94990, "train_speed(iter/s)": 1.529751 }, { "acc": 0.97270832, "epoch": 44.52542770096086, "grad_norm": 3.873404026031494, "learning_rate": 3.239418496295605e-07, "loss": 0.03261493, "memory(GiB)": 13.7, "step": 94995, "train_speed(iter/s)": 1.529755 }, { "acc": 0.98348217, "epoch": 44.527771267869696, "grad_norm": 2.361212968826294, "learning_rate": 3.2366784375222804e-07, "loss": 0.03307203, "memory(GiB)": 13.7, "step": 95000, "train_speed(iter/s)": 1.529756 }, { "acc": 0.9979167, "epoch": 44.53011483477853, "grad_norm": 0.011804735288023949, "learning_rate": 3.2339395029057504e-07, "loss": 0.01402333, "memory(GiB)": 13.7, "step": 95005, "train_speed(iter/s)": 1.529758 }, { "acc": 0.98571434, "epoch": 44.53245840168737, "grad_norm": 2.0390448570251465, "learning_rate": 3.2312016925118733e-07, "loss": 0.0287219, "memory(GiB)": 13.7, "step": 95010, "train_speed(iter/s)": 1.529753 }, { "acc": 0.98529758, "epoch": 44.534801968596206, "grad_norm": 2.761396884918213, "learning_rate": 3.228465006406428e-07, "loss": 0.03648016, "memory(GiB)": 13.7, "step": 95015, "train_speed(iter/s)": 1.529753 }, { "acc": 0.97354164, "epoch": 44.53714553550504, "grad_norm": 8.320852279663086, "learning_rate": 3.225729444655218e-07, "loss": 0.06669747, "memory(GiB)": 13.7, "step": 95020, "train_speed(iter/s)": 1.529753 }, { "acc": 0.97935095, "epoch": 44.539489102413874, "grad_norm": 1.7013394832611084, "learning_rate": 3.222995007323988e-07, "loss": 0.05634347, "memory(GiB)": 13.7, "step": 95025, "train_speed(iter/s)": 1.529755 }, { "acc": 0.9864584, "epoch": 44.54183266932271, "grad_norm": 3.4400367736816406, "learning_rate": 3.2202616944784745e-07, "loss": 0.02419944, "memory(GiB)": 13.7, "step": 95030, "train_speed(iter/s)": 1.529758 }, { "acc": 0.98187504, "epoch": 44.54417623623154, "grad_norm": 2.484346389770508, "learning_rate": 3.2175295061843727e-07, "loss": 0.02694394, "memory(GiB)": 13.7, "step": 95035, "train_speed(iter/s)": 1.529761 }, { "acc": 0.98247023, "epoch": 44.54651980314038, "grad_norm": 1.5256383419036865, "learning_rate": 3.214798442507346e-07, "loss": 0.03843118, "memory(GiB)": 13.7, "step": 95040, "train_speed(iter/s)": 1.529764 }, { "acc": 0.98683758, "epoch": 44.54886337004921, "grad_norm": 1.9180622100830078, "learning_rate": 3.2120685035130476e-07, "loss": 0.02942411, "memory(GiB)": 13.7, "step": 95045, "train_speed(iter/s)": 1.529762 }, { "acc": 0.98395834, "epoch": 44.55120693695805, "grad_norm": 2.9142658710479736, "learning_rate": 3.209339689267089e-07, "loss": 0.03047532, "memory(GiB)": 13.7, "step": 95050, "train_speed(iter/s)": 1.529769 }, { "acc": 0.98604164, "epoch": 44.55355050386689, "grad_norm": 4.522912979125977, "learning_rate": 3.206611999835079e-07, "loss": 0.03285089, "memory(GiB)": 13.7, "step": 95055, "train_speed(iter/s)": 1.529774 }, { "acc": 0.98041668, "epoch": 44.55589407077572, "grad_norm": 5.409082889556885, "learning_rate": 3.2038854352825637e-07, "loss": 0.03066042, "memory(GiB)": 13.7, "step": 95060, "train_speed(iter/s)": 1.52978 }, { "acc": 0.98187504, "epoch": 44.558237637684556, "grad_norm": 2.9586997032165527, "learning_rate": 3.2011599956750956e-07, "loss": 0.04925492, "memory(GiB)": 13.7, "step": 95065, "train_speed(iter/s)": 1.529783 }, { "acc": 0.98145838, "epoch": 44.56058120459339, "grad_norm": 5.3159332275390625, "learning_rate": 3.198435681078182e-07, "loss": 0.04286514, "memory(GiB)": 13.7, "step": 95070, "train_speed(iter/s)": 1.529789 }, { "acc": 0.99444447, "epoch": 44.562924771502225, "grad_norm": 0.22280775010585785, "learning_rate": 3.195712491557292e-07, "loss": 0.04747811, "memory(GiB)": 13.7, "step": 95075, "train_speed(iter/s)": 1.529791 }, { "acc": 0.99340277, "epoch": 44.56526833841106, "grad_norm": 3.69675874710083, "learning_rate": 3.1929904271779057e-07, "loss": 0.02169065, "memory(GiB)": 13.7, "step": 95080, "train_speed(iter/s)": 1.529795 }, { "acc": 0.99196434, "epoch": 44.5676119053199, "grad_norm": 0.0015881198924034834, "learning_rate": 3.1902694880054244e-07, "loss": 0.04200954, "memory(GiB)": 13.7, "step": 95085, "train_speed(iter/s)": 1.529796 }, { "acc": 0.9916667, "epoch": 44.569955472228735, "grad_norm": 0.16646313667297363, "learning_rate": 3.187549674105279e-07, "loss": 0.01627605, "memory(GiB)": 13.7, "step": 95090, "train_speed(iter/s)": 1.529797 }, { "acc": 0.98883934, "epoch": 44.57229903913757, "grad_norm": 6.570644378662109, "learning_rate": 3.1848309855428215e-07, "loss": 0.03021264, "memory(GiB)": 13.7, "step": 95095, "train_speed(iter/s)": 1.529804 }, { "acc": 0.97333336, "epoch": 44.5746426060464, "grad_norm": 3.811326503753662, "learning_rate": 3.1821134223834267e-07, "loss": 0.06856839, "memory(GiB)": 13.7, "step": 95100, "train_speed(iter/s)": 1.529808 }, { "acc": 0.98536701, "epoch": 44.57698617295524, "grad_norm": 4.258288860321045, "learning_rate": 3.17939698469238e-07, "loss": 0.02530822, "memory(GiB)": 13.7, "step": 95105, "train_speed(iter/s)": 1.529814 }, { "acc": 0.97279758, "epoch": 44.57932973986407, "grad_norm": 0.0019762697629630566, "learning_rate": 3.176681672535012e-07, "loss": 0.06711615, "memory(GiB)": 13.7, "step": 95110, "train_speed(iter/s)": 1.529813 }, { "acc": 0.98698864, "epoch": 44.581673306772906, "grad_norm": 3.576376438140869, "learning_rate": 3.173967485976568e-07, "loss": 0.04867026, "memory(GiB)": 13.7, "step": 95115, "train_speed(iter/s)": 1.529812 }, { "acc": 0.98895836, "epoch": 44.58401687368174, "grad_norm": 3.701603889465332, "learning_rate": 3.1712544250822916e-07, "loss": 0.02578427, "memory(GiB)": 13.7, "step": 95120, "train_speed(iter/s)": 1.529812 }, { "acc": 0.9822917, "epoch": 44.58636044059058, "grad_norm": 3.2653586864471436, "learning_rate": 3.168542489917405e-07, "loss": 0.03924904, "memory(GiB)": 13.7, "step": 95125, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98298607, "epoch": 44.588704007499416, "grad_norm": 1.790472388267517, "learning_rate": 3.1658316805470795e-07, "loss": 0.03960875, "memory(GiB)": 13.7, "step": 95130, "train_speed(iter/s)": 1.529817 }, { "acc": 0.98125, "epoch": 44.59104757440825, "grad_norm": 4.069317817687988, "learning_rate": 3.1631219970364937e-07, "loss": 0.04335078, "memory(GiB)": 13.7, "step": 95135, "train_speed(iter/s)": 1.529821 }, { "acc": 0.97208328, "epoch": 44.593391141317085, "grad_norm": 3.603717565536499, "learning_rate": 3.1604134394507465e-07, "loss": 0.04967516, "memory(GiB)": 13.7, "step": 95140, "train_speed(iter/s)": 1.529828 }, { "acc": 0.98869047, "epoch": 44.59573470822592, "grad_norm": 2.216214656829834, "learning_rate": 3.157706007854982e-07, "loss": 0.02629491, "memory(GiB)": 13.7, "step": 95145, "train_speed(iter/s)": 1.529833 }, { "acc": 0.97863102, "epoch": 44.59807827513475, "grad_norm": 4.512246131896973, "learning_rate": 3.1549997023142495e-07, "loss": 0.05777738, "memory(GiB)": 13.7, "step": 95150, "train_speed(iter/s)": 1.529835 }, { "acc": 0.99020834, "epoch": 44.60042184204359, "grad_norm": 4.418561935424805, "learning_rate": 3.152294522893617e-07, "loss": 0.03996582, "memory(GiB)": 13.7, "step": 95155, "train_speed(iter/s)": 1.529837 }, { "acc": 0.98946428, "epoch": 44.60276540895243, "grad_norm": 1.8202165365219116, "learning_rate": 3.149590469658105e-07, "loss": 0.04340973, "memory(GiB)": 13.7, "step": 95160, "train_speed(iter/s)": 1.529836 }, { "acc": 0.98140869, "epoch": 44.60510897586126, "grad_norm": 4.241236686706543, "learning_rate": 3.146887542672687e-07, "loss": 0.04326894, "memory(GiB)": 13.7, "step": 95165, "train_speed(iter/s)": 1.529839 }, { "acc": 0.98395834, "epoch": 44.6074525427701, "grad_norm": 0.004690536297857761, "learning_rate": 3.1441857420023726e-07, "loss": 0.03332574, "memory(GiB)": 13.7, "step": 95170, "train_speed(iter/s)": 1.529846 }, { "acc": 0.97562494, "epoch": 44.60979610967893, "grad_norm": 3.5085456371307373, "learning_rate": 3.141485067712074e-07, "loss": 0.03356352, "memory(GiB)": 13.7, "step": 95175, "train_speed(iter/s)": 1.529852 }, { "acc": 0.98924675, "epoch": 44.612139676587766, "grad_norm": 2.417513370513916, "learning_rate": 3.1387855198667166e-07, "loss": 0.06653804, "memory(GiB)": 13.7, "step": 95180, "train_speed(iter/s)": 1.529854 }, { "acc": 0.98859539, "epoch": 44.6144832434966, "grad_norm": 2.811807632446289, "learning_rate": 3.136087098531187e-07, "loss": 0.04496839, "memory(GiB)": 13.7, "step": 95185, "train_speed(iter/s)": 1.529854 }, { "acc": 0.98562498, "epoch": 44.616826810405435, "grad_norm": 0.04421839490532875, "learning_rate": 3.133389803770354e-07, "loss": 0.02423658, "memory(GiB)": 13.7, "step": 95190, "train_speed(iter/s)": 1.529856 }, { "acc": 0.98291664, "epoch": 44.61917037731427, "grad_norm": 0.08892997354269028, "learning_rate": 3.1306936356490433e-07, "loss": 0.02219529, "memory(GiB)": 13.7, "step": 95195, "train_speed(iter/s)": 1.52986 }, { "acc": 0.98708334, "epoch": 44.62151394422311, "grad_norm": 3.195735454559326, "learning_rate": 3.1279985942320457e-07, "loss": 0.02939017, "memory(GiB)": 13.7, "step": 95200, "train_speed(iter/s)": 1.529865 }, { "acc": 0.99613094, "epoch": 44.623857511131945, "grad_norm": 3.322638511657715, "learning_rate": 3.1253046795841816e-07, "loss": 0.02778896, "memory(GiB)": 13.7, "step": 95205, "train_speed(iter/s)": 1.529861 }, { "acc": 0.97815475, "epoch": 44.62620107804078, "grad_norm": 2.836984872817993, "learning_rate": 3.12261189177017e-07, "loss": 0.05129585, "memory(GiB)": 13.7, "step": 95210, "train_speed(iter/s)": 1.529866 }, { "acc": 0.97449999, "epoch": 44.62854464494961, "grad_norm": 5.442674160003662, "learning_rate": 3.1199202308547527e-07, "loss": 0.05270063, "memory(GiB)": 13.7, "step": 95215, "train_speed(iter/s)": 1.529868 }, { "acc": 0.97875004, "epoch": 44.63088821185845, "grad_norm": 3.9138376712799072, "learning_rate": 3.1172296969026156e-07, "loss": 0.03050598, "memory(GiB)": 13.7, "step": 95220, "train_speed(iter/s)": 1.529872 }, { "acc": 0.97104168, "epoch": 44.63323177876728, "grad_norm": 3.185560703277588, "learning_rate": 3.114540289978451e-07, "loss": 0.04747234, "memory(GiB)": 13.7, "step": 95225, "train_speed(iter/s)": 1.529874 }, { "acc": 0.98720245, "epoch": 44.635575345676116, "grad_norm": 3.2743937969207764, "learning_rate": 3.1118520101468726e-07, "loss": 0.04489362, "memory(GiB)": 13.7, "step": 95230, "train_speed(iter/s)": 1.529879 }, { "acc": 0.99375, "epoch": 44.63791891258495, "grad_norm": 2.9468865394592285, "learning_rate": 3.1091648574725335e-07, "loss": 0.03159264, "memory(GiB)": 13.7, "step": 95235, "train_speed(iter/s)": 1.529879 }, { "acc": 0.97624998, "epoch": 44.64026247949379, "grad_norm": 0.001474512624554336, "learning_rate": 3.106478832020003e-07, "loss": 0.0688236, "memory(GiB)": 13.7, "step": 95240, "train_speed(iter/s)": 1.52988 }, { "acc": 0.99174681, "epoch": 44.642606046402626, "grad_norm": 3.258899450302124, "learning_rate": 3.1037939338538403e-07, "loss": 0.03627988, "memory(GiB)": 13.7, "step": 95245, "train_speed(iter/s)": 1.529882 }, { "acc": 0.98812504, "epoch": 44.64494961331146, "grad_norm": 0.002393514383584261, "learning_rate": 3.1011101630385983e-07, "loss": 0.03031366, "memory(GiB)": 13.7, "step": 95250, "train_speed(iter/s)": 1.529885 }, { "acc": 0.98467264, "epoch": 44.647293180220295, "grad_norm": 3.587698459625244, "learning_rate": 3.098427519638763e-07, "loss": 0.03245483, "memory(GiB)": 13.7, "step": 95255, "train_speed(iter/s)": 1.529888 }, { "acc": 0.98874998, "epoch": 44.64963674712913, "grad_norm": 2.3059613704681396, "learning_rate": 3.0957460037188436e-07, "loss": 0.01684934, "memory(GiB)": 13.7, "step": 95260, "train_speed(iter/s)": 1.529892 }, { "acc": 0.99187498, "epoch": 44.651980314037964, "grad_norm": 2.9661900997161865, "learning_rate": 3.093065615343276e-07, "loss": 0.02712201, "memory(GiB)": 13.7, "step": 95265, "train_speed(iter/s)": 1.529895 }, { "acc": 0.97705355, "epoch": 44.6543238809468, "grad_norm": 4.227821350097656, "learning_rate": 3.0903863545764967e-07, "loss": 0.05079198, "memory(GiB)": 13.7, "step": 95270, "train_speed(iter/s)": 1.529898 }, { "acc": 0.98467274, "epoch": 44.65666744785564, "grad_norm": 1.887494444847107, "learning_rate": 3.087708221482898e-07, "loss": 0.03369754, "memory(GiB)": 13.7, "step": 95275, "train_speed(iter/s)": 1.529905 }, { "acc": 0.99333334, "epoch": 44.659011014764474, "grad_norm": 2.5988869667053223, "learning_rate": 3.085031216126866e-07, "loss": 0.03037168, "memory(GiB)": 13.7, "step": 95280, "train_speed(iter/s)": 1.529907 }, { "acc": 0.9979166, "epoch": 44.66135458167331, "grad_norm": 2.893543243408203, "learning_rate": 3.0823553385727374e-07, "loss": 0.01862658, "memory(GiB)": 13.7, "step": 95285, "train_speed(iter/s)": 1.529909 }, { "acc": 0.9905798, "epoch": 44.66369814858214, "grad_norm": 2.372469902038574, "learning_rate": 3.0796805888848215e-07, "loss": 0.02961291, "memory(GiB)": 13.7, "step": 95290, "train_speed(iter/s)": 1.529909 }, { "acc": 0.98175602, "epoch": 44.66604171549098, "grad_norm": 0.0022268248721957207, "learning_rate": 3.077006967127438e-07, "loss": 0.04265726, "memory(GiB)": 13.7, "step": 95295, "train_speed(iter/s)": 1.529913 }, { "acc": 0.98454857, "epoch": 44.66838528239981, "grad_norm": 2.2265217304229736, "learning_rate": 3.0743344733648225e-07, "loss": 0.04823143, "memory(GiB)": 13.7, "step": 95300, "train_speed(iter/s)": 1.529914 }, { "acc": 0.98718748, "epoch": 44.670728849308645, "grad_norm": 2.604424476623535, "learning_rate": 3.0716631076612407e-07, "loss": 0.04924449, "memory(GiB)": 13.7, "step": 95305, "train_speed(iter/s)": 1.52992 }, { "acc": 0.990625, "epoch": 44.67307241621748, "grad_norm": 2.269296646118164, "learning_rate": 3.068992870080873e-07, "loss": 0.02298915, "memory(GiB)": 13.7, "step": 95310, "train_speed(iter/s)": 1.529922 }, { "acc": 0.98916664, "epoch": 44.67541598312632, "grad_norm": 2.843771457672119, "learning_rate": 3.06632376068794e-07, "loss": 0.03713361, "memory(GiB)": 13.7, "step": 95315, "train_speed(iter/s)": 1.529924 }, { "acc": 0.9760417, "epoch": 44.677759550035155, "grad_norm": 0.1378229409456253, "learning_rate": 3.063655779546572e-07, "loss": 0.03313131, "memory(GiB)": 13.7, "step": 95320, "train_speed(iter/s)": 1.529927 }, { "acc": 0.9811553, "epoch": 44.68010311694399, "grad_norm": 4.550017356872559, "learning_rate": 3.0609889267208954e-07, "loss": 0.04464392, "memory(GiB)": 13.7, "step": 95325, "train_speed(iter/s)": 1.529928 }, { "acc": 0.98395844, "epoch": 44.682446683852824, "grad_norm": 5.358265399932861, "learning_rate": 3.0583232022750355e-07, "loss": 0.0383319, "memory(GiB)": 13.7, "step": 95330, "train_speed(iter/s)": 1.529928 }, { "acc": 0.99591351, "epoch": 44.68479025076166, "grad_norm": 4.916204452514648, "learning_rate": 3.0556586062730457e-07, "loss": 0.03081745, "memory(GiB)": 13.7, "step": 95335, "train_speed(iter/s)": 1.529934 }, { "acc": 0.978125, "epoch": 44.68713381767049, "grad_norm": 3.7309088706970215, "learning_rate": 3.052995138778985e-07, "loss": 0.04681342, "memory(GiB)": 13.7, "step": 95340, "train_speed(iter/s)": 1.529941 }, { "acc": 0.97711315, "epoch": 44.68947738457933, "grad_norm": 4.122840881347656, "learning_rate": 3.0503327998568627e-07, "loss": 0.04438496, "memory(GiB)": 13.7, "step": 95345, "train_speed(iter/s)": 1.529941 }, { "acc": 0.99020834, "epoch": 44.69182095148817, "grad_norm": 5.269372940063477, "learning_rate": 3.047671589570693e-07, "loss": 0.04024213, "memory(GiB)": 13.7, "step": 95350, "train_speed(iter/s)": 1.529944 }, { "acc": 0.971875, "epoch": 44.694164518397, "grad_norm": 1.6133348941802979, "learning_rate": 3.045011507984424e-07, "loss": 0.0495648, "memory(GiB)": 13.7, "step": 95355, "train_speed(iter/s)": 1.529948 }, { "acc": 0.99187498, "epoch": 44.69650808530584, "grad_norm": 0.22247426211833954, "learning_rate": 3.042352555162016e-07, "loss": 0.01505826, "memory(GiB)": 13.7, "step": 95360, "train_speed(iter/s)": 1.52995 }, { "acc": 0.99020824, "epoch": 44.69885165221467, "grad_norm": 1.6649682521820068, "learning_rate": 3.0396947311673595e-07, "loss": 0.02852699, "memory(GiB)": 13.7, "step": 95365, "train_speed(iter/s)": 1.529953 }, { "acc": 0.98458328, "epoch": 44.701195219123505, "grad_norm": 3.123793363571167, "learning_rate": 3.0370380360643486e-07, "loss": 0.03017094, "memory(GiB)": 13.7, "step": 95370, "train_speed(iter/s)": 1.529961 }, { "acc": 0.98083334, "epoch": 44.70353878603234, "grad_norm": 5.892521858215332, "learning_rate": 3.034382469916836e-07, "loss": 0.03303524, "memory(GiB)": 13.7, "step": 95375, "train_speed(iter/s)": 1.529964 }, { "acc": 0.97247028, "epoch": 44.705882352941174, "grad_norm": 4.934031963348389, "learning_rate": 3.031728032788664e-07, "loss": 0.07035779, "memory(GiB)": 13.7, "step": 95380, "train_speed(iter/s)": 1.529965 }, { "acc": 0.98028851, "epoch": 44.70822591985001, "grad_norm": 2.728910207748413, "learning_rate": 3.0290747247436375e-07, "loss": 0.02785756, "memory(GiB)": 13.7, "step": 95385, "train_speed(iter/s)": 1.529966 }, { "acc": 0.97742424, "epoch": 44.71056948675885, "grad_norm": 4.221640110015869, "learning_rate": 3.026422545845515e-07, "loss": 0.05429932, "memory(GiB)": 13.7, "step": 95390, "train_speed(iter/s)": 1.529967 }, { "acc": 0.99821434, "epoch": 44.712913053667684, "grad_norm": 1.6057615280151367, "learning_rate": 3.023771496158067e-07, "loss": 0.00908443, "memory(GiB)": 13.7, "step": 95395, "train_speed(iter/s)": 1.529967 }, { "acc": 0.9833334, "epoch": 44.71525662057652, "grad_norm": 2.207068681716919, "learning_rate": 3.0211215757449913e-07, "loss": 0.04845061, "memory(GiB)": 13.7, "step": 95400, "train_speed(iter/s)": 1.529972 }, { "acc": 0.96988096, "epoch": 44.71760018748535, "grad_norm": 1.7467025518417358, "learning_rate": 3.018472784670015e-07, "loss": 0.03823211, "memory(GiB)": 13.7, "step": 95405, "train_speed(iter/s)": 1.529974 }, { "acc": 0.97142859, "epoch": 44.71994375439419, "grad_norm": 1.5400795936584473, "learning_rate": 3.015825122996791e-07, "loss": 0.07152038, "memory(GiB)": 13.7, "step": 95410, "train_speed(iter/s)": 1.529977 }, { "acc": 0.98936014, "epoch": 44.72228732130302, "grad_norm": 4.6876325607299805, "learning_rate": 3.0131785907889505e-07, "loss": 0.03878453, "memory(GiB)": 13.7, "step": 95415, "train_speed(iter/s)": 1.529979 }, { "acc": 0.978125, "epoch": 44.724630888211856, "grad_norm": 3.896007537841797, "learning_rate": 3.0105331881101264e-07, "loss": 0.0488029, "memory(GiB)": 13.7, "step": 95420, "train_speed(iter/s)": 1.52998 }, { "acc": 1.0, "epoch": 44.7269744551207, "grad_norm": 2.456869602203369, "learning_rate": 3.0078889150238883e-07, "loss": 0.02704837, "memory(GiB)": 13.7, "step": 95425, "train_speed(iter/s)": 1.529985 }, { "acc": 0.99333334, "epoch": 44.72931802202953, "grad_norm": 0.006070922128856182, "learning_rate": 3.005245771593807e-07, "loss": 0.04562919, "memory(GiB)": 13.7, "step": 95430, "train_speed(iter/s)": 1.529984 }, { "acc": 0.98104172, "epoch": 44.731661588938366, "grad_norm": 4.286855697631836, "learning_rate": 3.002603757883398e-07, "loss": 0.0403438, "memory(GiB)": 13.7, "step": 95435, "train_speed(iter/s)": 1.529983 }, { "acc": 0.9916667, "epoch": 44.7340051558472, "grad_norm": 0.01338875014334917, "learning_rate": 2.9999628739561983e-07, "loss": 0.00940297, "memory(GiB)": 13.7, "step": 95440, "train_speed(iter/s)": 1.529984 }, { "acc": 0.98947306, "epoch": 44.736348722756034, "grad_norm": 3.129513740539551, "learning_rate": 2.9973231198756616e-07, "loss": 0.02555085, "memory(GiB)": 13.7, "step": 95445, "train_speed(iter/s)": 1.52998 }, { "acc": 0.97770834, "epoch": 44.73869228966487, "grad_norm": 2.5267739295959473, "learning_rate": 2.9946844957052424e-07, "loss": 0.03714983, "memory(GiB)": 13.7, "step": 95450, "train_speed(iter/s)": 1.529978 }, { "acc": 0.99237175, "epoch": 44.7410358565737, "grad_norm": 3.125792980194092, "learning_rate": 2.992047001508372e-07, "loss": 0.03498824, "memory(GiB)": 13.7, "step": 95455, "train_speed(iter/s)": 1.52998 }, { "acc": 0.9833334, "epoch": 44.74337942348254, "grad_norm": 2.833623170852661, "learning_rate": 2.9894106373484263e-07, "loss": 0.02775083, "memory(GiB)": 13.7, "step": 95460, "train_speed(iter/s)": 1.529981 }, { "acc": 0.98988094, "epoch": 44.74572299039138, "grad_norm": 3.363053321838379, "learning_rate": 2.986775403288811e-07, "loss": 0.03939151, "memory(GiB)": 13.7, "step": 95465, "train_speed(iter/s)": 1.529984 }, { "acc": 0.990625, "epoch": 44.74806655730021, "grad_norm": 3.6396310329437256, "learning_rate": 2.984141299392834e-07, "loss": 0.04005724, "memory(GiB)": 13.7, "step": 95470, "train_speed(iter/s)": 1.529988 }, { "acc": 0.9927084, "epoch": 44.75041012420905, "grad_norm": 5.88180685043335, "learning_rate": 2.9815083257238337e-07, "loss": 0.0574527, "memory(GiB)": 13.7, "step": 95475, "train_speed(iter/s)": 1.529989 }, { "acc": 0.98606052, "epoch": 44.75275369111788, "grad_norm": 3.105656623840332, "learning_rate": 2.978876482345086e-07, "loss": 0.06547112, "memory(GiB)": 13.7, "step": 95480, "train_speed(iter/s)": 1.529995 }, { "acc": 0.98245049, "epoch": 44.755097258026716, "grad_norm": 0.9231399893760681, "learning_rate": 2.976245769319857e-07, "loss": 0.04401287, "memory(GiB)": 13.7, "step": 95485, "train_speed(iter/s)": 1.529999 }, { "acc": 0.99613094, "epoch": 44.75744082493555, "grad_norm": 0.002084370469674468, "learning_rate": 2.973616186711383e-07, "loss": 0.01004734, "memory(GiB)": 13.7, "step": 95490, "train_speed(iter/s)": 1.529999 }, { "acc": 0.9822917, "epoch": 44.759784391844384, "grad_norm": 7.880923748016357, "learning_rate": 2.9709877345828477e-07, "loss": 0.07071173, "memory(GiB)": 13.7, "step": 95495, "train_speed(iter/s)": 1.53 }, { "acc": 0.97208328, "epoch": 44.762127958753226, "grad_norm": 4.515318393707275, "learning_rate": 2.968360412997464e-07, "loss": 0.05646646, "memory(GiB)": 13.7, "step": 95500, "train_speed(iter/s)": 1.530003 }, { "acc": 0.98690472, "epoch": 44.76447152566206, "grad_norm": 3.1112194061279297, "learning_rate": 2.965734222018361e-07, "loss": 0.04113259, "memory(GiB)": 13.7, "step": 95505, "train_speed(iter/s)": 1.530009 }, { "acc": 0.98351192, "epoch": 44.766815092570894, "grad_norm": 2.2552719116210938, "learning_rate": 2.963109161708675e-07, "loss": 0.03005719, "memory(GiB)": 13.7, "step": 95510, "train_speed(iter/s)": 1.53001 }, { "acc": 0.97840776, "epoch": 44.76915865947973, "grad_norm": 0.9799111485481262, "learning_rate": 2.960485232131499e-07, "loss": 0.06717024, "memory(GiB)": 13.7, "step": 95515, "train_speed(iter/s)": 1.530015 }, { "acc": 0.99051476, "epoch": 44.77150222638856, "grad_norm": 3.9413912296295166, "learning_rate": 2.9578624333499047e-07, "loss": 0.02929801, "memory(GiB)": 13.7, "step": 95520, "train_speed(iter/s)": 1.530019 }, { "acc": 0.9947917, "epoch": 44.7738457932974, "grad_norm": 1.305051565170288, "learning_rate": 2.9552407654269295e-07, "loss": 0.02713051, "memory(GiB)": 13.7, "step": 95525, "train_speed(iter/s)": 1.530015 }, { "acc": 0.98286705, "epoch": 44.77618936020623, "grad_norm": 0.0023564244620501995, "learning_rate": 2.952620228425611e-07, "loss": 0.03630346, "memory(GiB)": 13.7, "step": 95530, "train_speed(iter/s)": 1.530017 }, { "acc": 0.98291664, "epoch": 44.778532927115066, "grad_norm": 3.422240734100342, "learning_rate": 2.950000822408915e-07, "loss": 0.0327263, "memory(GiB)": 13.7, "step": 95535, "train_speed(iter/s)": 1.53002 }, { "acc": 0.98104172, "epoch": 44.78087649402391, "grad_norm": 6.086933612823486, "learning_rate": 2.9473825474398125e-07, "loss": 0.03668654, "memory(GiB)": 13.7, "step": 95540, "train_speed(iter/s)": 1.530023 }, { "acc": 0.98931551, "epoch": 44.78322006093274, "grad_norm": 3.544956922531128, "learning_rate": 2.9447654035812407e-07, "loss": 0.05117996, "memory(GiB)": 13.7, "step": 95545, "train_speed(iter/s)": 1.530026 }, { "acc": 0.996875, "epoch": 44.785563627841576, "grad_norm": 1.2461107969284058, "learning_rate": 2.942149390896089e-07, "loss": 0.04381834, "memory(GiB)": 13.7, "step": 95550, "train_speed(iter/s)": 1.530022 }, { "acc": 0.98993053, "epoch": 44.78790719475041, "grad_norm": 5.994820594787598, "learning_rate": 2.939534509447266e-07, "loss": 0.05974158, "memory(GiB)": 13.7, "step": 95555, "train_speed(iter/s)": 1.530028 }, { "acc": 0.990625, "epoch": 44.790250761659244, "grad_norm": 2.8693909645080566, "learning_rate": 2.936920759297604e-07, "loss": 0.02374371, "memory(GiB)": 13.7, "step": 95560, "train_speed(iter/s)": 1.530027 }, { "acc": 0.98708334, "epoch": 44.79259432856808, "grad_norm": 4.233921527862549, "learning_rate": 2.9343081405099476e-07, "loss": 0.03261144, "memory(GiB)": 13.7, "step": 95565, "train_speed(iter/s)": 1.530027 }, { "acc": 0.99821434, "epoch": 44.79493789547691, "grad_norm": 0.03035874105989933, "learning_rate": 2.931696653147073e-07, "loss": 0.03748633, "memory(GiB)": 13.7, "step": 95570, "train_speed(iter/s)": 1.530029 }, { "acc": 0.9848897, "epoch": 44.797281462385754, "grad_norm": 4.168455600738525, "learning_rate": 2.929086297271773e-07, "loss": 0.03481501, "memory(GiB)": 13.7, "step": 95575, "train_speed(iter/s)": 1.530032 }, { "acc": 0.990625, "epoch": 44.79962502929459, "grad_norm": 1.0201220512390137, "learning_rate": 2.926477072946781e-07, "loss": 0.02579603, "memory(GiB)": 13.7, "step": 95580, "train_speed(iter/s)": 1.53003 }, { "acc": 0.98342266, "epoch": 44.80196859620342, "grad_norm": 0.7910313606262207, "learning_rate": 2.923868980234796e-07, "loss": 0.03497505, "memory(GiB)": 13.7, "step": 95585, "train_speed(iter/s)": 1.530031 }, { "acc": 0.97946434, "epoch": 44.80431216311226, "grad_norm": 3.302155017852783, "learning_rate": 2.9212620191985506e-07, "loss": 0.03299775, "memory(GiB)": 13.7, "step": 95590, "train_speed(iter/s)": 1.530033 }, { "acc": 0.98347225, "epoch": 44.80665573002109, "grad_norm": 2.2140491008758545, "learning_rate": 2.918656189900671e-07, "loss": 0.04918303, "memory(GiB)": 13.7, "step": 95595, "train_speed(iter/s)": 1.530033 }, { "acc": 0.98374996, "epoch": 44.808999296929926, "grad_norm": 3.6014561653137207, "learning_rate": 2.916051492403812e-07, "loss": 0.04801649, "memory(GiB)": 13.7, "step": 95600, "train_speed(iter/s)": 1.53004 }, { "acc": 0.99375, "epoch": 44.81134286383876, "grad_norm": 0.03313857316970825, "learning_rate": 2.9134479267705623e-07, "loss": 0.00930983, "memory(GiB)": 13.7, "step": 95605, "train_speed(iter/s)": 1.530042 }, { "acc": 0.99375, "epoch": 44.813686430747595, "grad_norm": 2.4582438468933105, "learning_rate": 2.910845493063538e-07, "loss": 0.03108239, "memory(GiB)": 13.7, "step": 95610, "train_speed(iter/s)": 1.530043 }, { "acc": 0.98500004, "epoch": 44.816029997656436, "grad_norm": 3.0679876804351807, "learning_rate": 2.908244191345265e-07, "loss": 0.04798585, "memory(GiB)": 13.7, "step": 95615, "train_speed(iter/s)": 1.530039 }, { "acc": 0.96841345, "epoch": 44.81837356456527, "grad_norm": 3.891688108444214, "learning_rate": 2.9056440216782715e-07, "loss": 0.05315325, "memory(GiB)": 13.7, "step": 95620, "train_speed(iter/s)": 1.530041 }, { "acc": 0.98646774, "epoch": 44.820717131474105, "grad_norm": 3.6292226314544678, "learning_rate": 2.903044984125067e-07, "loss": 0.03878661, "memory(GiB)": 13.7, "step": 95625, "train_speed(iter/s)": 1.530043 }, { "acc": 0.98979168, "epoch": 44.82306069838294, "grad_norm": 3.1860663890838623, "learning_rate": 2.900447078748113e-07, "loss": 0.02271606, "memory(GiB)": 13.7, "step": 95630, "train_speed(iter/s)": 1.530046 }, { "acc": 0.97729168, "epoch": 44.82540426529177, "grad_norm": 5.665341854095459, "learning_rate": 2.897850305609869e-07, "loss": 0.06337726, "memory(GiB)": 13.7, "step": 95635, "train_speed(iter/s)": 1.530048 }, { "acc": 0.990625, "epoch": 44.82774783220061, "grad_norm": 3.9536523818969727, "learning_rate": 2.8952546647727346e-07, "loss": 0.02796682, "memory(GiB)": 13.7, "step": 95640, "train_speed(iter/s)": 1.530053 }, { "acc": 0.97840271, "epoch": 44.83009139910944, "grad_norm": 2.5971357822418213, "learning_rate": 2.892660156299121e-07, "loss": 0.02250423, "memory(GiB)": 13.7, "step": 95645, "train_speed(iter/s)": 1.530056 }, { "acc": 0.98773251, "epoch": 44.83243496601828, "grad_norm": 2.183361291885376, "learning_rate": 2.8900667802513706e-07, "loss": 0.06325434, "memory(GiB)": 13.7, "step": 95650, "train_speed(iter/s)": 1.53006 }, { "acc": 0.98083334, "epoch": 44.83477853292712, "grad_norm": 3.464942693710327, "learning_rate": 2.88747453669184e-07, "loss": 0.03025545, "memory(GiB)": 13.7, "step": 95655, "train_speed(iter/s)": 1.530061 }, { "acc": 0.98458328, "epoch": 44.83712209983595, "grad_norm": 0.11532249301671982, "learning_rate": 2.884883425682828e-07, "loss": 0.05376658, "memory(GiB)": 13.7, "step": 95660, "train_speed(iter/s)": 1.530067 }, { "acc": 0.97456846, "epoch": 44.839465666744786, "grad_norm": 1.8447620868682861, "learning_rate": 2.882293447286606e-07, "loss": 0.0465031, "memory(GiB)": 13.7, "step": 95665, "train_speed(iter/s)": 1.530063 }, { "acc": 0.98291664, "epoch": 44.84180923365362, "grad_norm": 3.3642702102661133, "learning_rate": 2.879704601565452e-07, "loss": 0.04645878, "memory(GiB)": 13.7, "step": 95670, "train_speed(iter/s)": 1.530064 }, { "acc": 0.98729162, "epoch": 44.844152800562455, "grad_norm": 2.1424427032470703, "learning_rate": 2.877116888581571e-07, "loss": 0.01959541, "memory(GiB)": 13.7, "step": 95675, "train_speed(iter/s)": 1.530068 }, { "acc": 0.98883934, "epoch": 44.84649636747129, "grad_norm": 3.1032791137695312, "learning_rate": 2.8745303083971837e-07, "loss": 0.02636285, "memory(GiB)": 13.7, "step": 95680, "train_speed(iter/s)": 1.530068 }, { "acc": 0.98334284, "epoch": 44.84883993438012, "grad_norm": 1.6884143352508545, "learning_rate": 2.871944861074441e-07, "loss": 0.04194285, "memory(GiB)": 13.7, "step": 95685, "train_speed(iter/s)": 1.530071 }, { "acc": 0.98675594, "epoch": 44.851183501288965, "grad_norm": 1.2594622373580933, "learning_rate": 2.8693605466755083e-07, "loss": 0.0415052, "memory(GiB)": 13.7, "step": 95690, "train_speed(iter/s)": 1.530072 }, { "acc": 0.98633928, "epoch": 44.8535270681978, "grad_norm": 4.486711502075195, "learning_rate": 2.866777365262486e-07, "loss": 0.03879528, "memory(GiB)": 13.7, "step": 95695, "train_speed(iter/s)": 1.53007 }, { "acc": 0.98500004, "epoch": 44.85587063510663, "grad_norm": 2.7129132747650146, "learning_rate": 2.864195316897485e-07, "loss": 0.048936, "memory(GiB)": 13.7, "step": 95700, "train_speed(iter/s)": 1.530077 }, { "acc": 0.98923607, "epoch": 44.85821420201547, "grad_norm": 2.0584285259246826, "learning_rate": 2.86161440164256e-07, "loss": 0.02282917, "memory(GiB)": 13.7, "step": 95705, "train_speed(iter/s)": 1.530077 }, { "acc": 0.97904758, "epoch": 44.8605577689243, "grad_norm": 3.1135761737823486, "learning_rate": 2.859034619559738e-07, "loss": 0.03723292, "memory(GiB)": 13.7, "step": 95710, "train_speed(iter/s)": 1.530078 }, { "acc": 0.98760414, "epoch": 44.862901335833136, "grad_norm": 0.016746561974287033, "learning_rate": 2.856455970711042e-07, "loss": 0.02428991, "memory(GiB)": 13.7, "step": 95715, "train_speed(iter/s)": 1.530078 }, { "acc": 0.98698864, "epoch": 44.86524490274197, "grad_norm": 2.560502767562866, "learning_rate": 2.853878455158449e-07, "loss": 0.04728287, "memory(GiB)": 13.7, "step": 95720, "train_speed(iter/s)": 1.530081 }, { "acc": 0.98812504, "epoch": 44.86758846965081, "grad_norm": 2.7160143852233887, "learning_rate": 2.8513020729639093e-07, "loss": 0.03152777, "memory(GiB)": 13.7, "step": 95725, "train_speed(iter/s)": 1.530081 }, { "acc": 0.98791676, "epoch": 44.869932036559646, "grad_norm": 4.230404376983643, "learning_rate": 2.8487268241893553e-07, "loss": 0.07324064, "memory(GiB)": 13.7, "step": 95730, "train_speed(iter/s)": 1.530086 }, { "acc": 0.98279762, "epoch": 44.87227560346848, "grad_norm": 2.2345850467681885, "learning_rate": 2.8461527088966984e-07, "loss": 0.07111005, "memory(GiB)": 13.7, "step": 95735, "train_speed(iter/s)": 1.530091 }, { "acc": 0.99375, "epoch": 44.874619170377315, "grad_norm": 0.00562806474044919, "learning_rate": 2.8435797271477996e-07, "loss": 0.02034643, "memory(GiB)": 13.7, "step": 95740, "train_speed(iter/s)": 1.53009 }, { "acc": 0.96791668, "epoch": 44.87696273728615, "grad_norm": 3.6105048656463623, "learning_rate": 2.841007879004503e-07, "loss": 0.05087547, "memory(GiB)": 13.7, "step": 95745, "train_speed(iter/s)": 1.530096 }, { "acc": 0.99258928, "epoch": 44.879306304194984, "grad_norm": 2.6835553646087646, "learning_rate": 2.838437164528637e-07, "loss": 0.01370035, "memory(GiB)": 13.7, "step": 95750, "train_speed(iter/s)": 1.530098 }, { "acc": 0.99604168, "epoch": 44.88164987110382, "grad_norm": 2.7613768577575684, "learning_rate": 2.835867583781972e-07, "loss": 0.04347082, "memory(GiB)": 13.7, "step": 95755, "train_speed(iter/s)": 1.530095 }, { "acc": 0.9947917, "epoch": 44.88399343801265, "grad_norm": 0.011827806010842323, "learning_rate": 2.833299136826299e-07, "loss": 0.01506968, "memory(GiB)": 13.7, "step": 95760, "train_speed(iter/s)": 1.530097 }, { "acc": 0.98312502, "epoch": 44.886337004921494, "grad_norm": 5.171169281005859, "learning_rate": 2.8307318237233393e-07, "loss": 0.03046924, "memory(GiB)": 13.7, "step": 95765, "train_speed(iter/s)": 1.530102 }, { "acc": 0.97967262, "epoch": 44.88868057183033, "grad_norm": 2.8528552055358887, "learning_rate": 2.828165644534815e-07, "loss": 0.0553296, "memory(GiB)": 13.7, "step": 95770, "train_speed(iter/s)": 1.530107 }, { "acc": 0.99098215, "epoch": 44.89102413873916, "grad_norm": 0.8364787697792053, "learning_rate": 2.825600599322393e-07, "loss": 0.04410672, "memory(GiB)": 13.7, "step": 95775, "train_speed(iter/s)": 1.53011 }, { "acc": 0.96645832, "epoch": 44.893367705648, "grad_norm": 6.6468000411987305, "learning_rate": 2.8230366881477454e-07, "loss": 0.06159154, "memory(GiB)": 13.7, "step": 95780, "train_speed(iter/s)": 1.530114 }, { "acc": 0.98708334, "epoch": 44.89571127255683, "grad_norm": 0.2490774244070053, "learning_rate": 2.8204739110724885e-07, "loss": 0.01769108, "memory(GiB)": 13.7, "step": 95785, "train_speed(iter/s)": 1.530115 }, { "acc": 0.990625, "epoch": 44.898054839465665, "grad_norm": 3.4845333099365234, "learning_rate": 2.8179122681582125e-07, "loss": 0.04339257, "memory(GiB)": 13.7, "step": 95790, "train_speed(iter/s)": 1.530115 }, { "acc": 0.97666664, "epoch": 44.9003984063745, "grad_norm": 4.6161932945251465, "learning_rate": 2.815351759466517e-07, "loss": 0.03085962, "memory(GiB)": 13.7, "step": 95795, "train_speed(iter/s)": 1.53012 }, { "acc": 0.97624998, "epoch": 44.90274197328334, "grad_norm": 4.632983684539795, "learning_rate": 2.81279238505893e-07, "loss": 0.03591661, "memory(GiB)": 13.7, "step": 95800, "train_speed(iter/s)": 1.530119 }, { "acc": 0.99750004, "epoch": 44.905085540192175, "grad_norm": 2.7222044467926025, "learning_rate": 2.810234144996979e-07, "loss": 0.01346009, "memory(GiB)": 13.7, "step": 95805, "train_speed(iter/s)": 1.530121 }, { "acc": 0.97821426, "epoch": 44.90742910710101, "grad_norm": 1.0257635116577148, "learning_rate": 2.807677039342142e-07, "loss": 0.04639464, "memory(GiB)": 13.7, "step": 95810, "train_speed(iter/s)": 1.530117 }, { "acc": 0.97874994, "epoch": 44.909772674009844, "grad_norm": 0.0020225741900503635, "learning_rate": 2.805121068155909e-07, "loss": 0.02522267, "memory(GiB)": 13.7, "step": 95815, "train_speed(iter/s)": 1.530117 }, { "acc": 0.99624996, "epoch": 44.91211624091868, "grad_norm": 2.9282546043395996, "learning_rate": 2.8025662314996906e-07, "loss": 0.03352734, "memory(GiB)": 13.7, "step": 95820, "train_speed(iter/s)": 1.530119 }, { "acc": 0.9682291, "epoch": 44.91445980782751, "grad_norm": 2.599670171737671, "learning_rate": 2.8000125294349153e-07, "loss": 0.04759302, "memory(GiB)": 13.7, "step": 95825, "train_speed(iter/s)": 1.53012 }, { "acc": 0.97854166, "epoch": 44.91680337473635, "grad_norm": 4.769209861755371, "learning_rate": 2.7974599620229607e-07, "loss": 0.04148256, "memory(GiB)": 13.7, "step": 95830, "train_speed(iter/s)": 1.53012 }, { "acc": 0.98344698, "epoch": 44.91914694164518, "grad_norm": 4.5271148681640625, "learning_rate": 2.794908529325172e-07, "loss": 0.05231059, "memory(GiB)": 13.7, "step": 95835, "train_speed(iter/s)": 1.530119 }, { "acc": 0.98395834, "epoch": 44.92149050855402, "grad_norm": 0.00133068289142102, "learning_rate": 2.7923582314028935e-07, "loss": 0.02446018, "memory(GiB)": 13.7, "step": 95840, "train_speed(iter/s)": 1.530118 }, { "acc": 0.99125004, "epoch": 44.92383407546286, "grad_norm": 2.9934768676757812, "learning_rate": 2.7898090683174043e-07, "loss": 0.02988558, "memory(GiB)": 13.7, "step": 95845, "train_speed(iter/s)": 1.530122 }, { "acc": 0.9822917, "epoch": 44.92617764237169, "grad_norm": 7.7323455810546875, "learning_rate": 2.787261040129998e-07, "loss": 0.03985175, "memory(GiB)": 13.7, "step": 95850, "train_speed(iter/s)": 1.530121 }, { "acc": 0.979072, "epoch": 44.928521209280525, "grad_norm": 3.9724574089050293, "learning_rate": 2.784714146901915e-07, "loss": 0.05250304, "memory(GiB)": 13.7, "step": 95855, "train_speed(iter/s)": 1.530121 }, { "acc": 0.98687496, "epoch": 44.93086477618936, "grad_norm": 2.1565847396850586, "learning_rate": 2.7821683886943774e-07, "loss": 0.02796141, "memory(GiB)": 13.7, "step": 95860, "train_speed(iter/s)": 1.53012 }, { "acc": 0.97770844, "epoch": 44.933208343098194, "grad_norm": 3.0589444637298584, "learning_rate": 2.7796237655685695e-07, "loss": 0.03343214, "memory(GiB)": 13.7, "step": 95865, "train_speed(iter/s)": 1.530123 }, { "acc": 0.996875, "epoch": 44.93555191000703, "grad_norm": 2.0870492458343506, "learning_rate": 2.7770802775856523e-07, "loss": 0.01080224, "memory(GiB)": 13.7, "step": 95870, "train_speed(iter/s)": 1.530125 }, { "acc": 0.97821503, "epoch": 44.93789547691586, "grad_norm": 4.355413436889648, "learning_rate": 2.774537924806777e-07, "loss": 0.05197852, "memory(GiB)": 13.7, "step": 95875, "train_speed(iter/s)": 1.530129 }, { "acc": 0.99541664, "epoch": 44.940239043824704, "grad_norm": 0.8808971047401428, "learning_rate": 2.7719967072930436e-07, "loss": 0.01860425, "memory(GiB)": 13.7, "step": 95880, "train_speed(iter/s)": 1.530129 }, { "acc": 0.98812504, "epoch": 44.94258261073354, "grad_norm": 2.535249710083008, "learning_rate": 2.7694566251055354e-07, "loss": 0.02898941, "memory(GiB)": 13.7, "step": 95885, "train_speed(iter/s)": 1.530132 }, { "acc": 0.97843752, "epoch": 44.94492617764237, "grad_norm": 0.5080761909484863, "learning_rate": 2.7669176783053103e-07, "loss": 0.05042681, "memory(GiB)": 13.7, "step": 95890, "train_speed(iter/s)": 1.530135 }, { "acc": 0.97875004, "epoch": 44.94726974455121, "grad_norm": 5.621590614318848, "learning_rate": 2.764379866953401e-07, "loss": 0.06590211, "memory(GiB)": 13.7, "step": 95895, "train_speed(iter/s)": 1.530141 }, { "acc": 0.99187498, "epoch": 44.94961331146004, "grad_norm": 0.007477310486137867, "learning_rate": 2.7618431911107866e-07, "loss": 0.03024023, "memory(GiB)": 13.7, "step": 95900, "train_speed(iter/s)": 1.530143 }, { "acc": 0.99541664, "epoch": 44.951956878368875, "grad_norm": 0.029644230380654335, "learning_rate": 2.7593076508384724e-07, "loss": 0.01748407, "memory(GiB)": 13.7, "step": 95905, "train_speed(iter/s)": 1.530149 }, { "acc": 0.98604164, "epoch": 44.95430044527771, "grad_norm": 2.731123208999634, "learning_rate": 2.7567732461973825e-07, "loss": 0.03673034, "memory(GiB)": 13.7, "step": 95910, "train_speed(iter/s)": 1.53015 }, { "acc": 0.97375336, "epoch": 44.95664401218655, "grad_norm": 7.4333014488220215, "learning_rate": 2.7542399772484395e-07, "loss": 0.05283199, "memory(GiB)": 13.7, "step": 95915, "train_speed(iter/s)": 1.530151 }, { "acc": 0.9885416, "epoch": 44.958987579095385, "grad_norm": 0.14269782602787018, "learning_rate": 2.751707844052543e-07, "loss": 0.01762426, "memory(GiB)": 13.7, "step": 95920, "train_speed(iter/s)": 1.530157 }, { "acc": 0.98467264, "epoch": 44.96133114600422, "grad_norm": 3.7170279026031494, "learning_rate": 2.74917684667054e-07, "loss": 0.05065609, "memory(GiB)": 13.7, "step": 95925, "train_speed(iter/s)": 1.530157 }, { "acc": 0.97250004, "epoch": 44.963674712913054, "grad_norm": 5.5159783363342285, "learning_rate": 2.746646985163291e-07, "loss": 0.04444132, "memory(GiB)": 13.7, "step": 95930, "train_speed(iter/s)": 1.530159 }, { "acc": 0.99465466, "epoch": 44.96601827982189, "grad_norm": 2.1934680938720703, "learning_rate": 2.7441182595915754e-07, "loss": 0.027808, "memory(GiB)": 13.7, "step": 95935, "train_speed(iter/s)": 1.53016 }, { "acc": 0.99255199, "epoch": 44.96836184673072, "grad_norm": 4.374858856201172, "learning_rate": 2.741590670016205e-07, "loss": 0.0268649, "memory(GiB)": 13.7, "step": 95940, "train_speed(iter/s)": 1.530161 }, { "acc": 0.9875, "epoch": 44.97070541363956, "grad_norm": 1.5509629249572754, "learning_rate": 2.7390642164979194e-07, "loss": 0.04451886, "memory(GiB)": 13.7, "step": 95945, "train_speed(iter/s)": 1.530164 }, { "acc": 0.9958334, "epoch": 44.97304898054839, "grad_norm": 3.178274154663086, "learning_rate": 2.736538899097453e-07, "loss": 0.03064788, "memory(GiB)": 13.7, "step": 95950, "train_speed(iter/s)": 1.530164 }, { "acc": 0.99258928, "epoch": 44.97539254745723, "grad_norm": 1.9329253435134888, "learning_rate": 2.7340147178755063e-07, "loss": 0.01722903, "memory(GiB)": 13.7, "step": 95955, "train_speed(iter/s)": 1.530163 }, { "acc": 0.97729168, "epoch": 44.97773611436607, "grad_norm": 3.8900389671325684, "learning_rate": 2.731491672892731e-07, "loss": 0.03618575, "memory(GiB)": 13.7, "step": 95960, "train_speed(iter/s)": 1.530167 }, { "acc": 0.96479168, "epoch": 44.9800796812749, "grad_norm": 6.757897853851318, "learning_rate": 2.728969764209805e-07, "loss": 0.05759212, "memory(GiB)": 13.7, "step": 95965, "train_speed(iter/s)": 1.530169 }, { "acc": 0.98562498, "epoch": 44.982423248183736, "grad_norm": 3.1019301414489746, "learning_rate": 2.7264489918873304e-07, "loss": 0.02887602, "memory(GiB)": 13.7, "step": 95970, "train_speed(iter/s)": 1.530175 }, { "acc": 0.99187498, "epoch": 44.98476681509257, "grad_norm": 0.12979798018932343, "learning_rate": 2.7239293559859016e-07, "loss": 0.01944788, "memory(GiB)": 13.7, "step": 95975, "train_speed(iter/s)": 1.530174 }, { "acc": 0.98542652, "epoch": 44.987110382001404, "grad_norm": 4.98832368850708, "learning_rate": 2.7214108565660757e-07, "loss": 0.04294059, "memory(GiB)": 13.7, "step": 95980, "train_speed(iter/s)": 1.530173 }, { "acc": 0.97469692, "epoch": 44.98945394891024, "grad_norm": 3.1440188884735107, "learning_rate": 2.7188934936883986e-07, "loss": 0.04623133, "memory(GiB)": 13.7, "step": 95985, "train_speed(iter/s)": 1.530173 }, { "acc": 0.99187498, "epoch": 44.99179751581908, "grad_norm": 0.001755951321683824, "learning_rate": 2.7163772674133644e-07, "loss": 0.02200306, "memory(GiB)": 13.7, "step": 95990, "train_speed(iter/s)": 1.530179 }, { "acc": 0.99031258, "epoch": 44.994141082727914, "grad_norm": 4.30078125, "learning_rate": 2.713862177801476e-07, "loss": 0.0256422, "memory(GiB)": 13.7, "step": 95995, "train_speed(iter/s)": 1.530182 }, { "acc": 0.97322302, "epoch": 44.99648464963675, "grad_norm": 5.114536285400391, "learning_rate": 2.711348224913178e-07, "loss": 0.04533347, "memory(GiB)": 13.7, "step": 96000, "train_speed(iter/s)": 1.530184 }, { "acc": 0.98812504, "epoch": 44.99882821654558, "grad_norm": 4.150809288024902, "learning_rate": 2.7088354088088936e-07, "loss": 0.02336141, "memory(GiB)": 13.7, "step": 96005, "train_speed(iter/s)": 1.530187 }, { "acc": 0.9875, "epoch": 45.00117178345442, "grad_norm": 2.102778196334839, "learning_rate": 2.7063237295490303e-07, "loss": 0.02030618, "memory(GiB)": 13.7, "step": 96010, "train_speed(iter/s)": 1.530175 }, { "acc": 0.96498508, "epoch": 45.00351535036325, "grad_norm": 2.4977030754089355, "learning_rate": 2.703813187193939e-07, "loss": 0.0460431, "memory(GiB)": 13.7, "step": 96015, "train_speed(iter/s)": 1.530175 }, { "acc": 0.98354168, "epoch": 45.005858917272086, "grad_norm": 2.0267226696014404, "learning_rate": 2.7013037818040043e-07, "loss": 0.03637633, "memory(GiB)": 13.7, "step": 96020, "train_speed(iter/s)": 1.530177 }, { "acc": 0.98583336, "epoch": 45.00820248418092, "grad_norm": 4.22066068649292, "learning_rate": 2.69879551343951e-07, "loss": 0.03300101, "memory(GiB)": 13.7, "step": 96025, "train_speed(iter/s)": 1.530178 }, { "acc": 0.98902779, "epoch": 45.01054605108976, "grad_norm": 1.704278826713562, "learning_rate": 2.6962883821607644e-07, "loss": 0.01948343, "memory(GiB)": 13.7, "step": 96030, "train_speed(iter/s)": 1.530183 }, { "acc": 0.98604164, "epoch": 45.012889617998596, "grad_norm": 5.009416103363037, "learning_rate": 2.693782388028029e-07, "loss": 0.05630813, "memory(GiB)": 13.7, "step": 96035, "train_speed(iter/s)": 1.530181 }, { "acc": 0.99300594, "epoch": 45.01523318490743, "grad_norm": 0.2841332256793976, "learning_rate": 2.691277531101522e-07, "loss": 0.02244593, "memory(GiB)": 13.7, "step": 96040, "train_speed(iter/s)": 1.530188 }, { "acc": 0.9885416, "epoch": 45.017576751816264, "grad_norm": 3.349032163619995, "learning_rate": 2.688773811441479e-07, "loss": 0.03121982, "memory(GiB)": 13.7, "step": 96045, "train_speed(iter/s)": 1.530193 }, { "acc": 0.98666668, "epoch": 45.0199203187251, "grad_norm": 4.26593542098999, "learning_rate": 2.686271229108049e-07, "loss": 0.03105171, "memory(GiB)": 13.7, "step": 96050, "train_speed(iter/s)": 1.530194 }, { "acc": 0.986131, "epoch": 45.02226388563393, "grad_norm": 4.472339153289795, "learning_rate": 2.683769784161425e-07, "loss": 0.05766111, "memory(GiB)": 13.7, "step": 96055, "train_speed(iter/s)": 1.530195 }, { "acc": 0.98407192, "epoch": 45.02460745254277, "grad_norm": 1.9409770965576172, "learning_rate": 2.6812694766617013e-07, "loss": 0.04167711, "memory(GiB)": 13.7, "step": 96060, "train_speed(iter/s)": 1.530198 }, { "acc": 0.98395834, "epoch": 45.02695101945161, "grad_norm": 0.002148624276742339, "learning_rate": 2.678770306668996e-07, "loss": 0.03899674, "memory(GiB)": 13.7, "step": 96065, "train_speed(iter/s)": 1.530201 }, { "acc": 0.97989578, "epoch": 45.02929458636044, "grad_norm": 4.2733354568481445, "learning_rate": 2.6762722742433674e-07, "loss": 0.05281671, "memory(GiB)": 13.7, "step": 96070, "train_speed(iter/s)": 1.5302 }, { "acc": 0.996875, "epoch": 45.03163815326928, "grad_norm": 0.12357036024332047, "learning_rate": 2.673775379444871e-07, "loss": 0.02264889, "memory(GiB)": 13.7, "step": 96075, "train_speed(iter/s)": 1.530201 }, { "acc": 0.97765875, "epoch": 45.03398172017811, "grad_norm": 2.372673749923706, "learning_rate": 2.671279622333515e-07, "loss": 0.02591263, "memory(GiB)": 13.7, "step": 96080, "train_speed(iter/s)": 1.530206 }, { "acc": 0.98514881, "epoch": 45.036325287086946, "grad_norm": 4.241386890411377, "learning_rate": 2.668785002969295e-07, "loss": 0.03772297, "memory(GiB)": 13.7, "step": 96085, "train_speed(iter/s)": 1.530207 }, { "acc": 0.97833338, "epoch": 45.03866885399578, "grad_norm": 3.0176074504852295, "learning_rate": 2.6662915214121796e-07, "loss": 0.03877677, "memory(GiB)": 13.7, "step": 96090, "train_speed(iter/s)": 1.53021 }, { "acc": 0.98444443, "epoch": 45.041012420904615, "grad_norm": 2.2799577713012695, "learning_rate": 2.6637991777220865e-07, "loss": 0.03302976, "memory(GiB)": 13.7, "step": 96095, "train_speed(iter/s)": 1.530211 }, { "acc": 0.996875, "epoch": 45.04335598781345, "grad_norm": 0.009865937754511833, "learning_rate": 2.66130797195894e-07, "loss": 0.02054091, "memory(GiB)": 13.7, "step": 96100, "train_speed(iter/s)": 1.530211 }, { "acc": 0.98321428, "epoch": 45.04569955472229, "grad_norm": 1.5077635049819946, "learning_rate": 2.658817904182597e-07, "loss": 0.02954094, "memory(GiB)": 13.7, "step": 96105, "train_speed(iter/s)": 1.530221 }, { "acc": 0.99702387, "epoch": 45.048043121631125, "grad_norm": 0.00038126634899526834, "learning_rate": 2.656328974452948e-07, "loss": 0.01020642, "memory(GiB)": 13.7, "step": 96110, "train_speed(iter/s)": 1.530225 }, { "acc": 0.99120045, "epoch": 45.05038668853996, "grad_norm": 3.9504356384277344, "learning_rate": 2.653841182829785e-07, "loss": 0.04097703, "memory(GiB)": 13.7, "step": 96115, "train_speed(iter/s)": 1.530227 }, { "acc": 0.990625, "epoch": 45.05273025544879, "grad_norm": 1.935019850730896, "learning_rate": 2.6513545293729296e-07, "loss": 0.0189208, "memory(GiB)": 13.7, "step": 96120, "train_speed(iter/s)": 1.530229 }, { "acc": 0.9864584, "epoch": 45.05507382235763, "grad_norm": 3.8073184490203857, "learning_rate": 2.6488690141421464e-07, "loss": 0.03138092, "memory(GiB)": 13.7, "step": 96125, "train_speed(iter/s)": 1.53023 }, { "acc": 0.99665184, "epoch": 45.05741738926646, "grad_norm": 0.0016486146487295628, "learning_rate": 2.6463846371971643e-07, "loss": 0.01419424, "memory(GiB)": 13.7, "step": 96130, "train_speed(iter/s)": 1.530232 }, { "acc": 0.98529758, "epoch": 45.059760956175296, "grad_norm": 2.3376121520996094, "learning_rate": 2.643901398597713e-07, "loss": 0.03651577, "memory(GiB)": 13.7, "step": 96135, "train_speed(iter/s)": 1.530237 }, { "acc": 0.9806345, "epoch": 45.06210452308414, "grad_norm": 1.5279600620269775, "learning_rate": 2.6414192984034725e-07, "loss": 0.0330075, "memory(GiB)": 13.7, "step": 96140, "train_speed(iter/s)": 1.530238 }, { "acc": 0.9746726, "epoch": 45.06444808999297, "grad_norm": 1.1363214254379272, "learning_rate": 2.638938336674116e-07, "loss": 0.03991221, "memory(GiB)": 13.7, "step": 96145, "train_speed(iter/s)": 1.530242 }, { "acc": 0.97979164, "epoch": 45.066791656901806, "grad_norm": 2.889993190765381, "learning_rate": 2.636458513469274e-07, "loss": 0.05217384, "memory(GiB)": 13.7, "step": 96150, "train_speed(iter/s)": 1.530245 }, { "acc": 0.98611116, "epoch": 45.06913522381064, "grad_norm": 2.735398054122925, "learning_rate": 2.633979828848553e-07, "loss": 0.0298763, "memory(GiB)": 13.7, "step": 96155, "train_speed(iter/s)": 1.530248 }, { "acc": 0.97404757, "epoch": 45.071478790719475, "grad_norm": 3.200852155685425, "learning_rate": 2.6315022828715344e-07, "loss": 0.04079556, "memory(GiB)": 13.7, "step": 96160, "train_speed(iter/s)": 1.530247 }, { "acc": 0.97729168, "epoch": 45.07382235762831, "grad_norm": 3.890611171722412, "learning_rate": 2.6290258755977524e-07, "loss": 0.03575393, "memory(GiB)": 13.7, "step": 96165, "train_speed(iter/s)": 1.530247 }, { "acc": 0.9947917, "epoch": 45.07616592453714, "grad_norm": 2.099327802658081, "learning_rate": 2.626550607086758e-07, "loss": 0.01054949, "memory(GiB)": 13.7, "step": 96170, "train_speed(iter/s)": 1.53025 }, { "acc": 0.98604164, "epoch": 45.07850949144598, "grad_norm": 3.324047327041626, "learning_rate": 2.624076477398027e-07, "loss": 0.02909448, "memory(GiB)": 13.7, "step": 96175, "train_speed(iter/s)": 1.530258 }, { "acc": 0.98249998, "epoch": 45.08085305835482, "grad_norm": 0.898064136505127, "learning_rate": 2.6216034865910503e-07, "loss": 0.02403711, "memory(GiB)": 13.7, "step": 96180, "train_speed(iter/s)": 1.530264 }, { "acc": 0.97514572, "epoch": 45.08319662526365, "grad_norm": 4.607754707336426, "learning_rate": 2.6191316347252573e-07, "loss": 0.06392271, "memory(GiB)": 13.7, "step": 96185, "train_speed(iter/s)": 1.530268 }, { "acc": 0.99406252, "epoch": 45.08554019217249, "grad_norm": 2.854766845703125, "learning_rate": 2.6166609218600663e-07, "loss": 0.02856559, "memory(GiB)": 13.7, "step": 96190, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98910255, "epoch": 45.08788375908132, "grad_norm": 0.6377702951431274, "learning_rate": 2.614191348054846e-07, "loss": 0.02596691, "memory(GiB)": 13.7, "step": 96195, "train_speed(iter/s)": 1.530275 }, { "acc": 0.97833328, "epoch": 45.090227325990156, "grad_norm": 2.3455862998962402, "learning_rate": 2.6117229133689936e-07, "loss": 0.03091548, "memory(GiB)": 13.7, "step": 96200, "train_speed(iter/s)": 1.530277 }, { "acc": 1.0, "epoch": 45.09257089289899, "grad_norm": 2.871004104614258, "learning_rate": 2.6092556178618224e-07, "loss": 0.01770164, "memory(GiB)": 13.7, "step": 96205, "train_speed(iter/s)": 1.530282 }, { "acc": 0.97987175, "epoch": 45.094914459807825, "grad_norm": 4.3677167892456055, "learning_rate": 2.606789461592635e-07, "loss": 0.07860622, "memory(GiB)": 13.7, "step": 96210, "train_speed(iter/s)": 1.530287 }, { "acc": 0.98166666, "epoch": 45.097258026716666, "grad_norm": 2.2644386291503906, "learning_rate": 2.6043244446207207e-07, "loss": 0.046485, "memory(GiB)": 13.7, "step": 96215, "train_speed(iter/s)": 1.530288 }, { "acc": 0.990625, "epoch": 45.0996015936255, "grad_norm": 2.2061846256256104, "learning_rate": 2.601860567005311e-07, "loss": 0.02133263, "memory(GiB)": 13.7, "step": 96220, "train_speed(iter/s)": 1.530288 }, { "acc": 0.98569441, "epoch": 45.101945160534335, "grad_norm": 2.9407761096954346, "learning_rate": 2.5993978288056475e-07, "loss": 0.03585161, "memory(GiB)": 13.7, "step": 96225, "train_speed(iter/s)": 1.530292 }, { "acc": 0.9801136, "epoch": 45.10428872744317, "grad_norm": 0.00025715501396916807, "learning_rate": 2.596936230080914e-07, "loss": 0.04331398, "memory(GiB)": 13.7, "step": 96230, "train_speed(iter/s)": 1.530294 }, { "acc": 0.97697916, "epoch": 45.106632294352, "grad_norm": 4.735722541809082, "learning_rate": 2.594475770890298e-07, "loss": 0.0427676, "memory(GiB)": 13.7, "step": 96235, "train_speed(iter/s)": 1.530298 }, { "acc": 0.98812504, "epoch": 45.10897586126084, "grad_norm": 2.9257068634033203, "learning_rate": 2.592016451292923e-07, "loss": 0.02436934, "memory(GiB)": 13.7, "step": 96240, "train_speed(iter/s)": 1.530305 }, { "acc": 0.9822917, "epoch": 45.11131942816967, "grad_norm": 5.668713092803955, "learning_rate": 2.589558271347913e-07, "loss": 0.03567849, "memory(GiB)": 13.7, "step": 96245, "train_speed(iter/s)": 1.530306 }, { "acc": 0.99375, "epoch": 45.113662995078506, "grad_norm": 0.029272083193063736, "learning_rate": 2.5871012311143495e-07, "loss": 0.01872308, "memory(GiB)": 13.7, "step": 96250, "train_speed(iter/s)": 1.53031 }, { "acc": 0.99354172, "epoch": 45.11600656198735, "grad_norm": 0.00428284564986825, "learning_rate": 2.5846453306512845e-07, "loss": 0.02147675, "memory(GiB)": 13.7, "step": 96255, "train_speed(iter/s)": 1.530312 }, { "acc": 0.98659182, "epoch": 45.11835012889618, "grad_norm": 3.367898941040039, "learning_rate": 2.582190570017765e-07, "loss": 0.03644219, "memory(GiB)": 13.7, "step": 96260, "train_speed(iter/s)": 1.530318 }, { "acc": 0.98666668, "epoch": 45.120693695805016, "grad_norm": 1.8469314575195312, "learning_rate": 2.579736949272787e-07, "loss": 0.03494903, "memory(GiB)": 13.7, "step": 96265, "train_speed(iter/s)": 1.530322 }, { "acc": 0.98488102, "epoch": 45.12303726271385, "grad_norm": 2.401008129119873, "learning_rate": 2.5772844684753317e-07, "loss": 0.02518504, "memory(GiB)": 13.7, "step": 96270, "train_speed(iter/s)": 1.530325 }, { "acc": 0.98660717, "epoch": 45.125380829622685, "grad_norm": 2.6845626831054688, "learning_rate": 2.57483312768434e-07, "loss": 0.05817097, "memory(GiB)": 13.7, "step": 96275, "train_speed(iter/s)": 1.530328 }, { "acc": 0.98467264, "epoch": 45.12772439653152, "grad_norm": 4.12512731552124, "learning_rate": 2.5723829269587424e-07, "loss": 0.02855189, "memory(GiB)": 13.7, "step": 96280, "train_speed(iter/s)": 1.53033 }, { "acc": 0.9895834, "epoch": 45.130067963440354, "grad_norm": 4.065228462219238, "learning_rate": 2.5699338663574357e-07, "loss": 0.02846524, "memory(GiB)": 13.7, "step": 96285, "train_speed(iter/s)": 1.530331 }, { "acc": 0.96931553, "epoch": 45.13241153034919, "grad_norm": 3.3364081382751465, "learning_rate": 2.5674859459392675e-07, "loss": 0.04655478, "memory(GiB)": 13.7, "step": 96290, "train_speed(iter/s)": 1.530336 }, { "acc": 0.97706852, "epoch": 45.13475509725803, "grad_norm": 4.206167697906494, "learning_rate": 2.5650391657631065e-07, "loss": 0.07341568, "memory(GiB)": 13.7, "step": 96295, "train_speed(iter/s)": 1.530339 }, { "acc": 0.9760416, "epoch": 45.137098664166864, "grad_norm": 7.147874355316162, "learning_rate": 2.562593525887744e-07, "loss": 0.04126318, "memory(GiB)": 13.7, "step": 96300, "train_speed(iter/s)": 1.530338 }, { "acc": 0.98416672, "epoch": 45.1394422310757, "grad_norm": 4.249910354614258, "learning_rate": 2.5601490263719834e-07, "loss": 0.0416967, "memory(GiB)": 13.7, "step": 96305, "train_speed(iter/s)": 1.530341 }, { "acc": 0.99187498, "epoch": 45.14178579798453, "grad_norm": 3.4442429542541504, "learning_rate": 2.557705667274554e-07, "loss": 0.02345299, "memory(GiB)": 13.7, "step": 96310, "train_speed(iter/s)": 1.530343 }, { "acc": 0.97875004, "epoch": 45.14412936489337, "grad_norm": 0.15759871900081635, "learning_rate": 2.555263448654216e-07, "loss": 0.03538567, "memory(GiB)": 13.7, "step": 96315, "train_speed(iter/s)": 1.530345 }, { "acc": 0.97995033, "epoch": 45.1464729318022, "grad_norm": 3.8317110538482666, "learning_rate": 2.552822370569659e-07, "loss": 0.04197398, "memory(GiB)": 13.7, "step": 96320, "train_speed(iter/s)": 1.530348 }, { "acc": 0.98988094, "epoch": 45.148816498711035, "grad_norm": 1.6561464071273804, "learning_rate": 2.550382433079564e-07, "loss": 0.02841935, "memory(GiB)": 13.7, "step": 96325, "train_speed(iter/s)": 1.53035 }, { "acc": 0.99188843, "epoch": 45.15116006561988, "grad_norm": 2.081662178039551, "learning_rate": 2.5479436362425737e-07, "loss": 0.03889433, "memory(GiB)": 13.7, "step": 96330, "train_speed(iter/s)": 1.530354 }, { "acc": 0.97666664, "epoch": 45.15350363252871, "grad_norm": 2.0217666625976562, "learning_rate": 2.545505980117307e-07, "loss": 0.04607221, "memory(GiB)": 13.7, "step": 96335, "train_speed(iter/s)": 1.530355 }, { "acc": 0.9958333, "epoch": 45.155847199437545, "grad_norm": 2.3294622898101807, "learning_rate": 2.5430694647623606e-07, "loss": 0.01658791, "memory(GiB)": 13.7, "step": 96340, "train_speed(iter/s)": 1.530355 }, { "acc": 0.97791672, "epoch": 45.15819076634638, "grad_norm": 4.071407794952393, "learning_rate": 2.540634090236288e-07, "loss": 0.0362724, "memory(GiB)": 13.7, "step": 96345, "train_speed(iter/s)": 1.530358 }, { "acc": 0.98812504, "epoch": 45.160534333255214, "grad_norm": 0.0246476661413908, "learning_rate": 2.5381998565976585e-07, "loss": 0.02526229, "memory(GiB)": 13.7, "step": 96350, "train_speed(iter/s)": 1.530364 }, { "acc": 0.98195515, "epoch": 45.16287790016405, "grad_norm": 4.755667686462402, "learning_rate": 2.5357667639049525e-07, "loss": 0.03956268, "memory(GiB)": 13.7, "step": 96355, "train_speed(iter/s)": 1.530362 }, { "acc": 0.9895834, "epoch": 45.16522146707288, "grad_norm": 3.4548838138580322, "learning_rate": 2.5333348122166676e-07, "loss": 0.03443557, "memory(GiB)": 13.7, "step": 96360, "train_speed(iter/s)": 1.530368 }, { "acc": 0.98050594, "epoch": 45.16756503398172, "grad_norm": 2.301701784133911, "learning_rate": 2.530904001591257e-07, "loss": 0.05067623, "memory(GiB)": 13.7, "step": 96365, "train_speed(iter/s)": 1.530368 }, { "acc": 0.98819447, "epoch": 45.16990860089056, "grad_norm": 2.7601447105407715, "learning_rate": 2.528474332087146e-07, "loss": 0.03544557, "memory(GiB)": 13.7, "step": 96370, "train_speed(iter/s)": 1.53037 }, { "acc": 0.99306011, "epoch": 45.17225216779939, "grad_norm": 2.741396427154541, "learning_rate": 2.526045803762748e-07, "loss": 0.04259761, "memory(GiB)": 13.7, "step": 96375, "train_speed(iter/s)": 1.530368 }, { "acc": 0.9875, "epoch": 45.17459573470823, "grad_norm": 3.3719818592071533, "learning_rate": 2.523618416676422e-07, "loss": 0.04183407, "memory(GiB)": 13.7, "step": 96380, "train_speed(iter/s)": 1.530372 }, { "acc": 0.98842258, "epoch": 45.17693930161706, "grad_norm": 4.596343994140625, "learning_rate": 2.5211921708865267e-07, "loss": 0.03039994, "memory(GiB)": 13.7, "step": 96385, "train_speed(iter/s)": 1.530375 }, { "acc": 0.98946428, "epoch": 45.179282868525895, "grad_norm": 3.5510149002075195, "learning_rate": 2.51876706645137e-07, "loss": 0.01796626, "memory(GiB)": 13.7, "step": 96390, "train_speed(iter/s)": 1.530378 }, { "acc": 0.99666672, "epoch": 45.18162643543473, "grad_norm": 2.154146432876587, "learning_rate": 2.5163431034292563e-07, "loss": 0.00988726, "memory(GiB)": 13.7, "step": 96395, "train_speed(iter/s)": 1.530377 }, { "acc": 0.99196434, "epoch": 45.183970002343564, "grad_norm": 2.4754140377044678, "learning_rate": 2.5139202818784266e-07, "loss": 0.02847174, "memory(GiB)": 13.7, "step": 96400, "train_speed(iter/s)": 1.530379 }, { "acc": 0.99187498, "epoch": 45.186313569252405, "grad_norm": 0.9027749300003052, "learning_rate": 2.5114986018571456e-07, "loss": 0.03833038, "memory(GiB)": 13.7, "step": 96405, "train_speed(iter/s)": 1.530375 }, { "acc": 0.98804379, "epoch": 45.18865713616124, "grad_norm": 2.0022358894348145, "learning_rate": 2.509078063423611e-07, "loss": 0.04704049, "memory(GiB)": 13.7, "step": 96410, "train_speed(iter/s)": 1.530372 }, { "acc": 0.9833333, "epoch": 45.191000703070074, "grad_norm": 3.0806336402893066, "learning_rate": 2.5066586666359975e-07, "loss": 0.02424085, "memory(GiB)": 13.7, "step": 96415, "train_speed(iter/s)": 1.530376 }, { "acc": 0.97800598, "epoch": 45.19334426997891, "grad_norm": 4.434572219848633, "learning_rate": 2.50424041155247e-07, "loss": 0.07508185, "memory(GiB)": 13.7, "step": 96420, "train_speed(iter/s)": 1.530382 }, { "acc": 0.9895834, "epoch": 45.19568783688774, "grad_norm": 1.0173388719558716, "learning_rate": 2.501823298231148e-07, "loss": 0.02781188, "memory(GiB)": 13.7, "step": 96425, "train_speed(iter/s)": 1.530388 }, { "acc": 0.97687492, "epoch": 45.19803140379658, "grad_norm": 3.0889947414398193, "learning_rate": 2.4994073267301356e-07, "loss": 0.03626403, "memory(GiB)": 13.7, "step": 96430, "train_speed(iter/s)": 1.530392 }, { "acc": 0.98567133, "epoch": 45.20037497070541, "grad_norm": 2.0354864597320557, "learning_rate": 2.496992497107491e-07, "loss": 0.05869843, "memory(GiB)": 13.7, "step": 96435, "train_speed(iter/s)": 1.530391 }, { "acc": 0.98604164, "epoch": 45.202718537614246, "grad_norm": 0.9278172850608826, "learning_rate": 2.494578809421289e-07, "loss": 0.01639534, "memory(GiB)": 13.7, "step": 96440, "train_speed(iter/s)": 1.530396 }, { "acc": 0.99020834, "epoch": 45.20506210452309, "grad_norm": 5.534091472625732, "learning_rate": 2.4921662637295183e-07, "loss": 0.04062389, "memory(GiB)": 13.7, "step": 96445, "train_speed(iter/s)": 1.530396 }, { "acc": 0.96896782, "epoch": 45.20740567143192, "grad_norm": 5.048393249511719, "learning_rate": 2.4897548600901866e-07, "loss": 0.05090443, "memory(GiB)": 13.7, "step": 96450, "train_speed(iter/s)": 1.530399 }, { "acc": 0.9947917, "epoch": 45.209749238340756, "grad_norm": 1.7900278568267822, "learning_rate": 2.487344598561247e-07, "loss": 0.01709437, "memory(GiB)": 13.7, "step": 96455, "train_speed(iter/s)": 1.530405 }, { "acc": 0.98472223, "epoch": 45.21209280524959, "grad_norm": 3.873110294342041, "learning_rate": 2.4849354792006193e-07, "loss": 0.02974406, "memory(GiB)": 13.7, "step": 96460, "train_speed(iter/s)": 1.53041 }, { "acc": 0.98444939, "epoch": 45.214436372158424, "grad_norm": 2.919938325881958, "learning_rate": 2.4825275020662473e-07, "loss": 0.03460152, "memory(GiB)": 13.7, "step": 96465, "train_speed(iter/s)": 1.53041 }, { "acc": 0.97967262, "epoch": 45.21677993906726, "grad_norm": 2.591022491455078, "learning_rate": 2.4801206672159773e-07, "loss": 0.03997649, "memory(GiB)": 13.7, "step": 96470, "train_speed(iter/s)": 1.530415 }, { "acc": 0.98046875, "epoch": 45.21912350597609, "grad_norm": 1.5608195066452026, "learning_rate": 2.477714974707686e-07, "loss": 0.04261313, "memory(GiB)": 13.7, "step": 96475, "train_speed(iter/s)": 1.530418 }, { "acc": 0.99437504, "epoch": 45.221467072884934, "grad_norm": 2.1138222217559814, "learning_rate": 2.4753104245991826e-07, "loss": 0.02469891, "memory(GiB)": 13.7, "step": 96480, "train_speed(iter/s)": 1.53042 }, { "acc": 0.98425598, "epoch": 45.22381063979377, "grad_norm": 3.1794795989990234, "learning_rate": 2.47290701694827e-07, "loss": 0.02839969, "memory(GiB)": 13.7, "step": 96485, "train_speed(iter/s)": 1.530424 }, { "acc": 0.98571434, "epoch": 45.2261542067026, "grad_norm": 3.161705493927002, "learning_rate": 2.470504751812708e-07, "loss": 0.04332067, "memory(GiB)": 13.7, "step": 96490, "train_speed(iter/s)": 1.530424 }, { "acc": 0.98999996, "epoch": 45.22849777361144, "grad_norm": 2.407468318939209, "learning_rate": 2.4681036292502654e-07, "loss": 0.04841231, "memory(GiB)": 13.7, "step": 96495, "train_speed(iter/s)": 1.530424 }, { "acc": 0.99508934, "epoch": 45.23084134052027, "grad_norm": 2.4783647060394287, "learning_rate": 2.465703649318637e-07, "loss": 0.02679892, "memory(GiB)": 13.7, "step": 96500, "train_speed(iter/s)": 1.530421 }, { "acc": 0.99169645, "epoch": 45.233184907429106, "grad_norm": 0.001776898163370788, "learning_rate": 2.4633048120755077e-07, "loss": 0.02115914, "memory(GiB)": 13.7, "step": 96505, "train_speed(iter/s)": 1.530423 }, { "acc": 0.97673607, "epoch": 45.23552847433794, "grad_norm": 7.641019344329834, "learning_rate": 2.4609071175785487e-07, "loss": 0.03327207, "memory(GiB)": 13.7, "step": 96510, "train_speed(iter/s)": 1.530421 }, { "acc": 0.99541664, "epoch": 45.237872041246774, "grad_norm": 0.004194880835711956, "learning_rate": 2.4585105658853804e-07, "loss": 0.01000409, "memory(GiB)": 13.7, "step": 96515, "train_speed(iter/s)": 1.530427 }, { "acc": 0.98249998, "epoch": 45.240215608155616, "grad_norm": 2.5626721382141113, "learning_rate": 2.456115157053623e-07, "loss": 0.04031012, "memory(GiB)": 13.7, "step": 96520, "train_speed(iter/s)": 1.530427 }, { "acc": 0.9895834, "epoch": 45.24255917506445, "grad_norm": 2.238799810409546, "learning_rate": 2.453720891140847e-07, "loss": 0.01679153, "memory(GiB)": 13.7, "step": 96525, "train_speed(iter/s)": 1.53043 }, { "acc": 0.99404764, "epoch": 45.244902741973284, "grad_norm": 1.490572452545166, "learning_rate": 2.4513277682046e-07, "loss": 0.01380079, "memory(GiB)": 13.7, "step": 96530, "train_speed(iter/s)": 1.530431 }, { "acc": 0.99548607, "epoch": 45.24724630888212, "grad_norm": 0.00162605382502079, "learning_rate": 2.4489357883024097e-07, "loss": 0.02436081, "memory(GiB)": 13.7, "step": 96535, "train_speed(iter/s)": 1.530433 }, { "acc": 0.9875, "epoch": 45.24958987579095, "grad_norm": 3.9406070709228516, "learning_rate": 2.446544951491773e-07, "loss": 0.02652654, "memory(GiB)": 13.7, "step": 96540, "train_speed(iter/s)": 1.530435 }, { "acc": 0.98656254, "epoch": 45.25193344269979, "grad_norm": 3.7878360748291016, "learning_rate": 2.4441552578301545e-07, "loss": 0.0559263, "memory(GiB)": 13.7, "step": 96545, "train_speed(iter/s)": 1.530437 }, { "acc": 0.98946428, "epoch": 45.25427700960862, "grad_norm": 0.04596744477748871, "learning_rate": 2.4417667073749756e-07, "loss": 0.01997486, "memory(GiB)": 13.7, "step": 96550, "train_speed(iter/s)": 1.530439 }, { "acc": 0.98447914, "epoch": 45.25662057651746, "grad_norm": 0.0011431548045948148, "learning_rate": 2.43937930018369e-07, "loss": 0.03604972, "memory(GiB)": 13.7, "step": 96555, "train_speed(iter/s)": 1.530442 }, { "acc": 0.98604164, "epoch": 45.2589641434263, "grad_norm": 3.063795804977417, "learning_rate": 2.4369930363136455e-07, "loss": 0.03441994, "memory(GiB)": 13.7, "step": 96560, "train_speed(iter/s)": 1.530446 }, { "acc": 0.97627983, "epoch": 45.26130771033513, "grad_norm": 3.4444901943206787, "learning_rate": 2.43460791582223e-07, "loss": 0.05631514, "memory(GiB)": 13.7, "step": 96565, "train_speed(iter/s)": 1.530446 }, { "acc": 0.98419647, "epoch": 45.263651277243966, "grad_norm": 4.065240383148193, "learning_rate": 2.4322239387667473e-07, "loss": 0.04242711, "memory(GiB)": 13.7, "step": 96570, "train_speed(iter/s)": 1.530446 }, { "acc": 0.98812504, "epoch": 45.2659948441528, "grad_norm": 3.0566561222076416, "learning_rate": 2.4298411052045183e-07, "loss": 0.0486202, "memory(GiB)": 13.7, "step": 96575, "train_speed(iter/s)": 1.530447 }, { "acc": 0.99221687, "epoch": 45.268338411061634, "grad_norm": 2.847827196121216, "learning_rate": 2.427459415192807e-07, "loss": 0.01843832, "memory(GiB)": 13.7, "step": 96580, "train_speed(iter/s)": 1.530451 }, { "acc": 0.9864584, "epoch": 45.27068197797047, "grad_norm": 3.687570810317993, "learning_rate": 2.425078868788869e-07, "loss": 0.02019487, "memory(GiB)": 13.7, "step": 96585, "train_speed(iter/s)": 1.530456 }, { "acc": 0.99020824, "epoch": 45.2730255448793, "grad_norm": 4.047172546386719, "learning_rate": 2.42269946604993e-07, "loss": 0.03349846, "memory(GiB)": 13.7, "step": 96590, "train_speed(iter/s)": 1.530461 }, { "acc": 0.98572311, "epoch": 45.275369111788144, "grad_norm": 6.171697616577148, "learning_rate": 2.4203212070331715e-07, "loss": 0.03738357, "memory(GiB)": 13.7, "step": 96595, "train_speed(iter/s)": 1.530464 }, { "acc": 0.98173075, "epoch": 45.27771267869698, "grad_norm": 6.421278953552246, "learning_rate": 2.417944091795764e-07, "loss": 0.05248606, "memory(GiB)": 13.7, "step": 96600, "train_speed(iter/s)": 1.530464 }, { "acc": 0.984375, "epoch": 45.28005624560581, "grad_norm": 2.4568819999694824, "learning_rate": 2.4155681203948346e-07, "loss": 0.04357616, "memory(GiB)": 13.7, "step": 96605, "train_speed(iter/s)": 1.530463 }, { "acc": 0.99125004, "epoch": 45.28239981251465, "grad_norm": 3.6019821166992188, "learning_rate": 2.413193292887515e-07, "loss": 0.03194605, "memory(GiB)": 13.7, "step": 96610, "train_speed(iter/s)": 1.530466 }, { "acc": 0.98249998, "epoch": 45.28474337942348, "grad_norm": 3.903195381164551, "learning_rate": 2.410819609330876e-07, "loss": 0.07401788, "memory(GiB)": 13.7, "step": 96615, "train_speed(iter/s)": 1.530467 }, { "acc": 0.99361115, "epoch": 45.287086946332316, "grad_norm": 0.0012496052077040076, "learning_rate": 2.4084470697819775e-07, "loss": 0.02557297, "memory(GiB)": 13.7, "step": 96620, "train_speed(iter/s)": 1.530472 }, { "acc": 0.98353634, "epoch": 45.28943051324115, "grad_norm": 3.5449490547180176, "learning_rate": 2.4060756742978457e-07, "loss": 0.03622452, "memory(GiB)": 13.7, "step": 96625, "train_speed(iter/s)": 1.530474 }, { "acc": 0.97946434, "epoch": 45.29177408014999, "grad_norm": 2.167842388153076, "learning_rate": 2.4037054229354677e-07, "loss": 0.05112746, "memory(GiB)": 13.7, "step": 96630, "train_speed(iter/s)": 1.530478 }, { "acc": 0.97666664, "epoch": 45.294117647058826, "grad_norm": 4.565969467163086, "learning_rate": 2.4013363157518435e-07, "loss": 0.04884865, "memory(GiB)": 13.7, "step": 96635, "train_speed(iter/s)": 1.530482 }, { "acc": 0.97777786, "epoch": 45.29646121396766, "grad_norm": 0.0010155875934287906, "learning_rate": 2.398968352803888e-07, "loss": 0.029203, "memory(GiB)": 13.7, "step": 96640, "train_speed(iter/s)": 1.530485 }, { "acc": 0.9874054, "epoch": 45.298804780876495, "grad_norm": 1.5283253192901611, "learning_rate": 2.3966015341485497e-07, "loss": 0.03505795, "memory(GiB)": 13.7, "step": 96645, "train_speed(iter/s)": 1.530483 }, { "acc": 0.98976288, "epoch": 45.30114834778533, "grad_norm": 3.7836220264434814, "learning_rate": 2.394235859842694e-07, "loss": 0.03285612, "memory(GiB)": 13.7, "step": 96650, "train_speed(iter/s)": 1.530485 }, { "acc": 0.98111115, "epoch": 45.30349191469416, "grad_norm": 0.9148851633071899, "learning_rate": 2.3918713299432084e-07, "loss": 0.03696249, "memory(GiB)": 13.7, "step": 96655, "train_speed(iter/s)": 1.53049 }, { "acc": 0.97557545, "epoch": 45.305835481603, "grad_norm": 0.2530047595500946, "learning_rate": 2.3895079445068926e-07, "loss": 0.06041031, "memory(GiB)": 13.7, "step": 96660, "train_speed(iter/s)": 1.530488 }, { "acc": 0.98395834, "epoch": 45.30817904851183, "grad_norm": 2.5191562175750732, "learning_rate": 2.387145703590595e-07, "loss": 0.02911066, "memory(GiB)": 13.7, "step": 96665, "train_speed(iter/s)": 1.530492 }, { "acc": 0.98460226, "epoch": 45.31052261542067, "grad_norm": 3.2174253463745117, "learning_rate": 2.384784607251081e-07, "loss": 0.04129097, "memory(GiB)": 13.7, "step": 96670, "train_speed(iter/s)": 1.530495 }, { "acc": 0.99340277, "epoch": 45.31286618232951, "grad_norm": 4.375946044921875, "learning_rate": 2.382424655545089e-07, "loss": 0.02074831, "memory(GiB)": 13.7, "step": 96675, "train_speed(iter/s)": 1.530498 }, { "acc": 0.9885416, "epoch": 45.31520974923834, "grad_norm": 1.524385929107666, "learning_rate": 2.380065848529367e-07, "loss": 0.02372749, "memory(GiB)": 13.7, "step": 96680, "train_speed(iter/s)": 1.530497 }, { "acc": 0.98718748, "epoch": 45.317553316147176, "grad_norm": 4.1632161140441895, "learning_rate": 2.3777081862605925e-07, "loss": 0.02716245, "memory(GiB)": 13.7, "step": 96685, "train_speed(iter/s)": 1.530499 }, { "acc": 0.9822917, "epoch": 45.31989688305601, "grad_norm": 3.7831547260284424, "learning_rate": 2.3753516687954534e-07, "loss": 0.04439039, "memory(GiB)": 13.7, "step": 96690, "train_speed(iter/s)": 1.530502 }, { "acc": 0.98883934, "epoch": 45.322240449964845, "grad_norm": 5.041736125946045, "learning_rate": 2.3729962961905704e-07, "loss": 0.02988053, "memory(GiB)": 13.7, "step": 96695, "train_speed(iter/s)": 1.530505 }, { "acc": 0.97843752, "epoch": 45.32458401687368, "grad_norm": 2.385594606399536, "learning_rate": 2.3706420685025817e-07, "loss": 0.08180616, "memory(GiB)": 13.7, "step": 96700, "train_speed(iter/s)": 1.530508 }, { "acc": 0.98633928, "epoch": 45.32692758378252, "grad_norm": 3.087954521179199, "learning_rate": 2.3682889857880756e-07, "loss": 0.02576852, "memory(GiB)": 13.7, "step": 96705, "train_speed(iter/s)": 1.53051 }, { "acc": 0.97979164, "epoch": 45.329271150691355, "grad_norm": 8.479289054870605, "learning_rate": 2.3659370481035895e-07, "loss": 0.03007466, "memory(GiB)": 13.7, "step": 96710, "train_speed(iter/s)": 1.530519 }, { "acc": 0.98946428, "epoch": 45.33161471760019, "grad_norm": 4.838992595672607, "learning_rate": 2.3635862555056842e-07, "loss": 0.04223493, "memory(GiB)": 13.7, "step": 96715, "train_speed(iter/s)": 1.530518 }, { "acc": 0.97874994, "epoch": 45.33395828450902, "grad_norm": 4.780587673187256, "learning_rate": 2.3612366080508307e-07, "loss": 0.02862215, "memory(GiB)": 13.7, "step": 96720, "train_speed(iter/s)": 1.530521 }, { "acc": 0.99296389, "epoch": 45.33630185141786, "grad_norm": 2.777233839035034, "learning_rate": 2.3588881057955448e-07, "loss": 0.04460308, "memory(GiB)": 13.7, "step": 96725, "train_speed(iter/s)": 1.530524 }, { "acc": 0.98375988, "epoch": 45.33864541832669, "grad_norm": 0.011615152470767498, "learning_rate": 2.3565407487962482e-07, "loss": 0.02177733, "memory(GiB)": 13.7, "step": 96730, "train_speed(iter/s)": 1.530523 }, { "acc": 0.9859375, "epoch": 45.340988985235526, "grad_norm": 3.2041053771972656, "learning_rate": 2.3541945371093844e-07, "loss": 0.03101174, "memory(GiB)": 13.7, "step": 96735, "train_speed(iter/s)": 1.530523 }, { "acc": 0.9809226, "epoch": 45.34333255214436, "grad_norm": 4.093991756439209, "learning_rate": 2.3518494707913357e-07, "loss": 0.03989226, "memory(GiB)": 13.7, "step": 96740, "train_speed(iter/s)": 1.530527 }, { "acc": 0.98571434, "epoch": 45.3456761190532, "grad_norm": 0.30825284123420715, "learning_rate": 2.349505549898474e-07, "loss": 0.02775286, "memory(GiB)": 13.7, "step": 96745, "train_speed(iter/s)": 1.53053 }, { "acc": 0.9895833, "epoch": 45.348019685962036, "grad_norm": 4.793787479400635, "learning_rate": 2.347162774487143e-07, "loss": 0.02866734, "memory(GiB)": 13.7, "step": 96750, "train_speed(iter/s)": 1.530527 }, { "acc": 0.98495941, "epoch": 45.35036325287087, "grad_norm": 0.9273124933242798, "learning_rate": 2.3448211446136308e-07, "loss": 0.05767782, "memory(GiB)": 13.7, "step": 96755, "train_speed(iter/s)": 1.530528 }, { "acc": 0.98291674, "epoch": 45.352706819779705, "grad_norm": 0.9866018295288086, "learning_rate": 2.3424806603342646e-07, "loss": 0.0354524, "memory(GiB)": 13.7, "step": 96760, "train_speed(iter/s)": 1.530534 }, { "acc": 0.97817116, "epoch": 45.35505038668854, "grad_norm": 1.4265375137329102, "learning_rate": 2.3401413217052657e-07, "loss": 0.05417143, "memory(GiB)": 13.7, "step": 96765, "train_speed(iter/s)": 1.530536 }, { "acc": 0.99282198, "epoch": 45.357393953597374, "grad_norm": 2.982553005218506, "learning_rate": 2.3378031287828895e-07, "loss": 0.03051467, "memory(GiB)": 13.7, "step": 96770, "train_speed(iter/s)": 1.530542 }, { "acc": 0.97979164, "epoch": 45.35973752050621, "grad_norm": 2.682431697845459, "learning_rate": 2.3354660816233183e-07, "loss": 0.05050509, "memory(GiB)": 13.7, "step": 96775, "train_speed(iter/s)": 1.530541 }, { "acc": 0.97904758, "epoch": 45.36208108741504, "grad_norm": 2.7977328300476074, "learning_rate": 2.333130180282741e-07, "loss": 0.05341022, "memory(GiB)": 13.7, "step": 96780, "train_speed(iter/s)": 1.530544 }, { "acc": 0.9864584, "epoch": 45.364424654323884, "grad_norm": 3.8173635005950928, "learning_rate": 2.3307954248172897e-07, "loss": 0.04345983, "memory(GiB)": 13.7, "step": 96785, "train_speed(iter/s)": 1.530544 }, { "acc": 0.98708334, "epoch": 45.36676822123272, "grad_norm": 0.027123037725687027, "learning_rate": 2.3284618152831087e-07, "loss": 0.02441466, "memory(GiB)": 13.7, "step": 96790, "train_speed(iter/s)": 1.530548 }, { "acc": 0.98083334, "epoch": 45.36911178814155, "grad_norm": 4.88455867767334, "learning_rate": 2.3261293517362754e-07, "loss": 0.02039655, "memory(GiB)": 13.7, "step": 96795, "train_speed(iter/s)": 1.530549 }, { "acc": 0.97645836, "epoch": 45.37145535505039, "grad_norm": 0.0007223146385513246, "learning_rate": 2.323798034232845e-07, "loss": 0.03255846, "memory(GiB)": 13.7, "step": 96800, "train_speed(iter/s)": 1.530546 }, { "acc": 0.9875, "epoch": 45.37379892195922, "grad_norm": 0.09543117880821228, "learning_rate": 2.321467862828872e-07, "loss": 0.01863146, "memory(GiB)": 13.7, "step": 96805, "train_speed(iter/s)": 1.530547 }, { "acc": 0.984375, "epoch": 45.376142488868055, "grad_norm": 1.1587899923324585, "learning_rate": 2.319138837580351e-07, "loss": 0.03372415, "memory(GiB)": 13.7, "step": 96810, "train_speed(iter/s)": 1.530547 }, { "acc": 0.9916667, "epoch": 45.37848605577689, "grad_norm": 0.000913990312255919, "learning_rate": 2.3168109585432817e-07, "loss": 0.02340139, "memory(GiB)": 13.7, "step": 96815, "train_speed(iter/s)": 1.530551 }, { "acc": 0.98988972, "epoch": 45.38082962268573, "grad_norm": 0.5965102910995483, "learning_rate": 2.314484225773602e-07, "loss": 0.01769937, "memory(GiB)": 13.7, "step": 96820, "train_speed(iter/s)": 1.530549 }, { "acc": 0.98979168, "epoch": 45.383173189594565, "grad_norm": 0.08246682584285736, "learning_rate": 2.3121586393272513e-07, "loss": 0.01955636, "memory(GiB)": 13.7, "step": 96825, "train_speed(iter/s)": 1.53055 }, { "acc": 0.98175602, "epoch": 45.3855167565034, "grad_norm": 2.718369722366333, "learning_rate": 2.3098341992601285e-07, "loss": 0.04935791, "memory(GiB)": 13.7, "step": 96830, "train_speed(iter/s)": 1.530549 }, { "acc": 0.98638897, "epoch": 45.387860323412234, "grad_norm": 3.7235665321350098, "learning_rate": 2.307510905628089e-07, "loss": 0.047471, "memory(GiB)": 13.7, "step": 96835, "train_speed(iter/s)": 1.530556 }, { "acc": 0.9963315, "epoch": 45.39020389032107, "grad_norm": 3.8456006050109863, "learning_rate": 2.3051887584869999e-07, "loss": 0.03145561, "memory(GiB)": 13.7, "step": 96840, "train_speed(iter/s)": 1.530562 }, { "acc": 0.9885416, "epoch": 45.3925474572299, "grad_norm": 3.365147829055786, "learning_rate": 2.302867757892655e-07, "loss": 0.0571578, "memory(GiB)": 13.7, "step": 96845, "train_speed(iter/s)": 1.530567 }, { "acc": 0.98562498, "epoch": 45.39489102413874, "grad_norm": 2.423469066619873, "learning_rate": 2.3005479039008645e-07, "loss": 0.02325665, "memory(GiB)": 13.7, "step": 96850, "train_speed(iter/s)": 1.53057 }, { "acc": 0.97715282, "epoch": 45.39723459104757, "grad_norm": 4.270088195800781, "learning_rate": 2.2982291965673736e-07, "loss": 0.06693815, "memory(GiB)": 13.7, "step": 96855, "train_speed(iter/s)": 1.530573 }, { "acc": 0.9780797, "epoch": 45.39957815795641, "grad_norm": 3.341440200805664, "learning_rate": 2.2959116359479317e-07, "loss": 0.05748683, "memory(GiB)": 13.7, "step": 96860, "train_speed(iter/s)": 1.530576 }, { "acc": 0.99080353, "epoch": 45.40192172486525, "grad_norm": 0.6819918751716614, "learning_rate": 2.293595222098228e-07, "loss": 0.04004458, "memory(GiB)": 13.7, "step": 96865, "train_speed(iter/s)": 1.530577 }, { "acc": 0.98291664, "epoch": 45.40426529177408, "grad_norm": 4.008728981018066, "learning_rate": 2.2912799550739615e-07, "loss": 0.05072585, "memory(GiB)": 13.7, "step": 96870, "train_speed(iter/s)": 1.530577 }, { "acc": 0.98680553, "epoch": 45.406608858682915, "grad_norm": 4.106185436248779, "learning_rate": 2.288965834930772e-07, "loss": 0.0468665, "memory(GiB)": 13.7, "step": 96875, "train_speed(iter/s)": 1.530579 }, { "acc": 0.98611107, "epoch": 45.40895242559175, "grad_norm": 4.177638530731201, "learning_rate": 2.286652861724281e-07, "loss": 0.0338838, "memory(GiB)": 13.7, "step": 96880, "train_speed(iter/s)": 1.530578 }, { "acc": 0.98187504, "epoch": 45.411295992500584, "grad_norm": 1.6698617935180664, "learning_rate": 2.2843410355100946e-07, "loss": 0.04327694, "memory(GiB)": 13.7, "step": 96885, "train_speed(iter/s)": 1.530584 }, { "acc": 0.98113098, "epoch": 45.41363955940942, "grad_norm": 4.443790435791016, "learning_rate": 2.2820303563437674e-07, "loss": 0.02035468, "memory(GiB)": 13.7, "step": 96890, "train_speed(iter/s)": 1.530584 }, { "acc": 0.98979168, "epoch": 45.41598312631826, "grad_norm": 3.72521710395813, "learning_rate": 2.2797208242808503e-07, "loss": 0.02834503, "memory(GiB)": 13.7, "step": 96895, "train_speed(iter/s)": 1.530582 }, { "acc": 0.97791662, "epoch": 45.418326693227094, "grad_norm": 2.323131799697876, "learning_rate": 2.2774124393768486e-07, "loss": 0.03901754, "memory(GiB)": 13.7, "step": 96900, "train_speed(iter/s)": 1.53058 }, { "acc": 0.99434528, "epoch": 45.42067026013593, "grad_norm": 2.4817585945129395, "learning_rate": 2.2751052016872625e-07, "loss": 0.03326358, "memory(GiB)": 13.7, "step": 96905, "train_speed(iter/s)": 1.530584 }, { "acc": 0.9916667, "epoch": 45.42301382704476, "grad_norm": 2.325517416000366, "learning_rate": 2.2727991112675414e-07, "loss": 0.01415855, "memory(GiB)": 13.7, "step": 96910, "train_speed(iter/s)": 1.530584 }, { "acc": 0.99050598, "epoch": 45.4253573939536, "grad_norm": 4.501628398895264, "learning_rate": 2.2704941681731197e-07, "loss": 0.0390894, "memory(GiB)": 13.7, "step": 96915, "train_speed(iter/s)": 1.530584 }, { "acc": 0.9926136, "epoch": 45.42770096086243, "grad_norm": 1.2185618877410889, "learning_rate": 2.2681903724593969e-07, "loss": 0.02515096, "memory(GiB)": 13.7, "step": 96920, "train_speed(iter/s)": 1.530585 }, { "acc": 0.9921875, "epoch": 45.430044527771265, "grad_norm": 2.2065703868865967, "learning_rate": 2.2658877241817462e-07, "loss": 0.0378456, "memory(GiB)": 13.7, "step": 96925, "train_speed(iter/s)": 1.530587 }, { "acc": 0.98241806, "epoch": 45.4323880946801, "grad_norm": 0.6920132637023926, "learning_rate": 2.2635862233955222e-07, "loss": 0.04814616, "memory(GiB)": 13.7, "step": 96930, "train_speed(iter/s)": 1.530589 }, { "acc": 0.97180557, "epoch": 45.43473166158894, "grad_norm": 6.147344589233398, "learning_rate": 2.2612858701560368e-07, "loss": 0.05509441, "memory(GiB)": 13.7, "step": 96935, "train_speed(iter/s)": 1.530595 }, { "acc": 0.98583336, "epoch": 45.437075228497775, "grad_norm": 0.017296727746725082, "learning_rate": 2.2589866645186014e-07, "loss": 0.03816775, "memory(GiB)": 13.7, "step": 96940, "train_speed(iter/s)": 1.530597 }, { "acc": 0.99437504, "epoch": 45.43941879540661, "grad_norm": 4.807565689086914, "learning_rate": 2.2566886065384604e-07, "loss": 0.03701537, "memory(GiB)": 13.7, "step": 96945, "train_speed(iter/s)": 1.530602 }, { "acc": 0.98907204, "epoch": 45.441762362315444, "grad_norm": 3.8618407249450684, "learning_rate": 2.2543916962708643e-07, "loss": 0.02598018, "memory(GiB)": 13.7, "step": 96950, "train_speed(iter/s)": 1.530604 }, { "acc": 0.97250004, "epoch": 45.44410592922428, "grad_norm": 3.4117307662963867, "learning_rate": 2.2520959337710195e-07, "loss": 0.03519165, "memory(GiB)": 13.7, "step": 96955, "train_speed(iter/s)": 1.530605 }, { "acc": 0.99219704, "epoch": 45.44644949613311, "grad_norm": 2.0510528087615967, "learning_rate": 2.249801319094092e-07, "loss": 0.03005979, "memory(GiB)": 13.7, "step": 96960, "train_speed(iter/s)": 1.530602 }, { "acc": 0.97145834, "epoch": 45.44879306304195, "grad_norm": 6.367698669433594, "learning_rate": 2.2475078522952717e-07, "loss": 0.0532473, "memory(GiB)": 13.7, "step": 96965, "train_speed(iter/s)": 1.530604 }, { "acc": 0.98083334, "epoch": 45.45113662995079, "grad_norm": 4.766864776611328, "learning_rate": 2.2452155334296586e-07, "loss": 0.04432473, "memory(GiB)": 13.7, "step": 96970, "train_speed(iter/s)": 1.530606 }, { "acc": 0.97666664, "epoch": 45.45348019685962, "grad_norm": 3.874516010284424, "learning_rate": 2.2429243625523647e-07, "loss": 0.03352657, "memory(GiB)": 13.7, "step": 96975, "train_speed(iter/s)": 1.530608 }, { "acc": 0.98217258, "epoch": 45.45582376376846, "grad_norm": 2.5152111053466797, "learning_rate": 2.2406343397184568e-07, "loss": 0.03151162, "memory(GiB)": 13.7, "step": 96980, "train_speed(iter/s)": 1.530611 }, { "acc": 0.98708334, "epoch": 45.45816733067729, "grad_norm": 0.08391059190034866, "learning_rate": 2.2383454649829848e-07, "loss": 0.03592406, "memory(GiB)": 13.7, "step": 96985, "train_speed(iter/s)": 1.530614 }, { "acc": 0.98833332, "epoch": 45.460510897586126, "grad_norm": 2.9993419647216797, "learning_rate": 2.236057738400945e-07, "loss": 0.03201382, "memory(GiB)": 13.7, "step": 96990, "train_speed(iter/s)": 1.530616 }, { "acc": 0.9885416, "epoch": 45.46285446449496, "grad_norm": 3.075824022293091, "learning_rate": 2.2337711600273645e-07, "loss": 0.02307542, "memory(GiB)": 13.7, "step": 96995, "train_speed(iter/s)": 1.530618 }, { "acc": 0.97843752, "epoch": 45.465198031403794, "grad_norm": 4.506887912750244, "learning_rate": 2.2314857299171832e-07, "loss": 0.04481501, "memory(GiB)": 13.7, "step": 97000, "train_speed(iter/s)": 1.530622 }, { "acc": 0.99541664, "epoch": 45.46754159831263, "grad_norm": 2.7967309951782227, "learning_rate": 2.229201448125324e-07, "loss": 0.01499536, "memory(GiB)": 13.7, "step": 97005, "train_speed(iter/s)": 1.53062 }, { "acc": 0.97729168, "epoch": 45.46988516522147, "grad_norm": 4.01070499420166, "learning_rate": 2.2269183147067203e-07, "loss": 0.03329146, "memory(GiB)": 13.7, "step": 97010, "train_speed(iter/s)": 1.530624 }, { "acc": 0.99375, "epoch": 45.472228732130304, "grad_norm": 1.2216687202453613, "learning_rate": 2.2246363297162232e-07, "loss": 0.02959062, "memory(GiB)": 13.7, "step": 97015, "train_speed(iter/s)": 1.530627 }, { "acc": 0.98321428, "epoch": 45.47457229903914, "grad_norm": 0.007444014307111502, "learning_rate": 2.2223554932087052e-07, "loss": 0.04965551, "memory(GiB)": 13.7, "step": 97020, "train_speed(iter/s)": 1.530629 }, { "acc": 0.98803024, "epoch": 45.47691586594797, "grad_norm": 2.9180045127868652, "learning_rate": 2.2200758052389837e-07, "loss": 0.06062844, "memory(GiB)": 13.7, "step": 97025, "train_speed(iter/s)": 1.530632 }, { "acc": 0.99100275, "epoch": 45.47925943285681, "grad_norm": 2.380568265914917, "learning_rate": 2.2177972658618545e-07, "loss": 0.02165585, "memory(GiB)": 13.7, "step": 97030, "train_speed(iter/s)": 1.530632 }, { "acc": 0.98145828, "epoch": 45.48160299976564, "grad_norm": 5.696325302124023, "learning_rate": 2.2155198751320843e-07, "loss": 0.02581534, "memory(GiB)": 13.7, "step": 97035, "train_speed(iter/s)": 1.530635 }, { "acc": 0.99196434, "epoch": 45.483946566674476, "grad_norm": 3.400343418121338, "learning_rate": 2.2132436331044178e-07, "loss": 0.02780324, "memory(GiB)": 13.7, "step": 97040, "train_speed(iter/s)": 1.530637 }, { "acc": 0.97937508, "epoch": 45.48629013358332, "grad_norm": 3.5268049240112305, "learning_rate": 2.2109685398335735e-07, "loss": 0.03915346, "memory(GiB)": 13.7, "step": 97045, "train_speed(iter/s)": 1.530641 }, { "acc": 0.99125004, "epoch": 45.48863370049215, "grad_norm": 3.5874006748199463, "learning_rate": 2.2086945953742074e-07, "loss": 0.02858528, "memory(GiB)": 13.7, "step": 97050, "train_speed(iter/s)": 1.530642 }, { "acc": 0.98291664, "epoch": 45.490977267400986, "grad_norm": 0.9827322959899902, "learning_rate": 2.20642179978102e-07, "loss": 0.04970046, "memory(GiB)": 13.7, "step": 97055, "train_speed(iter/s)": 1.530638 }, { "acc": 0.99008923, "epoch": 45.49332083430982, "grad_norm": 2.505091667175293, "learning_rate": 2.204150153108612e-07, "loss": 0.02820626, "memory(GiB)": 13.7, "step": 97060, "train_speed(iter/s)": 1.530641 }, { "acc": 0.99050598, "epoch": 45.495664401218654, "grad_norm": 3.2155702114105225, "learning_rate": 2.2018796554116069e-07, "loss": 0.0406527, "memory(GiB)": 13.7, "step": 97065, "train_speed(iter/s)": 1.530647 }, { "acc": 0.98633928, "epoch": 45.49800796812749, "grad_norm": 2.667964458465576, "learning_rate": 2.199610306744561e-07, "loss": 0.02582051, "memory(GiB)": 13.7, "step": 97070, "train_speed(iter/s)": 1.530647 }, { "acc": 0.98745041, "epoch": 45.50035153503632, "grad_norm": 3.385221004486084, "learning_rate": 2.197342107162031e-07, "loss": 0.03832601, "memory(GiB)": 13.7, "step": 97075, "train_speed(iter/s)": 1.530647 }, { "acc": 0.98479166, "epoch": 45.50269510194516, "grad_norm": 1.922516942024231, "learning_rate": 2.195075056718534e-07, "loss": 0.03354259, "memory(GiB)": 13.7, "step": 97080, "train_speed(iter/s)": 1.530648 }, { "acc": 0.98500004, "epoch": 45.505038668854, "grad_norm": 2.622504234313965, "learning_rate": 2.1928091554685712e-07, "loss": 0.02846354, "memory(GiB)": 13.7, "step": 97085, "train_speed(iter/s)": 1.530653 }, { "acc": 0.97946434, "epoch": 45.50738223576283, "grad_norm": 6.709734916687012, "learning_rate": 2.190544403466605e-07, "loss": 0.03066798, "memory(GiB)": 13.7, "step": 97090, "train_speed(iter/s)": 1.530661 }, { "acc": 0.99375, "epoch": 45.50972580267167, "grad_norm": 4.2390971183776855, "learning_rate": 2.1882808007670583e-07, "loss": 0.0153599, "memory(GiB)": 13.7, "step": 97095, "train_speed(iter/s)": 1.530662 }, { "acc": 0.96906242, "epoch": 45.5120693695805, "grad_norm": 3.9649298191070557, "learning_rate": 2.18601834742436e-07, "loss": 0.0800761, "memory(GiB)": 13.7, "step": 97100, "train_speed(iter/s)": 1.530666 }, { "acc": 0.98812504, "epoch": 45.514412936489336, "grad_norm": 0.2120145708322525, "learning_rate": 2.1837570434928664e-07, "loss": 0.03986602, "memory(GiB)": 13.7, "step": 97105, "train_speed(iter/s)": 1.530667 }, { "acc": 0.97820511, "epoch": 45.51675650339817, "grad_norm": 2.777536153793335, "learning_rate": 2.1814968890269623e-07, "loss": 0.04501395, "memory(GiB)": 13.7, "step": 97110, "train_speed(iter/s)": 1.530666 }, { "acc": 0.9833334, "epoch": 45.519100070307005, "grad_norm": 2.5933022499084473, "learning_rate": 2.1792378840809599e-07, "loss": 0.0528855, "memory(GiB)": 13.7, "step": 97115, "train_speed(iter/s)": 1.53067 }, { "acc": 0.98625002, "epoch": 45.521443637215846, "grad_norm": 3.2335610389709473, "learning_rate": 2.1769800287091602e-07, "loss": 0.05517855, "memory(GiB)": 13.7, "step": 97120, "train_speed(iter/s)": 1.530671 }, { "acc": 0.99020834, "epoch": 45.52378720412468, "grad_norm": 5.619510650634766, "learning_rate": 2.1747233229658307e-07, "loss": 0.04482812, "memory(GiB)": 13.7, "step": 97125, "train_speed(iter/s)": 1.530674 }, { "acc": 0.9697917, "epoch": 45.526130771033515, "grad_norm": 3.915792465209961, "learning_rate": 2.1724677669052177e-07, "loss": 0.05093644, "memory(GiB)": 13.7, "step": 97130, "train_speed(iter/s)": 1.530675 }, { "acc": 0.9957386, "epoch": 45.52847433794235, "grad_norm": 0.013526706956326962, "learning_rate": 2.1702133605815442e-07, "loss": 0.03176544, "memory(GiB)": 13.7, "step": 97135, "train_speed(iter/s)": 1.530678 }, { "acc": 1.0, "epoch": 45.53081790485118, "grad_norm": 0.6719520092010498, "learning_rate": 2.1679601040489727e-07, "loss": 0.05411453, "memory(GiB)": 13.7, "step": 97140, "train_speed(iter/s)": 1.530681 }, { "acc": 0.9864584, "epoch": 45.53316147176002, "grad_norm": 3.0080199241638184, "learning_rate": 2.165707997361699e-07, "loss": 0.01904201, "memory(GiB)": 13.7, "step": 97145, "train_speed(iter/s)": 1.530681 }, { "acc": 0.97854166, "epoch": 45.53550503866885, "grad_norm": 2.4732844829559326, "learning_rate": 2.1634570405738406e-07, "loss": 0.08464139, "memory(GiB)": 13.7, "step": 97150, "train_speed(iter/s)": 1.530682 }, { "acc": 0.97988091, "epoch": 45.537848605577686, "grad_norm": 4.529921531677246, "learning_rate": 2.1612072337395047e-07, "loss": 0.04912063, "memory(GiB)": 13.7, "step": 97155, "train_speed(iter/s)": 1.530686 }, { "acc": 0.98447914, "epoch": 45.54019217248653, "grad_norm": 2.1461310386657715, "learning_rate": 2.158958576912754e-07, "loss": 0.03667257, "memory(GiB)": 13.7, "step": 97160, "train_speed(iter/s)": 1.530688 }, { "acc": 0.990625, "epoch": 45.54253573939536, "grad_norm": 0.0017947547603398561, "learning_rate": 2.1567110701476666e-07, "loss": 0.02395636, "memory(GiB)": 13.7, "step": 97165, "train_speed(iter/s)": 1.530693 }, { "acc": 0.98550596, "epoch": 45.544879306304196, "grad_norm": 3.1446635723114014, "learning_rate": 2.154464713498251e-07, "loss": 0.03995582, "memory(GiB)": 13.7, "step": 97170, "train_speed(iter/s)": 1.530696 }, { "acc": 0.99484844, "epoch": 45.54722287321303, "grad_norm": 0.05431070551276207, "learning_rate": 2.1522195070184962e-07, "loss": 0.01379523, "memory(GiB)": 13.7, "step": 97175, "train_speed(iter/s)": 1.530698 }, { "acc": 0.9854167, "epoch": 45.549566440121865, "grad_norm": 0.004634136334061623, "learning_rate": 2.149975450762382e-07, "loss": 0.04257413, "memory(GiB)": 13.7, "step": 97180, "train_speed(iter/s)": 1.530702 }, { "acc": 0.97654762, "epoch": 45.5519100070307, "grad_norm": 2.9596803188323975, "learning_rate": 2.147732544783832e-07, "loss": 0.05056562, "memory(GiB)": 13.7, "step": 97185, "train_speed(iter/s)": 1.530708 }, { "acc": 0.98395834, "epoch": 45.55425357393953, "grad_norm": 0.311581552028656, "learning_rate": 2.1454907891367807e-07, "loss": 0.03536097, "memory(GiB)": 13.7, "step": 97190, "train_speed(iter/s)": 1.53071 }, { "acc": 0.984375, "epoch": 45.55659714084837, "grad_norm": 7.1548357009887695, "learning_rate": 2.1432501838750851e-07, "loss": 0.037672, "memory(GiB)": 13.7, "step": 97195, "train_speed(iter/s)": 1.530709 }, { "acc": 0.97994051, "epoch": 45.55894070775721, "grad_norm": 4.119283676147461, "learning_rate": 2.1410107290526312e-07, "loss": 0.03798528, "memory(GiB)": 13.7, "step": 97200, "train_speed(iter/s)": 1.530708 }, { "acc": 0.98570518, "epoch": 45.56128427466604, "grad_norm": 3.925405263900757, "learning_rate": 2.1387724247232247e-07, "loss": 0.03622254, "memory(GiB)": 13.7, "step": 97205, "train_speed(iter/s)": 1.530706 }, { "acc": 0.98227673, "epoch": 45.56362784157488, "grad_norm": 1.9493775367736816, "learning_rate": 2.1365352709406845e-07, "loss": 0.06465499, "memory(GiB)": 13.7, "step": 97210, "train_speed(iter/s)": 1.530705 }, { "acc": 0.98988094, "epoch": 45.56597140848371, "grad_norm": 3.7697110176086426, "learning_rate": 2.1342992677587733e-07, "loss": 0.03837691, "memory(GiB)": 13.7, "step": 97215, "train_speed(iter/s)": 1.530706 }, { "acc": 0.99383926, "epoch": 45.568314975392546, "grad_norm": 3.2280614376068115, "learning_rate": 2.1320644152312312e-07, "loss": 0.02779487, "memory(GiB)": 13.7, "step": 97220, "train_speed(iter/s)": 1.530714 }, { "acc": 0.98916664, "epoch": 45.57065854230138, "grad_norm": 0.00045751038123853505, "learning_rate": 2.1298307134117996e-07, "loss": 0.02436562, "memory(GiB)": 13.7, "step": 97225, "train_speed(iter/s)": 1.530716 }, { "acc": 0.97374992, "epoch": 45.573002109210215, "grad_norm": 6.893582820892334, "learning_rate": 2.1275981623541462e-07, "loss": 0.05027423, "memory(GiB)": 13.7, "step": 97230, "train_speed(iter/s)": 1.530724 }, { "acc": 0.98718748, "epoch": 45.575345676119056, "grad_norm": 2.891101360321045, "learning_rate": 2.1253667621119505e-07, "loss": 0.02928273, "memory(GiB)": 13.7, "step": 97235, "train_speed(iter/s)": 1.530727 }, { "acc": 1.0, "epoch": 45.57768924302789, "grad_norm": 0.6986368298530579, "learning_rate": 2.1231365127388367e-07, "loss": 0.00662932, "memory(GiB)": 13.7, "step": 97240, "train_speed(iter/s)": 1.530729 }, { "acc": 0.9802084, "epoch": 45.580032809936725, "grad_norm": 3.5846455097198486, "learning_rate": 2.1209074142884233e-07, "loss": 0.04411116, "memory(GiB)": 13.7, "step": 97245, "train_speed(iter/s)": 1.530731 }, { "acc": 0.9885417, "epoch": 45.58237637684556, "grad_norm": 0.006006249226629734, "learning_rate": 2.118679466814278e-07, "loss": 0.04388879, "memory(GiB)": 13.7, "step": 97250, "train_speed(iter/s)": 1.530733 }, { "acc": 0.97458334, "epoch": 45.58471994375439, "grad_norm": 3.7063708305358887, "learning_rate": 2.1164526703699533e-07, "loss": 0.0619235, "memory(GiB)": 13.7, "step": 97255, "train_speed(iter/s)": 1.530732 }, { "acc": 0.96748505, "epoch": 45.58706351066323, "grad_norm": 3.9563283920288086, "learning_rate": 2.11422702500899e-07, "loss": 0.04378806, "memory(GiB)": 13.7, "step": 97260, "train_speed(iter/s)": 1.530733 }, { "acc": 0.98696423, "epoch": 45.58940707757206, "grad_norm": 2.289950370788574, "learning_rate": 2.1120025307848727e-07, "loss": 0.03746432, "memory(GiB)": 13.7, "step": 97265, "train_speed(iter/s)": 1.530738 }, { "acc": 0.98968754, "epoch": 45.591750644480896, "grad_norm": 2.4983174800872803, "learning_rate": 2.109779187751076e-07, "loss": 0.03466141, "memory(GiB)": 13.7, "step": 97270, "train_speed(iter/s)": 1.530741 }, { "acc": 0.98966351, "epoch": 45.59409421138974, "grad_norm": 3.472280502319336, "learning_rate": 2.107556995961035e-07, "loss": 0.03473809, "memory(GiB)": 13.7, "step": 97275, "train_speed(iter/s)": 1.530743 }, { "acc": 0.98291664, "epoch": 45.59643777829857, "grad_norm": 0.0014208884676918387, "learning_rate": 2.1053359554681738e-07, "loss": 0.02808143, "memory(GiB)": 13.7, "step": 97280, "train_speed(iter/s)": 1.530745 }, { "acc": 0.9731945, "epoch": 45.598781345207406, "grad_norm": 1.2471883296966553, "learning_rate": 2.1031160663258666e-07, "loss": 0.07195908, "memory(GiB)": 13.7, "step": 97285, "train_speed(iter/s)": 1.530745 }, { "acc": 0.98249998, "epoch": 45.60112491211624, "grad_norm": 3.5321316719055176, "learning_rate": 2.1008973285874874e-07, "loss": 0.06683698, "memory(GiB)": 13.7, "step": 97290, "train_speed(iter/s)": 1.530749 }, { "acc": 0.98832798, "epoch": 45.603468479025075, "grad_norm": 3.283376693725586, "learning_rate": 2.0986797423063553e-07, "loss": 0.01793914, "memory(GiB)": 13.7, "step": 97295, "train_speed(iter/s)": 1.53075 }, { "acc": 0.96870041, "epoch": 45.60581204593391, "grad_norm": 6.891825199127197, "learning_rate": 2.0964633075357774e-07, "loss": 0.05857009, "memory(GiB)": 13.7, "step": 97300, "train_speed(iter/s)": 1.530757 }, { "acc": 0.97770824, "epoch": 45.608155612842744, "grad_norm": 2.3286986351013184, "learning_rate": 2.094248024329034e-07, "loss": 0.03716829, "memory(GiB)": 13.7, "step": 97305, "train_speed(iter/s)": 1.530757 }, { "acc": 0.98490534, "epoch": 45.610499179751585, "grad_norm": 0.374676376581192, "learning_rate": 2.0920338927393602e-07, "loss": 0.04180026, "memory(GiB)": 13.7, "step": 97310, "train_speed(iter/s)": 1.530758 }, { "acc": 0.9979167, "epoch": 45.61284274666042, "grad_norm": 0.00013956421753391623, "learning_rate": 2.0898209128199916e-07, "loss": 0.02191546, "memory(GiB)": 13.7, "step": 97315, "train_speed(iter/s)": 1.530756 }, { "acc": 0.99125004, "epoch": 45.615186313569254, "grad_norm": 1.9389828443527222, "learning_rate": 2.0876090846241134e-07, "loss": 0.0181732, "memory(GiB)": 13.7, "step": 97320, "train_speed(iter/s)": 1.530761 }, { "acc": 0.98113976, "epoch": 45.61752988047809, "grad_norm": 2.323241949081421, "learning_rate": 2.085398408204895e-07, "loss": 0.02973999, "memory(GiB)": 13.7, "step": 97325, "train_speed(iter/s)": 1.53076 }, { "acc": 0.99330359, "epoch": 45.61987344738692, "grad_norm": 0.002057554665952921, "learning_rate": 2.0831888836154653e-07, "loss": 0.01702214, "memory(GiB)": 13.7, "step": 97330, "train_speed(iter/s)": 1.530762 }, { "acc": 0.98916664, "epoch": 45.62221701429576, "grad_norm": 2.6855390071868896, "learning_rate": 2.080980510908944e-07, "loss": 0.02311482, "memory(GiB)": 13.7, "step": 97335, "train_speed(iter/s)": 1.530767 }, { "acc": 0.9760417, "epoch": 45.62456058120459, "grad_norm": 0.001976459054276347, "learning_rate": 2.078773290138411e-07, "loss": 0.03812302, "memory(GiB)": 13.7, "step": 97340, "train_speed(iter/s)": 1.530768 }, { "acc": 0.98698778, "epoch": 45.626904148113425, "grad_norm": 1.833300232887268, "learning_rate": 2.0765672213569075e-07, "loss": 0.04561085, "memory(GiB)": 13.7, "step": 97345, "train_speed(iter/s)": 1.530766 }, { "acc": 0.99508934, "epoch": 45.62924771502227, "grad_norm": 1.0206851959228516, "learning_rate": 2.07436230461748e-07, "loss": 0.03056583, "memory(GiB)": 13.7, "step": 97350, "train_speed(iter/s)": 1.53077 }, { "acc": 0.96615086, "epoch": 45.6315912819311, "grad_norm": 7.262584209442139, "learning_rate": 2.0721585399731143e-07, "loss": 0.07814631, "memory(GiB)": 13.7, "step": 97355, "train_speed(iter/s)": 1.530773 }, { "acc": 0.96958332, "epoch": 45.633934848839935, "grad_norm": 3.3949246406555176, "learning_rate": 2.06995592747679e-07, "loss": 0.05041586, "memory(GiB)": 13.7, "step": 97360, "train_speed(iter/s)": 1.53077 }, { "acc": 0.99875002, "epoch": 45.63627841574877, "grad_norm": 1.955566167831421, "learning_rate": 2.0677544671814323e-07, "loss": 0.0206824, "memory(GiB)": 13.7, "step": 97365, "train_speed(iter/s)": 1.530771 }, { "acc": 0.98686008, "epoch": 45.638621982657604, "grad_norm": 1.5886497497558594, "learning_rate": 2.0655541591399883e-07, "loss": 0.03040304, "memory(GiB)": 13.7, "step": 97370, "train_speed(iter/s)": 1.53077 }, { "acc": 0.97729168, "epoch": 45.64096554956644, "grad_norm": 3.7338192462921143, "learning_rate": 2.063355003405326e-07, "loss": 0.031105, "memory(GiB)": 13.7, "step": 97375, "train_speed(iter/s)": 1.530774 }, { "acc": 0.97453127, "epoch": 45.64330911647527, "grad_norm": 4.942114353179932, "learning_rate": 2.061157000030304e-07, "loss": 0.09803642, "memory(GiB)": 13.7, "step": 97380, "train_speed(iter/s)": 1.530773 }, { "acc": 0.98557463, "epoch": 45.645652683384114, "grad_norm": 1.442783236503601, "learning_rate": 2.0589601490677638e-07, "loss": 0.04412949, "memory(GiB)": 13.7, "step": 97385, "train_speed(iter/s)": 1.530776 }, { "acc": 0.98299675, "epoch": 45.64799625029295, "grad_norm": 3.66900634765625, "learning_rate": 2.0567644505705073e-07, "loss": 0.04049645, "memory(GiB)": 13.7, "step": 97390, "train_speed(iter/s)": 1.530783 }, { "acc": 0.990625, "epoch": 45.65033981720178, "grad_norm": 0.0051107751205563545, "learning_rate": 2.0545699045913098e-07, "loss": 0.03824035, "memory(GiB)": 13.7, "step": 97395, "train_speed(iter/s)": 1.530782 }, { "acc": 0.9927084, "epoch": 45.65268338411062, "grad_norm": 1.1024322509765625, "learning_rate": 2.0523765111829128e-07, "loss": 0.02911901, "memory(GiB)": 13.7, "step": 97400, "train_speed(iter/s)": 1.530787 }, { "acc": 0.97727718, "epoch": 45.65502695101945, "grad_norm": 0.0009542433544993401, "learning_rate": 2.0501842703980624e-07, "loss": 0.05314477, "memory(GiB)": 13.7, "step": 97405, "train_speed(iter/s)": 1.530794 }, { "acc": 0.9875, "epoch": 45.657370517928285, "grad_norm": 2.351433038711548, "learning_rate": 2.047993182289429e-07, "loss": 0.04378338, "memory(GiB)": 13.7, "step": 97410, "train_speed(iter/s)": 1.530797 }, { "acc": 0.98312492, "epoch": 45.65971408483712, "grad_norm": 2.410888910293579, "learning_rate": 2.045803246909692e-07, "loss": 0.03287536, "memory(GiB)": 13.7, "step": 97415, "train_speed(iter/s)": 1.530801 }, { "acc": 0.99055557, "epoch": 45.662057651745954, "grad_norm": 3.0055110454559326, "learning_rate": 2.0436144643114937e-07, "loss": 0.02156555, "memory(GiB)": 13.7, "step": 97420, "train_speed(iter/s)": 1.530804 }, { "acc": 0.98270836, "epoch": 45.664401218654795, "grad_norm": 3.8490238189697266, "learning_rate": 2.0414268345474308e-07, "loss": 0.02598356, "memory(GiB)": 13.7, "step": 97425, "train_speed(iter/s)": 1.530803 }, { "acc": 0.99187498, "epoch": 45.66674478556363, "grad_norm": 1.4043012857437134, "learning_rate": 2.039240357670089e-07, "loss": 0.02425814, "memory(GiB)": 13.7, "step": 97430, "train_speed(iter/s)": 1.530799 }, { "acc": 0.98145828, "epoch": 45.669088352472464, "grad_norm": 3.1894452571868896, "learning_rate": 2.0370550337320328e-07, "loss": 0.03815085, "memory(GiB)": 13.7, "step": 97435, "train_speed(iter/s)": 1.5308 }, { "acc": 0.97277775, "epoch": 45.6714319193813, "grad_norm": 2.863405704498291, "learning_rate": 2.034870862785792e-07, "loss": 0.08812842, "memory(GiB)": 13.7, "step": 97440, "train_speed(iter/s)": 1.530807 }, { "acc": 0.98250008, "epoch": 45.67377548629013, "grad_norm": 0.0014134490629658103, "learning_rate": 2.0326878448838586e-07, "loss": 0.0262763, "memory(GiB)": 13.7, "step": 97445, "train_speed(iter/s)": 1.530811 }, { "acc": 0.97624998, "epoch": 45.67611905319897, "grad_norm": 2.485142707824707, "learning_rate": 2.0305059800787077e-07, "loss": 0.05385044, "memory(GiB)": 13.7, "step": 97450, "train_speed(iter/s)": 1.530815 }, { "acc": 0.98041668, "epoch": 45.6784626201078, "grad_norm": 6.667825698852539, "learning_rate": 2.028325268422781e-07, "loss": 0.03119805, "memory(GiB)": 13.7, "step": 97455, "train_speed(iter/s)": 1.530818 }, { "acc": 0.99020824, "epoch": 45.68080618701664, "grad_norm": 2.3295986652374268, "learning_rate": 2.0261457099685033e-07, "loss": 0.04257066, "memory(GiB)": 13.7, "step": 97460, "train_speed(iter/s)": 1.530819 }, { "acc": 0.9927083, "epoch": 45.68314975392548, "grad_norm": 4.557787895202637, "learning_rate": 2.0239673047682665e-07, "loss": 0.03357995, "memory(GiB)": 13.7, "step": 97465, "train_speed(iter/s)": 1.530815 }, { "acc": 0.98936014, "epoch": 45.68549332083431, "grad_norm": 3.0308735370635986, "learning_rate": 2.021790052874412e-07, "loss": 0.0668999, "memory(GiB)": 13.7, "step": 97470, "train_speed(iter/s)": 1.530816 }, { "acc": 0.98008928, "epoch": 45.687836887743146, "grad_norm": 4.633060932159424, "learning_rate": 2.019613954339299e-07, "loss": 0.04286796, "memory(GiB)": 13.7, "step": 97475, "train_speed(iter/s)": 1.53082 }, { "acc": 0.9916667, "epoch": 45.69018045465198, "grad_norm": 3.438209056854248, "learning_rate": 2.0174390092152186e-07, "loss": 0.05302086, "memory(GiB)": 13.7, "step": 97480, "train_speed(iter/s)": 1.530822 }, { "acc": 0.99333334, "epoch": 45.692524021560814, "grad_norm": 1.7554430961608887, "learning_rate": 2.0152652175544575e-07, "loss": 0.01611291, "memory(GiB)": 13.7, "step": 97485, "train_speed(iter/s)": 1.530824 }, { "acc": 0.99821434, "epoch": 45.69486758846965, "grad_norm": 0.0010475809685885906, "learning_rate": 2.0130925794092462e-07, "loss": 0.00565485, "memory(GiB)": 13.7, "step": 97490, "train_speed(iter/s)": 1.530823 }, { "acc": 0.98083334, "epoch": 45.69721115537848, "grad_norm": 2.102076530456543, "learning_rate": 2.010921094831838e-07, "loss": 0.05082207, "memory(GiB)": 13.7, "step": 97495, "train_speed(iter/s)": 1.530827 }, { "acc": 0.98842258, "epoch": 45.699554722287324, "grad_norm": 0.20330695807933807, "learning_rate": 2.0087507638744133e-07, "loss": 0.01915809, "memory(GiB)": 13.7, "step": 97500, "train_speed(iter/s)": 1.530835 }, { "acc": 0.97895832, "epoch": 45.70189828919616, "grad_norm": 3.3269124031066895, "learning_rate": 2.0065815865891364e-07, "loss": 0.05825062, "memory(GiB)": 13.7, "step": 97505, "train_speed(iter/s)": 1.530839 }, { "acc": 0.99020824, "epoch": 45.70424185610499, "grad_norm": 0.0022802911698818207, "learning_rate": 2.0044135630281553e-07, "loss": 0.03131019, "memory(GiB)": 13.7, "step": 97510, "train_speed(iter/s)": 1.530838 }, { "acc": 0.98604164, "epoch": 45.70658542301383, "grad_norm": 0.7818379402160645, "learning_rate": 2.0022466932435668e-07, "loss": 0.04417897, "memory(GiB)": 13.7, "step": 97515, "train_speed(iter/s)": 1.530838 }, { "acc": 0.98354168, "epoch": 45.70892898992266, "grad_norm": 4.189886093139648, "learning_rate": 2.0000809772874745e-07, "loss": 0.03238859, "memory(GiB)": 13.7, "step": 97520, "train_speed(iter/s)": 1.530835 }, { "acc": 0.99375, "epoch": 45.711272556831496, "grad_norm": 2.2772018909454346, "learning_rate": 1.9979164152119257e-07, "loss": 0.01968269, "memory(GiB)": 13.7, "step": 97525, "train_speed(iter/s)": 1.530837 }, { "acc": 0.98604164, "epoch": 45.71361612374033, "grad_norm": 5.549528121948242, "learning_rate": 1.995753007068957e-07, "loss": 0.02361779, "memory(GiB)": 13.7, "step": 97530, "train_speed(iter/s)": 1.530836 }, { "acc": 0.98881941, "epoch": 45.71595969064917, "grad_norm": 2.218432903289795, "learning_rate": 1.9935907529105554e-07, "loss": 0.02363461, "memory(GiB)": 13.7, "step": 97535, "train_speed(iter/s)": 1.53084 }, { "acc": 0.96916676, "epoch": 45.718303257558006, "grad_norm": 2.2398617267608643, "learning_rate": 1.9914296527887126e-07, "loss": 0.05100356, "memory(GiB)": 13.7, "step": 97540, "train_speed(iter/s)": 1.530844 }, { "acc": 0.99437504, "epoch": 45.72064682446684, "grad_norm": 6.199828147888184, "learning_rate": 1.9892697067553595e-07, "loss": 0.02288853, "memory(GiB)": 13.7, "step": 97545, "train_speed(iter/s)": 1.530846 }, { "acc": 0.98298616, "epoch": 45.722990391375674, "grad_norm": 4.926069259643555, "learning_rate": 1.9871109148624048e-07, "loss": 0.03523546, "memory(GiB)": 13.7, "step": 97550, "train_speed(iter/s)": 1.530847 }, { "acc": 0.97270832, "epoch": 45.72533395828451, "grad_norm": 3.23594331741333, "learning_rate": 1.9849532771617687e-07, "loss": 0.04442794, "memory(GiB)": 13.7, "step": 97555, "train_speed(iter/s)": 1.53085 }, { "acc": 0.9895834, "epoch": 45.72767752519334, "grad_norm": 0.0025222869589924812, "learning_rate": 1.982796793705294e-07, "loss": 0.01635991, "memory(GiB)": 13.7, "step": 97560, "train_speed(iter/s)": 1.530851 }, { "acc": 0.98374996, "epoch": 45.73002109210218, "grad_norm": 3.281183958053589, "learning_rate": 1.980641464544822e-07, "loss": 0.04142545, "memory(GiB)": 13.7, "step": 97565, "train_speed(iter/s)": 1.530849 }, { "acc": 0.97791672, "epoch": 45.73236465901101, "grad_norm": 3.4282801151275635, "learning_rate": 1.9784872897321505e-07, "loss": 0.03185276, "memory(GiB)": 13.7, "step": 97570, "train_speed(iter/s)": 1.530852 }, { "acc": 0.98090782, "epoch": 45.73470822591985, "grad_norm": 6.897849082946777, "learning_rate": 1.9763342693190614e-07, "loss": 0.05549445, "memory(GiB)": 13.7, "step": 97575, "train_speed(iter/s)": 1.530858 }, { "acc": 0.98583336, "epoch": 45.73705179282869, "grad_norm": 7.580881118774414, "learning_rate": 1.9741824033573194e-07, "loss": 0.02643643, "memory(GiB)": 13.7, "step": 97580, "train_speed(iter/s)": 1.530861 }, { "acc": 0.98383923, "epoch": 45.73939535973752, "grad_norm": 1.7572404146194458, "learning_rate": 1.9720316918986379e-07, "loss": 0.04155736, "memory(GiB)": 13.7, "step": 97585, "train_speed(iter/s)": 1.530863 }, { "acc": 0.98291664, "epoch": 45.741738926646356, "grad_norm": 2.7988524436950684, "learning_rate": 1.9698821349947155e-07, "loss": 0.06355772, "memory(GiB)": 13.7, "step": 97590, "train_speed(iter/s)": 1.530866 }, { "acc": 0.99508934, "epoch": 45.74408249355519, "grad_norm": 0.01697535440325737, "learning_rate": 1.9677337326972114e-07, "loss": 0.02175488, "memory(GiB)": 13.7, "step": 97595, "train_speed(iter/s)": 1.53087 }, { "acc": 0.98321428, "epoch": 45.746426060464024, "grad_norm": 5.808244228363037, "learning_rate": 1.965586485057785e-07, "loss": 0.035043, "memory(GiB)": 13.7, "step": 97600, "train_speed(iter/s)": 1.530869 }, { "acc": 0.98937502, "epoch": 45.74876962737286, "grad_norm": 2.141977310180664, "learning_rate": 1.963440392128022e-07, "loss": 0.02191424, "memory(GiB)": 13.7, "step": 97605, "train_speed(iter/s)": 1.53087 }, { "acc": 0.98673611, "epoch": 45.7511131942817, "grad_norm": 4.776391506195068, "learning_rate": 1.961295453959538e-07, "loss": 0.04001401, "memory(GiB)": 13.7, "step": 97610, "train_speed(iter/s)": 1.530873 }, { "acc": 0.97865534, "epoch": 45.753456761190535, "grad_norm": 2.7991485595703125, "learning_rate": 1.9591516706038642e-07, "loss": 0.0373662, "memory(GiB)": 13.7, "step": 97615, "train_speed(iter/s)": 1.530873 }, { "acc": 0.98439903, "epoch": 45.75580032809937, "grad_norm": 0.9850583076477051, "learning_rate": 1.9570090421125484e-07, "loss": 0.05285743, "memory(GiB)": 13.7, "step": 97620, "train_speed(iter/s)": 1.530874 }, { "acc": 0.97875004, "epoch": 45.7581438950082, "grad_norm": 4.41440486907959, "learning_rate": 1.9548675685370836e-07, "loss": 0.03596112, "memory(GiB)": 13.7, "step": 97625, "train_speed(iter/s)": 1.530876 }, { "acc": 0.98696423, "epoch": 45.76048746191704, "grad_norm": 2.0576565265655518, "learning_rate": 1.952727249928951e-07, "loss": 0.03814202, "memory(GiB)": 13.7, "step": 97630, "train_speed(iter/s)": 1.530879 }, { "acc": 0.98395834, "epoch": 45.76283102882587, "grad_norm": 3.8917157649993896, "learning_rate": 1.9505880863395876e-07, "loss": 0.03541648, "memory(GiB)": 13.7, "step": 97635, "train_speed(iter/s)": 1.530882 }, { "acc": 0.98684626, "epoch": 45.765174595734706, "grad_norm": 3.724532127380371, "learning_rate": 1.9484500778204082e-07, "loss": 0.04017606, "memory(GiB)": 13.7, "step": 97640, "train_speed(iter/s)": 1.530887 }, { "acc": 0.97381935, "epoch": 45.76751816264354, "grad_norm": 3.4443764686584473, "learning_rate": 1.946313224422817e-07, "loss": 0.05651549, "memory(GiB)": 13.7, "step": 97645, "train_speed(iter/s)": 1.53089 }, { "acc": 0.98778677, "epoch": 45.76986172955238, "grad_norm": 4.002894401550293, "learning_rate": 1.944177526198167e-07, "loss": 0.04933403, "memory(GiB)": 13.7, "step": 97650, "train_speed(iter/s)": 1.530894 }, { "acc": 0.98800602, "epoch": 45.772205296461216, "grad_norm": 2.6974709033966064, "learning_rate": 1.9420429831978067e-07, "loss": 0.03044989, "memory(GiB)": 13.7, "step": 97655, "train_speed(iter/s)": 1.530894 }, { "acc": 0.98351641, "epoch": 45.77454886337005, "grad_norm": 2.1837589740753174, "learning_rate": 1.939909595473018e-07, "loss": 0.0322933, "memory(GiB)": 13.7, "step": 97660, "train_speed(iter/s)": 1.530893 }, { "acc": 0.96999998, "epoch": 45.776892430278885, "grad_norm": 3.6773476600646973, "learning_rate": 1.9377773630751102e-07, "loss": 0.04660841, "memory(GiB)": 13.7, "step": 97665, "train_speed(iter/s)": 1.530892 }, { "acc": 0.97989588, "epoch": 45.77923599718772, "grad_norm": 0.003538406454026699, "learning_rate": 1.935646286055326e-07, "loss": 0.03875861, "memory(GiB)": 13.7, "step": 97670, "train_speed(iter/s)": 1.530897 }, { "acc": 0.9895834, "epoch": 45.78157956409655, "grad_norm": 4.467829704284668, "learning_rate": 1.9335163644648748e-07, "loss": 0.02221686, "memory(GiB)": 13.7, "step": 97675, "train_speed(iter/s)": 1.530896 }, { "acc": 0.96765327, "epoch": 45.78392313100539, "grad_norm": 1.8297358751296997, "learning_rate": 1.9313875983549662e-07, "loss": 0.06336153, "memory(GiB)": 13.7, "step": 97680, "train_speed(iter/s)": 1.5309 }, { "acc": 0.97103853, "epoch": 45.78626669791423, "grad_norm": 5.0108513832092285, "learning_rate": 1.9292599877767652e-07, "loss": 0.08574526, "memory(GiB)": 13.7, "step": 97685, "train_speed(iter/s)": 1.5309 }, { "acc": 0.97386904, "epoch": 45.78861026482306, "grad_norm": 5.253608703613281, "learning_rate": 1.9271335327814145e-07, "loss": 0.07453503, "memory(GiB)": 13.7, "step": 97690, "train_speed(iter/s)": 1.530905 }, { "acc": 0.9859375, "epoch": 45.7909538317319, "grad_norm": 2.1680409908294678, "learning_rate": 1.9250082334200128e-07, "loss": 0.02158912, "memory(GiB)": 13.7, "step": 97695, "train_speed(iter/s)": 1.530907 }, { "acc": 0.98362179, "epoch": 45.79329739864073, "grad_norm": 5.353482246398926, "learning_rate": 1.9228840897436752e-07, "loss": 0.03307554, "memory(GiB)": 13.7, "step": 97700, "train_speed(iter/s)": 1.530912 }, { "acc": 0.97486115, "epoch": 45.795640965549566, "grad_norm": 3.681687355041504, "learning_rate": 1.9207611018034335e-07, "loss": 0.0474099, "memory(GiB)": 13.7, "step": 97705, "train_speed(iter/s)": 1.530914 }, { "acc": 0.98416672, "epoch": 45.7979845324584, "grad_norm": 4.93786096572876, "learning_rate": 1.9186392696503308e-07, "loss": 0.03670967, "memory(GiB)": 13.7, "step": 97710, "train_speed(iter/s)": 1.530913 }, { "acc": 0.98738098, "epoch": 45.800328099367235, "grad_norm": 5.049280643463135, "learning_rate": 1.916518593335365e-07, "loss": 0.0470427, "memory(GiB)": 13.7, "step": 97715, "train_speed(iter/s)": 1.530914 }, { "acc": 0.979072, "epoch": 45.80267166627607, "grad_norm": 5.570816516876221, "learning_rate": 1.9143990729094966e-07, "loss": 0.06189871, "memory(GiB)": 13.7, "step": 97720, "train_speed(iter/s)": 1.530918 }, { "acc": 0.98395834, "epoch": 45.80501523318491, "grad_norm": 3.4284732341766357, "learning_rate": 1.9122807084237012e-07, "loss": 0.03042518, "memory(GiB)": 13.7, "step": 97725, "train_speed(iter/s)": 1.530925 }, { "acc": 0.98703127, "epoch": 45.807358800093745, "grad_norm": 1.8941560983657837, "learning_rate": 1.9101634999288725e-07, "loss": 0.04357409, "memory(GiB)": 13.7, "step": 97730, "train_speed(iter/s)": 1.53093 }, { "acc": 0.99769344, "epoch": 45.80970236700258, "grad_norm": 0.4257853329181671, "learning_rate": 1.908047447475909e-07, "loss": 0.01143768, "memory(GiB)": 13.7, "step": 97735, "train_speed(iter/s)": 1.530934 }, { "acc": 0.99330359, "epoch": 45.81204593391141, "grad_norm": 4.0231428146362305, "learning_rate": 1.905932551115676e-07, "loss": 0.03500276, "memory(GiB)": 13.7, "step": 97740, "train_speed(iter/s)": 1.530935 }, { "acc": 0.97580357, "epoch": 45.81438950082025, "grad_norm": 4.29015588760376, "learning_rate": 1.9038188108990112e-07, "loss": 0.05040912, "memory(GiB)": 13.7, "step": 97745, "train_speed(iter/s)": 1.530937 }, { "acc": 0.97022724, "epoch": 45.81673306772908, "grad_norm": 3.058603525161743, "learning_rate": 1.9017062268767015e-07, "loss": 0.06575499, "memory(GiB)": 13.7, "step": 97750, "train_speed(iter/s)": 1.530937 }, { "acc": 0.98738098, "epoch": 45.819076634637916, "grad_norm": 3.178401231765747, "learning_rate": 1.8995947990995575e-07, "loss": 0.03550307, "memory(GiB)": 13.7, "step": 97755, "train_speed(iter/s)": 1.530945 }, { "acc": 0.98125, "epoch": 45.82142020154676, "grad_norm": 4.784848690032959, "learning_rate": 1.897484527618317e-07, "loss": 0.04532889, "memory(GiB)": 13.7, "step": 97760, "train_speed(iter/s)": 1.530944 }, { "acc": 0.97625008, "epoch": 45.82376376845559, "grad_norm": 0.6288180351257324, "learning_rate": 1.8953754124836945e-07, "loss": 0.05859817, "memory(GiB)": 13.7, "step": 97765, "train_speed(iter/s)": 1.530943 }, { "acc": 0.97847633, "epoch": 45.826107335364426, "grad_norm": 4.0341339111328125, "learning_rate": 1.8932674537464065e-07, "loss": 0.05306969, "memory(GiB)": 13.7, "step": 97770, "train_speed(iter/s)": 1.530943 }, { "acc": 0.97666664, "epoch": 45.82845090227326, "grad_norm": 5.710822582244873, "learning_rate": 1.8911606514570958e-07, "loss": 0.07763625, "memory(GiB)": 13.7, "step": 97775, "train_speed(iter/s)": 1.530947 }, { "acc": 0.9895834, "epoch": 45.830794469182095, "grad_norm": 0.00389246572740376, "learning_rate": 1.8890550056664278e-07, "loss": 0.04087432, "memory(GiB)": 13.7, "step": 97780, "train_speed(iter/s)": 1.530948 }, { "acc": 0.98883934, "epoch": 45.83313803609093, "grad_norm": 2.685316324234009, "learning_rate": 1.886950516424991e-07, "loss": 0.02168233, "memory(GiB)": 13.7, "step": 97785, "train_speed(iter/s)": 1.530951 }, { "acc": 0.98041668, "epoch": 45.835481602999764, "grad_norm": 4.307829856872559, "learning_rate": 1.8848471837833943e-07, "loss": 0.02415137, "memory(GiB)": 13.7, "step": 97790, "train_speed(iter/s)": 1.530955 }, { "acc": 0.984375, "epoch": 45.8378251699086, "grad_norm": 2.480388879776001, "learning_rate": 1.8827450077921877e-07, "loss": 0.03621195, "memory(GiB)": 13.7, "step": 97795, "train_speed(iter/s)": 1.530959 }, { "acc": 0.97562504, "epoch": 45.84016873681744, "grad_norm": 2.318040132522583, "learning_rate": 1.8806439885018862e-07, "loss": 0.0466238, "memory(GiB)": 13.7, "step": 97800, "train_speed(iter/s)": 1.53096 }, { "acc": 0.99229164, "epoch": 45.842512303726274, "grad_norm": 0.0015955595299601555, "learning_rate": 1.8785441259630113e-07, "loss": 0.03176325, "memory(GiB)": 13.7, "step": 97805, "train_speed(iter/s)": 1.530959 }, { "acc": 0.98946428, "epoch": 45.84485587063511, "grad_norm": 4.685433864593506, "learning_rate": 1.8764454202260174e-07, "loss": 0.03172956, "memory(GiB)": 13.7, "step": 97810, "train_speed(iter/s)": 1.530963 }, { "acc": 0.98542614, "epoch": 45.84719943754394, "grad_norm": 2.1704511642456055, "learning_rate": 1.8743478713413704e-07, "loss": 0.05163288, "memory(GiB)": 13.7, "step": 97815, "train_speed(iter/s)": 1.530964 }, { "acc": 0.98999996, "epoch": 45.84954300445278, "grad_norm": 3.292628288269043, "learning_rate": 1.872251479359475e-07, "loss": 0.04049189, "memory(GiB)": 13.7, "step": 97820, "train_speed(iter/s)": 1.530963 }, { "acc": 0.97416668, "epoch": 45.85188657136161, "grad_norm": 6.574499130249023, "learning_rate": 1.8701562443307296e-07, "loss": 0.03416167, "memory(GiB)": 13.7, "step": 97825, "train_speed(iter/s)": 1.53096 }, { "acc": 0.99375, "epoch": 45.854230138270445, "grad_norm": 2.3239922523498535, "learning_rate": 1.8680621663054898e-07, "loss": 0.01712839, "memory(GiB)": 13.7, "step": 97830, "train_speed(iter/s)": 1.530965 }, { "acc": 0.97539139, "epoch": 45.85657370517928, "grad_norm": 4.289609432220459, "learning_rate": 1.8659692453340987e-07, "loss": 0.05219312, "memory(GiB)": 13.7, "step": 97835, "train_speed(iter/s)": 1.530964 }, { "acc": 0.98583336, "epoch": 45.85891727208812, "grad_norm": 0.0027950750663876534, "learning_rate": 1.8638774814668554e-07, "loss": 0.02136361, "memory(GiB)": 13.7, "step": 97840, "train_speed(iter/s)": 1.530966 }, { "acc": 0.990625, "epoch": 45.861260838996955, "grad_norm": 4.858453273773193, "learning_rate": 1.8617868747540316e-07, "loss": 0.03931847, "memory(GiB)": 13.7, "step": 97845, "train_speed(iter/s)": 1.530971 }, { "acc": 0.99278851, "epoch": 45.86360440590579, "grad_norm": 2.7936887741088867, "learning_rate": 1.859697425245904e-07, "loss": 0.03435571, "memory(GiB)": 13.7, "step": 97850, "train_speed(iter/s)": 1.530973 }, { "acc": 0.98812504, "epoch": 45.865947972814624, "grad_norm": 4.336405277252197, "learning_rate": 1.8576091329926722e-07, "loss": 0.0416224, "memory(GiB)": 13.7, "step": 97855, "train_speed(iter/s)": 1.530981 }, { "acc": 0.98968754, "epoch": 45.86829153972346, "grad_norm": 3.781769037246704, "learning_rate": 1.8555219980445408e-07, "loss": 0.0377579, "memory(GiB)": 13.7, "step": 97860, "train_speed(iter/s)": 1.530985 }, { "acc": 0.99300594, "epoch": 45.87063510663229, "grad_norm": 2.43778133392334, "learning_rate": 1.85343602045167e-07, "loss": 0.04434458, "memory(GiB)": 13.7, "step": 97865, "train_speed(iter/s)": 1.530985 }, { "acc": 0.98708334, "epoch": 45.87297867354113, "grad_norm": 1.8606842756271362, "learning_rate": 1.8513512002642206e-07, "loss": 0.04238497, "memory(GiB)": 13.7, "step": 97870, "train_speed(iter/s)": 1.530987 }, { "acc": 0.99002972, "epoch": 45.87532224044997, "grad_norm": 3.5821142196655273, "learning_rate": 1.849267537532286e-07, "loss": 0.02252638, "memory(GiB)": 13.7, "step": 97875, "train_speed(iter/s)": 1.530987 }, { "acc": 0.96873512, "epoch": 45.8776658073588, "grad_norm": 5.702154159545898, "learning_rate": 1.847185032305955e-07, "loss": 0.0734413, "memory(GiB)": 13.7, "step": 97880, "train_speed(iter/s)": 1.53099 }, { "acc": 0.9979166, "epoch": 45.88000937426764, "grad_norm": 2.103008270263672, "learning_rate": 1.8451036846352934e-07, "loss": 0.0116882, "memory(GiB)": 13.7, "step": 97885, "train_speed(iter/s)": 1.530992 }, { "acc": 0.99120922, "epoch": 45.88235294117647, "grad_norm": 1.1735212802886963, "learning_rate": 1.8430234945703116e-07, "loss": 0.01323917, "memory(GiB)": 13.7, "step": 97890, "train_speed(iter/s)": 1.530995 }, { "acc": 0.98291664, "epoch": 45.884696508085305, "grad_norm": 3.247410535812378, "learning_rate": 1.8409444621610257e-07, "loss": 0.04240088, "memory(GiB)": 13.7, "step": 97895, "train_speed(iter/s)": 1.530992 }, { "acc": 0.97999992, "epoch": 45.88704007499414, "grad_norm": 0.0031046955846250057, "learning_rate": 1.838866587457391e-07, "loss": 0.04214344, "memory(GiB)": 13.7, "step": 97900, "train_speed(iter/s)": 1.530993 }, { "acc": 0.98602066, "epoch": 45.889383641902974, "grad_norm": 2.974628210067749, "learning_rate": 1.8367898705093844e-07, "loss": 0.032969, "memory(GiB)": 13.7, "step": 97905, "train_speed(iter/s)": 1.530994 }, { "acc": 0.9822917, "epoch": 45.89172720881181, "grad_norm": 0.001459180493839085, "learning_rate": 1.8347143113668887e-07, "loss": 0.03490176, "memory(GiB)": 13.7, "step": 97910, "train_speed(iter/s)": 1.530997 }, { "acc": 0.98976192, "epoch": 45.89407077572065, "grad_norm": 2.11326003074646, "learning_rate": 1.8326399100798208e-07, "loss": 0.02719337, "memory(GiB)": 13.7, "step": 97915, "train_speed(iter/s)": 1.531 }, { "acc": 0.98812504, "epoch": 45.896414342629484, "grad_norm": 2.7202084064483643, "learning_rate": 1.8305666666980298e-07, "loss": 0.01799677, "memory(GiB)": 13.7, "step": 97920, "train_speed(iter/s)": 1.531 }, { "acc": 0.98282194, "epoch": 45.89875790953832, "grad_norm": 3.6370368003845215, "learning_rate": 1.828494581271343e-07, "loss": 0.04060662, "memory(GiB)": 13.7, "step": 97925, "train_speed(iter/s)": 1.531004 }, { "acc": 0.98452377, "epoch": 45.90110147644715, "grad_norm": 3.455981492996216, "learning_rate": 1.826423653849582e-07, "loss": 0.030735, "memory(GiB)": 13.7, "step": 97930, "train_speed(iter/s)": 1.531006 }, { "acc": 0.98778839, "epoch": 45.90344504335599, "grad_norm": 4.956454753875732, "learning_rate": 1.8243538844825138e-07, "loss": 0.06697361, "memory(GiB)": 13.7, "step": 97935, "train_speed(iter/s)": 1.531004 }, { "acc": 0.99821434, "epoch": 45.90578861026482, "grad_norm": 1.8449655771255493, "learning_rate": 1.8222852732198932e-07, "loss": 0.01687933, "memory(GiB)": 13.7, "step": 97940, "train_speed(iter/s)": 1.531004 }, { "acc": 0.98118057, "epoch": 45.908132177173655, "grad_norm": 4.591027736663818, "learning_rate": 1.8202178201114422e-07, "loss": 0.04042614, "memory(GiB)": 13.7, "step": 97945, "train_speed(iter/s)": 1.531006 }, { "acc": 0.98680553, "epoch": 45.9104757440825, "grad_norm": 3.7582035064697266, "learning_rate": 1.8181515252068607e-07, "loss": 0.07548456, "memory(GiB)": 13.7, "step": 97950, "train_speed(iter/s)": 1.531012 }, { "acc": 0.99196434, "epoch": 45.91281931099133, "grad_norm": 3.4829235076904297, "learning_rate": 1.8160863885557978e-07, "loss": 0.01318115, "memory(GiB)": 13.7, "step": 97955, "train_speed(iter/s)": 1.531011 }, { "acc": 0.97374992, "epoch": 45.915162877900165, "grad_norm": 0.8941428661346436, "learning_rate": 1.814022410207915e-07, "loss": 0.03384161, "memory(GiB)": 13.7, "step": 97960, "train_speed(iter/s)": 1.53101 }, { "acc": 1.0, "epoch": 45.917506444809, "grad_norm": 1.134750485420227, "learning_rate": 1.811959590212812e-07, "loss": 0.01016581, "memory(GiB)": 13.7, "step": 97965, "train_speed(iter/s)": 1.531009 }, { "acc": 0.98874998, "epoch": 45.919850011717834, "grad_norm": 0.44022783637046814, "learning_rate": 1.809897928620072e-07, "loss": 0.04153467, "memory(GiB)": 13.7, "step": 97970, "train_speed(iter/s)": 1.531014 }, { "acc": 0.9927083, "epoch": 45.92219357862667, "grad_norm": 0.8285505771636963, "learning_rate": 1.807837425479261e-07, "loss": 0.02819784, "memory(GiB)": 13.7, "step": 97975, "train_speed(iter/s)": 1.531018 }, { "acc": 0.98812504, "epoch": 45.9245371455355, "grad_norm": 2.6802151203155518, "learning_rate": 1.8057780808398851e-07, "loss": 0.01830789, "memory(GiB)": 13.7, "step": 97980, "train_speed(iter/s)": 1.531015 }, { "acc": 0.97440472, "epoch": 45.92688071244434, "grad_norm": 5.912885665893555, "learning_rate": 1.8037198947514714e-07, "loss": 0.03959658, "memory(GiB)": 13.7, "step": 97985, "train_speed(iter/s)": 1.531018 }, { "acc": 0.992342, "epoch": 45.92922427935318, "grad_norm": 3.1320066452026367, "learning_rate": 1.8016628672634595e-07, "loss": 0.01795907, "memory(GiB)": 13.7, "step": 97990, "train_speed(iter/s)": 1.531021 }, { "acc": 0.9927084, "epoch": 45.93156784626201, "grad_norm": 3.9155712127685547, "learning_rate": 1.7996069984253263e-07, "loss": 0.03093986, "memory(GiB)": 13.7, "step": 97995, "train_speed(iter/s)": 1.531023 }, { "acc": 0.98029766, "epoch": 45.93391141317085, "grad_norm": 1.1507596969604492, "learning_rate": 1.7975522882864721e-07, "loss": 0.05486305, "memory(GiB)": 13.7, "step": 98000, "train_speed(iter/s)": 1.531023 }, { "acc": 0.990625, "epoch": 45.93625498007968, "grad_norm": 2.290987730026245, "learning_rate": 1.7954987368962858e-07, "loss": 0.01644446, "memory(GiB)": 13.7, "step": 98005, "train_speed(iter/s)": 1.531024 }, { "acc": 0.9958334, "epoch": 45.938598546988516, "grad_norm": 0.9912378191947937, "learning_rate": 1.7934463443041338e-07, "loss": 0.02229018, "memory(GiB)": 13.7, "step": 98010, "train_speed(iter/s)": 1.531027 }, { "acc": 0.98402777, "epoch": 45.94094211389735, "grad_norm": 4.737226963043213, "learning_rate": 1.7913951105593332e-07, "loss": 0.04783614, "memory(GiB)": 13.7, "step": 98015, "train_speed(iter/s)": 1.53103 }, { "acc": 0.9927084, "epoch": 45.943285680806184, "grad_norm": 1.299239993095398, "learning_rate": 1.7893450357112062e-07, "loss": 0.01742193, "memory(GiB)": 13.7, "step": 98020, "train_speed(iter/s)": 1.531033 }, { "acc": 0.9895833, "epoch": 45.945629247715026, "grad_norm": 2.2070438861846924, "learning_rate": 1.787296119809025e-07, "loss": 0.04578882, "memory(GiB)": 13.7, "step": 98025, "train_speed(iter/s)": 1.531037 }, { "acc": 0.97999992, "epoch": 45.94797281462386, "grad_norm": 5.578534126281738, "learning_rate": 1.7852483629020398e-07, "loss": 0.03794348, "memory(GiB)": 13.7, "step": 98030, "train_speed(iter/s)": 1.531037 }, { "acc": 0.9777976, "epoch": 45.950316381532694, "grad_norm": 3.612703800201416, "learning_rate": 1.7832017650394675e-07, "loss": 0.04287332, "memory(GiB)": 13.7, "step": 98035, "train_speed(iter/s)": 1.531043 }, { "acc": 0.97416668, "epoch": 45.95265994844153, "grad_norm": 7.096550464630127, "learning_rate": 1.7811563262705028e-07, "loss": 0.04337287, "memory(GiB)": 13.7, "step": 98040, "train_speed(iter/s)": 1.531048 }, { "acc": 1.0, "epoch": 45.95500351535036, "grad_norm": 0.004753448069095612, "learning_rate": 1.7791120466443176e-07, "loss": 0.02275372, "memory(GiB)": 13.7, "step": 98045, "train_speed(iter/s)": 1.53105 }, { "acc": 0.98157196, "epoch": 45.9573470822592, "grad_norm": 3.886812448501587, "learning_rate": 1.777068926210035e-07, "loss": 0.03852014, "memory(GiB)": 13.7, "step": 98050, "train_speed(iter/s)": 1.531048 }, { "acc": 0.98425598, "epoch": 45.95969064916803, "grad_norm": 2.1725800037384033, "learning_rate": 1.7750269650167822e-07, "loss": 0.0198045, "memory(GiB)": 13.7, "step": 98055, "train_speed(iter/s)": 1.53105 }, { "acc": 0.98871536, "epoch": 45.962034216076866, "grad_norm": 3.3528008460998535, "learning_rate": 1.7729861631136268e-07, "loss": 0.01945862, "memory(GiB)": 13.7, "step": 98060, "train_speed(iter/s)": 1.531049 }, { "acc": 0.99562502, "epoch": 45.96437778298571, "grad_norm": 5.467058181762695, "learning_rate": 1.770946520549636e-07, "loss": 0.03495074, "memory(GiB)": 13.7, "step": 98065, "train_speed(iter/s)": 1.531049 }, { "acc": 0.98194447, "epoch": 45.96672134989454, "grad_norm": 3.693490743637085, "learning_rate": 1.7689080373738204e-07, "loss": 0.02087242, "memory(GiB)": 13.7, "step": 98070, "train_speed(iter/s)": 1.531053 }, { "acc": 0.97999992, "epoch": 45.969064916803376, "grad_norm": 2.916529655456543, "learning_rate": 1.7668707136351975e-07, "loss": 0.03650514, "memory(GiB)": 13.7, "step": 98075, "train_speed(iter/s)": 1.531051 }, { "acc": 0.98178024, "epoch": 45.97140848371221, "grad_norm": 3.224808931350708, "learning_rate": 1.7648345493827233e-07, "loss": 0.02641847, "memory(GiB)": 13.7, "step": 98080, "train_speed(iter/s)": 1.531053 }, { "acc": 0.98561964, "epoch": 45.973752050621044, "grad_norm": 3.1201698780059814, "learning_rate": 1.762799544665348e-07, "loss": 0.05338908, "memory(GiB)": 13.7, "step": 98085, "train_speed(iter/s)": 1.531055 }, { "acc": 0.99125004, "epoch": 45.97609561752988, "grad_norm": 2.2647864818573, "learning_rate": 1.7607656995319886e-07, "loss": 0.02612645, "memory(GiB)": 13.7, "step": 98090, "train_speed(iter/s)": 1.531055 }, { "acc": 0.990625, "epoch": 45.97843918443871, "grad_norm": 1.8494892120361328, "learning_rate": 1.7587330140315183e-07, "loss": 0.02068722, "memory(GiB)": 13.7, "step": 98095, "train_speed(iter/s)": 1.531057 }, { "acc": 0.99508934, "epoch": 45.980782751347554, "grad_norm": 2.4506850242614746, "learning_rate": 1.756701488212809e-07, "loss": 0.02032301, "memory(GiB)": 13.7, "step": 98100, "train_speed(iter/s)": 1.531058 }, { "acc": 0.98340778, "epoch": 45.98312631825639, "grad_norm": 2.4488871097564697, "learning_rate": 1.7546711221246781e-07, "loss": 0.05027605, "memory(GiB)": 13.7, "step": 98105, "train_speed(iter/s)": 1.531059 }, { "acc": 0.96841354, "epoch": 45.98546988516522, "grad_norm": 5.551681995391846, "learning_rate": 1.7526419158159548e-07, "loss": 0.05599498, "memory(GiB)": 13.7, "step": 98110, "train_speed(iter/s)": 1.531057 }, { "acc": 0.98979168, "epoch": 45.98781345207406, "grad_norm": 4.562844276428223, "learning_rate": 1.7506138693353884e-07, "loss": 0.04212858, "memory(GiB)": 13.7, "step": 98115, "train_speed(iter/s)": 1.531059 }, { "acc": 0.99008923, "epoch": 45.99015701898289, "grad_norm": 0.02177736908197403, "learning_rate": 1.7485869827317411e-07, "loss": 0.0285891, "memory(GiB)": 13.7, "step": 98120, "train_speed(iter/s)": 1.53106 }, { "acc": 0.9859375, "epoch": 45.992500585891726, "grad_norm": 2.23018217086792, "learning_rate": 1.746561256053725e-07, "loss": 0.02642317, "memory(GiB)": 13.7, "step": 98125, "train_speed(iter/s)": 1.531066 }, { "acc": 0.9888195, "epoch": 45.99484415280056, "grad_norm": 1.7987866401672363, "learning_rate": 1.74453668935004e-07, "loss": 0.03752984, "memory(GiB)": 13.7, "step": 98130, "train_speed(iter/s)": 1.531069 }, { "acc": 0.99003029, "epoch": 45.997187719709395, "grad_norm": 3.177781581878662, "learning_rate": 1.7425132826693371e-07, "loss": 0.03814708, "memory(GiB)": 13.7, "step": 98135, "train_speed(iter/s)": 1.531071 }, { "acc": 0.98726635, "epoch": 45.999531286618236, "grad_norm": 2.2886404991149902, "learning_rate": 1.740491036060267e-07, "loss": 0.02502178, "memory(GiB)": 13.7, "step": 98140, "train_speed(iter/s)": 1.531077 }, { "acc": 0.9916666, "epoch": 46.00187485352707, "grad_norm": 2.1375985145568848, "learning_rate": 1.73846994957143e-07, "loss": 0.01847906, "memory(GiB)": 13.7, "step": 98145, "train_speed(iter/s)": 1.531066 }, { "acc": 0.98943462, "epoch": 46.004218420435905, "grad_norm": 1.4748578071594238, "learning_rate": 1.7364500232514056e-07, "loss": 0.02240526, "memory(GiB)": 13.7, "step": 98150, "train_speed(iter/s)": 1.531069 }, { "acc": 0.98264885, "epoch": 46.00656198734474, "grad_norm": 4.986188888549805, "learning_rate": 1.7344312571487487e-07, "loss": 0.02862237, "memory(GiB)": 13.7, "step": 98155, "train_speed(iter/s)": 1.531068 }, { "acc": 0.98083324, "epoch": 46.00890555425357, "grad_norm": 6.3753581047058105, "learning_rate": 1.7324136513119782e-07, "loss": 0.02721173, "memory(GiB)": 13.7, "step": 98160, "train_speed(iter/s)": 1.531074 }, { "acc": 0.98562498, "epoch": 46.01124912116241, "grad_norm": 1.3366159200668335, "learning_rate": 1.7303972057896048e-07, "loss": 0.03127149, "memory(GiB)": 13.7, "step": 98165, "train_speed(iter/s)": 1.531073 }, { "acc": 0.9864583, "epoch": 46.01359268807124, "grad_norm": 2.057281017303467, "learning_rate": 1.7283819206300854e-07, "loss": 0.02724569, "memory(GiB)": 13.7, "step": 98170, "train_speed(iter/s)": 1.531076 }, { "acc": 0.978125, "epoch": 46.01593625498008, "grad_norm": 4.585652828216553, "learning_rate": 1.7263677958818657e-07, "loss": 0.0381259, "memory(GiB)": 13.7, "step": 98175, "train_speed(iter/s)": 1.53108 }, { "acc": 0.97946424, "epoch": 46.01827982188892, "grad_norm": 4.931663990020752, "learning_rate": 1.7243548315933569e-07, "loss": 0.04588468, "memory(GiB)": 13.7, "step": 98180, "train_speed(iter/s)": 1.531081 }, { "acc": 0.97791672, "epoch": 46.02062338879775, "grad_norm": 6.410982608795166, "learning_rate": 1.722343027812938e-07, "loss": 0.05699593, "memory(GiB)": 13.7, "step": 98185, "train_speed(iter/s)": 1.531085 }, { "acc": 0.97238102, "epoch": 46.022966955706586, "grad_norm": 7.749759197235107, "learning_rate": 1.7203323845889768e-07, "loss": 0.04022388, "memory(GiB)": 13.7, "step": 98190, "train_speed(iter/s)": 1.531088 }, { "acc": 0.99655704, "epoch": 46.02531052261542, "grad_norm": 1.0083798170089722, "learning_rate": 1.718322901969785e-07, "loss": 0.02331056, "memory(GiB)": 13.7, "step": 98195, "train_speed(iter/s)": 1.531088 }, { "acc": 0.98988094, "epoch": 46.027654089524255, "grad_norm": 0.9350400567054749, "learning_rate": 1.7163145800036918e-07, "loss": 0.04066769, "memory(GiB)": 13.7, "step": 98200, "train_speed(iter/s)": 1.531093 }, { "acc": 0.9957386, "epoch": 46.02999765643309, "grad_norm": 0.006743431091308594, "learning_rate": 1.7143074187389475e-07, "loss": 0.02932348, "memory(GiB)": 13.7, "step": 98205, "train_speed(iter/s)": 1.531096 }, { "acc": 0.98458338, "epoch": 46.03234122334192, "grad_norm": 5.32949161529541, "learning_rate": 1.712301418223815e-07, "loss": 0.03942688, "memory(GiB)": 13.7, "step": 98210, "train_speed(iter/s)": 1.531096 }, { "acc": 0.98529758, "epoch": 46.034684790250765, "grad_norm": 2.530895471572876, "learning_rate": 1.7102965785065e-07, "loss": 0.0356308, "memory(GiB)": 13.7, "step": 98215, "train_speed(iter/s)": 1.5311 }, { "acc": 0.97175598, "epoch": 46.0370283571596, "grad_norm": 5.302565574645996, "learning_rate": 1.7082928996351826e-07, "loss": 0.04727584, "memory(GiB)": 13.7, "step": 98220, "train_speed(iter/s)": 1.531099 }, { "acc": 0.9979167, "epoch": 46.03937192406843, "grad_norm": 0.014494937844574451, "learning_rate": 1.7062903816580463e-07, "loss": 0.01698048, "memory(GiB)": 13.7, "step": 98225, "train_speed(iter/s)": 1.531104 }, { "acc": 0.99437504, "epoch": 46.04171549097727, "grad_norm": 1.9943416118621826, "learning_rate": 1.7042890246232145e-07, "loss": 0.01804522, "memory(GiB)": 13.7, "step": 98230, "train_speed(iter/s)": 1.531109 }, { "acc": 0.98633928, "epoch": 46.0440590578861, "grad_norm": 0.9729149341583252, "learning_rate": 1.7022888285787998e-07, "loss": 0.04246127, "memory(GiB)": 13.7, "step": 98235, "train_speed(iter/s)": 1.531114 }, { "acc": 0.97344246, "epoch": 46.046402624794936, "grad_norm": 3.4885268211364746, "learning_rate": 1.700289793572864e-07, "loss": 0.0694598, "memory(GiB)": 13.7, "step": 98240, "train_speed(iter/s)": 1.531116 }, { "acc": 0.98045635, "epoch": 46.04874619170377, "grad_norm": 1.8233133554458618, "learning_rate": 1.6982919196534813e-07, "loss": 0.04928928, "memory(GiB)": 13.7, "step": 98245, "train_speed(iter/s)": 1.531122 }, { "acc": 0.98979168, "epoch": 46.051089758612605, "grad_norm": 1.593787431716919, "learning_rate": 1.6962952068686468e-07, "loss": 0.017061, "memory(GiB)": 13.7, "step": 98250, "train_speed(iter/s)": 1.531125 }, { "acc": 0.97544651, "epoch": 46.053433325521446, "grad_norm": 1.2511526346206665, "learning_rate": 1.6942996552663787e-07, "loss": 0.04650438, "memory(GiB)": 13.7, "step": 98255, "train_speed(iter/s)": 1.531127 }, { "acc": 0.99750004, "epoch": 46.05577689243028, "grad_norm": 3.7318191528320312, "learning_rate": 1.6923052648946333e-07, "loss": 0.01599739, "memory(GiB)": 13.7, "step": 98260, "train_speed(iter/s)": 1.531126 }, { "acc": 0.9875, "epoch": 46.058120459339115, "grad_norm": 3.0239486694335938, "learning_rate": 1.6903120358013458e-07, "loss": 0.02267363, "memory(GiB)": 13.7, "step": 98265, "train_speed(iter/s)": 1.531127 }, { "acc": 0.9795928, "epoch": 46.06046402624795, "grad_norm": 3.865666151046753, "learning_rate": 1.6883199680344394e-07, "loss": 0.04832652, "memory(GiB)": 13.7, "step": 98270, "train_speed(iter/s)": 1.531131 }, { "acc": 0.99375, "epoch": 46.06280759315678, "grad_norm": 2.59602689743042, "learning_rate": 1.6863290616417767e-07, "loss": 0.02550083, "memory(GiB)": 13.7, "step": 98275, "train_speed(iter/s)": 1.531136 }, { "acc": 0.97333336, "epoch": 46.06515116006562, "grad_norm": 3.619999885559082, "learning_rate": 1.6843393166712262e-07, "loss": 0.0424652, "memory(GiB)": 13.7, "step": 98280, "train_speed(iter/s)": 1.531138 }, { "acc": 0.97841339, "epoch": 46.06749472697445, "grad_norm": 5.758792400360107, "learning_rate": 1.682350733170616e-07, "loss": 0.03021804, "memory(GiB)": 13.7, "step": 98285, "train_speed(iter/s)": 1.531141 }, { "acc": 0.9916667, "epoch": 46.06983829388329, "grad_norm": 0.0016185323474928737, "learning_rate": 1.6803633111877431e-07, "loss": 0.05248434, "memory(GiB)": 13.7, "step": 98290, "train_speed(iter/s)": 1.531148 }, { "acc": 0.9885416, "epoch": 46.07218186079213, "grad_norm": 0.02898603305220604, "learning_rate": 1.6783770507703747e-07, "loss": 0.02487065, "memory(GiB)": 13.7, "step": 98295, "train_speed(iter/s)": 1.531149 }, { "acc": 0.99298611, "epoch": 46.07452542770096, "grad_norm": 3.756709098815918, "learning_rate": 1.676391951966263e-07, "loss": 0.03167402, "memory(GiB)": 13.7, "step": 98300, "train_speed(iter/s)": 1.531152 }, { "acc": 0.98383932, "epoch": 46.0768689946098, "grad_norm": 3.3359787464141846, "learning_rate": 1.6744080148231146e-07, "loss": 0.04155009, "memory(GiB)": 13.7, "step": 98305, "train_speed(iter/s)": 1.531158 }, { "acc": 0.99375, "epoch": 46.07921256151863, "grad_norm": 0.0014505581930279732, "learning_rate": 1.672425239388609e-07, "loss": 0.02244639, "memory(GiB)": 13.7, "step": 98310, "train_speed(iter/s)": 1.531162 }, { "acc": 0.98264885, "epoch": 46.081556128427465, "grad_norm": 4.236379146575928, "learning_rate": 1.6704436257104308e-07, "loss": 0.03388871, "memory(GiB)": 13.7, "step": 98315, "train_speed(iter/s)": 1.531162 }, { "acc": 1.0, "epoch": 46.0838996953363, "grad_norm": 1.7743057012557983, "learning_rate": 1.6684631738361876e-07, "loss": 0.02793055, "memory(GiB)": 13.7, "step": 98320, "train_speed(iter/s)": 1.531162 }, { "acc": 0.97875004, "epoch": 46.086243262245134, "grad_norm": 4.7014479637146, "learning_rate": 1.6664838838134975e-07, "loss": 0.05314856, "memory(GiB)": 13.7, "step": 98325, "train_speed(iter/s)": 1.531162 }, { "acc": 0.98500004, "epoch": 46.088586829153975, "grad_norm": 0.0012925750343129039, "learning_rate": 1.6645057556899227e-07, "loss": 0.03686284, "memory(GiB)": 13.7, "step": 98330, "train_speed(iter/s)": 1.531166 }, { "acc": 0.98145828, "epoch": 46.09093039606281, "grad_norm": 3.0056467056274414, "learning_rate": 1.6625287895130268e-07, "loss": 0.03265184, "memory(GiB)": 13.7, "step": 98335, "train_speed(iter/s)": 1.531167 }, { "acc": 0.9927083, "epoch": 46.093273962971644, "grad_norm": 3.3044896125793457, "learning_rate": 1.6605529853303223e-07, "loss": 0.01999003, "memory(GiB)": 13.7, "step": 98340, "train_speed(iter/s)": 1.531168 }, { "acc": 0.97270832, "epoch": 46.09561752988048, "grad_norm": 0.9079232811927795, "learning_rate": 1.6585783431892882e-07, "loss": 0.04466544, "memory(GiB)": 13.7, "step": 98345, "train_speed(iter/s)": 1.531169 }, { "acc": 0.99083338, "epoch": 46.09796109678931, "grad_norm": 3.866443157196045, "learning_rate": 1.6566048631374104e-07, "loss": 0.04696999, "memory(GiB)": 13.7, "step": 98350, "train_speed(iter/s)": 1.531174 }, { "acc": 0.98395834, "epoch": 46.10030466369815, "grad_norm": 0.004478150513023138, "learning_rate": 1.654632545222107e-07, "loss": 0.03677964, "memory(GiB)": 13.7, "step": 98355, "train_speed(iter/s)": 1.531175 }, { "acc": 0.99375, "epoch": 46.10264823060698, "grad_norm": 0.0009918182622641325, "learning_rate": 1.6526613894908013e-07, "loss": 0.02581184, "memory(GiB)": 13.7, "step": 98360, "train_speed(iter/s)": 1.531175 }, { "acc": 0.98500004, "epoch": 46.10499179751582, "grad_norm": 1.1207103729248047, "learning_rate": 1.6506913959908569e-07, "loss": 0.04265345, "memory(GiB)": 13.7, "step": 98365, "train_speed(iter/s)": 1.531177 }, { "acc": 0.98475418, "epoch": 46.10733536442466, "grad_norm": 2.1591663360595703, "learning_rate": 1.6487225647696478e-07, "loss": 0.02839024, "memory(GiB)": 13.7, "step": 98370, "train_speed(iter/s)": 1.531177 }, { "acc": 0.98217258, "epoch": 46.10967893133349, "grad_norm": 3.5027453899383545, "learning_rate": 1.6467548958744761e-07, "loss": 0.04814882, "memory(GiB)": 13.7, "step": 98375, "train_speed(iter/s)": 1.531177 }, { "acc": 0.9916667, "epoch": 46.112022498242325, "grad_norm": 0.01239327248185873, "learning_rate": 1.6447883893526543e-07, "loss": 0.0300444, "memory(GiB)": 13.7, "step": 98380, "train_speed(iter/s)": 1.531177 }, { "acc": 0.98416662, "epoch": 46.11436606515116, "grad_norm": 0.0023816560860723257, "learning_rate": 1.6428230452514452e-07, "loss": 0.04777941, "memory(GiB)": 13.7, "step": 98385, "train_speed(iter/s)": 1.531178 }, { "acc": 1.0, "epoch": 46.116709632059994, "grad_norm": 3.8134100437164307, "learning_rate": 1.6408588636180794e-07, "loss": 0.00795277, "memory(GiB)": 13.7, "step": 98390, "train_speed(iter/s)": 1.531178 }, { "acc": 0.97836809, "epoch": 46.11905319896883, "grad_norm": 2.9011826515197754, "learning_rate": 1.6388958444997857e-07, "loss": 0.05695321, "memory(GiB)": 13.7, "step": 98395, "train_speed(iter/s)": 1.531175 }, { "acc": 0.99385414, "epoch": 46.12139676587766, "grad_norm": 3.638279438018799, "learning_rate": 1.6369339879437338e-07, "loss": 0.04273369, "memory(GiB)": 13.7, "step": 98400, "train_speed(iter/s)": 1.531178 }, { "acc": 0.9875, "epoch": 46.123740332786504, "grad_norm": 0.9095317721366882, "learning_rate": 1.634973293997091e-07, "loss": 0.02718896, "memory(GiB)": 13.7, "step": 98405, "train_speed(iter/s)": 1.531182 }, { "acc": 0.9802084, "epoch": 46.12608389969534, "grad_norm": 4.923497676849365, "learning_rate": 1.6330137627069826e-07, "loss": 0.02977266, "memory(GiB)": 13.7, "step": 98410, "train_speed(iter/s)": 1.531187 }, { "acc": 0.99125004, "epoch": 46.12842746660417, "grad_norm": 0.0024436237290501595, "learning_rate": 1.6310553941205104e-07, "loss": 0.0191226, "memory(GiB)": 13.7, "step": 98415, "train_speed(iter/s)": 1.531188 }, { "acc": 0.99133148, "epoch": 46.13077103351301, "grad_norm": 0.003250661538913846, "learning_rate": 1.6290981882847432e-07, "loss": 0.01434719, "memory(GiB)": 13.7, "step": 98420, "train_speed(iter/s)": 1.53119 }, { "acc": 0.9895834, "epoch": 46.13311460042184, "grad_norm": 4.124456882476807, "learning_rate": 1.6271421452467271e-07, "loss": 0.03398767, "memory(GiB)": 13.7, "step": 98425, "train_speed(iter/s)": 1.531197 }, { "acc": 0.98746109, "epoch": 46.135458167330675, "grad_norm": 2.660229206085205, "learning_rate": 1.625187265053487e-07, "loss": 0.05354332, "memory(GiB)": 13.7, "step": 98430, "train_speed(iter/s)": 1.531199 }, { "acc": 0.9885417, "epoch": 46.13780173423951, "grad_norm": 2.838791847229004, "learning_rate": 1.623233547751997e-07, "loss": 0.02396756, "memory(GiB)": 13.7, "step": 98435, "train_speed(iter/s)": 1.531199 }, { "acc": 0.99312496, "epoch": 46.14014530114835, "grad_norm": 1.7721694707870483, "learning_rate": 1.6212809933892374e-07, "loss": 0.02125939, "memory(GiB)": 13.7, "step": 98440, "train_speed(iter/s)": 1.531203 }, { "acc": 0.98604164, "epoch": 46.142488868057185, "grad_norm": 2.3741610050201416, "learning_rate": 1.6193296020121152e-07, "loss": 0.04927319, "memory(GiB)": 13.7, "step": 98445, "train_speed(iter/s)": 1.531204 }, { "acc": 0.98536701, "epoch": 46.14483243496602, "grad_norm": 5.670558452606201, "learning_rate": 1.6173793736675613e-07, "loss": 0.04185238, "memory(GiB)": 13.7, "step": 98450, "train_speed(iter/s)": 1.531207 }, { "acc": 0.9856102, "epoch": 46.147176001874854, "grad_norm": 2.277284860610962, "learning_rate": 1.615430308402433e-07, "loss": 0.04389368, "memory(GiB)": 13.7, "step": 98455, "train_speed(iter/s)": 1.53121 }, { "acc": 0.98986111, "epoch": 46.14951956878369, "grad_norm": 0.9862379431724548, "learning_rate": 1.6134824062635934e-07, "loss": 0.02598182, "memory(GiB)": 13.7, "step": 98460, "train_speed(iter/s)": 1.531214 }, { "acc": 0.98199978, "epoch": 46.15186313569252, "grad_norm": 0.004448153544217348, "learning_rate": 1.6115356672978623e-07, "loss": 0.03449811, "memory(GiB)": 13.7, "step": 98465, "train_speed(iter/s)": 1.531211 }, { "acc": 0.98916664, "epoch": 46.15420670260136, "grad_norm": 1.6213736534118652, "learning_rate": 1.609590091552019e-07, "loss": 0.05932807, "memory(GiB)": 13.7, "step": 98470, "train_speed(iter/s)": 1.531218 }, { "acc": 0.9973732, "epoch": 46.15655026951019, "grad_norm": 2.1092722415924072, "learning_rate": 1.60764567907285e-07, "loss": 0.02929646, "memory(GiB)": 13.7, "step": 98475, "train_speed(iter/s)": 1.531223 }, { "acc": 0.98008928, "epoch": 46.15889383641903, "grad_norm": 4.3317437171936035, "learning_rate": 1.6057024299070738e-07, "loss": 0.04187703, "memory(GiB)": 13.7, "step": 98480, "train_speed(iter/s)": 1.531225 }, { "acc": 0.98395844, "epoch": 46.16123740332787, "grad_norm": 3.211949586868286, "learning_rate": 1.6037603441014095e-07, "loss": 0.02085028, "memory(GiB)": 13.7, "step": 98485, "train_speed(iter/s)": 1.53123 }, { "acc": 0.9885417, "epoch": 46.1635809702367, "grad_norm": 0.9747305512428284, "learning_rate": 1.601819421702526e-07, "loss": 0.03266812, "memory(GiB)": 13.7, "step": 98490, "train_speed(iter/s)": 1.531233 }, { "acc": 0.98962173, "epoch": 46.165924537145536, "grad_norm": 3.569484233856201, "learning_rate": 1.5998796627570986e-07, "loss": 0.03139058, "memory(GiB)": 13.7, "step": 98495, "train_speed(iter/s)": 1.531238 }, { "acc": 0.97311954, "epoch": 46.16826810405437, "grad_norm": 2.4857189655303955, "learning_rate": 1.597941067311735e-07, "loss": 0.04272242, "memory(GiB)": 13.7, "step": 98500, "train_speed(iter/s)": 1.531244 }, { "acc": 0.9766964, "epoch": 46.170611670963204, "grad_norm": 3.2666685581207275, "learning_rate": 1.5960036354130487e-07, "loss": 0.04964266, "memory(GiB)": 13.7, "step": 98505, "train_speed(iter/s)": 1.531245 }, { "acc": 0.9864584, "epoch": 46.17295523787204, "grad_norm": 4.136808395385742, "learning_rate": 1.594067367107592e-07, "loss": 0.02961177, "memory(GiB)": 13.7, "step": 98510, "train_speed(iter/s)": 1.53125 }, { "acc": 0.98499994, "epoch": 46.17529880478088, "grad_norm": 2.0817277431488037, "learning_rate": 1.5921322624419072e-07, "loss": 0.02980377, "memory(GiB)": 13.7, "step": 98515, "train_speed(iter/s)": 1.53125 }, { "acc": 0.98633928, "epoch": 46.177642371689714, "grad_norm": 0.9522084593772888, "learning_rate": 1.5901983214625242e-07, "loss": 0.01855539, "memory(GiB)": 13.7, "step": 98520, "train_speed(iter/s)": 1.531251 }, { "acc": 0.98258934, "epoch": 46.17998593859855, "grad_norm": 4.704745292663574, "learning_rate": 1.588265544215912e-07, "loss": 0.03346094, "memory(GiB)": 13.7, "step": 98525, "train_speed(iter/s)": 1.531251 }, { "acc": 0.98249998, "epoch": 46.18232950550738, "grad_norm": 3.6527798175811768, "learning_rate": 1.58633393074854e-07, "loss": 0.03327577, "memory(GiB)": 13.7, "step": 98530, "train_speed(iter/s)": 1.531251 }, { "acc": 0.98874998, "epoch": 46.18467307241622, "grad_norm": 3.7798683643341064, "learning_rate": 1.5844034811068333e-07, "loss": 0.02247468, "memory(GiB)": 13.7, "step": 98535, "train_speed(iter/s)": 1.531253 }, { "acc": 0.99821434, "epoch": 46.18701663932505, "grad_norm": 0.3222319483757019, "learning_rate": 1.5824741953371886e-07, "loss": 0.00707719, "memory(GiB)": 13.7, "step": 98540, "train_speed(iter/s)": 1.531252 }, { "acc": 0.98842258, "epoch": 46.189360206233886, "grad_norm": 4.605478286743164, "learning_rate": 1.5805460734859818e-07, "loss": 0.02726866, "memory(GiB)": 13.7, "step": 98545, "train_speed(iter/s)": 1.531252 }, { "acc": 0.99196434, "epoch": 46.19170377314272, "grad_norm": 3.006765604019165, "learning_rate": 1.5786191155995647e-07, "loss": 0.02026783, "memory(GiB)": 13.7, "step": 98550, "train_speed(iter/s)": 1.531255 }, { "acc": 0.97313442, "epoch": 46.19404734005156, "grad_norm": 3.7198641300201416, "learning_rate": 1.5766933217242565e-07, "loss": 0.04571607, "memory(GiB)": 13.7, "step": 98555, "train_speed(iter/s)": 1.531258 }, { "acc": 0.98604164, "epoch": 46.196390906960396, "grad_norm": 7.455879211425781, "learning_rate": 1.5747686919063334e-07, "loss": 0.04785559, "memory(GiB)": 13.7, "step": 98560, "train_speed(iter/s)": 1.531257 }, { "acc": 0.99541664, "epoch": 46.19873447386923, "grad_norm": 2.2202184200286865, "learning_rate": 1.5728452261920695e-07, "loss": 0.02046131, "memory(GiB)": 13.7, "step": 98565, "train_speed(iter/s)": 1.531259 }, { "acc": 0.97624998, "epoch": 46.201078040778064, "grad_norm": 2.5796613693237305, "learning_rate": 1.5709229246276845e-07, "loss": 0.05489126, "memory(GiB)": 13.7, "step": 98570, "train_speed(iter/s)": 1.531262 }, { "acc": 0.99250002, "epoch": 46.2034216076869, "grad_norm": 2.6035990715026855, "learning_rate": 1.5690017872594037e-07, "loss": 0.022164, "memory(GiB)": 13.7, "step": 98575, "train_speed(iter/s)": 1.531262 }, { "acc": 0.98381062, "epoch": 46.20576517459573, "grad_norm": 4.631632328033447, "learning_rate": 1.567081814133391e-07, "loss": 0.04843367, "memory(GiB)": 13.7, "step": 98580, "train_speed(iter/s)": 1.531265 }, { "acc": 0.97979164, "epoch": 46.20810874150457, "grad_norm": 2.4525532722473145, "learning_rate": 1.56516300529581e-07, "loss": 0.04374003, "memory(GiB)": 13.7, "step": 98585, "train_speed(iter/s)": 1.531267 }, { "acc": 0.98703375, "epoch": 46.21045230841341, "grad_norm": 5.355332374572754, "learning_rate": 1.5632453607927697e-07, "loss": 0.04013524, "memory(GiB)": 13.7, "step": 98590, "train_speed(iter/s)": 1.531268 }, { "acc": 0.98145828, "epoch": 46.21279587532224, "grad_norm": 0.0015631234273314476, "learning_rate": 1.5613288806703618e-07, "loss": 0.03439018, "memory(GiB)": 13.7, "step": 98595, "train_speed(iter/s)": 1.531266 }, { "acc": 0.97770824, "epoch": 46.21513944223108, "grad_norm": 2.0187652111053467, "learning_rate": 1.5594135649746613e-07, "loss": 0.07120019, "memory(GiB)": 13.7, "step": 98600, "train_speed(iter/s)": 1.531272 }, { "acc": 0.9894886, "epoch": 46.21748300913991, "grad_norm": 0.24555586278438568, "learning_rate": 1.557499413751694e-07, "loss": 0.02021267, "memory(GiB)": 13.7, "step": 98605, "train_speed(iter/s)": 1.531277 }, { "acc": 0.99229164, "epoch": 46.219826576048746, "grad_norm": 1.9732770919799805, "learning_rate": 1.55558642704749e-07, "loss": 0.01580689, "memory(GiB)": 13.7, "step": 98610, "train_speed(iter/s)": 1.53128 }, { "acc": 0.97881947, "epoch": 46.22217014295758, "grad_norm": 0.5868748426437378, "learning_rate": 1.553674604908014e-07, "loss": 0.05389158, "memory(GiB)": 13.7, "step": 98615, "train_speed(iter/s)": 1.531284 }, { "acc": 0.98594704, "epoch": 46.224513709866415, "grad_norm": 1.8016479015350342, "learning_rate": 1.55176394737923e-07, "loss": 0.03474553, "memory(GiB)": 13.7, "step": 98620, "train_speed(iter/s)": 1.531286 }, { "acc": 0.9905921, "epoch": 46.22685727677525, "grad_norm": 0.23013482987880707, "learning_rate": 1.549854454507052e-07, "loss": 0.04504434, "memory(GiB)": 13.7, "step": 98625, "train_speed(iter/s)": 1.53129 }, { "acc": 0.98467264, "epoch": 46.22920084368409, "grad_norm": 3.089484691619873, "learning_rate": 1.5479461263373946e-07, "loss": 0.04019986, "memory(GiB)": 13.7, "step": 98630, "train_speed(iter/s)": 1.531291 }, { "acc": 0.97354164, "epoch": 46.231544410592925, "grad_norm": 4.163518905639648, "learning_rate": 1.546038962916111e-07, "loss": 0.04638307, "memory(GiB)": 13.7, "step": 98635, "train_speed(iter/s)": 1.531293 }, { "acc": 0.98666668, "epoch": 46.23388797750176, "grad_norm": 4.243396282196045, "learning_rate": 1.544132964289054e-07, "loss": 0.01672482, "memory(GiB)": 13.7, "step": 98640, "train_speed(iter/s)": 1.531295 }, { "acc": 0.98270836, "epoch": 46.23623154441059, "grad_norm": 3.4378864765167236, "learning_rate": 1.5422281305020383e-07, "loss": 0.0597767, "memory(GiB)": 13.7, "step": 98645, "train_speed(iter/s)": 1.5313 }, { "acc": 0.98488102, "epoch": 46.23857511131943, "grad_norm": 2.491130828857422, "learning_rate": 1.5403244616008392e-07, "loss": 0.04425046, "memory(GiB)": 13.7, "step": 98650, "train_speed(iter/s)": 1.531301 }, { "acc": 0.9864584, "epoch": 46.24091867822826, "grad_norm": 3.357647180557251, "learning_rate": 1.5384219576312264e-07, "loss": 0.02741447, "memory(GiB)": 13.7, "step": 98655, "train_speed(iter/s)": 1.5313 }, { "acc": 0.98779764, "epoch": 46.243262245137096, "grad_norm": 4.051414489746094, "learning_rate": 1.5365206186389148e-07, "loss": 0.03341957, "memory(GiB)": 13.7, "step": 98660, "train_speed(iter/s)": 1.5313 }, { "acc": 0.99099903, "epoch": 46.24560581204594, "grad_norm": 2.2663440704345703, "learning_rate": 1.5346204446696238e-07, "loss": 0.04734986, "memory(GiB)": 13.7, "step": 98665, "train_speed(iter/s)": 1.531303 }, { "acc": 0.97278843, "epoch": 46.24794937895477, "grad_norm": 6.40712833404541, "learning_rate": 1.5327214357690185e-07, "loss": 0.04251805, "memory(GiB)": 13.7, "step": 98670, "train_speed(iter/s)": 1.531306 }, { "acc": 0.98923607, "epoch": 46.250292945863606, "grad_norm": 4.89232873916626, "learning_rate": 1.530823591982746e-07, "loss": 0.03364033, "memory(GiB)": 13.7, "step": 98675, "train_speed(iter/s)": 1.531307 }, { "acc": 0.96375008, "epoch": 46.25263651277244, "grad_norm": 6.712240695953369, "learning_rate": 1.5289269133564327e-07, "loss": 0.07551253, "memory(GiB)": 13.7, "step": 98680, "train_speed(iter/s)": 1.53131 }, { "acc": 0.98217258, "epoch": 46.254980079681275, "grad_norm": 2.407616376876831, "learning_rate": 1.527031399935648e-07, "loss": 0.0304067, "memory(GiB)": 13.7, "step": 98685, "train_speed(iter/s)": 1.53131 }, { "acc": 0.99250002, "epoch": 46.25732364659011, "grad_norm": 1.0468007326126099, "learning_rate": 1.5251370517659734e-07, "loss": 0.01247445, "memory(GiB)": 13.7, "step": 98690, "train_speed(iter/s)": 1.531312 }, { "acc": 0.99196434, "epoch": 46.25966721349894, "grad_norm": 0.002415256341919303, "learning_rate": 1.5232438688929235e-07, "loss": 0.01963129, "memory(GiB)": 13.7, "step": 98695, "train_speed(iter/s)": 1.531313 }, { "acc": 0.98562508, "epoch": 46.26201078040778, "grad_norm": 0.9424958229064941, "learning_rate": 1.5213518513620294e-07, "loss": 0.03860738, "memory(GiB)": 13.7, "step": 98700, "train_speed(iter/s)": 1.531313 }, { "acc": 0.98125, "epoch": 46.26435434731662, "grad_norm": 2.176046848297119, "learning_rate": 1.5194609992187502e-07, "loss": 0.04100653, "memory(GiB)": 13.7, "step": 98705, "train_speed(iter/s)": 1.531311 }, { "acc": 0.9979167, "epoch": 46.26669791422545, "grad_norm": 0.0008457516087219119, "learning_rate": 1.5175713125085455e-07, "loss": 0.02313581, "memory(GiB)": 13.7, "step": 98710, "train_speed(iter/s)": 1.53131 }, { "acc": 0.971632, "epoch": 46.26904148113429, "grad_norm": 4.778509616851807, "learning_rate": 1.5156827912768236e-07, "loss": 0.0546882, "memory(GiB)": 13.7, "step": 98715, "train_speed(iter/s)": 1.531312 }, { "acc": 0.99383926, "epoch": 46.27138504804312, "grad_norm": 4.642368316650391, "learning_rate": 1.5137954355689942e-07, "loss": 0.03376674, "memory(GiB)": 13.7, "step": 98720, "train_speed(iter/s)": 1.531315 }, { "acc": 0.99375, "epoch": 46.273728614951956, "grad_norm": 2.919858455657959, "learning_rate": 1.5119092454304218e-07, "loss": 0.03025878, "memory(GiB)": 13.7, "step": 98725, "train_speed(iter/s)": 1.531317 }, { "acc": 0.97979164, "epoch": 46.27607218186079, "grad_norm": 4.939823627471924, "learning_rate": 1.5100242209064268e-07, "loss": 0.03977304, "memory(GiB)": 13.7, "step": 98730, "train_speed(iter/s)": 1.531314 }, { "acc": 0.978125, "epoch": 46.278415748769625, "grad_norm": 3.471287965774536, "learning_rate": 1.5081403620423406e-07, "loss": 0.04430438, "memory(GiB)": 13.7, "step": 98735, "train_speed(iter/s)": 1.531319 }, { "acc": 0.98469696, "epoch": 46.28075931567846, "grad_norm": 2.377617359161377, "learning_rate": 1.5062576688834334e-07, "loss": 0.07440203, "memory(GiB)": 13.7, "step": 98740, "train_speed(iter/s)": 1.531323 }, { "acc": 0.990625, "epoch": 46.2831028825873, "grad_norm": 0.005917293485254049, "learning_rate": 1.5043761414749593e-07, "loss": 0.01644421, "memory(GiB)": 13.7, "step": 98745, "train_speed(iter/s)": 1.531323 }, { "acc": 0.98249998, "epoch": 46.285446449496135, "grad_norm": 3.750133514404297, "learning_rate": 1.5024957798621442e-07, "loss": 0.03932337, "memory(GiB)": 13.7, "step": 98750, "train_speed(iter/s)": 1.531324 }, { "acc": 0.98625002, "epoch": 46.28779001640497, "grad_norm": 3.387923002243042, "learning_rate": 1.5006165840901915e-07, "loss": 0.03231233, "memory(GiB)": 13.7, "step": 98755, "train_speed(iter/s)": 1.531326 }, { "acc": 0.98562508, "epoch": 46.2901335833138, "grad_norm": 0.0003390852943994105, "learning_rate": 1.4987385542042662e-07, "loss": 0.02058141, "memory(GiB)": 13.7, "step": 98760, "train_speed(iter/s)": 1.531328 }, { "acc": 0.98571434, "epoch": 46.29247715022264, "grad_norm": 3.2159993648529053, "learning_rate": 1.4968616902495114e-07, "loss": 0.02794706, "memory(GiB)": 13.7, "step": 98765, "train_speed(iter/s)": 1.531333 }, { "acc": 0.99032192, "epoch": 46.29482071713147, "grad_norm": 0.06946742534637451, "learning_rate": 1.4949859922710415e-07, "loss": 0.03228802, "memory(GiB)": 13.7, "step": 98770, "train_speed(iter/s)": 1.531336 }, { "acc": 0.98812504, "epoch": 46.297164284040306, "grad_norm": 2.2376132011413574, "learning_rate": 1.493111460313933e-07, "loss": 0.02657669, "memory(GiB)": 13.7, "step": 98775, "train_speed(iter/s)": 1.531335 }, { "acc": 0.98913689, "epoch": 46.29950785094915, "grad_norm": 2.4663631916046143, "learning_rate": 1.4912380944232564e-07, "loss": 0.03233697, "memory(GiB)": 13.7, "step": 98780, "train_speed(iter/s)": 1.531336 }, { "acc": 0.99154758, "epoch": 46.30185141785798, "grad_norm": 4.632203578948975, "learning_rate": 1.4893658946440323e-07, "loss": 0.04102063, "memory(GiB)": 13.7, "step": 98785, "train_speed(iter/s)": 1.531334 }, { "acc": 0.98723297, "epoch": 46.304194984766816, "grad_norm": 2.919468641281128, "learning_rate": 1.48749486102127e-07, "loss": 0.04086512, "memory(GiB)": 13.7, "step": 98790, "train_speed(iter/s)": 1.531335 }, { "acc": 0.97833328, "epoch": 46.30653855167565, "grad_norm": 1.8473917245864868, "learning_rate": 1.4856249935999346e-07, "loss": 0.0454942, "memory(GiB)": 13.7, "step": 98795, "train_speed(iter/s)": 1.531337 }, { "acc": 0.99187498, "epoch": 46.308882118584485, "grad_norm": 2.1853630542755127, "learning_rate": 1.4837562924249802e-07, "loss": 0.02722357, "memory(GiB)": 13.7, "step": 98800, "train_speed(iter/s)": 1.531338 }, { "acc": 0.98395824, "epoch": 46.31122568549332, "grad_norm": 1.948486089706421, "learning_rate": 1.481888757541322e-07, "loss": 0.05458339, "memory(GiB)": 13.7, "step": 98805, "train_speed(iter/s)": 1.53134 }, { "acc": 0.98936958, "epoch": 46.313569252402154, "grad_norm": 2.8680403232574463, "learning_rate": 1.4800223889938363e-07, "loss": 0.0264792, "memory(GiB)": 13.7, "step": 98810, "train_speed(iter/s)": 1.531341 }, { "acc": 0.98319445, "epoch": 46.31591281931099, "grad_norm": 2.2574994564056396, "learning_rate": 1.47815718682741e-07, "loss": 0.04733728, "memory(GiB)": 13.7, "step": 98815, "train_speed(iter/s)": 1.531342 }, { "acc": 0.9926754, "epoch": 46.31825638621983, "grad_norm": 2.581653594970703, "learning_rate": 1.4762931510868532e-07, "loss": 0.03319973, "memory(GiB)": 13.7, "step": 98820, "train_speed(iter/s)": 1.531345 }, { "acc": 0.99375, "epoch": 46.320599953128664, "grad_norm": 1.5030728578567505, "learning_rate": 1.4744302818169812e-07, "loss": 0.02930264, "memory(GiB)": 13.7, "step": 98825, "train_speed(iter/s)": 1.531349 }, { "acc": 0.99375, "epoch": 46.3229435200375, "grad_norm": 2.9558207988739014, "learning_rate": 1.4725685790625703e-07, "loss": 0.02907008, "memory(GiB)": 13.7, "step": 98830, "train_speed(iter/s)": 1.531353 }, { "acc": 0.98472672, "epoch": 46.32528708694633, "grad_norm": 3.466470956802368, "learning_rate": 1.4707080428683692e-07, "loss": 0.04508302, "memory(GiB)": 13.7, "step": 98835, "train_speed(iter/s)": 1.531356 }, { "acc": 0.98133383, "epoch": 46.32763065385517, "grad_norm": 2.049100399017334, "learning_rate": 1.4688486732790927e-07, "loss": 0.04429476, "memory(GiB)": 13.7, "step": 98840, "train_speed(iter/s)": 1.531358 }, { "acc": 0.99184532, "epoch": 46.329974220764, "grad_norm": 2.9737398624420166, "learning_rate": 1.466990470339451e-07, "loss": 0.01605976, "memory(GiB)": 13.7, "step": 98845, "train_speed(iter/s)": 1.53136 }, { "acc": 0.97145834, "epoch": 46.332317787672835, "grad_norm": 4.174506187438965, "learning_rate": 1.4651334340940984e-07, "loss": 0.06135302, "memory(GiB)": 13.7, "step": 98850, "train_speed(iter/s)": 1.531366 }, { "acc": 0.98500004, "epoch": 46.33466135458168, "grad_norm": 4.325431823730469, "learning_rate": 1.4632775645876668e-07, "loss": 0.03133119, "memory(GiB)": 13.7, "step": 98855, "train_speed(iter/s)": 1.531367 }, { "acc": 0.99019375, "epoch": 46.33700492149051, "grad_norm": 3.3571078777313232, "learning_rate": 1.4614228618647768e-07, "loss": 0.03989324, "memory(GiB)": 13.7, "step": 98860, "train_speed(iter/s)": 1.531371 }, { "acc": 0.99437504, "epoch": 46.339348488399345, "grad_norm": 2.6901907920837402, "learning_rate": 1.459569325969989e-07, "loss": 0.02176113, "memory(GiB)": 13.7, "step": 98865, "train_speed(iter/s)": 1.531371 }, { "acc": 0.99020834, "epoch": 46.34169205530818, "grad_norm": 1.7234331369400024, "learning_rate": 1.4577169569478854e-07, "loss": 0.03386998, "memory(GiB)": 13.7, "step": 98870, "train_speed(iter/s)": 1.531374 }, { "acc": 0.99821434, "epoch": 46.344035622217014, "grad_norm": 2.7244300842285156, "learning_rate": 1.4558657548429698e-07, "loss": 0.02088333, "memory(GiB)": 13.7, "step": 98875, "train_speed(iter/s)": 1.53138 }, { "acc": 0.97833328, "epoch": 46.34637918912585, "grad_norm": 1.8105171918869019, "learning_rate": 1.454015719699747e-07, "loss": 0.0356774, "memory(GiB)": 13.7, "step": 98880, "train_speed(iter/s)": 1.531384 }, { "acc": 0.984375, "epoch": 46.34872275603468, "grad_norm": 2.237089157104492, "learning_rate": 1.452166851562688e-07, "loss": 0.03455253, "memory(GiB)": 13.7, "step": 98885, "train_speed(iter/s)": 1.531389 }, { "acc": 0.99229164, "epoch": 46.35106632294352, "grad_norm": 3.4448652267456055, "learning_rate": 1.4503191504762197e-07, "loss": 0.03392878, "memory(GiB)": 13.7, "step": 98890, "train_speed(iter/s)": 1.53139 }, { "acc": 0.99113092, "epoch": 46.35340988985236, "grad_norm": 2.399522304534912, "learning_rate": 1.448472616484774e-07, "loss": 0.02109208, "memory(GiB)": 13.7, "step": 98895, "train_speed(iter/s)": 1.531388 }, { "acc": 0.9802084, "epoch": 46.35575345676119, "grad_norm": 4.330002784729004, "learning_rate": 1.446627249632711e-07, "loss": 0.05151381, "memory(GiB)": 13.7, "step": 98900, "train_speed(iter/s)": 1.531392 }, { "acc": 0.97374992, "epoch": 46.35809702367003, "grad_norm": 5.306870460510254, "learning_rate": 1.444783049964419e-07, "loss": 0.07027482, "memory(GiB)": 13.7, "step": 98905, "train_speed(iter/s)": 1.531401 }, { "acc": 0.98604164, "epoch": 46.36044059057886, "grad_norm": 2.3345205783843994, "learning_rate": 1.442940017524202e-07, "loss": 0.02031827, "memory(GiB)": 13.7, "step": 98910, "train_speed(iter/s)": 1.531403 }, { "acc": 0.97666664, "epoch": 46.362784157487695, "grad_norm": 4.076940059661865, "learning_rate": 1.4410981523563705e-07, "loss": 0.07483372, "memory(GiB)": 13.7, "step": 98915, "train_speed(iter/s)": 1.531405 }, { "acc": 0.99548607, "epoch": 46.36512772439653, "grad_norm": 3.1687676906585693, "learning_rate": 1.4392574545051956e-07, "loss": 0.00546578, "memory(GiB)": 13.7, "step": 98920, "train_speed(iter/s)": 1.531408 }, { "acc": 0.97510414, "epoch": 46.367471291305364, "grad_norm": 3.538404703140259, "learning_rate": 1.437417924014915e-07, "loss": 0.05309908, "memory(GiB)": 13.7, "step": 98925, "train_speed(iter/s)": 1.531413 }, { "acc": 0.98425598, "epoch": 46.369814858214205, "grad_norm": 2.4153635501861572, "learning_rate": 1.435579560929756e-07, "loss": 0.05273359, "memory(GiB)": 13.7, "step": 98930, "train_speed(iter/s)": 1.531416 }, { "acc": 0.9967803, "epoch": 46.37215842512304, "grad_norm": 0.00176823150832206, "learning_rate": 1.433742365293901e-07, "loss": 0.02297242, "memory(GiB)": 13.7, "step": 98935, "train_speed(iter/s)": 1.531416 }, { "acc": 0.97900248, "epoch": 46.374501992031874, "grad_norm": 3.3837788105010986, "learning_rate": 1.43190633715151e-07, "loss": 0.0494962, "memory(GiB)": 13.7, "step": 98940, "train_speed(iter/s)": 1.531421 }, { "acc": 0.99354172, "epoch": 46.37684555894071, "grad_norm": 0.9865280389785767, "learning_rate": 1.4300714765467156e-07, "loss": 0.0275451, "memory(GiB)": 13.7, "step": 98945, "train_speed(iter/s)": 1.531421 }, { "acc": 0.98625002, "epoch": 46.37918912584954, "grad_norm": 0.005017549265176058, "learning_rate": 1.428237783523623e-07, "loss": 0.0333853, "memory(GiB)": 13.7, "step": 98950, "train_speed(iter/s)": 1.531423 }, { "acc": 0.98594694, "epoch": 46.38153269275838, "grad_norm": 3.873159885406494, "learning_rate": 1.4264052581262974e-07, "loss": 0.03472532, "memory(GiB)": 13.7, "step": 98955, "train_speed(iter/s)": 1.531426 }, { "acc": 0.97559528, "epoch": 46.38387625966721, "grad_norm": 2.195342779159546, "learning_rate": 1.4245739003988104e-07, "loss": 0.04549292, "memory(GiB)": 13.7, "step": 98960, "train_speed(iter/s)": 1.53143 }, { "acc": 0.98424683, "epoch": 46.386219826576045, "grad_norm": 3.812563180923462, "learning_rate": 1.422743710385156e-07, "loss": 0.0456936, "memory(GiB)": 13.7, "step": 98965, "train_speed(iter/s)": 1.531434 }, { "acc": 0.97963333, "epoch": 46.38856339348489, "grad_norm": 3.181253433227539, "learning_rate": 1.4209146881293503e-07, "loss": 0.04823224, "memory(GiB)": 13.7, "step": 98970, "train_speed(iter/s)": 1.531438 }, { "acc": 0.98333702, "epoch": 46.39090696039372, "grad_norm": 2.0217957496643066, "learning_rate": 1.4190868336753362e-07, "loss": 0.04574721, "memory(GiB)": 13.7, "step": 98975, "train_speed(iter/s)": 1.531441 }, { "acc": 0.98447304, "epoch": 46.393250527302555, "grad_norm": 2.5597851276397705, "learning_rate": 1.4172601470670588e-07, "loss": 0.03291577, "memory(GiB)": 13.7, "step": 98980, "train_speed(iter/s)": 1.531447 }, { "acc": 0.97729168, "epoch": 46.39559409421139, "grad_norm": 5.091071128845215, "learning_rate": 1.4154346283484273e-07, "loss": 0.03958102, "memory(GiB)": 13.7, "step": 98985, "train_speed(iter/s)": 1.531447 }, { "acc": 0.98514881, "epoch": 46.397937661120224, "grad_norm": 6.499744892120361, "learning_rate": 1.4136102775633087e-07, "loss": 0.03299097, "memory(GiB)": 13.7, "step": 98990, "train_speed(iter/s)": 1.531446 }, { "acc": 0.97937498, "epoch": 46.40028122802906, "grad_norm": 2.205457925796509, "learning_rate": 1.411787094755574e-07, "loss": 0.04173175, "memory(GiB)": 13.7, "step": 98995, "train_speed(iter/s)": 1.531447 }, { "acc": 0.99115524, "epoch": 46.40262479493789, "grad_norm": 4.125540256500244, "learning_rate": 1.4099650799690287e-07, "loss": 0.02216343, "memory(GiB)": 13.7, "step": 99000, "train_speed(iter/s)": 1.531448 }, { "acc": 0.9864584, "epoch": 46.404968361846734, "grad_norm": 1.797236680984497, "learning_rate": 1.408144233247483e-07, "loss": 0.02312325, "memory(GiB)": 13.7, "step": 99005, "train_speed(iter/s)": 1.531448 }, { "acc": 0.98267365, "epoch": 46.40731192875557, "grad_norm": 2.579836368560791, "learning_rate": 1.406324554634698e-07, "loss": 0.03271402, "memory(GiB)": 13.7, "step": 99010, "train_speed(iter/s)": 1.531449 }, { "acc": 0.9875, "epoch": 46.4096554956644, "grad_norm": 3.7683777809143066, "learning_rate": 1.4045060441744006e-07, "loss": 0.02159545, "memory(GiB)": 13.7, "step": 99015, "train_speed(iter/s)": 1.531453 }, { "acc": 0.97875004, "epoch": 46.41199906257324, "grad_norm": 2.9342734813690186, "learning_rate": 1.4026887019103235e-07, "loss": 0.04630912, "memory(GiB)": 13.7, "step": 99020, "train_speed(iter/s)": 1.531455 }, { "acc": 0.98916664, "epoch": 46.41434262948207, "grad_norm": 1.7067618370056152, "learning_rate": 1.4008725278861334e-07, "loss": 0.02986054, "memory(GiB)": 13.7, "step": 99025, "train_speed(iter/s)": 1.531457 }, { "acc": 0.98988094, "epoch": 46.416686196390906, "grad_norm": 0.03822698816657066, "learning_rate": 1.3990575221454964e-07, "loss": 0.02187673, "memory(GiB)": 13.7, "step": 99030, "train_speed(iter/s)": 1.531461 }, { "acc": 0.97240524, "epoch": 46.41902976329974, "grad_norm": 5.002896308898926, "learning_rate": 1.3972436847320287e-07, "loss": 0.05160875, "memory(GiB)": 13.7, "step": 99035, "train_speed(iter/s)": 1.531462 }, { "acc": 0.98727684, "epoch": 46.421373330208574, "grad_norm": 2.610480785369873, "learning_rate": 1.3954310156893407e-07, "loss": 0.03793242, "memory(GiB)": 13.7, "step": 99040, "train_speed(iter/s)": 1.531467 }, { "acc": 0.98708334, "epoch": 46.423716897117416, "grad_norm": 0.03505063056945801, "learning_rate": 1.393619515060983e-07, "loss": 0.01440378, "memory(GiB)": 13.7, "step": 99045, "train_speed(iter/s)": 1.531472 }, { "acc": 0.98184528, "epoch": 46.42606046402625, "grad_norm": 2.093982219696045, "learning_rate": 1.3918091828905216e-07, "loss": 0.06131805, "memory(GiB)": 13.7, "step": 99050, "train_speed(iter/s)": 1.531475 }, { "acc": 0.98687496, "epoch": 46.428404030935084, "grad_norm": 1.6645779609680176, "learning_rate": 1.3900000192214613e-07, "loss": 0.03633334, "memory(GiB)": 13.7, "step": 99055, "train_speed(iter/s)": 1.531482 }, { "acc": 0.97654762, "epoch": 46.43074759784392, "grad_norm": 4.994232177734375, "learning_rate": 1.3881920240972856e-07, "loss": 0.06044585, "memory(GiB)": 13.7, "step": 99060, "train_speed(iter/s)": 1.531487 }, { "acc": 0.9927083, "epoch": 46.43309116475275, "grad_norm": 0.014898418448865414, "learning_rate": 1.3863851975614555e-07, "loss": 0.01350328, "memory(GiB)": 13.7, "step": 99065, "train_speed(iter/s)": 1.53149 }, { "acc": 0.9927084, "epoch": 46.43543473166159, "grad_norm": 2.035395383834839, "learning_rate": 1.3845795396573926e-07, "loss": 0.01537183, "memory(GiB)": 13.7, "step": 99070, "train_speed(iter/s)": 1.531494 }, { "acc": 0.98270836, "epoch": 46.43777829857042, "grad_norm": 4.207204341888428, "learning_rate": 1.3827750504285192e-07, "loss": 0.03782459, "memory(GiB)": 13.7, "step": 99075, "train_speed(iter/s)": 1.5315 }, { "acc": 0.98715782, "epoch": 46.44012186547926, "grad_norm": 4.468657493591309, "learning_rate": 1.3809717299181908e-07, "loss": 0.080637, "memory(GiB)": 13.7, "step": 99080, "train_speed(iter/s)": 1.531503 }, { "acc": 0.97666664, "epoch": 46.4424654323881, "grad_norm": 1.636042833328247, "learning_rate": 1.3791695781697625e-07, "loss": 0.05367828, "memory(GiB)": 13.7, "step": 99085, "train_speed(iter/s)": 1.531504 }, { "acc": 0.97520828, "epoch": 46.44480899929693, "grad_norm": 3.073065757751465, "learning_rate": 1.377368595226546e-07, "loss": 0.05305935, "memory(GiB)": 13.7, "step": 99090, "train_speed(iter/s)": 1.531509 }, { "acc": 0.98812504, "epoch": 46.447152566205766, "grad_norm": 3.8165552616119385, "learning_rate": 1.3755687811318404e-07, "loss": 0.02053202, "memory(GiB)": 13.7, "step": 99095, "train_speed(iter/s)": 1.531509 }, { "acc": 0.9875, "epoch": 46.4494961331146, "grad_norm": 3.146855354309082, "learning_rate": 1.373770135928896e-07, "loss": 0.02827134, "memory(GiB)": 13.7, "step": 99100, "train_speed(iter/s)": 1.531514 }, { "acc": 0.98770294, "epoch": 46.451839700023434, "grad_norm": 3.4607162475585938, "learning_rate": 1.3719726596609464e-07, "loss": 0.06859044, "memory(GiB)": 13.7, "step": 99105, "train_speed(iter/s)": 1.531514 }, { "acc": 0.98520298, "epoch": 46.45418326693227, "grad_norm": 2.512037992477417, "learning_rate": 1.3701763523712133e-07, "loss": 0.04280576, "memory(GiB)": 13.7, "step": 99110, "train_speed(iter/s)": 1.531517 }, { "acc": 0.98342257, "epoch": 46.4565268338411, "grad_norm": 3.2185654640197754, "learning_rate": 1.3683812141028583e-07, "loss": 0.04045118, "memory(GiB)": 13.7, "step": 99115, "train_speed(iter/s)": 1.531519 }, { "acc": 0.98005037, "epoch": 46.458870400749944, "grad_norm": 4.509433746337891, "learning_rate": 1.3665872448990367e-07, "loss": 0.04303772, "memory(GiB)": 13.7, "step": 99120, "train_speed(iter/s)": 1.531519 }, { "acc": 0.98770828, "epoch": 46.46121396765878, "grad_norm": 4.159526348114014, "learning_rate": 1.3647944448028654e-07, "loss": 0.02014297, "memory(GiB)": 13.7, "step": 99125, "train_speed(iter/s)": 1.531519 }, { "acc": 0.98312492, "epoch": 46.46355753456761, "grad_norm": 2.553664207458496, "learning_rate": 1.363002813857439e-07, "loss": 0.02204487, "memory(GiB)": 13.7, "step": 99130, "train_speed(iter/s)": 1.53152 }, { "acc": 0.99750004, "epoch": 46.46590110147645, "grad_norm": 0.01267558615654707, "learning_rate": 1.3612123521058296e-07, "loss": 0.00570049, "memory(GiB)": 13.7, "step": 99135, "train_speed(iter/s)": 1.531522 }, { "acc": 0.98604164, "epoch": 46.46824466838528, "grad_norm": 0.9399078488349915, "learning_rate": 1.3594230595910598e-07, "loss": 0.02188696, "memory(GiB)": 13.7, "step": 99140, "train_speed(iter/s)": 1.531526 }, { "acc": 0.98698864, "epoch": 46.470588235294116, "grad_norm": 4.982914447784424, "learning_rate": 1.357634936356152e-07, "loss": 0.02462483, "memory(GiB)": 13.7, "step": 99145, "train_speed(iter/s)": 1.531531 }, { "acc": 0.98425598, "epoch": 46.47293180220295, "grad_norm": 1.8973300457000732, "learning_rate": 1.3558479824440785e-07, "loss": 0.0239098, "memory(GiB)": 13.7, "step": 99150, "train_speed(iter/s)": 1.531535 }, { "acc": 0.98510418, "epoch": 46.475275369111785, "grad_norm": 3.517822742462158, "learning_rate": 1.3540621978978007e-07, "loss": 0.03712279, "memory(GiB)": 13.7, "step": 99155, "train_speed(iter/s)": 1.531536 }, { "acc": 0.99333334, "epoch": 46.477618936020626, "grad_norm": 3.342590808868408, "learning_rate": 1.3522775827602244e-07, "loss": 0.01800222, "memory(GiB)": 13.7, "step": 99160, "train_speed(iter/s)": 1.531539 }, { "acc": 0.98812504, "epoch": 46.47996250292946, "grad_norm": 1.3227602243423462, "learning_rate": 1.3504941370742722e-07, "loss": 0.03298795, "memory(GiB)": 13.7, "step": 99165, "train_speed(iter/s)": 1.531539 }, { "acc": 0.98819447, "epoch": 46.482306069838295, "grad_norm": 2.787853956222534, "learning_rate": 1.348711860882789e-07, "loss": 0.02335103, "memory(GiB)": 13.7, "step": 99170, "train_speed(iter/s)": 1.531541 }, { "acc": 0.9967804, "epoch": 46.48464963674713, "grad_norm": 1.4115129709243774, "learning_rate": 1.34693075422863e-07, "loss": 0.01905275, "memory(GiB)": 13.7, "step": 99175, "train_speed(iter/s)": 1.531544 }, { "acc": 0.9871727, "epoch": 46.48699320365596, "grad_norm": 3.7108705043792725, "learning_rate": 1.3451508171546074e-07, "loss": 0.05756165, "memory(GiB)": 13.7, "step": 99180, "train_speed(iter/s)": 1.531546 }, { "acc": 0.98675594, "epoch": 46.4893367705648, "grad_norm": 5.084631443023682, "learning_rate": 1.343372049703488e-07, "loss": 0.03633834, "memory(GiB)": 13.7, "step": 99185, "train_speed(iter/s)": 1.531548 }, { "acc": 0.98675594, "epoch": 46.49168033747363, "grad_norm": 3.6232359409332275, "learning_rate": 1.3415944519180442e-07, "loss": 0.03191564, "memory(GiB)": 13.7, "step": 99190, "train_speed(iter/s)": 1.531553 }, { "acc": 0.98458338, "epoch": 46.49402390438247, "grad_norm": 5.410519599914551, "learning_rate": 1.339818023840988e-07, "loss": 0.04446053, "memory(GiB)": 13.7, "step": 99195, "train_speed(iter/s)": 1.531557 }, { "acc": 0.97811012, "epoch": 46.49636747129131, "grad_norm": 4.835512638092041, "learning_rate": 1.3380427655150415e-07, "loss": 0.04185079, "memory(GiB)": 13.7, "step": 99200, "train_speed(iter/s)": 1.531561 }, { "acc": 0.98402777, "epoch": 46.49871103820014, "grad_norm": 2.3461313247680664, "learning_rate": 1.3362686769828557e-07, "loss": 0.02945093, "memory(GiB)": 13.7, "step": 99205, "train_speed(iter/s)": 1.531562 }, { "acc": 0.99092264, "epoch": 46.501054605108976, "grad_norm": 4.313025951385498, "learning_rate": 1.3344957582870923e-07, "loss": 0.03248008, "memory(GiB)": 13.7, "step": 99210, "train_speed(iter/s)": 1.531566 }, { "acc": 0.9927084, "epoch": 46.50339817201781, "grad_norm": 1.331408977508545, "learning_rate": 1.3327240094703403e-07, "loss": 0.05163441, "memory(GiB)": 13.7, "step": 99215, "train_speed(iter/s)": 1.531566 }, { "acc": 0.9770834, "epoch": 46.505741738926645, "grad_norm": 2.8483216762542725, "learning_rate": 1.3309534305752115e-07, "loss": 0.05480816, "memory(GiB)": 13.7, "step": 99220, "train_speed(iter/s)": 1.531568 }, { "acc": 0.98008928, "epoch": 46.50808530583548, "grad_norm": 3.7043919563293457, "learning_rate": 1.3291840216442567e-07, "loss": 0.04385351, "memory(GiB)": 13.7, "step": 99225, "train_speed(iter/s)": 1.531568 }, { "acc": 0.98125, "epoch": 46.51042887274431, "grad_norm": 6.335344314575195, "learning_rate": 1.3274157827200037e-07, "loss": 0.06951616, "memory(GiB)": 13.7, "step": 99230, "train_speed(iter/s)": 1.53157 }, { "acc": 0.99012499, "epoch": 46.512772439653155, "grad_norm": 2.5041468143463135, "learning_rate": 1.3256487138449595e-07, "loss": 0.0230972, "memory(GiB)": 13.7, "step": 99235, "train_speed(iter/s)": 1.531571 }, { "acc": 0.98145828, "epoch": 46.51511600656199, "grad_norm": 3.8925962448120117, "learning_rate": 1.323882815061591e-07, "loss": 0.03758778, "memory(GiB)": 13.7, "step": 99240, "train_speed(iter/s)": 1.531575 }, { "acc": 0.99562502, "epoch": 46.51745957347082, "grad_norm": 3.7939186096191406, "learning_rate": 1.3221180864123542e-07, "loss": 0.02134492, "memory(GiB)": 13.7, "step": 99245, "train_speed(iter/s)": 1.531578 }, { "acc": 0.97762718, "epoch": 46.51980314037966, "grad_norm": 3.2743875980377197, "learning_rate": 1.320354527939656e-07, "loss": 0.05573677, "memory(GiB)": 13.7, "step": 99250, "train_speed(iter/s)": 1.531575 }, { "acc": 0.99437504, "epoch": 46.52214670728849, "grad_norm": 0.010109607130289078, "learning_rate": 1.3185921396859022e-07, "loss": 0.01471907, "memory(GiB)": 13.7, "step": 99255, "train_speed(iter/s)": 1.531575 }, { "acc": 0.98300591, "epoch": 46.524490274197326, "grad_norm": 1.971420407295227, "learning_rate": 1.3168309216934435e-07, "loss": 0.0319674, "memory(GiB)": 13.7, "step": 99260, "train_speed(iter/s)": 1.531575 }, { "acc": 0.98865528, "epoch": 46.52683384110616, "grad_norm": 4.376962661743164, "learning_rate": 1.3150708740046144e-07, "loss": 0.04965633, "memory(GiB)": 13.7, "step": 99265, "train_speed(iter/s)": 1.531576 }, { "acc": 0.98904762, "epoch": 46.529177408015, "grad_norm": 4.042923450469971, "learning_rate": 1.3133119966617268e-07, "loss": 0.02703472, "memory(GiB)": 13.7, "step": 99270, "train_speed(iter/s)": 1.531583 }, { "acc": 0.9739583, "epoch": 46.531520974923836, "grad_norm": 4.059396743774414, "learning_rate": 1.311554289707048e-07, "loss": 0.02904034, "memory(GiB)": 13.7, "step": 99275, "train_speed(iter/s)": 1.531588 }, { "acc": 0.98029795, "epoch": 46.53386454183267, "grad_norm": 1.409282922744751, "learning_rate": 1.3097977531828396e-07, "loss": 0.03294848, "memory(GiB)": 13.7, "step": 99280, "train_speed(iter/s)": 1.531591 }, { "acc": 0.98395824, "epoch": 46.536208108741505, "grad_norm": 2.5762314796447754, "learning_rate": 1.3080423871313088e-07, "loss": 0.03620152, "memory(GiB)": 13.7, "step": 99285, "train_speed(iter/s)": 1.531592 }, { "acc": 0.98416672, "epoch": 46.53855167565034, "grad_norm": 3.7689855098724365, "learning_rate": 1.3062881915946674e-07, "loss": 0.0271246, "memory(GiB)": 13.7, "step": 99290, "train_speed(iter/s)": 1.531594 }, { "acc": 0.98687496, "epoch": 46.54089524255917, "grad_norm": 0.8523843884468079, "learning_rate": 1.3045351666150605e-07, "loss": 0.02700263, "memory(GiB)": 13.7, "step": 99295, "train_speed(iter/s)": 1.531597 }, { "acc": 0.97302094, "epoch": 46.54323880946801, "grad_norm": 5.229886531829834, "learning_rate": 1.302783312234645e-07, "loss": 0.05771896, "memory(GiB)": 13.7, "step": 99300, "train_speed(iter/s)": 1.531596 }, { "acc": 0.97729168, "epoch": 46.54558237637684, "grad_norm": 3.803999900817871, "learning_rate": 1.301032628495516e-07, "loss": 0.06490638, "memory(GiB)": 13.7, "step": 99305, "train_speed(iter/s)": 1.531598 }, { "acc": 0.98624458, "epoch": 46.547925943285684, "grad_norm": 3.6529741287231445, "learning_rate": 1.2992831154397468e-07, "loss": 0.04819365, "memory(GiB)": 13.7, "step": 99310, "train_speed(iter/s)": 1.531603 }, { "acc": 0.9883604, "epoch": 46.55026951019452, "grad_norm": 2.2466444969177246, "learning_rate": 1.2975347731094166e-07, "loss": 0.03696781, "memory(GiB)": 13.7, "step": 99315, "train_speed(iter/s)": 1.531609 }, { "acc": 0.98083334, "epoch": 46.55261307710335, "grad_norm": 2.675455331802368, "learning_rate": 1.295787601546526e-07, "loss": 0.02901504, "memory(GiB)": 13.7, "step": 99320, "train_speed(iter/s)": 1.531611 }, { "acc": 0.99392357, "epoch": 46.55495664401219, "grad_norm": 0.6734123826026917, "learning_rate": 1.2940416007930818e-07, "loss": 0.02205217, "memory(GiB)": 13.7, "step": 99325, "train_speed(iter/s)": 1.531612 }, { "acc": 0.97607136, "epoch": 46.55730021092102, "grad_norm": 7.4075446128845215, "learning_rate": 1.292296770891046e-07, "loss": 0.04413041, "memory(GiB)": 13.7, "step": 99330, "train_speed(iter/s)": 1.531614 }, { "acc": 0.98529758, "epoch": 46.559643777829855, "grad_norm": 3.5345094203948975, "learning_rate": 1.2905531118823648e-07, "loss": 0.02400855, "memory(GiB)": 13.7, "step": 99335, "train_speed(iter/s)": 1.531613 }, { "acc": 0.98812504, "epoch": 46.56198734473869, "grad_norm": 4.031008243560791, "learning_rate": 1.2888106238089445e-07, "loss": 0.05353066, "memory(GiB)": 13.7, "step": 99340, "train_speed(iter/s)": 1.531616 }, { "acc": 0.99437504, "epoch": 46.56433091164753, "grad_norm": 2.1662275791168213, "learning_rate": 1.287069306712675e-07, "loss": 0.0131538, "memory(GiB)": 13.7, "step": 99345, "train_speed(iter/s)": 1.531617 }, { "acc": 0.99375, "epoch": 46.566674478556365, "grad_norm": 0.9785538911819458, "learning_rate": 1.2853291606354133e-07, "loss": 0.02632628, "memory(GiB)": 13.7, "step": 99350, "train_speed(iter/s)": 1.531617 }, { "acc": 0.98467264, "epoch": 46.5690180454652, "grad_norm": 4.1245436668396, "learning_rate": 1.283590185618972e-07, "loss": 0.02344273, "memory(GiB)": 13.7, "step": 99355, "train_speed(iter/s)": 1.53162 }, { "acc": 0.99562502, "epoch": 46.571361612374034, "grad_norm": 1.3540610074996948, "learning_rate": 1.2818523817051689e-07, "loss": 0.0305226, "memory(GiB)": 13.7, "step": 99360, "train_speed(iter/s)": 1.53162 }, { "acc": 0.98465281, "epoch": 46.57370517928287, "grad_norm": 3.979482412338257, "learning_rate": 1.280115748935755e-07, "loss": 0.02358027, "memory(GiB)": 13.7, "step": 99365, "train_speed(iter/s)": 1.531622 }, { "acc": 0.98633928, "epoch": 46.5760487461917, "grad_norm": 0.8906341195106506, "learning_rate": 1.2783802873524983e-07, "loss": 0.02976031, "memory(GiB)": 13.7, "step": 99370, "train_speed(iter/s)": 1.531624 }, { "acc": 0.98455353, "epoch": 46.57839231310054, "grad_norm": 1.4485775232315063, "learning_rate": 1.2766459969970893e-07, "loss": 0.04811807, "memory(GiB)": 13.7, "step": 99375, "train_speed(iter/s)": 1.531624 }, { "acc": 0.984375, "epoch": 46.58073588000937, "grad_norm": 4.087033271789551, "learning_rate": 1.274912877911235e-07, "loss": 0.04675338, "memory(GiB)": 13.7, "step": 99380, "train_speed(iter/s)": 1.531626 }, { "acc": 0.98715286, "epoch": 46.58307944691821, "grad_norm": 3.2127175331115723, "learning_rate": 1.2731809301365755e-07, "loss": 0.05390844, "memory(GiB)": 13.7, "step": 99385, "train_speed(iter/s)": 1.531626 }, { "acc": 0.98291664, "epoch": 46.58542301382705, "grad_norm": 3.3692564964294434, "learning_rate": 1.2714501537147566e-07, "loss": 0.0386971, "memory(GiB)": 13.7, "step": 99390, "train_speed(iter/s)": 1.531625 }, { "acc": 0.97868061, "epoch": 46.58776658073588, "grad_norm": 2.713114023208618, "learning_rate": 1.2697205486873744e-07, "loss": 0.04797119, "memory(GiB)": 13.7, "step": 99395, "train_speed(iter/s)": 1.531621 }, { "acc": 0.98883934, "epoch": 46.590110147644715, "grad_norm": 1.9660367965698242, "learning_rate": 1.2679921150959966e-07, "loss": 0.03026128, "memory(GiB)": 13.7, "step": 99400, "train_speed(iter/s)": 1.531623 }, { "acc": 0.98083334, "epoch": 46.59245371455355, "grad_norm": 1.4057618379592896, "learning_rate": 1.266264852982181e-07, "loss": 0.0351263, "memory(GiB)": 13.7, "step": 99405, "train_speed(iter/s)": 1.531626 }, { "acc": 0.9874239, "epoch": 46.594797281462384, "grad_norm": 4.736716270446777, "learning_rate": 1.2645387623874339e-07, "loss": 0.05928974, "memory(GiB)": 13.7, "step": 99410, "train_speed(iter/s)": 1.531623 }, { "acc": 0.98249998, "epoch": 46.59714084837122, "grad_norm": 2.4514999389648438, "learning_rate": 1.2628138433532518e-07, "loss": 0.03298535, "memory(GiB)": 13.7, "step": 99415, "train_speed(iter/s)": 1.53162 }, { "acc": 0.97937498, "epoch": 46.59948441528006, "grad_norm": 4.853557586669922, "learning_rate": 1.2610900959210916e-07, "loss": 0.05686119, "memory(GiB)": 13.7, "step": 99420, "train_speed(iter/s)": 1.531622 }, { "acc": 0.98145828, "epoch": 46.601827982188894, "grad_norm": 5.579531192779541, "learning_rate": 1.259367520132394e-07, "loss": 0.04164576, "memory(GiB)": 13.7, "step": 99425, "train_speed(iter/s)": 1.531624 }, { "acc": 0.9947917, "epoch": 46.60417154909773, "grad_norm": 1.989413857460022, "learning_rate": 1.2576461160285604e-07, "loss": 0.01372658, "memory(GiB)": 13.7, "step": 99430, "train_speed(iter/s)": 1.531629 }, { "acc": 0.9958334, "epoch": 46.60651511600656, "grad_norm": 0.009245253168046474, "learning_rate": 1.2559258836509647e-07, "loss": 0.02694443, "memory(GiB)": 13.7, "step": 99435, "train_speed(iter/s)": 1.531632 }, { "acc": 0.9979167, "epoch": 46.6088586829154, "grad_norm": 2.028761148452759, "learning_rate": 1.2542068230409587e-07, "loss": 0.06089686, "memory(GiB)": 13.7, "step": 99440, "train_speed(iter/s)": 1.531633 }, { "acc": 0.99750004, "epoch": 46.61120224982423, "grad_norm": 4.203832626342773, "learning_rate": 1.2524889342398607e-07, "loss": 0.02841621, "memory(GiB)": 13.7, "step": 99445, "train_speed(iter/s)": 1.531633 }, { "acc": 0.97770834, "epoch": 46.613545816733065, "grad_norm": 5.2732930183410645, "learning_rate": 1.250772217288967e-07, "loss": 0.05864097, "memory(GiB)": 13.7, "step": 99450, "train_speed(iter/s)": 1.531639 }, { "acc": 0.97787704, "epoch": 46.6158893836419, "grad_norm": 3.9292900562286377, "learning_rate": 1.249056672229529e-07, "loss": 0.03907348, "memory(GiB)": 13.7, "step": 99455, "train_speed(iter/s)": 1.531639 }, { "acc": 0.99258928, "epoch": 46.61823295055074, "grad_norm": 0.0017481950344517827, "learning_rate": 1.2473422991027989e-07, "loss": 0.01257815, "memory(GiB)": 13.7, "step": 99460, "train_speed(iter/s)": 1.531638 }, { "acc": 0.996875, "epoch": 46.620576517459575, "grad_norm": 3.2323086261749268, "learning_rate": 1.245629097949978e-07, "loss": 0.02005252, "memory(GiB)": 13.7, "step": 99465, "train_speed(iter/s)": 1.531637 }, { "acc": 0.98812504, "epoch": 46.62292008436841, "grad_norm": 2.7259812355041504, "learning_rate": 1.243917068812252e-07, "loss": 0.02586718, "memory(GiB)": 13.7, "step": 99470, "train_speed(iter/s)": 1.531638 }, { "acc": 0.98916664, "epoch": 46.625263651277244, "grad_norm": 0.002964685671031475, "learning_rate": 1.2422062117307666e-07, "loss": 0.01821948, "memory(GiB)": 13.7, "step": 99475, "train_speed(iter/s)": 1.531636 }, { "acc": 0.98354168, "epoch": 46.62760721818608, "grad_norm": 1.160902738571167, "learning_rate": 1.240496526746641e-07, "loss": 0.03220481, "memory(GiB)": 13.7, "step": 99480, "train_speed(iter/s)": 1.531633 }, { "acc": 0.98145294, "epoch": 46.62995078509491, "grad_norm": 3.6151957511901855, "learning_rate": 1.2387880139009768e-07, "loss": 0.04479907, "memory(GiB)": 13.7, "step": 99485, "train_speed(iter/s)": 1.531637 }, { "acc": 0.97987175, "epoch": 46.63229435200375, "grad_norm": 3.175462007522583, "learning_rate": 1.237080673234837e-07, "loss": 0.05848443, "memory(GiB)": 13.7, "step": 99490, "train_speed(iter/s)": 1.531643 }, { "acc": 0.99131947, "epoch": 46.63463791891259, "grad_norm": 2.575352430343628, "learning_rate": 1.235374504789268e-07, "loss": 0.02239508, "memory(GiB)": 13.7, "step": 99495, "train_speed(iter/s)": 1.531646 }, { "acc": 0.98806095, "epoch": 46.63698148582142, "grad_norm": 1.8153561353683472, "learning_rate": 1.2336695086052663e-07, "loss": 0.01998827, "memory(GiB)": 13.7, "step": 99500, "train_speed(iter/s)": 1.531648 }, { "acc": 0.996875, "epoch": 46.63932505273026, "grad_norm": 0.13960349559783936, "learning_rate": 1.231965684723834e-07, "loss": 0.01310511, "memory(GiB)": 13.7, "step": 99505, "train_speed(iter/s)": 1.531648 }, { "acc": 0.99083328, "epoch": 46.64166861963909, "grad_norm": 1.7131825685501099, "learning_rate": 1.230263033185906e-07, "loss": 0.03366907, "memory(GiB)": 13.7, "step": 99510, "train_speed(iter/s)": 1.531651 }, { "acc": 0.98500004, "epoch": 46.644012186547926, "grad_norm": 2.1573941707611084, "learning_rate": 1.2285615540324237e-07, "loss": 0.05716541, "memory(GiB)": 13.7, "step": 99515, "train_speed(iter/s)": 1.531656 }, { "acc": 0.996875, "epoch": 46.64635575345676, "grad_norm": 0.021314140409231186, "learning_rate": 1.2268612473042781e-07, "loss": 0.02172424, "memory(GiB)": 13.7, "step": 99520, "train_speed(iter/s)": 1.531655 }, { "acc": 1.0, "epoch": 46.648699320365594, "grad_norm": 2.128197193145752, "learning_rate": 1.2251621130423323e-07, "loss": 0.01480991, "memory(GiB)": 13.7, "step": 99525, "train_speed(iter/s)": 1.53166 }, { "acc": 0.97770834, "epoch": 46.65104288727443, "grad_norm": 1.9969217777252197, "learning_rate": 1.2234641512874435e-07, "loss": 0.05914161, "memory(GiB)": 13.7, "step": 99530, "train_speed(iter/s)": 1.531663 }, { "acc": 0.98729172, "epoch": 46.65338645418327, "grad_norm": 3.2884318828582764, "learning_rate": 1.2217673620804147e-07, "loss": 0.02041697, "memory(GiB)": 13.7, "step": 99535, "train_speed(iter/s)": 1.531666 }, { "acc": 0.98819447, "epoch": 46.655730021092104, "grad_norm": 2.4468612670898438, "learning_rate": 1.2200717454620365e-07, "loss": 0.02897067, "memory(GiB)": 13.7, "step": 99540, "train_speed(iter/s)": 1.531668 }, { "acc": 0.97666664, "epoch": 46.65807358800094, "grad_norm": 0.23842297494411469, "learning_rate": 1.21837730147305e-07, "loss": 0.0270121, "memory(GiB)": 13.7, "step": 99545, "train_speed(iter/s)": 1.531669 }, { "acc": 0.98602676, "epoch": 46.66041715490977, "grad_norm": 3.2158148288726807, "learning_rate": 1.2166840301542075e-07, "loss": 0.04766609, "memory(GiB)": 13.7, "step": 99550, "train_speed(iter/s)": 1.53167 }, { "acc": 0.98008928, "epoch": 46.66276072181861, "grad_norm": 2.5953614711761475, "learning_rate": 1.2149919315462002e-07, "loss": 0.05545571, "memory(GiB)": 13.7, "step": 99555, "train_speed(iter/s)": 1.531672 }, { "acc": 0.9927083, "epoch": 46.66510428872744, "grad_norm": 4.780657768249512, "learning_rate": 1.2133010056896914e-07, "loss": 0.03967404, "memory(GiB)": 13.7, "step": 99560, "train_speed(iter/s)": 1.531674 }, { "acc": 0.98455362, "epoch": 46.667447855636276, "grad_norm": 4.074582576751709, "learning_rate": 1.211611252625339e-07, "loss": 0.03288135, "memory(GiB)": 13.7, "step": 99565, "train_speed(iter/s)": 1.531674 }, { "acc": 0.98395834, "epoch": 46.66979142254512, "grad_norm": 2.6782658100128174, "learning_rate": 1.2099226723937455e-07, "loss": 0.01993712, "memory(GiB)": 13.7, "step": 99570, "train_speed(iter/s)": 1.531675 }, { "acc": 0.97958336, "epoch": 46.67213498945395, "grad_norm": 3.5698628425598145, "learning_rate": 1.2082352650355182e-07, "loss": 0.08088031, "memory(GiB)": 13.7, "step": 99575, "train_speed(iter/s)": 1.531677 }, { "acc": 0.97657204, "epoch": 46.674478556362786, "grad_norm": 5.2740583419799805, "learning_rate": 1.2065490305911936e-07, "loss": 0.07706735, "memory(GiB)": 13.7, "step": 99580, "train_speed(iter/s)": 1.531677 }, { "acc": 0.97696438, "epoch": 46.67682212327162, "grad_norm": 2.475541353225708, "learning_rate": 1.204863969101324e-07, "loss": 0.06475501, "memory(GiB)": 13.7, "step": 99585, "train_speed(iter/s)": 1.531678 }, { "acc": 0.98041668, "epoch": 46.679165690180454, "grad_norm": 3.0162527561187744, "learning_rate": 1.2031800806064002e-07, "loss": 0.03503827, "memory(GiB)": 13.7, "step": 99590, "train_speed(iter/s)": 1.531682 }, { "acc": 0.99250002, "epoch": 46.68150925708929, "grad_norm": 1.1396658420562744, "learning_rate": 1.2014973651469028e-07, "loss": 0.03464384, "memory(GiB)": 13.7, "step": 99595, "train_speed(iter/s)": 1.531686 }, { "acc": 0.96375008, "epoch": 46.68385282399812, "grad_norm": 6.174942970275879, "learning_rate": 1.1998158227632786e-07, "loss": 0.08005072, "memory(GiB)": 13.7, "step": 99600, "train_speed(iter/s)": 1.531684 }, { "acc": 0.99229164, "epoch": 46.68619639090696, "grad_norm": 0.21823762357234955, "learning_rate": 1.1981354534959359e-07, "loss": 0.02078337, "memory(GiB)": 13.7, "step": 99605, "train_speed(iter/s)": 1.531688 }, { "acc": 0.97250004, "epoch": 46.6885399578158, "grad_norm": 5.680695056915283, "learning_rate": 1.1964562573852827e-07, "loss": 0.06588215, "memory(GiB)": 13.7, "step": 99610, "train_speed(iter/s)": 1.531693 }, { "acc": 0.98633928, "epoch": 46.69088352472463, "grad_norm": 3.047431230545044, "learning_rate": 1.1947782344716656e-07, "loss": 0.02850184, "memory(GiB)": 13.7, "step": 99615, "train_speed(iter/s)": 1.531698 }, { "acc": 0.98670635, "epoch": 46.69322709163347, "grad_norm": 4.401706695556641, "learning_rate": 1.1931013847954376e-07, "loss": 0.02600968, "memory(GiB)": 13.7, "step": 99620, "train_speed(iter/s)": 1.531701 }, { "acc": 0.99750004, "epoch": 46.6955706585423, "grad_norm": 0.004534969571977854, "learning_rate": 1.1914257083968794e-07, "loss": 0.01100827, "memory(GiB)": 13.7, "step": 99625, "train_speed(iter/s)": 1.531702 }, { "acc": 0.99291668, "epoch": 46.697914225451136, "grad_norm": 3.348450183868408, "learning_rate": 1.1897512053162873e-07, "loss": 0.01880337, "memory(GiB)": 13.7, "step": 99630, "train_speed(iter/s)": 1.531702 }, { "acc": 0.98145828, "epoch": 46.70025779235997, "grad_norm": 1.4478425979614258, "learning_rate": 1.1880778755939036e-07, "loss": 0.05167575, "memory(GiB)": 13.7, "step": 99635, "train_speed(iter/s)": 1.531703 }, { "acc": 0.98477678, "epoch": 46.702601359268805, "grad_norm": 4.580196380615234, "learning_rate": 1.1864057192699582e-07, "loss": 0.02606539, "memory(GiB)": 13.7, "step": 99640, "train_speed(iter/s)": 1.531702 }, { "acc": 0.97423611, "epoch": 46.704944926177646, "grad_norm": 0.019021982327103615, "learning_rate": 1.1847347363846318e-07, "loss": 0.04498183, "memory(GiB)": 13.7, "step": 99645, "train_speed(iter/s)": 1.531701 }, { "acc": 0.97229176, "epoch": 46.70728849308648, "grad_norm": 3.4368622303009033, "learning_rate": 1.1830649269780996e-07, "loss": 0.04852827, "memory(GiB)": 13.7, "step": 99650, "train_speed(iter/s)": 1.531702 }, { "acc": 0.984375, "epoch": 46.709632059995315, "grad_norm": 1.5043224096298218, "learning_rate": 1.1813962910904918e-07, "loss": 0.03594838, "memory(GiB)": 13.7, "step": 99655, "train_speed(iter/s)": 1.531702 }, { "acc": 0.98673611, "epoch": 46.71197562690415, "grad_norm": 3.6712634563446045, "learning_rate": 1.1797288287619112e-07, "loss": 0.04292467, "memory(GiB)": 13.7, "step": 99660, "train_speed(iter/s)": 1.531703 }, { "acc": 0.98175602, "epoch": 46.71431919381298, "grad_norm": 2.329491376876831, "learning_rate": 1.1780625400324607e-07, "loss": 0.02920823, "memory(GiB)": 13.7, "step": 99665, "train_speed(iter/s)": 1.531705 }, { "acc": 0.98812504, "epoch": 46.71666276072182, "grad_norm": 0.0032197877299040556, "learning_rate": 1.1763974249421656e-07, "loss": 0.0278869, "memory(GiB)": 13.7, "step": 99670, "train_speed(iter/s)": 1.531709 }, { "acc": 0.97822914, "epoch": 46.71900632763065, "grad_norm": 3.32844877243042, "learning_rate": 1.174733483531073e-07, "loss": 0.03610499, "memory(GiB)": 13.7, "step": 99675, "train_speed(iter/s)": 1.531709 }, { "acc": 0.99236107, "epoch": 46.721349894539486, "grad_norm": 2.6097676753997803, "learning_rate": 1.1730707158391634e-07, "loss": 0.02151229, "memory(GiB)": 13.7, "step": 99680, "train_speed(iter/s)": 1.531712 }, { "acc": 0.99375, "epoch": 46.72369346144833, "grad_norm": 4.165674686431885, "learning_rate": 1.1714091219064066e-07, "loss": 0.02589676, "memory(GiB)": 13.7, "step": 99685, "train_speed(iter/s)": 1.531716 }, { "acc": 0.98793564, "epoch": 46.72603702835716, "grad_norm": 3.3266637325286865, "learning_rate": 1.1697487017727445e-07, "loss": 0.05960682, "memory(GiB)": 13.7, "step": 99690, "train_speed(iter/s)": 1.53172 }, { "acc": 0.98703375, "epoch": 46.728380595265996, "grad_norm": 2.727692127227783, "learning_rate": 1.16808945547808e-07, "loss": 0.02007294, "memory(GiB)": 13.7, "step": 99695, "train_speed(iter/s)": 1.531724 }, { "acc": 0.98604164, "epoch": 46.73072416217483, "grad_norm": 3.598339319229126, "learning_rate": 1.1664313830623159e-07, "loss": 0.03123254, "memory(GiB)": 13.7, "step": 99700, "train_speed(iter/s)": 1.531729 }, { "acc": 0.98770218, "epoch": 46.733067729083665, "grad_norm": 3.52644419670105, "learning_rate": 1.1647744845652831e-07, "loss": 0.05828959, "memory(GiB)": 13.7, "step": 99705, "train_speed(iter/s)": 1.531732 }, { "acc": 0.984375, "epoch": 46.7354112959925, "grad_norm": 4.430748462677002, "learning_rate": 1.1631187600268294e-07, "loss": 0.02879011, "memory(GiB)": 13.7, "step": 99710, "train_speed(iter/s)": 1.531734 }, { "acc": 0.996875, "epoch": 46.73775486290133, "grad_norm": 4.3486175537109375, "learning_rate": 1.1614642094867297e-07, "loss": 0.02291162, "memory(GiB)": 13.7, "step": 99715, "train_speed(iter/s)": 1.531735 }, { "acc": 0.99666672, "epoch": 46.74009842981017, "grad_norm": 2.4765396118164062, "learning_rate": 1.1598108329847817e-07, "loss": 0.03610514, "memory(GiB)": 13.7, "step": 99720, "train_speed(iter/s)": 1.531739 }, { "acc": 0.97986107, "epoch": 46.74244199671901, "grad_norm": 2.3197262287139893, "learning_rate": 1.1581586305607051e-07, "loss": 0.05499387, "memory(GiB)": 13.7, "step": 99725, "train_speed(iter/s)": 1.531745 }, { "acc": 0.9859375, "epoch": 46.74478556362784, "grad_norm": 1.7627856731414795, "learning_rate": 1.1565076022542199e-07, "loss": 0.0380088, "memory(GiB)": 13.7, "step": 99730, "train_speed(iter/s)": 1.531748 }, { "acc": 0.98524799, "epoch": 46.74712913053668, "grad_norm": 4.990926265716553, "learning_rate": 1.1548577481050179e-07, "loss": 0.05728779, "memory(GiB)": 13.7, "step": 99735, "train_speed(iter/s)": 1.531754 }, { "acc": 0.99541664, "epoch": 46.74947269744551, "grad_norm": 0.003890304360538721, "learning_rate": 1.1532090681527414e-07, "loss": 0.00962693, "memory(GiB)": 13.7, "step": 99740, "train_speed(iter/s)": 1.531754 }, { "acc": 0.9833334, "epoch": 46.751816264354346, "grad_norm": 0.0006806002929806709, "learning_rate": 1.1515615624370379e-07, "loss": 0.04625654, "memory(GiB)": 13.7, "step": 99745, "train_speed(iter/s)": 1.531755 }, { "acc": 0.984375, "epoch": 46.75415983126318, "grad_norm": 4.003617286682129, "learning_rate": 1.1499152309974885e-07, "loss": 0.03150957, "memory(GiB)": 13.7, "step": 99750, "train_speed(iter/s)": 1.531757 }, { "acc": 0.97788696, "epoch": 46.756503398172015, "grad_norm": 4.147185325622559, "learning_rate": 1.14827007387368e-07, "loss": 0.0532052, "memory(GiB)": 13.7, "step": 99755, "train_speed(iter/s)": 1.531757 }, { "acc": 0.97937508, "epoch": 46.758846965080856, "grad_norm": 3.3302001953125, "learning_rate": 1.1466260911051543e-07, "loss": 0.03441882, "memory(GiB)": 13.7, "step": 99760, "train_speed(iter/s)": 1.531762 }, { "acc": 0.99231148, "epoch": 46.76119053198969, "grad_norm": 3.191901683807373, "learning_rate": 1.1449832827314259e-07, "loss": 0.0557511, "memory(GiB)": 13.7, "step": 99765, "train_speed(iter/s)": 1.531766 }, { "acc": 0.9885416, "epoch": 46.763534098898525, "grad_norm": 2.38442325592041, "learning_rate": 1.1433416487919817e-07, "loss": 0.02818626, "memory(GiB)": 13.7, "step": 99770, "train_speed(iter/s)": 1.531764 }, { "acc": 0.98395834, "epoch": 46.76587766580736, "grad_norm": 0.0019404696067795157, "learning_rate": 1.1417011893262747e-07, "loss": 0.0243949, "memory(GiB)": 13.7, "step": 99775, "train_speed(iter/s)": 1.53177 }, { "acc": 0.98306551, "epoch": 46.76822123271619, "grad_norm": 2.4708189964294434, "learning_rate": 1.1400619043737475e-07, "loss": 0.02696218, "memory(GiB)": 13.7, "step": 99780, "train_speed(iter/s)": 1.531779 }, { "acc": 0.97738094, "epoch": 46.77056479962503, "grad_norm": 6.574269771575928, "learning_rate": 1.1384237939737978e-07, "loss": 0.05887318, "memory(GiB)": 13.7, "step": 99785, "train_speed(iter/s)": 1.531784 }, { "acc": 0.98628922, "epoch": 46.77290836653386, "grad_norm": 1.5120551586151123, "learning_rate": 1.1367868581658014e-07, "loss": 0.02533574, "memory(GiB)": 13.7, "step": 99790, "train_speed(iter/s)": 1.531789 }, { "acc": 0.9988636, "epoch": 46.775251933442696, "grad_norm": 2.2005629539489746, "learning_rate": 1.1351510969891061e-07, "loss": 0.0133584, "memory(GiB)": 13.7, "step": 99795, "train_speed(iter/s)": 1.531787 }, { "acc": 0.99333334, "epoch": 46.77759550035154, "grad_norm": 3.4254889488220215, "learning_rate": 1.1335165104830266e-07, "loss": 0.02283181, "memory(GiB)": 13.7, "step": 99800, "train_speed(iter/s)": 1.531794 }, { "acc": 0.98988094, "epoch": 46.77993906726037, "grad_norm": 4.772153377532959, "learning_rate": 1.1318830986868498e-07, "loss": 0.02795716, "memory(GiB)": 13.7, "step": 99805, "train_speed(iter/s)": 1.531797 }, { "acc": 0.98322306, "epoch": 46.782282634169206, "grad_norm": 3.7539093494415283, "learning_rate": 1.1302508616398514e-07, "loss": 0.03211175, "memory(GiB)": 13.7, "step": 99810, "train_speed(iter/s)": 1.531798 }, { "acc": 0.98395834, "epoch": 46.78462620107804, "grad_norm": 4.742383003234863, "learning_rate": 1.1286197993812516e-07, "loss": 0.03757198, "memory(GiB)": 13.7, "step": 99815, "train_speed(iter/s)": 1.531798 }, { "acc": 0.9947916, "epoch": 46.786969767986875, "grad_norm": 4.076571464538574, "learning_rate": 1.1269899119502596e-07, "loss": 0.02401386, "memory(GiB)": 13.7, "step": 99820, "train_speed(iter/s)": 1.531798 }, { "acc": 0.98719692, "epoch": 46.78931333489571, "grad_norm": 0.003275136696174741, "learning_rate": 1.1253611993860624e-07, "loss": 0.05241228, "memory(GiB)": 13.7, "step": 99825, "train_speed(iter/s)": 1.531803 }, { "acc": 0.98187504, "epoch": 46.791656901804544, "grad_norm": 0.9220041036605835, "learning_rate": 1.1237336617277913e-07, "loss": 0.03558306, "memory(GiB)": 13.7, "step": 99830, "train_speed(iter/s)": 1.531805 }, { "acc": 0.98666668, "epoch": 46.794000468713385, "grad_norm": 2.557471513748169, "learning_rate": 1.1221072990145779e-07, "loss": 0.02925234, "memory(GiB)": 13.7, "step": 99835, "train_speed(iter/s)": 1.531806 }, { "acc": 0.98334284, "epoch": 46.79634403562222, "grad_norm": 1.8466359376907349, "learning_rate": 1.1204821112855038e-07, "loss": 0.03364534, "memory(GiB)": 13.7, "step": 99840, "train_speed(iter/s)": 1.53181 }, { "acc": 0.98467264, "epoch": 46.798687602531054, "grad_norm": 3.2257344722747803, "learning_rate": 1.1188580985796503e-07, "loss": 0.03625066, "memory(GiB)": 13.7, "step": 99845, "train_speed(iter/s)": 1.531815 }, { "acc": 0.99020834, "epoch": 46.80103116943989, "grad_norm": 3.122239828109741, "learning_rate": 1.1172352609360489e-07, "loss": 0.03065267, "memory(GiB)": 13.7, "step": 99850, "train_speed(iter/s)": 1.531813 }, { "acc": 0.97416668, "epoch": 46.80337473634872, "grad_norm": 3.391308069229126, "learning_rate": 1.115613598393698e-07, "loss": 0.05247096, "memory(GiB)": 13.7, "step": 99855, "train_speed(iter/s)": 1.531805 }, { "acc": 0.97986107, "epoch": 46.80571830325756, "grad_norm": 0.0010730482172220945, "learning_rate": 1.1139931109915845e-07, "loss": 0.04275989, "memory(GiB)": 13.7, "step": 99860, "train_speed(iter/s)": 1.531805 }, { "acc": 0.996875, "epoch": 46.80806187016639, "grad_norm": 3.9227161407470703, "learning_rate": 1.1123737987686459e-07, "loss": 0.04336458, "memory(GiB)": 13.7, "step": 99865, "train_speed(iter/s)": 1.531804 }, { "acc": 0.98104162, "epoch": 46.810405437075225, "grad_norm": 3.4013888835906982, "learning_rate": 1.1107556617638304e-07, "loss": 0.03129204, "memory(GiB)": 13.7, "step": 99870, "train_speed(iter/s)": 1.531807 }, { "acc": 0.97989578, "epoch": 46.81274900398407, "grad_norm": 0.03844723105430603, "learning_rate": 1.1091387000160083e-07, "loss": 0.03176372, "memory(GiB)": 13.7, "step": 99875, "train_speed(iter/s)": 1.53181 }, { "acc": 0.98812504, "epoch": 46.8150925708929, "grad_norm": 0.0026533298660069704, "learning_rate": 1.1075229135640671e-07, "loss": 0.02288014, "memory(GiB)": 13.7, "step": 99880, "train_speed(iter/s)": 1.531811 }, { "acc": 0.984375, "epoch": 46.817436137801735, "grad_norm": 2.518843173980713, "learning_rate": 1.105908302446827e-07, "loss": 0.03238198, "memory(GiB)": 13.7, "step": 99885, "train_speed(iter/s)": 1.531817 }, { "acc": 0.9854167, "epoch": 46.81977970471057, "grad_norm": 5.066587448120117, "learning_rate": 1.1042948667031095e-07, "loss": 0.02925825, "memory(GiB)": 13.7, "step": 99890, "train_speed(iter/s)": 1.531818 }, { "acc": 0.99177084, "epoch": 46.822123271619404, "grad_norm": 2.1360087394714355, "learning_rate": 1.1026826063716899e-07, "loss": 0.03420511, "memory(GiB)": 13.7, "step": 99895, "train_speed(iter/s)": 1.531819 }, { "acc": 0.99080353, "epoch": 46.82446683852824, "grad_norm": 5.135109901428223, "learning_rate": 1.1010715214913171e-07, "loss": 0.01709339, "memory(GiB)": 13.7, "step": 99900, "train_speed(iter/s)": 1.531823 }, { "acc": 0.9817482, "epoch": 46.82681040543707, "grad_norm": 0.024138612672686577, "learning_rate": 1.099461612100734e-07, "loss": 0.03345557, "memory(GiB)": 13.7, "step": 99905, "train_speed(iter/s)": 1.531826 }, { "acc": 0.98395834, "epoch": 46.829153972345914, "grad_norm": 1.9829906225204468, "learning_rate": 1.0978528782386222e-07, "loss": 0.03376555, "memory(GiB)": 13.7, "step": 99910, "train_speed(iter/s)": 1.531824 }, { "acc": 0.9895834, "epoch": 46.83149753925475, "grad_norm": 0.0005756838363595307, "learning_rate": 1.0962453199436582e-07, "loss": 0.02386537, "memory(GiB)": 13.7, "step": 99915, "train_speed(iter/s)": 1.531826 }, { "acc": 0.99145832, "epoch": 46.83384110616358, "grad_norm": 0.0774218961596489, "learning_rate": 1.0946389372544685e-07, "loss": 0.01405498, "memory(GiB)": 13.7, "step": 99920, "train_speed(iter/s)": 1.531821 }, { "acc": 0.99903851, "epoch": 46.83618467307242, "grad_norm": 0.0009637237526476383, "learning_rate": 1.0930337302096847e-07, "loss": 0.01207438, "memory(GiB)": 13.7, "step": 99925, "train_speed(iter/s)": 1.531821 }, { "acc": 0.9864583, "epoch": 46.83852823998125, "grad_norm": 3.4277799129486084, "learning_rate": 1.0914296988478778e-07, "loss": 0.04963192, "memory(GiB)": 13.7, "step": 99930, "train_speed(iter/s)": 1.531823 }, { "acc": 0.98874998, "epoch": 46.840871806890085, "grad_norm": 2.499825954437256, "learning_rate": 1.0898268432076129e-07, "loss": 0.04186825, "memory(GiB)": 13.7, "step": 99935, "train_speed(iter/s)": 1.531831 }, { "acc": 0.9885416, "epoch": 46.84321537379892, "grad_norm": 2.2780818939208984, "learning_rate": 1.0882251633274112e-07, "loss": 0.03580567, "memory(GiB)": 13.7, "step": 99940, "train_speed(iter/s)": 1.531833 }, { "acc": 0.9923214, "epoch": 46.845558940707754, "grad_norm": 7.478900909423828, "learning_rate": 1.0866246592457712e-07, "loss": 0.04979172, "memory(GiB)": 13.7, "step": 99945, "train_speed(iter/s)": 1.531837 }, { "acc": 0.97999458, "epoch": 46.847902507616595, "grad_norm": 1.584439992904663, "learning_rate": 1.0850253310011637e-07, "loss": 0.0425469, "memory(GiB)": 13.7, "step": 99950, "train_speed(iter/s)": 1.531839 }, { "acc": 0.9760417, "epoch": 46.85024607452543, "grad_norm": 0.01058946643024683, "learning_rate": 1.0834271786320269e-07, "loss": 0.06185204, "memory(GiB)": 13.7, "step": 99955, "train_speed(iter/s)": 1.531838 }, { "acc": 0.99125004, "epoch": 46.852589641434264, "grad_norm": 2.5924553871154785, "learning_rate": 1.081830202176792e-07, "loss": 0.02811365, "memory(GiB)": 13.7, "step": 99960, "train_speed(iter/s)": 1.531842 }, { "acc": 0.98937492, "epoch": 46.8549332083431, "grad_norm": 0.0024128067307174206, "learning_rate": 1.0802344016738251e-07, "loss": 0.02222112, "memory(GiB)": 13.7, "step": 99965, "train_speed(iter/s)": 1.531846 }, { "acc": 0.98184528, "epoch": 46.85727677525193, "grad_norm": 5.680809497833252, "learning_rate": 1.0786397771614971e-07, "loss": 0.08439317, "memory(GiB)": 13.7, "step": 99970, "train_speed(iter/s)": 1.531847 }, { "acc": 0.99291134, "epoch": 46.85962034216077, "grad_norm": 0.10166525840759277, "learning_rate": 1.0770463286781401e-07, "loss": 0.03569066, "memory(GiB)": 13.7, "step": 99975, "train_speed(iter/s)": 1.531848 }, { "acc": 0.98562498, "epoch": 46.8619639090696, "grad_norm": 2.480494260787964, "learning_rate": 1.0754540562620362e-07, "loss": 0.02111241, "memory(GiB)": 13.7, "step": 99980, "train_speed(iter/s)": 1.53185 }, { "acc": 0.9875, "epoch": 46.86430747597844, "grad_norm": 6.411360263824463, "learning_rate": 1.0738629599514736e-07, "loss": 0.05095302, "memory(GiB)": 13.7, "step": 99985, "train_speed(iter/s)": 1.531852 }, { "acc": 0.98468752, "epoch": 46.86665104288728, "grad_norm": 2.6355745792388916, "learning_rate": 1.0722730397846951e-07, "loss": 0.02355626, "memory(GiB)": 13.7, "step": 99990, "train_speed(iter/s)": 1.531855 }, { "acc": 0.97520828, "epoch": 46.86899460979611, "grad_norm": 1.9792519807815552, "learning_rate": 1.0706842957999222e-07, "loss": 0.04362506, "memory(GiB)": 13.7, "step": 99995, "train_speed(iter/s)": 1.531859 }, { "acc": 0.98875008, "epoch": 46.871338176704946, "grad_norm": 5.189490795135498, "learning_rate": 1.069096728035326e-07, "loss": 0.02962295, "memory(GiB)": 13.7, "step": 100000, "train_speed(iter/s)": 1.531859 }, { "epoch": 46.871338176704946, "eval_acc": 0.7786807850542928, "eval_loss": 1.2677737474441528, "eval_runtime": 144.506, "eval_samples_per_second": 55.832, "eval_steps_per_second": 6.982, "step": 100000 }, { "acc": 0.9885417, "epoch": 46.87368174361378, "grad_norm": 2.466409206390381, "learning_rate": 1.0675103365290889e-07, "loss": 0.0200431, "memory(GiB)": 13.7, "step": 100005, "train_speed(iter/s)": 1.527732 }, { "acc": 0.99571428, "epoch": 46.876025310522614, "grad_norm": 2.2744193077087402, "learning_rate": 1.0659251213193209e-07, "loss": 0.03112545, "memory(GiB)": 13.7, "step": 100010, "train_speed(iter/s)": 1.527734 }, { "acc": 0.98145838, "epoch": 46.87836887743145, "grad_norm": 5.1365814208984375, "learning_rate": 1.0643410824441431e-07, "loss": 0.03124821, "memory(GiB)": 13.7, "step": 100015, "train_speed(iter/s)": 1.527737 }, { "acc": 0.9900815, "epoch": 46.88071244434028, "grad_norm": 2.657820463180542, "learning_rate": 1.0627582199416216e-07, "loss": 0.03634194, "memory(GiB)": 13.7, "step": 100020, "train_speed(iter/s)": 1.527742 }, { "acc": 0.97477684, "epoch": 46.883056011249124, "grad_norm": 4.636331081390381, "learning_rate": 1.0611765338498056e-07, "loss": 0.04919206, "memory(GiB)": 13.7, "step": 100025, "train_speed(iter/s)": 1.527745 }, { "acc": 0.97770834, "epoch": 46.88539957815796, "grad_norm": 0.023099899291992188, "learning_rate": 1.0595960242067158e-07, "loss": 0.06100646, "memory(GiB)": 13.7, "step": 100030, "train_speed(iter/s)": 1.527747 }, { "acc": 0.98291664, "epoch": 46.88774314506679, "grad_norm": 2.628023624420166, "learning_rate": 1.0580166910503297e-07, "loss": 0.04615715, "memory(GiB)": 13.7, "step": 100035, "train_speed(iter/s)": 1.527748 }, { "acc": 0.98133926, "epoch": 46.89008671197563, "grad_norm": 3.4954888820648193, "learning_rate": 1.0564385344186294e-07, "loss": 0.03388103, "memory(GiB)": 13.7, "step": 100040, "train_speed(iter/s)": 1.52775 }, { "acc": 0.9863637, "epoch": 46.89243027888446, "grad_norm": 4.645227909088135, "learning_rate": 1.0548615543495256e-07, "loss": 0.03513356, "memory(GiB)": 13.7, "step": 100045, "train_speed(iter/s)": 1.527753 }, { "acc": 0.98698864, "epoch": 46.894773845793296, "grad_norm": 0.013216380029916763, "learning_rate": 1.053285750880945e-07, "loss": 0.03445184, "memory(GiB)": 13.7, "step": 100050, "train_speed(iter/s)": 1.527755 }, { "acc": 0.98760414, "epoch": 46.89711741270213, "grad_norm": 4.757720470428467, "learning_rate": 1.0517111240507535e-07, "loss": 0.04197477, "memory(GiB)": 13.7, "step": 100055, "train_speed(iter/s)": 1.527757 }, { "acc": 0.9977273, "epoch": 46.89946097961097, "grad_norm": 1.8614603281021118, "learning_rate": 1.0501376738968062e-07, "loss": 0.01766531, "memory(GiB)": 13.7, "step": 100060, "train_speed(iter/s)": 1.527757 }, { "acc": 0.98395834, "epoch": 46.901804546519806, "grad_norm": 0.00978073850274086, "learning_rate": 1.0485654004569246e-07, "loss": 0.03029544, "memory(GiB)": 13.7, "step": 100065, "train_speed(iter/s)": 1.527759 }, { "acc": 0.97263393, "epoch": 46.90414811342864, "grad_norm": 7.1537089347839355, "learning_rate": 1.0469943037688799e-07, "loss": 0.06445433, "memory(GiB)": 13.7, "step": 100070, "train_speed(iter/s)": 1.527762 }, { "acc": 0.98321428, "epoch": 46.906491680337474, "grad_norm": 4.517212390899658, "learning_rate": 1.045424383870466e-07, "loss": 0.0270484, "memory(GiB)": 13.7, "step": 100075, "train_speed(iter/s)": 1.527764 }, { "acc": 0.98440475, "epoch": 46.90883524724631, "grad_norm": 4.791025638580322, "learning_rate": 1.043855640799399e-07, "loss": 0.03177153, "memory(GiB)": 13.7, "step": 100080, "train_speed(iter/s)": 1.527764 }, { "acc": 0.984375, "epoch": 46.91117881415514, "grad_norm": 2.642834186553955, "learning_rate": 1.0422880745934007e-07, "loss": 0.0301451, "memory(GiB)": 13.7, "step": 100085, "train_speed(iter/s)": 1.52777 }, { "acc": 0.97342262, "epoch": 46.91352238106398, "grad_norm": 0.5109535455703735, "learning_rate": 1.0407216852901314e-07, "loss": 0.05531877, "memory(GiB)": 13.7, "step": 100090, "train_speed(iter/s)": 1.52777 }, { "acc": 0.99666672, "epoch": 46.91586594797281, "grad_norm": 0.00020782036881428212, "learning_rate": 1.0391564729272628e-07, "loss": 0.01086153, "memory(GiB)": 13.7, "step": 100095, "train_speed(iter/s)": 1.527775 }, { "acc": 0.98666668, "epoch": 46.91820951488165, "grad_norm": 0.9387823939323425, "learning_rate": 1.0375924375424056e-07, "loss": 0.02524348, "memory(GiB)": 13.7, "step": 100100, "train_speed(iter/s)": 1.527773 }, { "acc": 0.98986111, "epoch": 46.92055308179049, "grad_norm": 5.191957473754883, "learning_rate": 1.036029579173148e-07, "loss": 0.03230722, "memory(GiB)": 13.7, "step": 100105, "train_speed(iter/s)": 1.527772 }, { "acc": 0.9895833, "epoch": 46.92289664869932, "grad_norm": 0.8916510343551636, "learning_rate": 1.0344678978570674e-07, "loss": 0.01463237, "memory(GiB)": 13.7, "step": 100110, "train_speed(iter/s)": 1.527776 }, { "acc": 0.98666668, "epoch": 46.925240215608156, "grad_norm": 4.211344242095947, "learning_rate": 1.0329073936316969e-07, "loss": 0.0471059, "memory(GiB)": 13.7, "step": 100115, "train_speed(iter/s)": 1.527776 }, { "acc": 0.98881941, "epoch": 46.92758378251699, "grad_norm": 2.93894362449646, "learning_rate": 1.0313480665345523e-07, "loss": 0.08329887, "memory(GiB)": 13.7, "step": 100120, "train_speed(iter/s)": 1.527777 }, { "acc": 0.996875, "epoch": 46.929927349425824, "grad_norm": 4.299793243408203, "learning_rate": 1.0297899166031057e-07, "loss": 0.02777314, "memory(GiB)": 13.7, "step": 100125, "train_speed(iter/s)": 1.527776 }, { "acc": 0.98322926, "epoch": 46.93227091633466, "grad_norm": 1.7551195621490479, "learning_rate": 1.0282329438748123e-07, "loss": 0.04348241, "memory(GiB)": 13.7, "step": 100130, "train_speed(iter/s)": 1.527779 }, { "acc": 0.97903843, "epoch": 46.93461448324349, "grad_norm": 3.3283066749572754, "learning_rate": 1.0266771483870938e-07, "loss": 0.06116058, "memory(GiB)": 13.7, "step": 100135, "train_speed(iter/s)": 1.527779 }, { "acc": 0.9757143, "epoch": 46.936958050152334, "grad_norm": 3.665700674057007, "learning_rate": 1.0251225301773613e-07, "loss": 0.05487406, "memory(GiB)": 13.7, "step": 100140, "train_speed(iter/s)": 1.527778 }, { "acc": 0.99035721, "epoch": 46.93930161706117, "grad_norm": 0.00335475686006248, "learning_rate": 1.0235690892829696e-07, "loss": 0.01585992, "memory(GiB)": 13.7, "step": 100145, "train_speed(iter/s)": 1.527775 }, { "acc": 0.99278278, "epoch": 46.94164518397, "grad_norm": 1.8997622728347778, "learning_rate": 1.0220168257412522e-07, "loss": 0.02583759, "memory(GiB)": 13.7, "step": 100150, "train_speed(iter/s)": 1.527778 }, { "acc": 0.98363094, "epoch": 46.94398875087884, "grad_norm": 1.2077969312667847, "learning_rate": 1.0204657395895365e-07, "loss": 0.0328188, "memory(GiB)": 13.7, "step": 100155, "train_speed(iter/s)": 1.527783 }, { "acc": 0.9916667, "epoch": 46.94633231778767, "grad_norm": 2.616091728210449, "learning_rate": 1.0189158308650887e-07, "loss": 0.01605999, "memory(GiB)": 13.7, "step": 100160, "train_speed(iter/s)": 1.527787 }, { "acc": 0.9760417, "epoch": 46.948675884696506, "grad_norm": 2.435303211212158, "learning_rate": 1.0173670996051813e-07, "loss": 0.04593947, "memory(GiB)": 13.7, "step": 100165, "train_speed(iter/s)": 1.527789 }, { "acc": 0.98258934, "epoch": 46.95101945160534, "grad_norm": 4.6482157707214355, "learning_rate": 1.0158195458470303e-07, "loss": 0.03832546, "memory(GiB)": 13.7, "step": 100170, "train_speed(iter/s)": 1.527795 }, { "acc": 0.98883934, "epoch": 46.95336301851418, "grad_norm": 0.0017240972956642509, "learning_rate": 1.0142731696278357e-07, "loss": 0.01737766, "memory(GiB)": 13.7, "step": 100175, "train_speed(iter/s)": 1.527792 }, { "acc": 0.98354168, "epoch": 46.955706585423016, "grad_norm": 1.80141282081604, "learning_rate": 1.0127279709847642e-07, "loss": 0.01893049, "memory(GiB)": 13.7, "step": 100180, "train_speed(iter/s)": 1.527797 }, { "acc": 0.99508934, "epoch": 46.95805015233185, "grad_norm": 4.200133800506592, "learning_rate": 1.0111839499549602e-07, "loss": 0.02822301, "memory(GiB)": 13.7, "step": 100185, "train_speed(iter/s)": 1.5278 }, { "acc": 0.97915182, "epoch": 46.960393719240685, "grad_norm": 4.328453540802002, "learning_rate": 1.00964110657554e-07, "loss": 0.04155327, "memory(GiB)": 13.7, "step": 100190, "train_speed(iter/s)": 1.527806 }, { "acc": 0.97437496, "epoch": 46.96273728614952, "grad_norm": 4.297114849090576, "learning_rate": 1.0080994408835815e-07, "loss": 0.06357327, "memory(GiB)": 13.7, "step": 100195, "train_speed(iter/s)": 1.527809 }, { "acc": 0.98812504, "epoch": 46.96508085305835, "grad_norm": 1.564001441001892, "learning_rate": 1.0065589529161514e-07, "loss": 0.0282617, "memory(GiB)": 13.7, "step": 100200, "train_speed(iter/s)": 1.527811 }, { "acc": 0.98988094, "epoch": 46.96742441996719, "grad_norm": 1.9645050764083862, "learning_rate": 1.0050196427102663e-07, "loss": 0.02434779, "memory(GiB)": 13.7, "step": 100205, "train_speed(iter/s)": 1.527811 }, { "acc": 0.98604164, "epoch": 46.96976798687602, "grad_norm": 4.054196834564209, "learning_rate": 1.0034815103029318e-07, "loss": 0.03131782, "memory(GiB)": 13.7, "step": 100210, "train_speed(iter/s)": 1.527814 }, { "acc": 0.97770834, "epoch": 46.97211155378486, "grad_norm": 9.616192817687988, "learning_rate": 1.0019445557311147e-07, "loss": 0.04078917, "memory(GiB)": 13.7, "step": 100215, "train_speed(iter/s)": 1.527815 }, { "acc": 0.98500004, "epoch": 46.9744551206937, "grad_norm": 3.1391000747680664, "learning_rate": 1.000408779031776e-07, "loss": 0.02325572, "memory(GiB)": 13.7, "step": 100220, "train_speed(iter/s)": 1.52782 }, { "acc": 0.99321423, "epoch": 46.97679868760253, "grad_norm": 2.081576108932495, "learning_rate": 9.988741802418104e-08, "loss": 0.05056057, "memory(GiB)": 13.7, "step": 100225, "train_speed(iter/s)": 1.527821 }, { "acc": 0.98916664, "epoch": 46.979142254511366, "grad_norm": 2.1941964626312256, "learning_rate": 9.973407593981125e-08, "loss": 0.01945112, "memory(GiB)": 13.7, "step": 100230, "train_speed(iter/s)": 1.527823 }, { "acc": 0.98184528, "epoch": 46.9814858214202, "grad_norm": 3.790184736251831, "learning_rate": 9.958085165375379e-08, "loss": 0.03075911, "memory(GiB)": 13.7, "step": 100235, "train_speed(iter/s)": 1.527828 }, { "acc": 0.97856064, "epoch": 46.983829388329035, "grad_norm": 3.3169608116149902, "learning_rate": 9.942774516969204e-08, "loss": 0.03608713, "memory(GiB)": 13.7, "step": 100240, "train_speed(iter/s)": 1.527833 }, { "acc": 0.9864583, "epoch": 46.98617295523787, "grad_norm": 1.24471116065979, "learning_rate": 9.927475649130654e-08, "loss": 0.02587072, "memory(GiB)": 13.7, "step": 100245, "train_speed(iter/s)": 1.527837 }, { "acc": 0.99571428, "epoch": 46.98851652214671, "grad_norm": 0.0011816424084827304, "learning_rate": 9.91218856222729e-08, "loss": 0.05331647, "memory(GiB)": 13.7, "step": 100250, "train_speed(iter/s)": 1.52784 }, { "acc": 0.9927083, "epoch": 46.990860089055545, "grad_norm": 2.5460572242736816, "learning_rate": 9.896913256626778e-08, "loss": 0.06658182, "memory(GiB)": 13.7, "step": 100255, "train_speed(iter/s)": 1.527839 }, { "acc": 0.99375, "epoch": 46.99320365596438, "grad_norm": 4.312953472137451, "learning_rate": 9.881649732696125e-08, "loss": 0.0164381, "memory(GiB)": 13.7, "step": 100260, "train_speed(iter/s)": 1.527842 }, { "acc": 0.97186012, "epoch": 46.99554722287321, "grad_norm": 7.483669281005859, "learning_rate": 9.866397990802386e-08, "loss": 0.05683545, "memory(GiB)": 13.7, "step": 100265, "train_speed(iter/s)": 1.527842 }, { "acc": 0.97372017, "epoch": 46.99789078978205, "grad_norm": 3.179656505584717, "learning_rate": 9.85115803131201e-08, "loss": 0.05233287, "memory(GiB)": 13.7, "step": 100270, "train_speed(iter/s)": 1.527846 }, { "acc": 0.9885417, "epoch": 47.00023435669088, "grad_norm": 2.0131895542144775, "learning_rate": 9.835929854591337e-08, "loss": 0.02657356, "memory(GiB)": 13.7, "step": 100275, "train_speed(iter/s)": 1.527825 }, { "acc": 0.9979166, "epoch": 47.002577923599716, "grad_norm": 1.3370827436447144, "learning_rate": 9.820713461006476e-08, "loss": 0.00753882, "memory(GiB)": 13.7, "step": 100280, "train_speed(iter/s)": 1.527826 }, { "acc": 0.98291664, "epoch": 47.00492149050855, "grad_norm": 2.9680488109588623, "learning_rate": 9.805508850923101e-08, "loss": 0.038605, "memory(GiB)": 13.7, "step": 100285, "train_speed(iter/s)": 1.527828 }, { "acc": 0.98758926, "epoch": 47.00726505741739, "grad_norm": 2.925031900405884, "learning_rate": 9.790316024706774e-08, "loss": 0.04951593, "memory(GiB)": 13.7, "step": 100290, "train_speed(iter/s)": 1.527826 }, { "acc": 0.9958333, "epoch": 47.009608624326226, "grad_norm": 0.010239793919026852, "learning_rate": 9.775134982722553e-08, "loss": 0.02802131, "memory(GiB)": 13.7, "step": 100295, "train_speed(iter/s)": 1.527828 }, { "acc": 0.98937492, "epoch": 47.01195219123506, "grad_norm": 2.5925419330596924, "learning_rate": 9.759965725335442e-08, "loss": 0.03499623, "memory(GiB)": 13.7, "step": 100300, "train_speed(iter/s)": 1.527832 }, { "acc": 0.98500004, "epoch": 47.014295758143895, "grad_norm": 5.89555549621582, "learning_rate": 9.744808252910004e-08, "loss": 0.04720872, "memory(GiB)": 13.7, "step": 100305, "train_speed(iter/s)": 1.527836 }, { "acc": 0.98874998, "epoch": 47.01663932505273, "grad_norm": 2.6007466316223145, "learning_rate": 9.729662565810632e-08, "loss": 0.03113286, "memory(GiB)": 13.7, "step": 100310, "train_speed(iter/s)": 1.527838 }, { "acc": 0.98839283, "epoch": 47.018982891961564, "grad_norm": 3.034440279006958, "learning_rate": 9.71452866440133e-08, "loss": 0.03238151, "memory(GiB)": 13.7, "step": 100315, "train_speed(iter/s)": 1.52784 }, { "acc": 0.98032198, "epoch": 47.0213264588704, "grad_norm": 0.48408642411231995, "learning_rate": 9.699406549045886e-08, "loss": 0.04395429, "memory(GiB)": 13.7, "step": 100320, "train_speed(iter/s)": 1.527844 }, { "acc": 0.99124994, "epoch": 47.02367002577924, "grad_norm": 0.000794091378338635, "learning_rate": 9.684296220107802e-08, "loss": 0.02122563, "memory(GiB)": 13.7, "step": 100325, "train_speed(iter/s)": 1.527849 }, { "acc": 0.98723211, "epoch": 47.026013592688074, "grad_norm": 0.007297934498637915, "learning_rate": 9.6691976779502e-08, "loss": 0.05287191, "memory(GiB)": 13.7, "step": 100330, "train_speed(iter/s)": 1.527849 }, { "acc": 0.98633928, "epoch": 47.02835715959691, "grad_norm": 0.04575534164905548, "learning_rate": 9.654110922936083e-08, "loss": 0.01975665, "memory(GiB)": 13.7, "step": 100335, "train_speed(iter/s)": 1.527851 }, { "acc": 0.9864583, "epoch": 47.03070072650574, "grad_norm": 2.351047992706299, "learning_rate": 9.63903595542807e-08, "loss": 0.02014043, "memory(GiB)": 13.7, "step": 100340, "train_speed(iter/s)": 1.527851 }, { "acc": 0.996875, "epoch": 47.03304429341458, "grad_norm": 3.804363965988159, "learning_rate": 9.623972775788556e-08, "loss": 0.01369776, "memory(GiB)": 13.7, "step": 100345, "train_speed(iter/s)": 1.527851 }, { "acc": 0.99591351, "epoch": 47.03538786032341, "grad_norm": 0.0010386480716988444, "learning_rate": 9.608921384379495e-08, "loss": 0.01878113, "memory(GiB)": 13.7, "step": 100350, "train_speed(iter/s)": 1.527853 }, { "acc": 0.98777781, "epoch": 47.037731427232245, "grad_norm": 3.1012096405029297, "learning_rate": 9.593881781562783e-08, "loss": 0.0333054, "memory(GiB)": 13.7, "step": 100355, "train_speed(iter/s)": 1.527859 }, { "acc": 0.98800592, "epoch": 47.04007499414108, "grad_norm": 0.9099358916282654, "learning_rate": 9.578853967699873e-08, "loss": 0.02109878, "memory(GiB)": 13.7, "step": 100360, "train_speed(iter/s)": 1.527861 }, { "acc": 0.99375, "epoch": 47.04241856104992, "grad_norm": 2.4003422260284424, "learning_rate": 9.563837943151938e-08, "loss": 0.01994864, "memory(GiB)": 13.7, "step": 100365, "train_speed(iter/s)": 1.52787 }, { "acc": 0.97562294, "epoch": 47.044762127958755, "grad_norm": 0.02033950760960579, "learning_rate": 9.548833708280043e-08, "loss": 0.04890175, "memory(GiB)": 13.7, "step": 100370, "train_speed(iter/s)": 1.52787 }, { "acc": 0.98145828, "epoch": 47.04710569486759, "grad_norm": 3.2685189247131348, "learning_rate": 9.533841263444696e-08, "loss": 0.02479632, "memory(GiB)": 13.7, "step": 100375, "train_speed(iter/s)": 1.527872 }, { "acc": 0.9822916, "epoch": 47.049449261776424, "grad_norm": 2.2075469493865967, "learning_rate": 9.518860609006409e-08, "loss": 0.02954438, "memory(GiB)": 13.7, "step": 100380, "train_speed(iter/s)": 1.527876 }, { "acc": 0.99155636, "epoch": 47.05179282868526, "grad_norm": 2.581456184387207, "learning_rate": 9.503891745325133e-08, "loss": 0.02972727, "memory(GiB)": 13.7, "step": 100385, "train_speed(iter/s)": 1.527873 }, { "acc": 0.98052082, "epoch": 47.05413639559409, "grad_norm": 0.5873698592185974, "learning_rate": 9.488934672760768e-08, "loss": 0.04017805, "memory(GiB)": 13.7, "step": 100390, "train_speed(iter/s)": 1.527874 }, { "acc": 0.98812504, "epoch": 47.05647996250293, "grad_norm": 2.6831135749816895, "learning_rate": 9.473989391672765e-08, "loss": 0.01581966, "memory(GiB)": 13.7, "step": 100395, "train_speed(iter/s)": 1.527873 }, { "acc": 0.98986111, "epoch": 47.05882352941177, "grad_norm": 2.046612024307251, "learning_rate": 9.459055902420359e-08, "loss": 0.02667514, "memory(GiB)": 13.7, "step": 100400, "train_speed(iter/s)": 1.527878 }, { "acc": 0.98288689, "epoch": 47.0611670963206, "grad_norm": 0.2015080600976944, "learning_rate": 9.444134205362616e-08, "loss": 0.04602945, "memory(GiB)": 13.7, "step": 100405, "train_speed(iter/s)": 1.52788 }, { "acc": 0.98752975, "epoch": 47.06351066322944, "grad_norm": 0.8631418943405151, "learning_rate": 9.429224300858046e-08, "loss": 0.03104824, "memory(GiB)": 13.7, "step": 100410, "train_speed(iter/s)": 1.527884 }, { "acc": 0.990625, "epoch": 47.06585423013827, "grad_norm": 3.6679983139038086, "learning_rate": 9.414326189265159e-08, "loss": 0.01921524, "memory(GiB)": 13.7, "step": 100415, "train_speed(iter/s)": 1.52789 }, { "acc": 0.98898811, "epoch": 47.068197797047105, "grad_norm": 7.630040645599365, "learning_rate": 9.399439870941966e-08, "loss": 0.04805501, "memory(GiB)": 13.7, "step": 100420, "train_speed(iter/s)": 1.527889 }, { "acc": 0.98562498, "epoch": 47.07054136395594, "grad_norm": 2.215545654296875, "learning_rate": 9.384565346246369e-08, "loss": 0.02147249, "memory(GiB)": 13.7, "step": 100425, "train_speed(iter/s)": 1.527893 }, { "acc": 0.97333336, "epoch": 47.072884930864774, "grad_norm": 3.681708812713623, "learning_rate": 9.369702615535821e-08, "loss": 0.03243431, "memory(GiB)": 13.7, "step": 100430, "train_speed(iter/s)": 1.527892 }, { "acc": 0.99665184, "epoch": 47.07522849777361, "grad_norm": 4.333859443664551, "learning_rate": 9.354851679167671e-08, "loss": 0.04512298, "memory(GiB)": 13.7, "step": 100435, "train_speed(iter/s)": 1.527894 }, { "acc": 0.98250008, "epoch": 47.07757206468245, "grad_norm": 3.350980758666992, "learning_rate": 9.340012537498818e-08, "loss": 0.02767375, "memory(GiB)": 13.7, "step": 100440, "train_speed(iter/s)": 1.527895 }, { "acc": 0.9936553, "epoch": 47.079915631591284, "grad_norm": 1.1980665922164917, "learning_rate": 9.325185190885884e-08, "loss": 0.01605792, "memory(GiB)": 13.7, "step": 100445, "train_speed(iter/s)": 1.527896 }, { "acc": 0.97770834, "epoch": 47.08225919850012, "grad_norm": 5.907498836517334, "learning_rate": 9.310369639685438e-08, "loss": 0.0477487, "memory(GiB)": 13.7, "step": 100450, "train_speed(iter/s)": 1.527898 }, { "acc": 0.97250004, "epoch": 47.08460276540895, "grad_norm": 4.105685710906982, "learning_rate": 9.295565884253383e-08, "loss": 0.03813454, "memory(GiB)": 13.7, "step": 100455, "train_speed(iter/s)": 1.527904 }, { "acc": 0.99020834, "epoch": 47.08694633231779, "grad_norm": 0.9040483236312866, "learning_rate": 9.280773924945789e-08, "loss": 0.03245405, "memory(GiB)": 13.7, "step": 100460, "train_speed(iter/s)": 1.527907 }, { "acc": 0.98488102, "epoch": 47.08928989922662, "grad_norm": 3.042046070098877, "learning_rate": 9.265993762117997e-08, "loss": 0.03448101, "memory(GiB)": 13.7, "step": 100465, "train_speed(iter/s)": 1.52791 }, { "acc": 0.99008923, "epoch": 47.091633466135455, "grad_norm": 2.131364345550537, "learning_rate": 9.25122539612547e-08, "loss": 0.02506276, "memory(GiB)": 13.7, "step": 100470, "train_speed(iter/s)": 1.527911 }, { "acc": 0.97142859, "epoch": 47.0939770330443, "grad_norm": 2.39284348487854, "learning_rate": 9.236468827322997e-08, "loss": 0.05537491, "memory(GiB)": 13.7, "step": 100475, "train_speed(iter/s)": 1.527916 }, { "acc": 0.98571434, "epoch": 47.09632059995313, "grad_norm": 1.7871825695037842, "learning_rate": 9.221724056065372e-08, "loss": 0.03588302, "memory(GiB)": 13.7, "step": 100480, "train_speed(iter/s)": 1.527917 }, { "acc": 0.984375, "epoch": 47.098664166861965, "grad_norm": 6.89253044128418, "learning_rate": 9.206991082707053e-08, "loss": 0.04713024, "memory(GiB)": 13.7, "step": 100485, "train_speed(iter/s)": 1.527923 }, { "acc": 0.98029766, "epoch": 47.1010077337708, "grad_norm": 3.418647289276123, "learning_rate": 9.192269907602052e-08, "loss": 0.04678916, "memory(GiB)": 13.7, "step": 100490, "train_speed(iter/s)": 1.527925 }, { "acc": 0.98458328, "epoch": 47.103351300679634, "grad_norm": 3.355309247970581, "learning_rate": 9.17756053110439e-08, "loss": 0.01927213, "memory(GiB)": 13.7, "step": 100495, "train_speed(iter/s)": 1.527929 }, { "acc": 0.97865534, "epoch": 47.10569486758847, "grad_norm": 0.002957275602966547, "learning_rate": 9.16286295356741e-08, "loss": 0.06598012, "memory(GiB)": 13.7, "step": 100500, "train_speed(iter/s)": 1.52793 }, { "acc": 0.98041668, "epoch": 47.1080384344973, "grad_norm": 0.14341768622398376, "learning_rate": 9.148177175344629e-08, "loss": 0.04212506, "memory(GiB)": 13.7, "step": 100505, "train_speed(iter/s)": 1.527935 }, { "acc": 0.97979164, "epoch": 47.11038200140614, "grad_norm": 0.0013943809317424893, "learning_rate": 9.133503196788841e-08, "loss": 0.03069819, "memory(GiB)": 13.7, "step": 100510, "train_speed(iter/s)": 1.52794 }, { "acc": 0.98633928, "epoch": 47.11272556831498, "grad_norm": 2.407386302947998, "learning_rate": 9.118841018252894e-08, "loss": 0.04338613, "memory(GiB)": 13.7, "step": 100515, "train_speed(iter/s)": 1.527944 }, { "acc": 0.98708334, "epoch": 47.11506913522381, "grad_norm": 0.6767948865890503, "learning_rate": 9.104190640089194e-08, "loss": 0.0255998, "memory(GiB)": 13.7, "step": 100520, "train_speed(iter/s)": 1.527948 }, { "acc": 0.97974205, "epoch": 47.11741270213265, "grad_norm": 3.158094644546509, "learning_rate": 9.089552062649813e-08, "loss": 0.04126215, "memory(GiB)": 13.7, "step": 100525, "train_speed(iter/s)": 1.527951 }, { "acc": 0.99285717, "epoch": 47.11975626904148, "grad_norm": 4.74691915512085, "learning_rate": 9.074925286286767e-08, "loss": 0.02071092, "memory(GiB)": 13.7, "step": 100530, "train_speed(iter/s)": 1.527956 }, { "acc": 0.99083338, "epoch": 47.122099835950316, "grad_norm": 0.8827394247055054, "learning_rate": 9.060310311351463e-08, "loss": 0.03428184, "memory(GiB)": 13.7, "step": 100535, "train_speed(iter/s)": 1.527957 }, { "acc": 0.98729172, "epoch": 47.12444340285915, "grad_norm": 5.124565124511719, "learning_rate": 9.045707138195305e-08, "loss": 0.07924026, "memory(GiB)": 13.7, "step": 100540, "train_speed(iter/s)": 1.527958 }, { "acc": 0.9848958, "epoch": 47.126786969767984, "grad_norm": 2.5734105110168457, "learning_rate": 9.031115767169201e-08, "loss": 0.03213685, "memory(GiB)": 13.7, "step": 100545, "train_speed(iter/s)": 1.527961 }, { "acc": 0.99125004, "epoch": 47.129130536676826, "grad_norm": 1.368957281112671, "learning_rate": 9.016536198624002e-08, "loss": 0.0283094, "memory(GiB)": 13.7, "step": 100550, "train_speed(iter/s)": 1.527965 }, { "acc": 0.99509802, "epoch": 47.13147410358566, "grad_norm": 2.8792192935943604, "learning_rate": 9.001968432910114e-08, "loss": 0.03898164, "memory(GiB)": 13.7, "step": 100555, "train_speed(iter/s)": 1.527967 }, { "acc": 0.990098, "epoch": 47.133817670494494, "grad_norm": 0.5886391401290894, "learning_rate": 8.987412470377666e-08, "loss": 0.03058309, "memory(GiB)": 13.7, "step": 100560, "train_speed(iter/s)": 1.527965 }, { "acc": 0.99125004, "epoch": 47.13616123740333, "grad_norm": 2.9127190113067627, "learning_rate": 8.972868311376624e-08, "loss": 0.03281546, "memory(GiB)": 13.7, "step": 100565, "train_speed(iter/s)": 1.527967 }, { "acc": 0.98347759, "epoch": 47.13850480431216, "grad_norm": 1.0550203323364258, "learning_rate": 8.95833595625639e-08, "loss": 0.03105711, "memory(GiB)": 13.7, "step": 100570, "train_speed(iter/s)": 1.527969 }, { "acc": 0.98217258, "epoch": 47.140848371221, "grad_norm": 5.040257453918457, "learning_rate": 8.943815405366488e-08, "loss": 0.04225504, "memory(GiB)": 13.7, "step": 100575, "train_speed(iter/s)": 1.527973 }, { "acc": 0.98562508, "epoch": 47.14319193812983, "grad_norm": 3.9708616733551025, "learning_rate": 8.929306659055825e-08, "loss": 0.02129995, "memory(GiB)": 13.7, "step": 100580, "train_speed(iter/s)": 1.527976 }, { "acc": 0.9921875, "epoch": 47.145535505038666, "grad_norm": 2.2903385162353516, "learning_rate": 8.914809717673195e-08, "loss": 0.01699023, "memory(GiB)": 13.7, "step": 100585, "train_speed(iter/s)": 1.527977 }, { "acc": 0.99611111, "epoch": 47.14787907194751, "grad_norm": 1.4518823623657227, "learning_rate": 8.90032458156701e-08, "loss": 0.034217, "memory(GiB)": 13.7, "step": 100590, "train_speed(iter/s)": 1.527979 }, { "acc": 0.98383923, "epoch": 47.15022263885634, "grad_norm": 1.3791664838790894, "learning_rate": 8.88585125108551e-08, "loss": 0.02542165, "memory(GiB)": 13.7, "step": 100595, "train_speed(iter/s)": 1.527981 }, { "acc": 0.96851196, "epoch": 47.152566205765176, "grad_norm": 5.22344446182251, "learning_rate": 8.87138972657644e-08, "loss": 0.05930997, "memory(GiB)": 13.7, "step": 100600, "train_speed(iter/s)": 1.527984 }, { "acc": 0.9916667, "epoch": 47.15490977267401, "grad_norm": 4.177043914794922, "learning_rate": 8.856940008387654e-08, "loss": 0.03002791, "memory(GiB)": 13.7, "step": 100605, "train_speed(iter/s)": 1.527984 }, { "acc": 0.97800598, "epoch": 47.157253339582844, "grad_norm": 5.167843341827393, "learning_rate": 8.84250209686628e-08, "loss": 0.03882086, "memory(GiB)": 13.7, "step": 100610, "train_speed(iter/s)": 1.527987 }, { "acc": 0.98883934, "epoch": 47.15959690649168, "grad_norm": 2.029130697250366, "learning_rate": 8.828075992359397e-08, "loss": 0.043409, "memory(GiB)": 13.7, "step": 100615, "train_speed(iter/s)": 1.527988 }, { "acc": 0.98423615, "epoch": 47.16194047340051, "grad_norm": 4.485719680786133, "learning_rate": 8.813661695213806e-08, "loss": 0.03462291, "memory(GiB)": 13.7, "step": 100620, "train_speed(iter/s)": 1.527993 }, { "acc": 0.97895832, "epoch": 47.164284040309354, "grad_norm": 2.210028886795044, "learning_rate": 8.799259205775857e-08, "loss": 0.05928042, "memory(GiB)": 13.7, "step": 100625, "train_speed(iter/s)": 1.527995 }, { "acc": 0.97979164, "epoch": 47.16662760721819, "grad_norm": 2.209604263305664, "learning_rate": 8.784868524391966e-08, "loss": 0.05004914, "memory(GiB)": 13.7, "step": 100630, "train_speed(iter/s)": 1.527997 }, { "acc": 0.98145828, "epoch": 47.16897117412702, "grad_norm": 2.447007656097412, "learning_rate": 8.770489651407819e-08, "loss": 0.04646115, "memory(GiB)": 13.7, "step": 100635, "train_speed(iter/s)": 1.527999 }, { "acc": 0.99333324, "epoch": 47.17131474103586, "grad_norm": 1.3732855319976807, "learning_rate": 8.756122587169158e-08, "loss": 0.01866272, "memory(GiB)": 13.7, "step": 100640, "train_speed(iter/s)": 1.527999 }, { "acc": 0.98812504, "epoch": 47.17365830794469, "grad_norm": 3.572545289993286, "learning_rate": 8.741767332021342e-08, "loss": 0.03065836, "memory(GiB)": 13.7, "step": 100645, "train_speed(iter/s)": 1.527999 }, { "acc": 0.96948862, "epoch": 47.176001874853526, "grad_norm": 7.156230449676514, "learning_rate": 8.727423886309283e-08, "loss": 0.06011226, "memory(GiB)": 13.7, "step": 100650, "train_speed(iter/s)": 1.528005 }, { "acc": 0.98500004, "epoch": 47.17834544176236, "grad_norm": 0.018628496676683426, "learning_rate": 8.713092250377893e-08, "loss": 0.06478332, "memory(GiB)": 13.7, "step": 100655, "train_speed(iter/s)": 1.528003 }, { "acc": 0.996875, "epoch": 47.180689008671195, "grad_norm": 1.0859633684158325, "learning_rate": 8.698772424571472e-08, "loss": 0.01932935, "memory(GiB)": 13.7, "step": 100660, "train_speed(iter/s)": 1.528002 }, { "acc": 0.9839962, "epoch": 47.183032575580036, "grad_norm": 3.567391872406006, "learning_rate": 8.684464409234487e-08, "loss": 0.05036198, "memory(GiB)": 13.7, "step": 100665, "train_speed(iter/s)": 1.528003 }, { "acc": 0.9833334, "epoch": 47.18537614248887, "grad_norm": 3.52996563911438, "learning_rate": 8.67016820471063e-08, "loss": 0.04558591, "memory(GiB)": 13.7, "step": 100670, "train_speed(iter/s)": 1.528001 }, { "acc": 0.99229164, "epoch": 47.187719709397705, "grad_norm": 3.9736621379852295, "learning_rate": 8.6558838113437e-08, "loss": 0.05474976, "memory(GiB)": 13.7, "step": 100675, "train_speed(iter/s)": 1.528001 }, { "acc": 0.97361107, "epoch": 47.19006327630654, "grad_norm": 2.31054425239563, "learning_rate": 8.641611229476895e-08, "loss": 0.04338672, "memory(GiB)": 13.7, "step": 100680, "train_speed(iter/s)": 1.528002 }, { "acc": 0.9822916, "epoch": 47.19240684321537, "grad_norm": 3.1085586547851562, "learning_rate": 8.627350459453398e-08, "loss": 0.03872577, "memory(GiB)": 13.7, "step": 100685, "train_speed(iter/s)": 1.528004 }, { "acc": 0.98781252, "epoch": 47.19475041012421, "grad_norm": 2.973444938659668, "learning_rate": 8.613101501615905e-08, "loss": 0.04667457, "memory(GiB)": 13.7, "step": 100690, "train_speed(iter/s)": 1.528006 }, { "acc": 0.98976192, "epoch": 47.19709397703304, "grad_norm": 2.9197452068328857, "learning_rate": 8.598864356306997e-08, "loss": 0.02687135, "memory(GiB)": 13.7, "step": 100695, "train_speed(iter/s)": 1.528009 }, { "acc": 0.98154764, "epoch": 47.199437543941876, "grad_norm": 4.330433368682861, "learning_rate": 8.584639023868864e-08, "loss": 0.03097686, "memory(GiB)": 13.7, "step": 100700, "train_speed(iter/s)": 1.528008 }, { "acc": 0.9854167, "epoch": 47.20178111085072, "grad_norm": 2.028167486190796, "learning_rate": 8.570425504643365e-08, "loss": 0.0453037, "memory(GiB)": 13.7, "step": 100705, "train_speed(iter/s)": 1.528005 }, { "acc": 0.99236107, "epoch": 47.20412467775955, "grad_norm": 3.9496922492980957, "learning_rate": 8.55622379897225e-08, "loss": 0.02103665, "memory(GiB)": 13.7, "step": 100710, "train_speed(iter/s)": 1.528012 }, { "acc": 0.990625, "epoch": 47.206468244668386, "grad_norm": 0.001553674228489399, "learning_rate": 8.542033907196767e-08, "loss": 0.02693425, "memory(GiB)": 13.7, "step": 100715, "train_speed(iter/s)": 1.528015 }, { "acc": 0.99065208, "epoch": 47.20881181157722, "grad_norm": 2.6775074005126953, "learning_rate": 8.527855829658106e-08, "loss": 0.04975418, "memory(GiB)": 13.7, "step": 100720, "train_speed(iter/s)": 1.528015 }, { "acc": 0.9744792, "epoch": 47.211155378486055, "grad_norm": 6.604622840881348, "learning_rate": 8.51368956669702e-08, "loss": 0.07112767, "memory(GiB)": 13.7, "step": 100725, "train_speed(iter/s)": 1.528016 }, { "acc": 0.98291664, "epoch": 47.21349894539489, "grad_norm": 2.4380223751068115, "learning_rate": 8.499535118654031e-08, "loss": 0.05049299, "memory(GiB)": 13.7, "step": 100730, "train_speed(iter/s)": 1.528016 }, { "acc": 0.99082794, "epoch": 47.21584251230372, "grad_norm": 2.172903060913086, "learning_rate": 8.485392485869338e-08, "loss": 0.02085564, "memory(GiB)": 13.7, "step": 100735, "train_speed(iter/s)": 1.528016 }, { "acc": 0.98208332, "epoch": 47.218186079212565, "grad_norm": 3.285897970199585, "learning_rate": 8.471261668682853e-08, "loss": 0.05036232, "memory(GiB)": 13.7, "step": 100740, "train_speed(iter/s)": 1.52802 }, { "acc": 0.97770834, "epoch": 47.2205296461214, "grad_norm": 3.9633798599243164, "learning_rate": 8.45714266743433e-08, "loss": 0.03381193, "memory(GiB)": 13.7, "step": 100745, "train_speed(iter/s)": 1.528025 }, { "acc": 0.98154764, "epoch": 47.22287321303023, "grad_norm": 4.4991984367370605, "learning_rate": 8.443035482463017e-08, "loss": 0.02511595, "memory(GiB)": 13.7, "step": 100750, "train_speed(iter/s)": 1.528028 }, { "acc": 0.98180809, "epoch": 47.22521677993907, "grad_norm": 4.208771228790283, "learning_rate": 8.428940114108162e-08, "loss": 0.0282752, "memory(GiB)": 13.7, "step": 100755, "train_speed(iter/s)": 1.52803 }, { "acc": 0.99020834, "epoch": 47.2275603468479, "grad_norm": 2.423090934753418, "learning_rate": 8.414856562708407e-08, "loss": 0.01953804, "memory(GiB)": 13.7, "step": 100760, "train_speed(iter/s)": 1.528032 }, { "acc": 0.9802084, "epoch": 47.229903913756736, "grad_norm": 1.9006741046905518, "learning_rate": 8.400784828602447e-08, "loss": 0.0641427, "memory(GiB)": 13.7, "step": 100765, "train_speed(iter/s)": 1.528034 }, { "acc": 0.97437496, "epoch": 47.23224748066557, "grad_norm": 6.3906660079956055, "learning_rate": 8.386724912128368e-08, "loss": 0.05091062, "memory(GiB)": 13.7, "step": 100770, "train_speed(iter/s)": 1.528035 }, { "acc": 0.9854167, "epoch": 47.234591047574405, "grad_norm": 3.0308027267456055, "learning_rate": 8.372676813624142e-08, "loss": 0.02915674, "memory(GiB)": 13.7, "step": 100775, "train_speed(iter/s)": 1.52804 }, { "acc": 0.98772898, "epoch": 47.236934614483246, "grad_norm": 0.7734966278076172, "learning_rate": 8.358640533427576e-08, "loss": 0.02983249, "memory(GiB)": 13.7, "step": 100780, "train_speed(iter/s)": 1.528045 }, { "acc": 0.97559528, "epoch": 47.23927818139208, "grad_norm": 3.2160513401031494, "learning_rate": 8.344616071875866e-08, "loss": 0.04918305, "memory(GiB)": 13.7, "step": 100785, "train_speed(iter/s)": 1.528049 }, { "acc": 0.98495922, "epoch": 47.241621748300915, "grad_norm": 4.903773784637451, "learning_rate": 8.330603429306323e-08, "loss": 0.04698046, "memory(GiB)": 13.7, "step": 100790, "train_speed(iter/s)": 1.528057 }, { "acc": 0.98458328, "epoch": 47.24396531520975, "grad_norm": 5.243099689483643, "learning_rate": 8.316602606055527e-08, "loss": 0.0482391, "memory(GiB)": 13.7, "step": 100795, "train_speed(iter/s)": 1.528061 }, { "acc": 0.98656254, "epoch": 47.24630888211858, "grad_norm": 4.580481052398682, "learning_rate": 8.302613602460236e-08, "loss": 0.02501791, "memory(GiB)": 13.7, "step": 100800, "train_speed(iter/s)": 1.528062 }, { "acc": 0.98527775, "epoch": 47.24865244902742, "grad_norm": 3.4698843955993652, "learning_rate": 8.288636418856478e-08, "loss": 0.04808694, "memory(GiB)": 13.7, "step": 100805, "train_speed(iter/s)": 1.528067 }, { "acc": 0.97979164, "epoch": 47.25099601593625, "grad_norm": 6.2413859367370605, "learning_rate": 8.274671055580452e-08, "loss": 0.02960703, "memory(GiB)": 13.7, "step": 100810, "train_speed(iter/s)": 1.528074 }, { "acc": 0.9927084, "epoch": 47.25333958284509, "grad_norm": 0.013919576071202755, "learning_rate": 8.260717512967687e-08, "loss": 0.02402041, "memory(GiB)": 13.7, "step": 100815, "train_speed(iter/s)": 1.528076 }, { "acc": 0.98571434, "epoch": 47.25568314975393, "grad_norm": 4.195683479309082, "learning_rate": 8.246775791353604e-08, "loss": 0.01967523, "memory(GiB)": 13.7, "step": 100820, "train_speed(iter/s)": 1.528078 }, { "acc": 0.97562504, "epoch": 47.25802671666276, "grad_norm": 5.34245491027832, "learning_rate": 8.232845891073404e-08, "loss": 0.06549142, "memory(GiB)": 13.7, "step": 100825, "train_speed(iter/s)": 1.528078 }, { "acc": 0.98500004, "epoch": 47.260370283571596, "grad_norm": 2.1505775451660156, "learning_rate": 8.218927812461783e-08, "loss": 0.0357356, "memory(GiB)": 13.7, "step": 100830, "train_speed(iter/s)": 1.528076 }, { "acc": 0.98770828, "epoch": 47.26271385048043, "grad_norm": 5.200855255126953, "learning_rate": 8.205021555853331e-08, "loss": 0.02356125, "memory(GiB)": 13.7, "step": 100835, "train_speed(iter/s)": 1.528081 }, { "acc": 0.9782692, "epoch": 47.265057417389265, "grad_norm": 5.780564785003662, "learning_rate": 8.1911271215823e-08, "loss": 0.04710511, "memory(GiB)": 13.7, "step": 100840, "train_speed(iter/s)": 1.528082 }, { "acc": 0.9979166, "epoch": 47.2674009842981, "grad_norm": 2.582176923751831, "learning_rate": 8.177244509982781e-08, "loss": 0.01146711, "memory(GiB)": 13.7, "step": 100845, "train_speed(iter/s)": 1.528087 }, { "acc": 0.97592258, "epoch": 47.269744551206934, "grad_norm": 2.003624439239502, "learning_rate": 8.163373721388303e-08, "loss": 0.03921154, "memory(GiB)": 13.7, "step": 100850, "train_speed(iter/s)": 1.528093 }, { "acc": 0.9838542, "epoch": 47.272088118115775, "grad_norm": 5.39855432510376, "learning_rate": 8.149514756132404e-08, "loss": 0.03475237, "memory(GiB)": 13.7, "step": 100855, "train_speed(iter/s)": 1.528094 }, { "acc": 0.99613094, "epoch": 47.27443168502461, "grad_norm": 1.715074896812439, "learning_rate": 8.135667614548171e-08, "loss": 0.02501776, "memory(GiB)": 13.7, "step": 100860, "train_speed(iter/s)": 1.5281 }, { "acc": 0.96520834, "epoch": 47.276775251933444, "grad_norm": 2.6988282203674316, "learning_rate": 8.121832296968304e-08, "loss": 0.05054671, "memory(GiB)": 13.7, "step": 100865, "train_speed(iter/s)": 1.528107 }, { "acc": 0.98447914, "epoch": 47.27911881884228, "grad_norm": 5.7308220863342285, "learning_rate": 8.108008803725616e-08, "loss": 0.02758541, "memory(GiB)": 13.7, "step": 100870, "train_speed(iter/s)": 1.528113 }, { "acc": 0.98404131, "epoch": 47.28146238575111, "grad_norm": 0.016952261328697205, "learning_rate": 8.09419713515214e-08, "loss": 0.04735944, "memory(GiB)": 13.7, "step": 100875, "train_speed(iter/s)": 1.528115 }, { "acc": 0.97667618, "epoch": 47.28380595265995, "grad_norm": 2.909257173538208, "learning_rate": 8.080397291580021e-08, "loss": 0.11913071, "memory(GiB)": 13.7, "step": 100880, "train_speed(iter/s)": 1.528115 }, { "acc": 0.9837595, "epoch": 47.28614951956878, "grad_norm": 3.611405372619629, "learning_rate": 8.066609273340908e-08, "loss": 0.0406462, "memory(GiB)": 13.7, "step": 100885, "train_speed(iter/s)": 1.52812 }, { "acc": 0.9770834, "epoch": 47.28849308647762, "grad_norm": 5.66334342956543, "learning_rate": 8.052833080766222e-08, "loss": 0.0688955, "memory(GiB)": 13.7, "step": 100890, "train_speed(iter/s)": 1.52812 }, { "acc": 0.98619041, "epoch": 47.29083665338646, "grad_norm": 4.342560291290283, "learning_rate": 8.039068714187e-08, "loss": 0.03487758, "memory(GiB)": 13.7, "step": 100895, "train_speed(iter/s)": 1.528122 }, { "acc": 0.98520832, "epoch": 47.29318022029529, "grad_norm": 1.6782435178756714, "learning_rate": 8.025316173934277e-08, "loss": 0.02773737, "memory(GiB)": 13.7, "step": 100900, "train_speed(iter/s)": 1.528128 }, { "acc": 0.99125004, "epoch": 47.295523787204125, "grad_norm": 1.1451098918914795, "learning_rate": 8.011575460338532e-08, "loss": 0.0128567, "memory(GiB)": 13.7, "step": 100905, "train_speed(iter/s)": 1.528126 }, { "acc": 0.99152775, "epoch": 47.29786735411296, "grad_norm": 3.0373952388763428, "learning_rate": 7.997846573729969e-08, "loss": 0.02639391, "memory(GiB)": 13.7, "step": 100910, "train_speed(iter/s)": 1.528126 }, { "acc": 0.97198868, "epoch": 47.300210921021794, "grad_norm": 0.7432284951210022, "learning_rate": 7.984129514438737e-08, "loss": 0.05127377, "memory(GiB)": 13.7, "step": 100915, "train_speed(iter/s)": 1.528127 }, { "acc": 0.97583332, "epoch": 47.30255448793063, "grad_norm": 2.5479648113250732, "learning_rate": 7.97042428279437e-08, "loss": 0.03260365, "memory(GiB)": 13.7, "step": 100920, "train_speed(iter/s)": 1.528131 }, { "acc": 0.98938932, "epoch": 47.30489805483946, "grad_norm": 1.3076789379119873, "learning_rate": 7.956730879126518e-08, "loss": 0.03067262, "memory(GiB)": 13.7, "step": 100925, "train_speed(iter/s)": 1.528134 }, { "acc": 0.98113098, "epoch": 47.307241621748304, "grad_norm": 3.0315866470336914, "learning_rate": 7.943049303764104e-08, "loss": 0.03749091, "memory(GiB)": 13.7, "step": 100930, "train_speed(iter/s)": 1.528138 }, { "acc": 0.99494057, "epoch": 47.30958518865714, "grad_norm": 0.9524571299552917, "learning_rate": 7.929379557036168e-08, "loss": 0.01761992, "memory(GiB)": 13.7, "step": 100935, "train_speed(iter/s)": 1.528137 }, { "acc": 0.99375, "epoch": 47.31192875556597, "grad_norm": 0.8664020895957947, "learning_rate": 7.915721639271192e-08, "loss": 0.02802107, "memory(GiB)": 13.7, "step": 100940, "train_speed(iter/s)": 1.528137 }, { "acc": 0.9864584, "epoch": 47.31427232247481, "grad_norm": 2.2736656665802, "learning_rate": 7.902075550797436e-08, "loss": 0.02175183, "memory(GiB)": 13.7, "step": 100945, "train_speed(iter/s)": 1.52814 }, { "acc": 0.99236107, "epoch": 47.31661588938364, "grad_norm": 0.008145970292389393, "learning_rate": 7.88844129194299e-08, "loss": 0.01116962, "memory(GiB)": 13.7, "step": 100950, "train_speed(iter/s)": 1.528147 }, { "acc": 0.98812504, "epoch": 47.318959456292475, "grad_norm": 3.720500946044922, "learning_rate": 7.874818863035455e-08, "loss": 0.0227857, "memory(GiB)": 13.7, "step": 100955, "train_speed(iter/s)": 1.528149 }, { "acc": 0.99291668, "epoch": 47.32130302320131, "grad_norm": 2.9269988536834717, "learning_rate": 7.861208264402474e-08, "loss": 0.02812766, "memory(GiB)": 13.7, "step": 100960, "train_speed(iter/s)": 1.528148 }, { "acc": 0.98779764, "epoch": 47.32364659011015, "grad_norm": 2.521033763885498, "learning_rate": 7.847609496371035e-08, "loss": 0.04529092, "memory(GiB)": 13.7, "step": 100965, "train_speed(iter/s)": 1.528151 }, { "acc": 0.98883934, "epoch": 47.325990157018985, "grad_norm": 3.470386266708374, "learning_rate": 7.834022559268062e-08, "loss": 0.01994759, "memory(GiB)": 13.7, "step": 100970, "train_speed(iter/s)": 1.528151 }, { "acc": 0.98113098, "epoch": 47.32833372392782, "grad_norm": 3.008777141571045, "learning_rate": 7.820447453420098e-08, "loss": 0.05392253, "memory(GiB)": 13.7, "step": 100975, "train_speed(iter/s)": 1.52815 }, { "acc": 0.99348221, "epoch": 47.330677290836654, "grad_norm": 0.8949723243713379, "learning_rate": 7.80688417915346e-08, "loss": 0.01193747, "memory(GiB)": 13.7, "step": 100980, "train_speed(iter/s)": 1.528154 }, { "acc": 0.99125004, "epoch": 47.33302085774549, "grad_norm": 2.1835336685180664, "learning_rate": 7.793332736794241e-08, "loss": 0.02633197, "memory(GiB)": 13.7, "step": 100985, "train_speed(iter/s)": 1.528151 }, { "acc": 0.98416662, "epoch": 47.33536442465432, "grad_norm": 2.2011332511901855, "learning_rate": 7.779793126668095e-08, "loss": 0.01927565, "memory(GiB)": 13.7, "step": 100990, "train_speed(iter/s)": 1.528152 }, { "acc": 0.996875, "epoch": 47.33770799156316, "grad_norm": 0.0007934382883831859, "learning_rate": 7.766265349100504e-08, "loss": 0.01852677, "memory(GiB)": 13.7, "step": 100995, "train_speed(iter/s)": 1.528152 }, { "acc": 0.96678028, "epoch": 47.34005155847199, "grad_norm": 3.200735092163086, "learning_rate": 7.752749404416567e-08, "loss": 0.09102564, "memory(GiB)": 13.7, "step": 101000, "train_speed(iter/s)": 1.528153 }, { "acc": 0.99333334, "epoch": 47.34239512538083, "grad_norm": 4.000643253326416, "learning_rate": 7.739245292941267e-08, "loss": 0.02539939, "memory(GiB)": 13.7, "step": 101005, "train_speed(iter/s)": 1.528154 }, { "acc": 0.98703375, "epoch": 47.34473869228967, "grad_norm": 3.8301825523376465, "learning_rate": 7.725753014999092e-08, "loss": 0.04160163, "memory(GiB)": 13.7, "step": 101010, "train_speed(iter/s)": 1.528157 }, { "acc": 0.99375, "epoch": 47.3470822591985, "grad_norm": 5.807521820068359, "learning_rate": 7.712272570914471e-08, "loss": 0.01339528, "memory(GiB)": 13.7, "step": 101015, "train_speed(iter/s)": 1.52816 }, { "acc": 0.99416666, "epoch": 47.349425826107336, "grad_norm": 2.652404546737671, "learning_rate": 7.698803961011279e-08, "loss": 0.04546497, "memory(GiB)": 13.7, "step": 101020, "train_speed(iter/s)": 1.528162 }, { "acc": 0.98800592, "epoch": 47.35176939301617, "grad_norm": 3.8087098598480225, "learning_rate": 7.685347185613447e-08, "loss": 0.02584499, "memory(GiB)": 13.7, "step": 101025, "train_speed(iter/s)": 1.528164 }, { "acc": 0.96816473, "epoch": 47.354112959925004, "grad_norm": 3.3758926391601562, "learning_rate": 7.671902245044241e-08, "loss": 0.06933328, "memory(GiB)": 13.7, "step": 101030, "train_speed(iter/s)": 1.528167 }, { "acc": 0.9932292, "epoch": 47.35645652683384, "grad_norm": 3.6451828479766846, "learning_rate": 7.658469139626978e-08, "loss": 0.02842752, "memory(GiB)": 13.7, "step": 101035, "train_speed(iter/s)": 1.528168 }, { "acc": 0.9864584, "epoch": 47.35880009374268, "grad_norm": 2.7281734943389893, "learning_rate": 7.645047869684425e-08, "loss": 0.0278277, "memory(GiB)": 13.7, "step": 101040, "train_speed(iter/s)": 1.528171 }, { "acc": 0.9895834, "epoch": 47.361143660651514, "grad_norm": 0.0009153445716947317, "learning_rate": 7.631638435539236e-08, "loss": 0.0315173, "memory(GiB)": 13.7, "step": 101045, "train_speed(iter/s)": 1.528172 }, { "acc": 0.99690475, "epoch": 47.36348722756035, "grad_norm": 1.5465580224990845, "learning_rate": 7.61824083751379e-08, "loss": 0.01668472, "memory(GiB)": 13.7, "step": 101050, "train_speed(iter/s)": 1.528175 }, { "acc": 0.98309517, "epoch": 47.36583079446918, "grad_norm": 4.9173903465271, "learning_rate": 7.604855075930069e-08, "loss": 0.06955878, "memory(GiB)": 13.7, "step": 101055, "train_speed(iter/s)": 1.528181 }, { "acc": 0.98113098, "epoch": 47.36817436137802, "grad_norm": 4.378838539123535, "learning_rate": 7.591481151109788e-08, "loss": 0.04157612, "memory(GiB)": 13.7, "step": 101060, "train_speed(iter/s)": 1.528183 }, { "acc": 0.98458338, "epoch": 47.37051792828685, "grad_norm": 0.0011597353732213378, "learning_rate": 7.57811906337449e-08, "loss": 0.05457304, "memory(GiB)": 13.7, "step": 101065, "train_speed(iter/s)": 1.528185 }, { "acc": 0.98987179, "epoch": 47.372861495195686, "grad_norm": 0.0004812999104615301, "learning_rate": 7.564768813045275e-08, "loss": 0.02875802, "memory(GiB)": 13.7, "step": 101070, "train_speed(iter/s)": 1.528185 }, { "acc": 0.99125004, "epoch": 47.37520506210452, "grad_norm": 4.681595802307129, "learning_rate": 7.551430400443131e-08, "loss": 0.03735099, "memory(GiB)": 13.7, "step": 101075, "train_speed(iter/s)": 1.528188 }, { "acc": 0.98406248, "epoch": 47.37754862901336, "grad_norm": 2.941389322280884, "learning_rate": 7.538103825888548e-08, "loss": 0.02869146, "memory(GiB)": 13.7, "step": 101080, "train_speed(iter/s)": 1.528186 }, { "acc": 0.9838541, "epoch": 47.379892195922196, "grad_norm": 1.3212602138519287, "learning_rate": 7.52478908970196e-08, "loss": 0.03125766, "memory(GiB)": 13.7, "step": 101085, "train_speed(iter/s)": 1.528185 }, { "acc": 0.98270836, "epoch": 47.38223576283103, "grad_norm": 3.8929154872894287, "learning_rate": 7.511486192203355e-08, "loss": 0.05082709, "memory(GiB)": 13.7, "step": 101090, "train_speed(iter/s)": 1.528185 }, { "acc": 0.98812504, "epoch": 47.384579329739864, "grad_norm": 2.569617748260498, "learning_rate": 7.498195133712561e-08, "loss": 0.0198794, "memory(GiB)": 13.7, "step": 101095, "train_speed(iter/s)": 1.528189 }, { "acc": 0.99229164, "epoch": 47.3869228966487, "grad_norm": 1.03916597366333, "learning_rate": 7.484915914548896e-08, "loss": 0.01321208, "memory(GiB)": 13.7, "step": 101100, "train_speed(iter/s)": 1.528191 }, { "acc": 0.98258934, "epoch": 47.38926646355753, "grad_norm": 8.007658004760742, "learning_rate": 7.471648535031743e-08, "loss": 0.04676895, "memory(GiB)": 13.7, "step": 101105, "train_speed(iter/s)": 1.528194 }, { "acc": 0.99298611, "epoch": 47.39161003046637, "grad_norm": 3.282167434692383, "learning_rate": 7.458392995479868e-08, "loss": 0.01900245, "memory(GiB)": 13.7, "step": 101110, "train_speed(iter/s)": 1.528192 }, { "acc": 0.98291664, "epoch": 47.3939535973752, "grad_norm": 3.311530113220215, "learning_rate": 7.445149296211986e-08, "loss": 0.03294914, "memory(GiB)": 13.7, "step": 101115, "train_speed(iter/s)": 1.528194 }, { "acc": 0.9895833, "epoch": 47.39629716428404, "grad_norm": 4.362863063812256, "learning_rate": 7.431917437546366e-08, "loss": 0.02840121, "memory(GiB)": 13.7, "step": 101120, "train_speed(iter/s)": 1.528199 }, { "acc": 0.97937498, "epoch": 47.39864073119288, "grad_norm": 3.5854344367980957, "learning_rate": 7.418697419801e-08, "loss": 0.0279053, "memory(GiB)": 13.7, "step": 101125, "train_speed(iter/s)": 1.528203 }, { "acc": 0.9770833, "epoch": 47.40098429810171, "grad_norm": 2.028972864151001, "learning_rate": 7.405489243293767e-08, "loss": 0.03484015, "memory(GiB)": 13.7, "step": 101130, "train_speed(iter/s)": 1.528208 }, { "acc": 0.9864584, "epoch": 47.403327865010546, "grad_norm": 2.6640448570251465, "learning_rate": 7.39229290834216e-08, "loss": 0.05285828, "memory(GiB)": 13.7, "step": 101135, "train_speed(iter/s)": 1.52821 }, { "acc": 0.98651514, "epoch": 47.40567143191938, "grad_norm": 3.9937586784362793, "learning_rate": 7.379108415263284e-08, "loss": 0.04525996, "memory(GiB)": 13.7, "step": 101140, "train_speed(iter/s)": 1.528212 }, { "acc": 0.99375, "epoch": 47.408014998828214, "grad_norm": 3.1032509803771973, "learning_rate": 7.365935764374071e-08, "loss": 0.02510177, "memory(GiB)": 13.7, "step": 101145, "train_speed(iter/s)": 1.528212 }, { "acc": 0.98916664, "epoch": 47.41035856573705, "grad_norm": 4.6701765060424805, "learning_rate": 7.352774955991241e-08, "loss": 0.02620365, "memory(GiB)": 13.7, "step": 101150, "train_speed(iter/s)": 1.528214 }, { "acc": 0.9911458, "epoch": 47.41270213264589, "grad_norm": 0.001587226870469749, "learning_rate": 7.339625990431064e-08, "loss": 0.02294388, "memory(GiB)": 13.7, "step": 101155, "train_speed(iter/s)": 1.528218 }, { "acc": 0.99245186, "epoch": 47.415045699554724, "grad_norm": 1.6645126342773438, "learning_rate": 7.326488868009475e-08, "loss": 0.0369749, "memory(GiB)": 13.7, "step": 101160, "train_speed(iter/s)": 1.528218 }, { "acc": 0.98071432, "epoch": 47.41738926646356, "grad_norm": 3.1007065773010254, "learning_rate": 7.313363589042471e-08, "loss": 0.06497841, "memory(GiB)": 13.7, "step": 101165, "train_speed(iter/s)": 1.528221 }, { "acc": 0.98611107, "epoch": 47.41973283337239, "grad_norm": 2.57314133644104, "learning_rate": 7.300250153845434e-08, "loss": 0.02033542, "memory(GiB)": 13.7, "step": 101170, "train_speed(iter/s)": 1.528228 }, { "acc": 0.98217258, "epoch": 47.42207640028123, "grad_norm": 2.4635446071624756, "learning_rate": 7.287148562733578e-08, "loss": 0.05208878, "memory(GiB)": 13.7, "step": 101175, "train_speed(iter/s)": 1.52823 }, { "acc": 0.99175596, "epoch": 47.42441996719006, "grad_norm": 5.010458469390869, "learning_rate": 7.274058816021789e-08, "loss": 0.03898249, "memory(GiB)": 13.7, "step": 101180, "train_speed(iter/s)": 1.528231 }, { "acc": 0.98624992, "epoch": 47.426763534098896, "grad_norm": 0.07344643771648407, "learning_rate": 7.26098091402478e-08, "loss": 0.02924773, "memory(GiB)": 13.7, "step": 101185, "train_speed(iter/s)": 1.528234 }, { "acc": 0.97693138, "epoch": 47.42910710100773, "grad_norm": 7.155553340911865, "learning_rate": 7.24791485705683e-08, "loss": 0.07358267, "memory(GiB)": 13.7, "step": 101190, "train_speed(iter/s)": 1.528234 }, { "acc": 0.98083344, "epoch": 47.43145066791657, "grad_norm": 1.3120478391647339, "learning_rate": 7.234860645431984e-08, "loss": 0.03134953, "memory(GiB)": 13.7, "step": 101195, "train_speed(iter/s)": 1.52824 }, { "acc": 0.98604164, "epoch": 47.433794234825406, "grad_norm": 2.8560404777526855, "learning_rate": 7.221818279464128e-08, "loss": 0.03130893, "memory(GiB)": 13.7, "step": 101200, "train_speed(iter/s)": 1.528247 }, { "acc": 0.98069439, "epoch": 47.43613780173424, "grad_norm": 3.2502315044403076, "learning_rate": 7.208787759466648e-08, "loss": 0.03822512, "memory(GiB)": 13.7, "step": 101205, "train_speed(iter/s)": 1.528252 }, { "acc": 0.9895833, "epoch": 47.438481368643075, "grad_norm": 2.1519346237182617, "learning_rate": 7.195769085752815e-08, "loss": 0.03259389, "memory(GiB)": 13.7, "step": 101210, "train_speed(iter/s)": 1.528258 }, { "acc": 0.9927084, "epoch": 47.44082493555191, "grad_norm": 1.3854774236679077, "learning_rate": 7.182762258635516e-08, "loss": 0.0331168, "memory(GiB)": 13.7, "step": 101215, "train_speed(iter/s)": 1.52826 }, { "acc": 0.98239584, "epoch": 47.44316850246074, "grad_norm": 2.3824710845947266, "learning_rate": 7.16976727842747e-08, "loss": 0.03636247, "memory(GiB)": 13.7, "step": 101220, "train_speed(iter/s)": 1.528266 }, { "acc": 0.98124905, "epoch": 47.44551206936958, "grad_norm": 0.8699626922607422, "learning_rate": 7.156784145440949e-08, "loss": 0.079633, "memory(GiB)": 13.7, "step": 101225, "train_speed(iter/s)": 1.528266 }, { "acc": 0.98708334, "epoch": 47.44785563627842, "grad_norm": 5.227573871612549, "learning_rate": 7.143812859988063e-08, "loss": 0.04120621, "memory(GiB)": 13.7, "step": 101230, "train_speed(iter/s)": 1.528272 }, { "acc": 0.98458328, "epoch": 47.45019920318725, "grad_norm": 4.895135402679443, "learning_rate": 7.130853422380642e-08, "loss": 0.03527628, "memory(GiB)": 13.7, "step": 101235, "train_speed(iter/s)": 1.528279 }, { "acc": 0.98950758, "epoch": 47.45254277009609, "grad_norm": 3.094526767730713, "learning_rate": 7.117905832930072e-08, "loss": 0.05304368, "memory(GiB)": 13.7, "step": 101240, "train_speed(iter/s)": 1.52828 }, { "acc": 0.98279762, "epoch": 47.45488633700492, "grad_norm": 3.6811177730560303, "learning_rate": 7.104970091947684e-08, "loss": 0.02694457, "memory(GiB)": 13.7, "step": 101245, "train_speed(iter/s)": 1.52828 }, { "acc": 0.97990532, "epoch": 47.457229903913756, "grad_norm": 3.006096839904785, "learning_rate": 7.092046199744311e-08, "loss": 0.0547012, "memory(GiB)": 13.7, "step": 101250, "train_speed(iter/s)": 1.52828 }, { "acc": 0.98395834, "epoch": 47.45957347082259, "grad_norm": 0.9362607002258301, "learning_rate": 7.079134156630671e-08, "loss": 0.02752404, "memory(GiB)": 13.7, "step": 101255, "train_speed(iter/s)": 1.528279 }, { "acc": 0.97875004, "epoch": 47.461917037731425, "grad_norm": 3.2272396087646484, "learning_rate": 7.066233962917098e-08, "loss": 0.03626898, "memory(GiB)": 13.7, "step": 101260, "train_speed(iter/s)": 1.528278 }, { "acc": 0.99468136, "epoch": 47.46426060464026, "grad_norm": 1.436919927597046, "learning_rate": 7.053345618913755e-08, "loss": 0.02279977, "memory(GiB)": 13.7, "step": 101265, "train_speed(iter/s)": 1.528282 }, { "acc": 0.99208336, "epoch": 47.4666041715491, "grad_norm": 0.11071544140577316, "learning_rate": 7.040469124930253e-08, "loss": 0.02858915, "memory(GiB)": 13.7, "step": 101270, "train_speed(iter/s)": 1.528281 }, { "acc": 0.98942909, "epoch": 47.468947738457935, "grad_norm": 1.3413445949554443, "learning_rate": 7.027604481276315e-08, "loss": 0.02820803, "memory(GiB)": 13.7, "step": 101275, "train_speed(iter/s)": 1.528284 }, { "acc": 0.996875, "epoch": 47.47129130536677, "grad_norm": 4.397286891937256, "learning_rate": 7.01475168826105e-08, "loss": 0.02279688, "memory(GiB)": 13.7, "step": 101280, "train_speed(iter/s)": 1.528286 }, { "acc": 0.98361111, "epoch": 47.4736348722756, "grad_norm": 3.6689260005950928, "learning_rate": 7.001910746193345e-08, "loss": 0.03237279, "memory(GiB)": 13.7, "step": 101285, "train_speed(iter/s)": 1.528288 }, { "acc": 0.98770838, "epoch": 47.47597843918444, "grad_norm": 6.523188591003418, "learning_rate": 6.989081655381981e-08, "loss": 0.04614002, "memory(GiB)": 13.7, "step": 101290, "train_speed(iter/s)": 1.52829 }, { "acc": 0.98529758, "epoch": 47.47832200609327, "grad_norm": 3.0139200687408447, "learning_rate": 6.976264416135234e-08, "loss": 0.03408436, "memory(GiB)": 13.7, "step": 101295, "train_speed(iter/s)": 1.528293 }, { "acc": 0.98613091, "epoch": 47.480665573002106, "grad_norm": 3.547788619995117, "learning_rate": 6.963459028761272e-08, "loss": 0.04602532, "memory(GiB)": 13.7, "step": 101300, "train_speed(iter/s)": 1.52829 }, { "acc": 0.98291664, "epoch": 47.48300913991095, "grad_norm": 2.485835075378418, "learning_rate": 6.950665493567708e-08, "loss": 0.04464409, "memory(GiB)": 13.7, "step": 101305, "train_speed(iter/s)": 1.528291 }, { "acc": 0.98708334, "epoch": 47.48535270681978, "grad_norm": 2.5639467239379883, "learning_rate": 6.937883810862318e-08, "loss": 0.02971297, "memory(GiB)": 13.7, "step": 101310, "train_speed(iter/s)": 1.528292 }, { "acc": 0.97895832, "epoch": 47.487696273728616, "grad_norm": 6.084109306335449, "learning_rate": 6.925113980952161e-08, "loss": 0.04214848, "memory(GiB)": 13.7, "step": 101315, "train_speed(iter/s)": 1.528294 }, { "acc": 0.98402786, "epoch": 47.49003984063745, "grad_norm": 1.7823735475540161, "learning_rate": 6.912356004144182e-08, "loss": 0.02133498, "memory(GiB)": 13.7, "step": 101320, "train_speed(iter/s)": 1.528297 }, { "acc": 0.99582796, "epoch": 47.492383407546285, "grad_norm": 2.4714183807373047, "learning_rate": 6.899609880745163e-08, "loss": 0.03228863, "memory(GiB)": 13.7, "step": 101325, "train_speed(iter/s)": 1.5283 }, { "acc": 0.98125, "epoch": 47.49472697445512, "grad_norm": 4.944411754608154, "learning_rate": 6.886875611061268e-08, "loss": 0.05302631, "memory(GiB)": 13.7, "step": 101330, "train_speed(iter/s)": 1.528305 }, { "acc": 0.98708334, "epoch": 47.497070541363954, "grad_norm": 3.4147515296936035, "learning_rate": 6.874153195398835e-08, "loss": 0.0336934, "memory(GiB)": 13.7, "step": 101335, "train_speed(iter/s)": 1.528308 }, { "acc": 0.98489046, "epoch": 47.49941410827279, "grad_norm": 1.2612738609313965, "learning_rate": 6.86144263406348e-08, "loss": 0.04556334, "memory(GiB)": 13.7, "step": 101340, "train_speed(iter/s)": 1.528307 }, { "acc": 0.96958332, "epoch": 47.50175767518163, "grad_norm": 1.9980170726776123, "learning_rate": 6.848743927360869e-08, "loss": 0.04784567, "memory(GiB)": 13.7, "step": 101345, "train_speed(iter/s)": 1.528311 }, { "acc": 0.97321434, "epoch": 47.504101242090464, "grad_norm": 4.918731212615967, "learning_rate": 6.836057075596116e-08, "loss": 0.04405924, "memory(GiB)": 13.7, "step": 101350, "train_speed(iter/s)": 1.528313 }, { "acc": 0.9875, "epoch": 47.5064448089993, "grad_norm": 2.603738307952881, "learning_rate": 6.823382079074228e-08, "loss": 0.03238777, "memory(GiB)": 13.7, "step": 101355, "train_speed(iter/s)": 1.528319 }, { "acc": 0.98312492, "epoch": 47.50878837590813, "grad_norm": 3.4493637084960938, "learning_rate": 6.810718938099871e-08, "loss": 0.0332898, "memory(GiB)": 13.7, "step": 101360, "train_speed(iter/s)": 1.528317 }, { "acc": 0.9916667, "epoch": 47.51113194281697, "grad_norm": 2.1699554920196533, "learning_rate": 6.798067652977331e-08, "loss": 0.02343149, "memory(GiB)": 13.7, "step": 101365, "train_speed(iter/s)": 1.52832 }, { "acc": 0.98729172, "epoch": 47.5134755097258, "grad_norm": 3.4550325870513916, "learning_rate": 6.785428224010886e-08, "loss": 0.02478907, "memory(GiB)": 13.7, "step": 101370, "train_speed(iter/s)": 1.528322 }, { "acc": 0.98663692, "epoch": 47.515819076634635, "grad_norm": 1.7902220487594604, "learning_rate": 6.77280065150421e-08, "loss": 0.02438801, "memory(GiB)": 13.7, "step": 101375, "train_speed(iter/s)": 1.528322 }, { "acc": 0.98524618, "epoch": 47.51816264354348, "grad_norm": 2.874295949935913, "learning_rate": 6.760184935760916e-08, "loss": 0.05707581, "memory(GiB)": 13.7, "step": 101380, "train_speed(iter/s)": 1.528327 }, { "acc": 0.98779764, "epoch": 47.52050621045231, "grad_norm": 0.9813888669013977, "learning_rate": 6.747581077084122e-08, "loss": 0.03891307, "memory(GiB)": 13.7, "step": 101385, "train_speed(iter/s)": 1.528328 }, { "acc": 0.9875, "epoch": 47.522849777361145, "grad_norm": 3.275923490524292, "learning_rate": 6.734989075776886e-08, "loss": 0.02665869, "memory(GiB)": 13.7, "step": 101390, "train_speed(iter/s)": 1.528331 }, { "acc": 0.98500004, "epoch": 47.52519334426998, "grad_norm": 1.8614987134933472, "learning_rate": 6.722408932141773e-08, "loss": 0.02343997, "memory(GiB)": 13.7, "step": 101395, "train_speed(iter/s)": 1.528335 }, { "acc": 0.99020834, "epoch": 47.527536911178814, "grad_norm": 0.0012764394050464034, "learning_rate": 6.70984064648134e-08, "loss": 0.02801083, "memory(GiB)": 13.7, "step": 101400, "train_speed(iter/s)": 1.528337 }, { "acc": 0.9822917, "epoch": 47.52988047808765, "grad_norm": 2.5506856441497803, "learning_rate": 6.697284219097598e-08, "loss": 0.02935039, "memory(GiB)": 13.7, "step": 101405, "train_speed(iter/s)": 1.52834 }, { "acc": 0.98258934, "epoch": 47.53222404499648, "grad_norm": 3.2324440479278564, "learning_rate": 6.68473965029227e-08, "loss": 0.05756108, "memory(GiB)": 13.7, "step": 101410, "train_speed(iter/s)": 1.528341 }, { "acc": 0.98351641, "epoch": 47.53456761190532, "grad_norm": 2.851531505584717, "learning_rate": 6.672206940367031e-08, "loss": 0.04035, "memory(GiB)": 13.7, "step": 101415, "train_speed(iter/s)": 1.528342 }, { "acc": 0.98014956, "epoch": 47.53691117881416, "grad_norm": 2.3612401485443115, "learning_rate": 6.659686089623e-08, "loss": 0.03919228, "memory(GiB)": 13.7, "step": 101420, "train_speed(iter/s)": 1.528346 }, { "acc": 0.98032188, "epoch": 47.53925474572299, "grad_norm": 3.3532583713531494, "learning_rate": 6.647177098361295e-08, "loss": 0.04468185, "memory(GiB)": 13.7, "step": 101425, "train_speed(iter/s)": 1.528344 }, { "acc": 0.97925587, "epoch": 47.54159831263183, "grad_norm": 2.2102386951446533, "learning_rate": 6.634679966882421e-08, "loss": 0.02760903, "memory(GiB)": 13.7, "step": 101430, "train_speed(iter/s)": 1.528347 }, { "acc": 0.99327383, "epoch": 47.54394187954066, "grad_norm": 4.6789164543151855, "learning_rate": 6.622194695486942e-08, "loss": 0.05416732, "memory(GiB)": 13.7, "step": 101435, "train_speed(iter/s)": 1.528347 }, { "acc": 0.97958336, "epoch": 47.546285446449495, "grad_norm": 4.37464714050293, "learning_rate": 6.609721284474756e-08, "loss": 0.04020252, "memory(GiB)": 13.7, "step": 101440, "train_speed(iter/s)": 1.52835 }, { "acc": 0.98479176, "epoch": 47.54862901335833, "grad_norm": 0.9146959781646729, "learning_rate": 6.597259734145869e-08, "loss": 0.02714843, "memory(GiB)": 13.7, "step": 101445, "train_speed(iter/s)": 1.528352 }, { "acc": 0.99125004, "epoch": 47.550972580267164, "grad_norm": 1.542553186416626, "learning_rate": 6.584810044799734e-08, "loss": 0.03149746, "memory(GiB)": 13.7, "step": 101450, "train_speed(iter/s)": 1.528355 }, { "acc": 0.99326925, "epoch": 47.553316147176005, "grad_norm": 3.158202886581421, "learning_rate": 6.572372216735527e-08, "loss": 0.03066292, "memory(GiB)": 13.7, "step": 101455, "train_speed(iter/s)": 1.528354 }, { "acc": 0.9864583, "epoch": 47.55565971408484, "grad_norm": 0.9353452920913696, "learning_rate": 6.559946250252368e-08, "loss": 0.03801872, "memory(GiB)": 13.7, "step": 101460, "train_speed(iter/s)": 1.528355 }, { "acc": 0.99149799, "epoch": 47.558003280993674, "grad_norm": 1.5111429691314697, "learning_rate": 6.54753214564882e-08, "loss": 0.03218039, "memory(GiB)": 13.7, "step": 101465, "train_speed(iter/s)": 1.528356 }, { "acc": 0.9890625, "epoch": 47.56034684790251, "grad_norm": 1.4749196767807007, "learning_rate": 6.535129903223393e-08, "loss": 0.03890615, "memory(GiB)": 13.7, "step": 101470, "train_speed(iter/s)": 1.52836 }, { "acc": 0.98249998, "epoch": 47.56269041481134, "grad_norm": 2.2601821422576904, "learning_rate": 6.522739523274042e-08, "loss": 0.02817872, "memory(GiB)": 13.7, "step": 101475, "train_speed(iter/s)": 1.52836 }, { "acc": 0.98779755, "epoch": 47.56503398172018, "grad_norm": 2.184366464614868, "learning_rate": 6.510361006098664e-08, "loss": 0.04712841, "memory(GiB)": 13.7, "step": 101480, "train_speed(iter/s)": 1.528364 }, { "acc": 0.99416676, "epoch": 47.56737754862901, "grad_norm": 0.2098492830991745, "learning_rate": 6.49799435199488e-08, "loss": 0.01975228, "memory(GiB)": 13.7, "step": 101485, "train_speed(iter/s)": 1.52837 }, { "acc": 0.98999996, "epoch": 47.569721115537845, "grad_norm": 3.293405294418335, "learning_rate": 6.485639561259813e-08, "loss": 0.01357294, "memory(GiB)": 13.7, "step": 101490, "train_speed(iter/s)": 1.528369 }, { "acc": 0.97321434, "epoch": 47.57206468244669, "grad_norm": 6.90636682510376, "learning_rate": 6.473296634190528e-08, "loss": 0.04225824, "memory(GiB)": 13.7, "step": 101495, "train_speed(iter/s)": 1.528371 }, { "acc": 0.9760417, "epoch": 47.57440824935552, "grad_norm": 4.123680114746094, "learning_rate": 6.460965571083591e-08, "loss": 0.06897066, "memory(GiB)": 13.7, "step": 101500, "train_speed(iter/s)": 1.528373 }, { "acc": 0.98612175, "epoch": 47.576751816264355, "grad_norm": 3.198030710220337, "learning_rate": 6.44864637223557e-08, "loss": 0.03587513, "memory(GiB)": 13.7, "step": 101505, "train_speed(iter/s)": 1.528377 }, { "acc": 0.98708334, "epoch": 47.57909538317319, "grad_norm": 5.821502685546875, "learning_rate": 6.436339037942363e-08, "loss": 0.0620481, "memory(GiB)": 13.7, "step": 101510, "train_speed(iter/s)": 1.52838 }, { "acc": 1.0, "epoch": 47.581438950082024, "grad_norm": 3.738288164138794, "learning_rate": 6.42404356850004e-08, "loss": 0.01197328, "memory(GiB)": 13.7, "step": 101515, "train_speed(iter/s)": 1.528385 }, { "acc": 0.9916667, "epoch": 47.58378251699086, "grad_norm": 5.2878031730651855, "learning_rate": 6.411759964203943e-08, "loss": 0.02343607, "memory(GiB)": 13.7, "step": 101520, "train_speed(iter/s)": 1.528385 }, { "acc": 0.97829437, "epoch": 47.58612608389969, "grad_norm": 2.9136884212493896, "learning_rate": 6.399488225349475e-08, "loss": 0.0457275, "memory(GiB)": 13.7, "step": 101525, "train_speed(iter/s)": 1.528382 }, { "acc": 0.98916664, "epoch": 47.588469650808534, "grad_norm": 0.00708550913259387, "learning_rate": 6.387228352231536e-08, "loss": 0.04612408, "memory(GiB)": 13.7, "step": 101530, "train_speed(iter/s)": 1.528386 }, { "acc": 0.99187498, "epoch": 47.59081321771737, "grad_norm": 2.8632147312164307, "learning_rate": 6.37498034514475e-08, "loss": 0.02910773, "memory(GiB)": 13.7, "step": 101535, "train_speed(iter/s)": 1.528386 }, { "acc": 0.98328371, "epoch": 47.5931567846262, "grad_norm": 2.668825149536133, "learning_rate": 6.362744204383632e-08, "loss": 0.04306028, "memory(GiB)": 13.7, "step": 101540, "train_speed(iter/s)": 1.528386 }, { "acc": 0.99008923, "epoch": 47.59550035153504, "grad_norm": 2.0277745723724365, "learning_rate": 6.350519930242247e-08, "loss": 0.02941459, "memory(GiB)": 13.7, "step": 101545, "train_speed(iter/s)": 1.52839 }, { "acc": 0.9848958, "epoch": 47.59784391844387, "grad_norm": 2.2624731063842773, "learning_rate": 6.338307523014442e-08, "loss": 0.03429515, "memory(GiB)": 13.7, "step": 101550, "train_speed(iter/s)": 1.528392 }, { "acc": 0.98344707, "epoch": 47.600187485352706, "grad_norm": 3.7160868644714355, "learning_rate": 6.326106982993733e-08, "loss": 0.03393769, "memory(GiB)": 13.7, "step": 101555, "train_speed(iter/s)": 1.528392 }, { "acc": 0.98586311, "epoch": 47.60253105226154, "grad_norm": 4.031991958618164, "learning_rate": 6.313918310473467e-08, "loss": 0.04363685, "memory(GiB)": 13.7, "step": 101560, "train_speed(iter/s)": 1.528395 }, { "acc": 0.990625, "epoch": 47.604874619170374, "grad_norm": 3.8623857498168945, "learning_rate": 6.301741505746434e-08, "loss": 0.02765502, "memory(GiB)": 13.7, "step": 101565, "train_speed(iter/s)": 1.528398 }, { "acc": 0.99375, "epoch": 47.607218186079216, "grad_norm": 3.404339551925659, "learning_rate": 6.28957656910554e-08, "loss": 0.01977729, "memory(GiB)": 13.7, "step": 101570, "train_speed(iter/s)": 1.528403 }, { "acc": 0.98113098, "epoch": 47.60956175298805, "grad_norm": 4.027556896209717, "learning_rate": 6.277423500843128e-08, "loss": 0.06912836, "memory(GiB)": 13.7, "step": 101575, "train_speed(iter/s)": 1.528406 }, { "acc": 0.97758923, "epoch": 47.611905319896884, "grad_norm": 3.5867018699645996, "learning_rate": 6.265282301251218e-08, "loss": 0.03462425, "memory(GiB)": 13.7, "step": 101580, "train_speed(iter/s)": 1.528413 }, { "acc": 0.97213783, "epoch": 47.61424888680572, "grad_norm": 2.553884744644165, "learning_rate": 6.253152970621767e-08, "loss": 0.04851072, "memory(GiB)": 13.7, "step": 101585, "train_speed(iter/s)": 1.528416 }, { "acc": 0.97904758, "epoch": 47.61659245371455, "grad_norm": 5.8264594078063965, "learning_rate": 6.241035509246236e-08, "loss": 0.06138395, "memory(GiB)": 13.7, "step": 101590, "train_speed(iter/s)": 1.528417 }, { "acc": 0.98500004, "epoch": 47.61893602062339, "grad_norm": 4.234589099884033, "learning_rate": 6.228929917415917e-08, "loss": 0.03390368, "memory(GiB)": 13.7, "step": 101595, "train_speed(iter/s)": 1.528418 }, { "acc": 0.990625, "epoch": 47.62127958753222, "grad_norm": 3.2426645755767822, "learning_rate": 6.216836195421772e-08, "loss": 0.04264496, "memory(GiB)": 13.7, "step": 101600, "train_speed(iter/s)": 1.52842 }, { "acc": 0.97822914, "epoch": 47.62362315444106, "grad_norm": 4.516982078552246, "learning_rate": 6.204754343554537e-08, "loss": 0.04158424, "memory(GiB)": 13.7, "step": 101605, "train_speed(iter/s)": 1.528424 }, { "acc": 0.98500004, "epoch": 47.6259667213499, "grad_norm": 2.5787668228149414, "learning_rate": 6.192684362104621e-08, "loss": 0.03398833, "memory(GiB)": 13.7, "step": 101610, "train_speed(iter/s)": 1.528424 }, { "acc": 0.98321428, "epoch": 47.62831028825873, "grad_norm": 2.5363364219665527, "learning_rate": 6.180626251362147e-08, "loss": 0.04872522, "memory(GiB)": 13.7, "step": 101615, "train_speed(iter/s)": 1.528427 }, { "acc": 0.98506947, "epoch": 47.630653855167566, "grad_norm": 2.9167444705963135, "learning_rate": 6.168580011616914e-08, "loss": 0.04105423, "memory(GiB)": 13.7, "step": 101620, "train_speed(iter/s)": 1.52843 }, { "acc": 0.98703365, "epoch": 47.6329974220764, "grad_norm": 2.631101369857788, "learning_rate": 6.156545643158381e-08, "loss": 0.04648953, "memory(GiB)": 13.7, "step": 101625, "train_speed(iter/s)": 1.528428 }, { "acc": 0.987257, "epoch": 47.635340988985234, "grad_norm": 3.153245687484741, "learning_rate": 6.144523146276068e-08, "loss": 0.02904088, "memory(GiB)": 13.7, "step": 101630, "train_speed(iter/s)": 1.528434 }, { "acc": 0.9926136, "epoch": 47.63768455589407, "grad_norm": 0.04198180511593819, "learning_rate": 6.132512521258767e-08, "loss": 0.0490617, "memory(GiB)": 13.7, "step": 101635, "train_speed(iter/s)": 1.528434 }, { "acc": 0.98937502, "epoch": 47.6400281228029, "grad_norm": 0.47665342688560486, "learning_rate": 6.120513768395222e-08, "loss": 0.01670009, "memory(GiB)": 13.7, "step": 101640, "train_speed(iter/s)": 1.528439 }, { "acc": 0.98145828, "epoch": 47.642371689711744, "grad_norm": 2.329843759536743, "learning_rate": 6.108526887973836e-08, "loss": 0.02822562, "memory(GiB)": 13.7, "step": 101645, "train_speed(iter/s)": 1.528446 }, { "acc": 0.99750004, "epoch": 47.64471525662058, "grad_norm": 0.9144311547279358, "learning_rate": 6.096551880282741e-08, "loss": 0.03326782, "memory(GiB)": 13.7, "step": 101650, "train_speed(iter/s)": 1.528448 }, { "acc": 0.9859375, "epoch": 47.64705882352941, "grad_norm": 5.492822170257568, "learning_rate": 6.08458874560979e-08, "loss": 0.07448893, "memory(GiB)": 13.7, "step": 101655, "train_speed(iter/s)": 1.528451 }, { "acc": 0.98163376, "epoch": 47.64940239043825, "grad_norm": 5.07399845123291, "learning_rate": 6.0726374842425e-08, "loss": 0.04515443, "memory(GiB)": 13.7, "step": 101660, "train_speed(iter/s)": 1.528457 }, { "acc": 0.99273806, "epoch": 47.65174595734708, "grad_norm": 0.022777564823627472, "learning_rate": 6.060698096468168e-08, "loss": 0.0226222, "memory(GiB)": 13.7, "step": 101665, "train_speed(iter/s)": 1.52846 }, { "acc": 0.98006945, "epoch": 47.654089524255916, "grad_norm": 3.455361843109131, "learning_rate": 6.048770582573759e-08, "loss": 0.05065964, "memory(GiB)": 13.7, "step": 101670, "train_speed(iter/s)": 1.528461 }, { "acc": 0.9885416, "epoch": 47.65643309116475, "grad_norm": 2.0951178073883057, "learning_rate": 6.036854942846015e-08, "loss": 0.03942493, "memory(GiB)": 13.7, "step": 101675, "train_speed(iter/s)": 1.528464 }, { "acc": 0.9854166, "epoch": 47.658776658073585, "grad_norm": 0.0013835818972438574, "learning_rate": 6.024951177571345e-08, "loss": 0.02494752, "memory(GiB)": 13.7, "step": 101680, "train_speed(iter/s)": 1.528464 }, { "acc": 0.97446432, "epoch": 47.661120224982426, "grad_norm": 4.947573184967041, "learning_rate": 6.013059287035768e-08, "loss": 0.04434894, "memory(GiB)": 13.7, "step": 101685, "train_speed(iter/s)": 1.528463 }, { "acc": 0.9822916, "epoch": 47.66346379189126, "grad_norm": 0.0013095930917188525, "learning_rate": 6.00117927152525e-08, "loss": 0.03627613, "memory(GiB)": 13.7, "step": 101690, "train_speed(iter/s)": 1.528464 }, { "acc": 0.9895834, "epoch": 47.665807358800095, "grad_norm": 5.867063522338867, "learning_rate": 5.98931113132531e-08, "loss": 0.02880298, "memory(GiB)": 13.7, "step": 101695, "train_speed(iter/s)": 1.52847 }, { "acc": 0.97738972, "epoch": 47.66815092570893, "grad_norm": 3.9781689643859863, "learning_rate": 5.97745486672125e-08, "loss": 0.03812096, "memory(GiB)": 13.7, "step": 101700, "train_speed(iter/s)": 1.528471 }, { "acc": 0.98559971, "epoch": 47.67049449261776, "grad_norm": 4.44793701171875, "learning_rate": 5.965610477997978e-08, "loss": 0.07562125, "memory(GiB)": 13.7, "step": 101705, "train_speed(iter/s)": 1.528472 }, { "acc": 0.9871726, "epoch": 47.6728380595266, "grad_norm": 2.0416998863220215, "learning_rate": 5.953777965440238e-08, "loss": 0.04563597, "memory(GiB)": 13.7, "step": 101710, "train_speed(iter/s)": 1.528477 }, { "acc": 0.99401512, "epoch": 47.67518162643543, "grad_norm": 0.0015609055990353227, "learning_rate": 5.941957329332442e-08, "loss": 0.05436488, "memory(GiB)": 13.7, "step": 101715, "train_speed(iter/s)": 1.528478 }, { "acc": 0.98354168, "epoch": 47.67752519334427, "grad_norm": 3.458756446838379, "learning_rate": 5.930148569958721e-08, "loss": 0.03829044, "memory(GiB)": 13.7, "step": 101720, "train_speed(iter/s)": 1.528482 }, { "acc": 0.99028845, "epoch": 47.67986876025311, "grad_norm": 0.002013962483033538, "learning_rate": 5.918351687602932e-08, "loss": 0.02075927, "memory(GiB)": 13.7, "step": 101725, "train_speed(iter/s)": 1.528486 }, { "acc": 0.98343754, "epoch": 47.68221232716194, "grad_norm": 0.0249453354626894, "learning_rate": 5.906566682548654e-08, "loss": 0.04208455, "memory(GiB)": 13.7, "step": 101730, "train_speed(iter/s)": 1.528487 }, { "acc": 0.9822916, "epoch": 47.684555894070776, "grad_norm": 3.38874888420105, "learning_rate": 5.894793555079074e-08, "loss": 0.06367742, "memory(GiB)": 13.7, "step": 101735, "train_speed(iter/s)": 1.528493 }, { "acc": 0.99020834, "epoch": 47.68689946097961, "grad_norm": 2.9609169960021973, "learning_rate": 5.883032305477272e-08, "loss": 0.02259497, "memory(GiB)": 13.7, "step": 101740, "train_speed(iter/s)": 1.528498 }, { "acc": 0.97488098, "epoch": 47.689243027888445, "grad_norm": 5.02906608581543, "learning_rate": 5.871282934025883e-08, "loss": 0.0813542, "memory(GiB)": 13.7, "step": 101745, "train_speed(iter/s)": 1.528503 }, { "acc": 0.9947916, "epoch": 47.69158659479728, "grad_norm": 2.987374782562256, "learning_rate": 5.859545441007319e-08, "loss": 0.02569591, "memory(GiB)": 13.7, "step": 101750, "train_speed(iter/s)": 1.528509 }, { "acc": 0.98038197, "epoch": 47.69393016170611, "grad_norm": 5.861048698425293, "learning_rate": 5.847819826703771e-08, "loss": 0.04274437, "memory(GiB)": 13.7, "step": 101755, "train_speed(iter/s)": 1.528511 }, { "acc": 0.98425598, "epoch": 47.696273728614955, "grad_norm": 5.157515525817871, "learning_rate": 5.8361060913970396e-08, "loss": 0.03234482, "memory(GiB)": 13.7, "step": 101760, "train_speed(iter/s)": 1.528512 }, { "acc": 0.98368511, "epoch": 47.69861729552379, "grad_norm": 3.1315407752990723, "learning_rate": 5.824404235368761e-08, "loss": 0.03811048, "memory(GiB)": 13.7, "step": 101765, "train_speed(iter/s)": 1.528514 }, { "acc": 0.97203369, "epoch": 47.70096086243262, "grad_norm": 4.100841045379639, "learning_rate": 5.8127142589000156e-08, "loss": 0.05722183, "memory(GiB)": 13.7, "step": 101770, "train_speed(iter/s)": 1.528514 }, { "acc": 0.99333334, "epoch": 47.70330442934146, "grad_norm": 0.6833584308624268, "learning_rate": 5.8010361622720505e-08, "loss": 0.01821244, "memory(GiB)": 13.7, "step": 101775, "train_speed(iter/s)": 1.528515 }, { "acc": 0.98145828, "epoch": 47.70564799625029, "grad_norm": 4.873579025268555, "learning_rate": 5.789369945765391e-08, "loss": 0.0365713, "memory(GiB)": 13.7, "step": 101780, "train_speed(iter/s)": 1.528518 }, { "acc": 0.98777771, "epoch": 47.707991563159126, "grad_norm": 2.9431354999542236, "learning_rate": 5.777715609660506e-08, "loss": 0.0420356, "memory(GiB)": 13.7, "step": 101785, "train_speed(iter/s)": 1.52852 }, { "acc": 0.98031254, "epoch": 47.71033513006796, "grad_norm": 2.7637507915496826, "learning_rate": 5.766073154237588e-08, "loss": 0.06665604, "memory(GiB)": 13.7, "step": 101790, "train_speed(iter/s)": 1.52852 }, { "acc": 0.9796875, "epoch": 47.7126786969768, "grad_norm": 4.197820663452148, "learning_rate": 5.754442579776332e-08, "loss": 0.04845711, "memory(GiB)": 13.7, "step": 101795, "train_speed(iter/s)": 1.528524 }, { "acc": 0.99020834, "epoch": 47.715022263885636, "grad_norm": 4.565546035766602, "learning_rate": 5.7428238865564837e-08, "loss": 0.04495535, "memory(GiB)": 13.7, "step": 101800, "train_speed(iter/s)": 1.528528 }, { "acc": 0.97288694, "epoch": 47.71736583079447, "grad_norm": 3.8515570163726807, "learning_rate": 5.731217074857069e-08, "loss": 0.03559531, "memory(GiB)": 13.7, "step": 101805, "train_speed(iter/s)": 1.528531 }, { "acc": 0.97842264, "epoch": 47.719709397703305, "grad_norm": 3.954425573348999, "learning_rate": 5.719622144957395e-08, "loss": 0.05684272, "memory(GiB)": 13.7, "step": 101810, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98562508, "epoch": 47.72205296461214, "grad_norm": 3.9751343727111816, "learning_rate": 5.7080390971359316e-08, "loss": 0.05743682, "memory(GiB)": 13.7, "step": 101815, "train_speed(iter/s)": 1.528535 }, { "acc": 0.99196434, "epoch": 47.72439653152097, "grad_norm": 2.903916358947754, "learning_rate": 5.696467931671205e-08, "loss": 0.08383734, "memory(GiB)": 13.7, "step": 101820, "train_speed(iter/s)": 1.528534 }, { "acc": 0.97937508, "epoch": 47.72674009842981, "grad_norm": 5.479311466217041, "learning_rate": 5.6849086488413004e-08, "loss": 0.04989823, "memory(GiB)": 13.7, "step": 101825, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98008938, "epoch": 47.72908366533864, "grad_norm": 6.923473358154297, "learning_rate": 5.673361248924023e-08, "loss": 0.04235938, "memory(GiB)": 13.7, "step": 101830, "train_speed(iter/s)": 1.528534 }, { "acc": 0.98833332, "epoch": 47.73142723224748, "grad_norm": 4.596465587615967, "learning_rate": 5.66182573219701e-08, "loss": 0.04121003, "memory(GiB)": 13.7, "step": 101835, "train_speed(iter/s)": 1.528537 }, { "acc": 0.98488092, "epoch": 47.73377079915632, "grad_norm": 2.2856664657592773, "learning_rate": 5.6503020989375126e-08, "loss": 0.02735177, "memory(GiB)": 13.7, "step": 101840, "train_speed(iter/s)": 1.528542 }, { "acc": 0.98812494, "epoch": 47.73611436606515, "grad_norm": 0.909630298614502, "learning_rate": 5.63879034942256e-08, "loss": 0.02719423, "memory(GiB)": 13.7, "step": 101845, "train_speed(iter/s)": 1.528548 }, { "acc": 0.98187504, "epoch": 47.738457932973986, "grad_norm": 3.6033480167388916, "learning_rate": 5.6272904839287916e-08, "loss": 0.04263531, "memory(GiB)": 13.7, "step": 101850, "train_speed(iter/s)": 1.528549 }, { "acc": 0.98698864, "epoch": 47.74080149988282, "grad_norm": 2.7977676391601562, "learning_rate": 5.615802502732681e-08, "loss": 0.02754497, "memory(GiB)": 13.7, "step": 101855, "train_speed(iter/s)": 1.528552 }, { "acc": 0.98582792, "epoch": 47.743145066791655, "grad_norm": 2.7580535411834717, "learning_rate": 5.604326406110369e-08, "loss": 0.04262729, "memory(GiB)": 13.7, "step": 101860, "train_speed(iter/s)": 1.528554 }, { "acc": 0.98354168, "epoch": 47.74548863370049, "grad_norm": 2.110383987426758, "learning_rate": 5.592862194337551e-08, "loss": 0.0237062, "memory(GiB)": 13.7, "step": 101865, "train_speed(iter/s)": 1.528555 }, { "acc": 0.99499998, "epoch": 47.74783220060933, "grad_norm": 0.949938178062439, "learning_rate": 5.581409867690035e-08, "loss": 0.02585464, "memory(GiB)": 13.7, "step": 101870, "train_speed(iter/s)": 1.528557 }, { "acc": 0.97371531, "epoch": 47.750175767518165, "grad_norm": 3.860121965408325, "learning_rate": 5.569969426442906e-08, "loss": 0.08229293, "memory(GiB)": 13.7, "step": 101875, "train_speed(iter/s)": 1.528558 }, { "acc": 0.98083334, "epoch": 47.752519334427, "grad_norm": 2.364107370376587, "learning_rate": 5.55854087087125e-08, "loss": 0.0358487, "memory(GiB)": 13.7, "step": 101880, "train_speed(iter/s)": 1.528559 }, { "acc": 0.99363098, "epoch": 47.754862901335834, "grad_norm": 0.0009168562828563154, "learning_rate": 5.54712420124971e-08, "loss": 0.01833746, "memory(GiB)": 13.7, "step": 101885, "train_speed(iter/s)": 1.528566 }, { "acc": 0.99125004, "epoch": 47.75720646824467, "grad_norm": 3.743929147720337, "learning_rate": 5.535719417852759e-08, "loss": 0.03461243, "memory(GiB)": 13.7, "step": 101890, "train_speed(iter/s)": 1.528567 }, { "acc": 0.99125004, "epoch": 47.7595500351535, "grad_norm": 1.204017162322998, "learning_rate": 5.5243265209544307e-08, "loss": 0.02432788, "memory(GiB)": 13.7, "step": 101895, "train_speed(iter/s)": 1.528569 }, { "acc": 0.9875, "epoch": 47.76189360206234, "grad_norm": 3.0496673583984375, "learning_rate": 5.512945510828754e-08, "loss": 0.03218148, "memory(GiB)": 13.7, "step": 101900, "train_speed(iter/s)": 1.528567 }, { "acc": 0.99291134, "epoch": 47.76423716897117, "grad_norm": 5.515007972717285, "learning_rate": 5.5015763877491506e-08, "loss": 0.02115578, "memory(GiB)": 13.7, "step": 101905, "train_speed(iter/s)": 1.528571 }, { "acc": 0.996875, "epoch": 47.76658073588001, "grad_norm": 0.7530646920204163, "learning_rate": 5.490219151988929e-08, "loss": 0.02343383, "memory(GiB)": 13.7, "step": 101910, "train_speed(iter/s)": 1.528573 }, { "acc": 0.9875, "epoch": 47.76892430278885, "grad_norm": 4.573800086975098, "learning_rate": 5.478873803821122e-08, "loss": 0.07243357, "memory(GiB)": 13.7, "step": 101915, "train_speed(iter/s)": 1.528576 }, { "acc": 0.9958334, "epoch": 47.77126786969768, "grad_norm": 3.563178777694702, "learning_rate": 5.4675403435182636e-08, "loss": 0.02126517, "memory(GiB)": 13.7, "step": 101920, "train_speed(iter/s)": 1.528574 }, { "acc": 0.98666668, "epoch": 47.773611436606515, "grad_norm": 0.0007031372515484691, "learning_rate": 5.45621877135305e-08, "loss": 0.03324881, "memory(GiB)": 13.7, "step": 101925, "train_speed(iter/s)": 1.528574 }, { "acc": 0.98145828, "epoch": 47.77595500351535, "grad_norm": 5.033089637756348, "learning_rate": 5.444909087597405e-08, "loss": 0.03010798, "memory(GiB)": 13.7, "step": 101930, "train_speed(iter/s)": 1.528578 }, { "acc": 0.98071432, "epoch": 47.778298570424184, "grad_norm": 4.036905288696289, "learning_rate": 5.4336112925233054e-08, "loss": 0.05507988, "memory(GiB)": 13.7, "step": 101935, "train_speed(iter/s)": 1.52858 }, { "acc": 0.9760416, "epoch": 47.78064213733302, "grad_norm": 4.253323554992676, "learning_rate": 5.422325386402174e-08, "loss": 0.06077601, "memory(GiB)": 13.7, "step": 101940, "train_speed(iter/s)": 1.528583 }, { "acc": 0.99375, "epoch": 47.78298570424186, "grad_norm": 3.425821542739868, "learning_rate": 5.4110513695053764e-08, "loss": 0.00975016, "memory(GiB)": 13.7, "step": 101945, "train_speed(iter/s)": 1.528587 }, { "acc": 0.9770834, "epoch": 47.785329271150694, "grad_norm": 3.979668617248535, "learning_rate": 5.399789242103949e-08, "loss": 0.08181558, "memory(GiB)": 13.7, "step": 101950, "train_speed(iter/s)": 1.528589 }, { "acc": 0.99231148, "epoch": 47.78767283805953, "grad_norm": 1.5233947038650513, "learning_rate": 5.388539004468423e-08, "loss": 0.0238337, "memory(GiB)": 13.7, "step": 101955, "train_speed(iter/s)": 1.52859 }, { "acc": 0.98232145, "epoch": 47.79001640496836, "grad_norm": 4.52078914642334, "learning_rate": 5.377300656869446e-08, "loss": 0.04090252, "memory(GiB)": 13.7, "step": 101960, "train_speed(iter/s)": 1.528591 }, { "acc": 0.98666668, "epoch": 47.7923599718772, "grad_norm": 2.4658827781677246, "learning_rate": 5.366074199576996e-08, "loss": 0.03216011, "memory(GiB)": 13.7, "step": 101965, "train_speed(iter/s)": 1.528592 }, { "acc": 0.99092264, "epoch": 47.79470353878603, "grad_norm": 2.0303847789764404, "learning_rate": 5.354859632860943e-08, "loss": 0.02984461, "memory(GiB)": 13.7, "step": 101970, "train_speed(iter/s)": 1.528593 }, { "acc": 0.98708334, "epoch": 47.797047105694865, "grad_norm": 2.1595540046691895, "learning_rate": 5.34365695699082e-08, "loss": 0.02834352, "memory(GiB)": 13.7, "step": 101975, "train_speed(iter/s)": 1.528594 }, { "acc": 0.996875, "epoch": 47.7993906726037, "grad_norm": 1.8623026609420776, "learning_rate": 5.3324661722360514e-08, "loss": 0.01510582, "memory(GiB)": 13.7, "step": 101980, "train_speed(iter/s)": 1.528599 }, { "acc": 0.9864584, "epoch": 47.80173423951254, "grad_norm": 2.741976499557495, "learning_rate": 5.3212872788654524e-08, "loss": 0.07496403, "memory(GiB)": 13.7, "step": 101985, "train_speed(iter/s)": 1.528602 }, { "acc": 0.98125, "epoch": 47.804077806421375, "grad_norm": 3.7510879039764404, "learning_rate": 5.3101202771478355e-08, "loss": 0.03244952, "memory(GiB)": 13.7, "step": 101990, "train_speed(iter/s)": 1.528601 }, { "acc": 0.98738098, "epoch": 47.80642137333021, "grad_norm": 1.6068764925003052, "learning_rate": 5.2989651673515705e-08, "loss": 0.02036773, "memory(GiB)": 13.7, "step": 101995, "train_speed(iter/s)": 1.528606 }, { "acc": 0.9869792, "epoch": 47.808764940239044, "grad_norm": 3.970964193344116, "learning_rate": 5.287821949744805e-08, "loss": 0.05182862, "memory(GiB)": 13.7, "step": 102000, "train_speed(iter/s)": 1.528608 }, { "acc": 0.98592262, "epoch": 47.81110850714788, "grad_norm": 0.014827634207904339, "learning_rate": 5.276690624595353e-08, "loss": 0.02379114, "memory(GiB)": 13.7, "step": 102005, "train_speed(iter/s)": 1.528609 }, { "acc": 0.98708344, "epoch": 47.81345207405671, "grad_norm": 0.4868432581424713, "learning_rate": 5.265571192170753e-08, "loss": 0.01812964, "memory(GiB)": 13.7, "step": 102010, "train_speed(iter/s)": 1.528612 }, { "acc": 0.984375, "epoch": 47.81579564096555, "grad_norm": 1.4437583684921265, "learning_rate": 5.254463652738429e-08, "loss": 0.04938861, "memory(GiB)": 13.7, "step": 102015, "train_speed(iter/s)": 1.528614 }, { "acc": 0.98249998, "epoch": 47.81813920787438, "grad_norm": 3.855881929397583, "learning_rate": 5.243368006565198e-08, "loss": 0.05610075, "memory(GiB)": 13.7, "step": 102020, "train_speed(iter/s)": 1.528612 }, { "acc": 0.97729168, "epoch": 47.82048277478322, "grad_norm": 4.318638324737549, "learning_rate": 5.23228425391793e-08, "loss": 0.02847674, "memory(GiB)": 13.7, "step": 102025, "train_speed(iter/s)": 1.528616 }, { "acc": 0.99070511, "epoch": 47.82282634169206, "grad_norm": 2.5364737510681152, "learning_rate": 5.221212395062885e-08, "loss": 0.01405918, "memory(GiB)": 13.7, "step": 102030, "train_speed(iter/s)": 1.52862 }, { "acc": 0.971875, "epoch": 47.82516990860089, "grad_norm": 4.0293755531311035, "learning_rate": 5.210152430266269e-08, "loss": 0.03744999, "memory(GiB)": 13.7, "step": 102035, "train_speed(iter/s)": 1.52862 }, { "acc": 0.98478088, "epoch": 47.827513475509726, "grad_norm": 2.6981918811798096, "learning_rate": 5.1991043597938434e-08, "loss": 0.04772798, "memory(GiB)": 13.7, "step": 102040, "train_speed(iter/s)": 1.528621 }, { "acc": 0.98278236, "epoch": 47.82985704241856, "grad_norm": 4.011343955993652, "learning_rate": 5.188068183911311e-08, "loss": 0.04029112, "memory(GiB)": 13.7, "step": 102045, "train_speed(iter/s)": 1.528626 }, { "acc": 0.9916667, "epoch": 47.832200609327394, "grad_norm": 2.5867555141448975, "learning_rate": 5.1770439028838796e-08, "loss": 0.0295578, "memory(GiB)": 13.7, "step": 102050, "train_speed(iter/s)": 1.528628 }, { "acc": 0.9729167, "epoch": 47.83454417623623, "grad_norm": 5.6965651512146, "learning_rate": 5.1660315169764764e-08, "loss": 0.05281255, "memory(GiB)": 13.7, "step": 102055, "train_speed(iter/s)": 1.528629 }, { "acc": 0.99125004, "epoch": 47.83688774314507, "grad_norm": 0.010549219325184822, "learning_rate": 5.155031026453919e-08, "loss": 0.03608634, "memory(GiB)": 13.7, "step": 102060, "train_speed(iter/s)": 1.528631 }, { "acc": 0.98416128, "epoch": 47.839231310053904, "grad_norm": 3.5934972763061523, "learning_rate": 5.1440424315804706e-08, "loss": 0.03008463, "memory(GiB)": 13.7, "step": 102065, "train_speed(iter/s)": 1.528633 }, { "acc": 0.98217258, "epoch": 47.84157487696274, "grad_norm": 2.5577261447906494, "learning_rate": 5.133065732620392e-08, "loss": 0.05686187, "memory(GiB)": 13.7, "step": 102070, "train_speed(iter/s)": 1.528634 }, { "acc": 0.98277779, "epoch": 47.84391844387157, "grad_norm": 0.005488064140081406, "learning_rate": 5.1221009298375567e-08, "loss": 0.02640013, "memory(GiB)": 13.7, "step": 102075, "train_speed(iter/s)": 1.528635 }, { "acc": 0.96538696, "epoch": 47.84626201078041, "grad_norm": 3.4161148071289062, "learning_rate": 5.1111480234953395e-08, "loss": 0.07794271, "memory(GiB)": 13.7, "step": 102080, "train_speed(iter/s)": 1.528634 }, { "acc": 0.98634806, "epoch": 47.84860557768924, "grad_norm": 1.7493679523468018, "learning_rate": 5.100207013857225e-08, "loss": 0.02778829, "memory(GiB)": 13.7, "step": 102085, "train_speed(iter/s)": 1.528639 }, { "acc": 0.98840275, "epoch": 47.850949144598076, "grad_norm": 0.9683958292007446, "learning_rate": 5.089277901186033e-08, "loss": 0.02018448, "memory(GiB)": 13.7, "step": 102090, "train_speed(iter/s)": 1.52864 }, { "acc": 0.98591347, "epoch": 47.85329271150691, "grad_norm": 2.011063814163208, "learning_rate": 5.078360685744525e-08, "loss": 0.03532083, "memory(GiB)": 13.7, "step": 102095, "train_speed(iter/s)": 1.528636 }, { "acc": 0.98738966, "epoch": 47.85563627841575, "grad_norm": 0.004735489375889301, "learning_rate": 5.06745536779508e-08, "loss": 0.01764078, "memory(GiB)": 13.7, "step": 102100, "train_speed(iter/s)": 1.528639 }, { "acc": 0.9927084, "epoch": 47.857979845324586, "grad_norm": 1.129019021987915, "learning_rate": 5.0565619475999573e-08, "loss": 0.02894393, "memory(GiB)": 13.7, "step": 102105, "train_speed(iter/s)": 1.528642 }, { "acc": 0.98727074, "epoch": 47.86032341223342, "grad_norm": 2.9184305667877197, "learning_rate": 5.0456804254208686e-08, "loss": 0.02287929, "memory(GiB)": 13.7, "step": 102110, "train_speed(iter/s)": 1.528647 }, { "acc": 0.97883015, "epoch": 47.862666979142254, "grad_norm": 1.415518045425415, "learning_rate": 5.034810801519411e-08, "loss": 0.03004896, "memory(GiB)": 13.7, "step": 102115, "train_speed(iter/s)": 1.528646 }, { "acc": 0.98252983, "epoch": 47.86501054605109, "grad_norm": 4.60809850692749, "learning_rate": 5.0239530761568495e-08, "loss": 0.0780046, "memory(GiB)": 13.7, "step": 102120, "train_speed(iter/s)": 1.528647 }, { "acc": 0.98604164, "epoch": 47.86735411295992, "grad_norm": 2.212981939315796, "learning_rate": 5.0131072495941156e-08, "loss": 0.04141521, "memory(GiB)": 13.7, "step": 102125, "train_speed(iter/s)": 1.528652 }, { "acc": 0.977841, "epoch": 47.86969767986876, "grad_norm": 3.784302234649658, "learning_rate": 5.002273322092031e-08, "loss": 0.05687975, "memory(GiB)": 13.7, "step": 102130, "train_speed(iter/s)": 1.528653 }, { "acc": 0.99041672, "epoch": 47.8720412467776, "grad_norm": 3.476759195327759, "learning_rate": 4.991451293910916e-08, "loss": 0.01810798, "memory(GiB)": 13.7, "step": 102135, "train_speed(iter/s)": 1.528655 }, { "acc": 0.9776042, "epoch": 47.87438481368643, "grad_norm": 3.3199551105499268, "learning_rate": 4.9806411653109814e-08, "loss": 0.04119762, "memory(GiB)": 13.7, "step": 102140, "train_speed(iter/s)": 1.528658 }, { "acc": 0.97783833, "epoch": 47.87672838059527, "grad_norm": 2.1458826065063477, "learning_rate": 4.969842936551883e-08, "loss": 0.05299825, "memory(GiB)": 13.7, "step": 102145, "train_speed(iter/s)": 1.528659 }, { "acc": 0.978125, "epoch": 47.8790719475041, "grad_norm": 2.9716386795043945, "learning_rate": 4.9590566078933853e-08, "loss": 0.02913382, "memory(GiB)": 13.7, "step": 102150, "train_speed(iter/s)": 1.528661 }, { "acc": 0.96598215, "epoch": 47.881415514412936, "grad_norm": 2.336238384246826, "learning_rate": 4.9482821795946444e-08, "loss": 0.05660827, "memory(GiB)": 13.7, "step": 102155, "train_speed(iter/s)": 1.528663 }, { "acc": 0.98395834, "epoch": 47.88375908132177, "grad_norm": 3.9626946449279785, "learning_rate": 4.9375196519146507e-08, "loss": 0.02556829, "memory(GiB)": 13.7, "step": 102160, "train_speed(iter/s)": 1.528663 }, { "acc": 0.97517853, "epoch": 47.886102648230604, "grad_norm": 3.05578351020813, "learning_rate": 4.9267690251121144e-08, "loss": 0.03517262, "memory(GiB)": 13.7, "step": 102165, "train_speed(iter/s)": 1.528664 }, { "acc": 0.97770834, "epoch": 47.88844621513944, "grad_norm": 3.599268674850464, "learning_rate": 4.9160302994454706e-08, "loss": 0.03118508, "memory(GiB)": 13.7, "step": 102170, "train_speed(iter/s)": 1.528667 }, { "acc": 0.98780632, "epoch": 47.89078978204828, "grad_norm": 3.600597858428955, "learning_rate": 4.9053034751728195e-08, "loss": 0.03038065, "memory(GiB)": 13.7, "step": 102175, "train_speed(iter/s)": 1.52867 }, { "acc": 0.99499998, "epoch": 47.893133348957114, "grad_norm": 2.2338356971740723, "learning_rate": 4.89458855255193e-08, "loss": 0.01832463, "memory(GiB)": 13.7, "step": 102180, "train_speed(iter/s)": 1.528671 }, { "acc": 0.98698864, "epoch": 47.89547691586595, "grad_norm": 4.371074199676514, "learning_rate": 4.8838855318404583e-08, "loss": 0.04459779, "memory(GiB)": 13.7, "step": 102185, "train_speed(iter/s)": 1.528674 }, { "acc": 0.98447914, "epoch": 47.89782048277478, "grad_norm": 1.0122147798538208, "learning_rate": 4.8731944132956185e-08, "loss": 0.04233516, "memory(GiB)": 13.7, "step": 102190, "train_speed(iter/s)": 1.528677 }, { "acc": 0.98187504, "epoch": 47.90016404968362, "grad_norm": 7.232668876647949, "learning_rate": 4.862515197174456e-08, "loss": 0.04417061, "memory(GiB)": 13.7, "step": 102195, "train_speed(iter/s)": 1.52868 }, { "acc": 0.97194443, "epoch": 47.90250761659245, "grad_norm": 4.575746059417725, "learning_rate": 4.851847883733572e-08, "loss": 0.05974172, "memory(GiB)": 13.7, "step": 102200, "train_speed(iter/s)": 1.52868 }, { "acc": 0.98916216, "epoch": 47.904851183501286, "grad_norm": 3.483309507369995, "learning_rate": 4.841192473229405e-08, "loss": 0.04656836, "memory(GiB)": 13.7, "step": 102205, "train_speed(iter/s)": 1.52868 }, { "acc": 0.98187494, "epoch": 47.90719475041013, "grad_norm": 0.0002489422040525824, "learning_rate": 4.830548965918112e-08, "loss": 0.02173544, "memory(GiB)": 13.7, "step": 102210, "train_speed(iter/s)": 1.528677 }, { "acc": 0.98083334, "epoch": 47.90953831731896, "grad_norm": 5.934712886810303, "learning_rate": 4.819917362055462e-08, "loss": 0.03085478, "memory(GiB)": 13.7, "step": 102215, "train_speed(iter/s)": 1.528684 }, { "acc": 0.98883934, "epoch": 47.911881884227796, "grad_norm": 2.8280341625213623, "learning_rate": 4.809297661897061e-08, "loss": 0.023735, "memory(GiB)": 13.7, "step": 102220, "train_speed(iter/s)": 1.528688 }, { "acc": 0.97736111, "epoch": 47.91422545113663, "grad_norm": 6.903519630432129, "learning_rate": 4.798689865698176e-08, "loss": 0.04909086, "memory(GiB)": 13.7, "step": 102225, "train_speed(iter/s)": 1.528691 }, { "acc": 0.98197918, "epoch": 47.916569018045465, "grad_norm": 5.42085599899292, "learning_rate": 4.7880939737137464e-08, "loss": 0.04259745, "memory(GiB)": 13.7, "step": 102230, "train_speed(iter/s)": 1.528693 }, { "acc": 0.98354168, "epoch": 47.9189125849543, "grad_norm": 2.2523720264434814, "learning_rate": 4.777509986198486e-08, "loss": 0.05787395, "memory(GiB)": 13.7, "step": 102235, "train_speed(iter/s)": 1.528694 }, { "acc": 0.99321423, "epoch": 47.92125615186313, "grad_norm": 4.078861236572266, "learning_rate": 4.7669379034067784e-08, "loss": 0.02928388, "memory(GiB)": 13.7, "step": 102240, "train_speed(iter/s)": 1.528701 }, { "acc": 0.95925598, "epoch": 47.92359971877197, "grad_norm": 5.273839950561523, "learning_rate": 4.7563777255927266e-08, "loss": 0.06280164, "memory(GiB)": 13.7, "step": 102245, "train_speed(iter/s)": 1.528703 }, { "acc": 0.996875, "epoch": 47.92594328568081, "grad_norm": 2.390218734741211, "learning_rate": 4.745829453010215e-08, "loss": 0.02299069, "memory(GiB)": 13.7, "step": 102250, "train_speed(iter/s)": 1.528707 }, { "acc": 0.98738098, "epoch": 47.92828685258964, "grad_norm": 4.313496112823486, "learning_rate": 4.7352930859127365e-08, "loss": 0.02967505, "memory(GiB)": 13.7, "step": 102255, "train_speed(iter/s)": 1.52871 }, { "acc": 0.98205357, "epoch": 47.93063041949848, "grad_norm": 2.307739496231079, "learning_rate": 4.724768624553619e-08, "loss": 0.02310005, "memory(GiB)": 13.7, "step": 102260, "train_speed(iter/s)": 1.528712 }, { "acc": 0.99404764, "epoch": 47.93297398640731, "grad_norm": 2.402966022491455, "learning_rate": 4.714256069185745e-08, "loss": 0.02179191, "memory(GiB)": 13.7, "step": 102265, "train_speed(iter/s)": 1.528716 }, { "acc": 0.97217264, "epoch": 47.935317553316146, "grad_norm": 3.35868501663208, "learning_rate": 4.70375542006189e-08, "loss": 0.04647913, "memory(GiB)": 13.7, "step": 102270, "train_speed(iter/s)": 1.528718 }, { "acc": 0.98979168, "epoch": 47.93766112022498, "grad_norm": 4.74595308303833, "learning_rate": 4.693266677434379e-08, "loss": 0.05111348, "memory(GiB)": 13.7, "step": 102275, "train_speed(iter/s)": 1.528717 }, { "acc": 0.98239584, "epoch": 47.940004687133815, "grad_norm": 2.844294786453247, "learning_rate": 4.6827898415554306e-08, "loss": 0.03706265, "memory(GiB)": 13.7, "step": 102280, "train_speed(iter/s)": 1.528718 }, { "acc": 0.98738098, "epoch": 47.942348254042656, "grad_norm": 0.0017118463292717934, "learning_rate": 4.672324912676763e-08, "loss": 0.03402407, "memory(GiB)": 13.7, "step": 102285, "train_speed(iter/s)": 1.528724 }, { "acc": 0.99352684, "epoch": 47.94469182095149, "grad_norm": 1.4416385889053345, "learning_rate": 4.661871891049984e-08, "loss": 0.02176209, "memory(GiB)": 13.7, "step": 102290, "train_speed(iter/s)": 1.528731 }, { "acc": 0.99229164, "epoch": 47.947035387860325, "grad_norm": 3.3552913665771484, "learning_rate": 4.651430776926254e-08, "loss": 0.0263886, "memory(GiB)": 13.7, "step": 102295, "train_speed(iter/s)": 1.52873 }, { "acc": 0.99092264, "epoch": 47.94937895476916, "grad_norm": 3.8684351444244385, "learning_rate": 4.641001570556739e-08, "loss": 0.02385192, "memory(GiB)": 13.7, "step": 102300, "train_speed(iter/s)": 1.528727 }, { "acc": 0.97530251, "epoch": 47.95172252167799, "grad_norm": 6.346558570861816, "learning_rate": 4.630584272191877e-08, "loss": 0.08536612, "memory(GiB)": 13.7, "step": 102305, "train_speed(iter/s)": 1.528734 }, { "acc": 0.9666667, "epoch": 47.95406608858683, "grad_norm": 5.159246921539307, "learning_rate": 4.620178882082279e-08, "loss": 0.05068569, "memory(GiB)": 13.7, "step": 102310, "train_speed(iter/s)": 1.528737 }, { "acc": 0.98943462, "epoch": 47.95640965549566, "grad_norm": 4.494024753570557, "learning_rate": 4.609785400477884e-08, "loss": 0.03913708, "memory(GiB)": 13.7, "step": 102315, "train_speed(iter/s)": 1.528742 }, { "acc": 0.97729168, "epoch": 47.958753222404496, "grad_norm": 0.046752866357564926, "learning_rate": 4.59940382762869e-08, "loss": 0.04109595, "memory(GiB)": 13.7, "step": 102320, "train_speed(iter/s)": 1.528743 }, { "acc": 0.9916667, "epoch": 47.96109678931334, "grad_norm": 0.010945132002234459, "learning_rate": 4.589034163784141e-08, "loss": 0.02704666, "memory(GiB)": 13.7, "step": 102325, "train_speed(iter/s)": 1.528744 }, { "acc": 0.99750004, "epoch": 47.96344035622217, "grad_norm": 2.5997672080993652, "learning_rate": 4.578676409193399e-08, "loss": 0.02647943, "memory(GiB)": 13.7, "step": 102330, "train_speed(iter/s)": 1.528747 }, { "acc": 0.99844704, "epoch": 47.965783923131006, "grad_norm": 2.738347053527832, "learning_rate": 4.5683305641056293e-08, "loss": 0.02003433, "memory(GiB)": 13.7, "step": 102335, "train_speed(iter/s)": 1.528749 }, { "acc": 0.97875004, "epoch": 47.96812749003984, "grad_norm": 4.007115364074707, "learning_rate": 4.557996628769388e-08, "loss": 0.06472949, "memory(GiB)": 13.7, "step": 102340, "train_speed(iter/s)": 1.52875 }, { "acc": 0.98640871, "epoch": 47.970471056948675, "grad_norm": 0.0011561571154743433, "learning_rate": 4.54767460343306e-08, "loss": 0.02318446, "memory(GiB)": 13.7, "step": 102345, "train_speed(iter/s)": 1.528748 }, { "acc": 0.97928028, "epoch": 47.97281462385751, "grad_norm": 1.686673879623413, "learning_rate": 4.537364488344812e-08, "loss": 0.02947288, "memory(GiB)": 13.7, "step": 102350, "train_speed(iter/s)": 1.528755 }, { "acc": 0.98968754, "epoch": 47.975158190766344, "grad_norm": 5.606757640838623, "learning_rate": 4.52706628375242e-08, "loss": 0.03932511, "memory(GiB)": 13.7, "step": 102355, "train_speed(iter/s)": 1.528755 }, { "acc": 0.98440475, "epoch": 47.977501757675185, "grad_norm": 2.716655969619751, "learning_rate": 4.516779989903386e-08, "loss": 0.02951351, "memory(GiB)": 13.7, "step": 102360, "train_speed(iter/s)": 1.528757 }, { "acc": 0.98738098, "epoch": 47.97984532458402, "grad_norm": 2.832643985748291, "learning_rate": 4.506505607045096e-08, "loss": 0.03740539, "memory(GiB)": 13.7, "step": 102365, "train_speed(iter/s)": 1.52876 }, { "acc": 0.97937508, "epoch": 47.982188891492854, "grad_norm": 1.5145866870880127, "learning_rate": 4.496243135424384e-08, "loss": 0.08240248, "memory(GiB)": 13.7, "step": 102370, "train_speed(iter/s)": 1.528759 }, { "acc": 0.98312492, "epoch": 47.98453245840169, "grad_norm": 2.2435436248779297, "learning_rate": 4.485992575287974e-08, "loss": 0.03148508, "memory(GiB)": 13.7, "step": 102375, "train_speed(iter/s)": 1.52876 }, { "acc": 0.97654762, "epoch": 47.98687602531052, "grad_norm": 10.978781700134277, "learning_rate": 4.4757539268822534e-08, "loss": 0.07391687, "memory(GiB)": 13.7, "step": 102380, "train_speed(iter/s)": 1.528763 }, { "acc": 0.99070511, "epoch": 47.98921959221936, "grad_norm": 0.9511113166809082, "learning_rate": 4.4655271904533344e-08, "loss": 0.03101709, "memory(GiB)": 13.7, "step": 102385, "train_speed(iter/s)": 1.528758 }, { "acc": 0.98330364, "epoch": 47.99156315912819, "grad_norm": 4.4643073081970215, "learning_rate": 4.455312366246996e-08, "loss": 0.03770156, "memory(GiB)": 13.7, "step": 102390, "train_speed(iter/s)": 1.528758 }, { "acc": 0.98946428, "epoch": 47.993906726037025, "grad_norm": 1.777204155921936, "learning_rate": 4.445109454508739e-08, "loss": 0.03172618, "memory(GiB)": 13.7, "step": 102395, "train_speed(iter/s)": 1.528759 }, { "acc": 0.97250004, "epoch": 47.99625029294587, "grad_norm": 4.879583358764648, "learning_rate": 4.434918455483955e-08, "loss": 0.0675615, "memory(GiB)": 13.7, "step": 102400, "train_speed(iter/s)": 1.528759 }, { "acc": 0.97767859, "epoch": 47.9985938598547, "grad_norm": 4.629526138305664, "learning_rate": 4.424739369417478e-08, "loss": 0.05443502, "memory(GiB)": 13.7, "step": 102405, "train_speed(iter/s)": 1.52876 }, { "acc": 0.98118057, "epoch": 48.000937426763535, "grad_norm": 3.32393217086792, "learning_rate": 4.4145721965539776e-08, "loss": 0.06549466, "memory(GiB)": 13.7, "step": 102410, "train_speed(iter/s)": 1.528753 }, { "acc": 0.98163691, "epoch": 48.00328099367237, "grad_norm": 0.8701305985450745, "learning_rate": 4.404416937137955e-08, "loss": 0.02996795, "memory(GiB)": 13.7, "step": 102415, "train_speed(iter/s)": 1.528751 }, { "acc": 0.97994041, "epoch": 48.005624560581204, "grad_norm": 1.4226534366607666, "learning_rate": 4.394273591413302e-08, "loss": 0.03333736, "memory(GiB)": 13.7, "step": 102420, "train_speed(iter/s)": 1.528753 }, { "acc": 0.9882143, "epoch": 48.00796812749004, "grad_norm": 0.9213275909423828, "learning_rate": 4.384142159624078e-08, "loss": 0.06062163, "memory(GiB)": 13.7, "step": 102425, "train_speed(iter/s)": 1.528753 }, { "acc": 0.9885416, "epoch": 48.01031169439887, "grad_norm": 2.775562286376953, "learning_rate": 4.374022642013617e-08, "loss": 0.02111512, "memory(GiB)": 13.7, "step": 102430, "train_speed(iter/s)": 1.528757 }, { "acc": 0.99375, "epoch": 48.012655261307714, "grad_norm": 3.2227399349212646, "learning_rate": 4.363915038825258e-08, "loss": 0.02081656, "memory(GiB)": 13.7, "step": 102435, "train_speed(iter/s)": 1.528762 }, { "acc": 0.99750004, "epoch": 48.01499882821655, "grad_norm": 0.05676941201090813, "learning_rate": 4.353819350301893e-08, "loss": 0.0071206, "memory(GiB)": 13.7, "step": 102440, "train_speed(iter/s)": 1.528765 }, { "acc": 0.98812504, "epoch": 48.01734239512538, "grad_norm": 4.809922218322754, "learning_rate": 4.343735576686302e-08, "loss": 0.06122153, "memory(GiB)": 13.7, "step": 102445, "train_speed(iter/s)": 1.528768 }, { "acc": 0.99125004, "epoch": 48.01968596203422, "grad_norm": 0.9219644665718079, "learning_rate": 4.3336637182207686e-08, "loss": 0.01679477, "memory(GiB)": 13.7, "step": 102450, "train_speed(iter/s)": 1.528771 }, { "acc": 0.99229164, "epoch": 48.02202952894305, "grad_norm": 0.00014923956769052893, "learning_rate": 4.323603775147296e-08, "loss": 0.02193636, "memory(GiB)": 13.7, "step": 102455, "train_speed(iter/s)": 1.528776 }, { "acc": 0.982197, "epoch": 48.024373095851885, "grad_norm": 1.813064455986023, "learning_rate": 4.31355574770789e-08, "loss": 0.03513454, "memory(GiB)": 13.7, "step": 102460, "train_speed(iter/s)": 1.528779 }, { "acc": 0.99541664, "epoch": 48.02671666276072, "grad_norm": 3.4572954177856445, "learning_rate": 4.303519636143944e-08, "loss": 0.02928743, "memory(GiB)": 13.7, "step": 102465, "train_speed(iter/s)": 1.528781 }, { "acc": 0.99376354, "epoch": 48.029060229669554, "grad_norm": 1.1624501943588257, "learning_rate": 4.293495440696795e-08, "loss": 0.03495665, "memory(GiB)": 13.7, "step": 102470, "train_speed(iter/s)": 1.528782 }, { "acc": 0.99361115, "epoch": 48.031403796578395, "grad_norm": 2.1515491008758545, "learning_rate": 4.283483161607284e-08, "loss": 0.02706908, "memory(GiB)": 13.7, "step": 102475, "train_speed(iter/s)": 1.528782 }, { "acc": 0.97749996, "epoch": 48.03374736348723, "grad_norm": 3.9616646766662598, "learning_rate": 4.273482799116136e-08, "loss": 0.05386698, "memory(GiB)": 13.7, "step": 102480, "train_speed(iter/s)": 1.528784 }, { "acc": 0.98864584, "epoch": 48.036090930396064, "grad_norm": 3.0254123210906982, "learning_rate": 4.263494353463693e-08, "loss": 0.02517992, "memory(GiB)": 13.7, "step": 102485, "train_speed(iter/s)": 1.528782 }, { "acc": 0.98467264, "epoch": 48.0384344973049, "grad_norm": 5.4502081871032715, "learning_rate": 4.2535178248900705e-08, "loss": 0.03416995, "memory(GiB)": 13.7, "step": 102490, "train_speed(iter/s)": 1.528786 }, { "acc": 0.9958334, "epoch": 48.04077806421373, "grad_norm": 3.194857597351074, "learning_rate": 4.243553213635054e-08, "loss": 0.00835375, "memory(GiB)": 13.7, "step": 102495, "train_speed(iter/s)": 1.528785 }, { "acc": 0.99375, "epoch": 48.04312163112257, "grad_norm": 2.803696393966675, "learning_rate": 4.233600519938149e-08, "loss": 0.02122197, "memory(GiB)": 13.7, "step": 102500, "train_speed(iter/s)": 1.528783 }, { "acc": 0.98227177, "epoch": 48.0454651980314, "grad_norm": 2.4635488986968994, "learning_rate": 4.223659744038641e-08, "loss": 0.03825585, "memory(GiB)": 13.7, "step": 102505, "train_speed(iter/s)": 1.528784 }, { "acc": 0.97113094, "epoch": 48.04780876494024, "grad_norm": 3.284386157989502, "learning_rate": 4.2137308861754255e-08, "loss": 0.04714087, "memory(GiB)": 13.7, "step": 102510, "train_speed(iter/s)": 1.528787 }, { "acc": 0.97315025, "epoch": 48.05015233184908, "grad_norm": 3.1124625205993652, "learning_rate": 4.203813946587177e-08, "loss": 0.05376754, "memory(GiB)": 13.7, "step": 102515, "train_speed(iter/s)": 1.528788 }, { "acc": 0.96791668, "epoch": 48.05249589875791, "grad_norm": 4.009891510009766, "learning_rate": 4.193908925512236e-08, "loss": 0.052005, "memory(GiB)": 13.7, "step": 102520, "train_speed(iter/s)": 1.528792 }, { "acc": 0.97333336, "epoch": 48.054839465666745, "grad_norm": 0.002782797208055854, "learning_rate": 4.1840158231887214e-08, "loss": 0.04151593, "memory(GiB)": 13.7, "step": 102525, "train_speed(iter/s)": 1.528797 }, { "acc": 0.9802084, "epoch": 48.05718303257558, "grad_norm": 3.5965969562530518, "learning_rate": 4.174134639854421e-08, "loss": 0.02852981, "memory(GiB)": 13.7, "step": 102530, "train_speed(iter/s)": 1.528799 }, { "acc": 0.99008923, "epoch": 48.059526599484414, "grad_norm": 2.256207227706909, "learning_rate": 4.1642653757468414e-08, "loss": 0.0206948, "memory(GiB)": 13.7, "step": 102535, "train_speed(iter/s)": 1.528802 }, { "acc": 0.99219704, "epoch": 48.06187016639325, "grad_norm": 0.8793520927429199, "learning_rate": 4.1544080311032686e-08, "loss": 0.01710564, "memory(GiB)": 13.7, "step": 102540, "train_speed(iter/s)": 1.528802 }, { "acc": 0.9791667, "epoch": 48.06421373330208, "grad_norm": 3.4760355949401855, "learning_rate": 4.144562606160491e-08, "loss": 0.05148362, "memory(GiB)": 13.7, "step": 102545, "train_speed(iter/s)": 1.528804 }, { "acc": 0.98833332, "epoch": 48.066557300210924, "grad_norm": 3.0470733642578125, "learning_rate": 4.134729101155296e-08, "loss": 0.04442504, "memory(GiB)": 13.7, "step": 102550, "train_speed(iter/s)": 1.528806 }, { "acc": 0.97515869, "epoch": 48.06890086711976, "grad_norm": 5.080074310302734, "learning_rate": 4.1249075163240244e-08, "loss": 0.04701393, "memory(GiB)": 13.7, "step": 102555, "train_speed(iter/s)": 1.528808 }, { "acc": 0.97763252, "epoch": 48.07124443402859, "grad_norm": 1.4527323246002197, "learning_rate": 4.115097851902742e-08, "loss": 0.075955, "memory(GiB)": 13.7, "step": 102560, "train_speed(iter/s)": 1.528803 }, { "acc": 0.9791666, "epoch": 48.07358800093743, "grad_norm": 3.1830506324768066, "learning_rate": 4.105300108127182e-08, "loss": 0.03394272, "memory(GiB)": 13.7, "step": 102565, "train_speed(iter/s)": 1.528806 }, { "acc": 0.9895834, "epoch": 48.07593156784626, "grad_norm": 3.0095863342285156, "learning_rate": 4.0955142852329104e-08, "loss": 0.0450304, "memory(GiB)": 13.7, "step": 102570, "train_speed(iter/s)": 1.528808 }, { "acc": 0.9774107, "epoch": 48.078275134755096, "grad_norm": 2.3145766258239746, "learning_rate": 4.0857403834551585e-08, "loss": 0.05664752, "memory(GiB)": 13.7, "step": 102575, "train_speed(iter/s)": 1.528811 }, { "acc": 0.99508934, "epoch": 48.08061870166393, "grad_norm": 2.162416458129883, "learning_rate": 4.075978403028827e-08, "loss": 0.01616876, "memory(GiB)": 13.7, "step": 102580, "train_speed(iter/s)": 1.528813 }, { "acc": 0.98937502, "epoch": 48.082962268572764, "grad_norm": 1.348264217376709, "learning_rate": 4.066228344188649e-08, "loss": 0.02404856, "memory(GiB)": 13.7, "step": 102585, "train_speed(iter/s)": 1.528813 }, { "acc": 0.98529758, "epoch": 48.085305835481606, "grad_norm": 2.298797607421875, "learning_rate": 4.0564902071688026e-08, "loss": 0.02679132, "memory(GiB)": 13.7, "step": 102590, "train_speed(iter/s)": 1.528815 }, { "acc": 0.98377352, "epoch": 48.08764940239044, "grad_norm": 2.8657939434051514, "learning_rate": 4.046763992203521e-08, "loss": 0.05291117, "memory(GiB)": 13.7, "step": 102595, "train_speed(iter/s)": 1.528816 }, { "acc": 0.98604164, "epoch": 48.089992969299274, "grad_norm": 1.6318466663360596, "learning_rate": 4.0370496995264834e-08, "loss": 0.04247103, "memory(GiB)": 13.7, "step": 102600, "train_speed(iter/s)": 1.528819 }, { "acc": 0.99359379, "epoch": 48.09233653620811, "grad_norm": 0.05820586904883385, "learning_rate": 4.027347329371312e-08, "loss": 0.02501443, "memory(GiB)": 13.7, "step": 102605, "train_speed(iter/s)": 1.528821 }, { "acc": 0.97752972, "epoch": 48.09468010311694, "grad_norm": 2.4005792140960693, "learning_rate": 4.0176568819711315e-08, "loss": 0.04436722, "memory(GiB)": 13.7, "step": 102610, "train_speed(iter/s)": 1.528822 }, { "acc": 0.9890398, "epoch": 48.09702367002578, "grad_norm": 4.085329055786133, "learning_rate": 4.0079783575588976e-08, "loss": 0.02542374, "memory(GiB)": 13.7, "step": 102615, "train_speed(iter/s)": 1.528825 }, { "acc": 0.98485126, "epoch": 48.09936723693461, "grad_norm": 1.5710973739624023, "learning_rate": 3.998311756367236e-08, "loss": 0.05498997, "memory(GiB)": 13.7, "step": 102620, "train_speed(iter/s)": 1.528829 }, { "acc": 0.97960224, "epoch": 48.10171080384345, "grad_norm": 0.06391745805740356, "learning_rate": 3.9886570786284353e-08, "loss": 0.06333494, "memory(GiB)": 13.7, "step": 102625, "train_speed(iter/s)": 1.528832 }, { "acc": 0.9947916, "epoch": 48.10405437075229, "grad_norm": 3.019252300262451, "learning_rate": 3.9790143245746774e-08, "loss": 0.02798783, "memory(GiB)": 13.7, "step": 102630, "train_speed(iter/s)": 1.528835 }, { "acc": 0.98263893, "epoch": 48.10639793766112, "grad_norm": 2.465765953063965, "learning_rate": 3.9693834944376976e-08, "loss": 0.02925707, "memory(GiB)": 13.7, "step": 102635, "train_speed(iter/s)": 1.528839 }, { "acc": 0.99375, "epoch": 48.108741504569956, "grad_norm": 0.02111799083650112, "learning_rate": 3.95976458844901e-08, "loss": 0.01528654, "memory(GiB)": 13.7, "step": 102640, "train_speed(iter/s)": 1.528846 }, { "acc": 0.98988094, "epoch": 48.11108507147879, "grad_norm": 2.9703869819641113, "learning_rate": 3.9501576068397384e-08, "loss": 0.03433903, "memory(GiB)": 13.7, "step": 102645, "train_speed(iter/s)": 1.528849 }, { "acc": 0.97592258, "epoch": 48.113428638387624, "grad_norm": 3.2502496242523193, "learning_rate": 3.940562549840955e-08, "loss": 0.04316401, "memory(GiB)": 13.7, "step": 102650, "train_speed(iter/s)": 1.528855 }, { "acc": 0.99375, "epoch": 48.11577220529646, "grad_norm": 4.695168972015381, "learning_rate": 3.930979417683118e-08, "loss": 0.0166952, "memory(GiB)": 13.7, "step": 102655, "train_speed(iter/s)": 1.528858 }, { "acc": 0.98966351, "epoch": 48.11811577220529, "grad_norm": 2.4452996253967285, "learning_rate": 3.9214082105966855e-08, "loss": 0.03378268, "memory(GiB)": 13.7, "step": 102660, "train_speed(iter/s)": 1.528859 }, { "acc": 0.98562498, "epoch": 48.120459339114134, "grad_norm": 5.058470726013184, "learning_rate": 3.91184892881173e-08, "loss": 0.01779252, "memory(GiB)": 13.7, "step": 102665, "train_speed(iter/s)": 1.528864 }, { "acc": 0.98604164, "epoch": 48.12280290602297, "grad_norm": 3.364278554916382, "learning_rate": 3.902301572557932e-08, "loss": 0.02719527, "memory(GiB)": 13.7, "step": 102670, "train_speed(iter/s)": 1.528861 }, { "acc": 0.9947917, "epoch": 48.1251464729318, "grad_norm": 0.9623650312423706, "learning_rate": 3.8927661420648636e-08, "loss": 0.02776651, "memory(GiB)": 13.7, "step": 102675, "train_speed(iter/s)": 1.528862 }, { "acc": 0.99229164, "epoch": 48.12749003984064, "grad_norm": 1.0657697916030884, "learning_rate": 3.883242637561653e-08, "loss": 0.0251838, "memory(GiB)": 13.7, "step": 102680, "train_speed(iter/s)": 1.528868 }, { "acc": 0.99707794, "epoch": 48.12983360674947, "grad_norm": 0.0031772477086633444, "learning_rate": 3.8737310592772587e-08, "loss": 0.01209705, "memory(GiB)": 13.7, "step": 102685, "train_speed(iter/s)": 1.528869 }, { "acc": 0.98912582, "epoch": 48.132177173658306, "grad_norm": 2.468529224395752, "learning_rate": 3.8642314074403656e-08, "loss": 0.06647331, "memory(GiB)": 13.7, "step": 102690, "train_speed(iter/s)": 1.528872 }, { "acc": 0.98319445, "epoch": 48.13452074056714, "grad_norm": 3.0043869018554688, "learning_rate": 3.854743682279211e-08, "loss": 0.02467261, "memory(GiB)": 13.7, "step": 102695, "train_speed(iter/s)": 1.528874 }, { "acc": 0.9864584, "epoch": 48.13686430747598, "grad_norm": 4.260830402374268, "learning_rate": 3.845267884021924e-08, "loss": 0.02692167, "memory(GiB)": 13.7, "step": 102700, "train_speed(iter/s)": 1.528875 }, { "acc": 0.99083328, "epoch": 48.139207874384816, "grad_norm": 4.108996391296387, "learning_rate": 3.835804012896188e-08, "loss": 0.03136809, "memory(GiB)": 13.7, "step": 102705, "train_speed(iter/s)": 1.528876 }, { "acc": 0.98363094, "epoch": 48.14155144129365, "grad_norm": 4.768072605133057, "learning_rate": 3.826352069129577e-08, "loss": 0.02264319, "memory(GiB)": 13.7, "step": 102710, "train_speed(iter/s)": 1.52888 }, { "acc": 0.98883934, "epoch": 48.143895008202485, "grad_norm": 2.8327064514160156, "learning_rate": 3.816912052949164e-08, "loss": 0.04565141, "memory(GiB)": 13.7, "step": 102715, "train_speed(iter/s)": 1.528883 }, { "acc": 0.98708324, "epoch": 48.14623857511132, "grad_norm": 0.002649775706231594, "learning_rate": 3.807483964582021e-08, "loss": 0.02205246, "memory(GiB)": 13.7, "step": 102720, "train_speed(iter/s)": 1.528882 }, { "acc": 0.99279766, "epoch": 48.14858214202015, "grad_norm": 0.001137943472713232, "learning_rate": 3.798067804254613e-08, "loss": 0.04474345, "memory(GiB)": 13.7, "step": 102725, "train_speed(iter/s)": 1.528885 }, { "acc": 0.9984375, "epoch": 48.15092570892899, "grad_norm": 0.07224909961223602, "learning_rate": 3.7886635721934016e-08, "loss": 0.01290728, "memory(GiB)": 13.7, "step": 102730, "train_speed(iter/s)": 1.528884 }, { "acc": 0.99333334, "epoch": 48.15326927583782, "grad_norm": 0.9598071575164795, "learning_rate": 3.77927126862435e-08, "loss": 0.02144106, "memory(GiB)": 13.7, "step": 102735, "train_speed(iter/s)": 1.528884 }, { "acc": 0.9822917, "epoch": 48.15561284274666, "grad_norm": 3.8768692016601562, "learning_rate": 3.7698908937732556e-08, "loss": 0.04760073, "memory(GiB)": 13.7, "step": 102740, "train_speed(iter/s)": 1.528883 }, { "acc": 0.99508934, "epoch": 48.1579564096555, "grad_norm": 1.6035479307174683, "learning_rate": 3.7605224478655265e-08, "loss": 0.01320975, "memory(GiB)": 13.7, "step": 102745, "train_speed(iter/s)": 1.528883 }, { "acc": 0.98395834, "epoch": 48.16029997656433, "grad_norm": 4.0837321281433105, "learning_rate": 3.751165931126459e-08, "loss": 0.02194692, "memory(GiB)": 13.7, "step": 102750, "train_speed(iter/s)": 1.528887 }, { "acc": 0.9875, "epoch": 48.162643543473166, "grad_norm": 3.0891590118408203, "learning_rate": 3.741821343780852e-08, "loss": 0.04368323, "memory(GiB)": 13.7, "step": 102755, "train_speed(iter/s)": 1.528888 }, { "acc": 0.97599697, "epoch": 48.164987110382, "grad_norm": 1.4582289457321167, "learning_rate": 3.732488686053392e-08, "loss": 0.06116812, "memory(GiB)": 13.7, "step": 102760, "train_speed(iter/s)": 1.528891 }, { "acc": 0.98258934, "epoch": 48.167330677290835, "grad_norm": 4.8129191398620605, "learning_rate": 3.723167958168377e-08, "loss": 0.0463846, "memory(GiB)": 13.7, "step": 102765, "train_speed(iter/s)": 1.528893 }, { "acc": 0.98812504, "epoch": 48.16967424419967, "grad_norm": 0.6052087545394897, "learning_rate": 3.713859160349771e-08, "loss": 0.02608568, "memory(GiB)": 13.7, "step": 102770, "train_speed(iter/s)": 1.528893 }, { "acc": 0.98974209, "epoch": 48.17201781110851, "grad_norm": 2.645627975463867, "learning_rate": 3.704562292821485e-08, "loss": 0.04854605, "memory(GiB)": 13.7, "step": 102775, "train_speed(iter/s)": 1.528894 }, { "acc": 0.99187498, "epoch": 48.174361378017345, "grad_norm": 0.13411924242973328, "learning_rate": 3.695277355806874e-08, "loss": 0.01921097, "memory(GiB)": 13.7, "step": 102780, "train_speed(iter/s)": 1.528898 }, { "acc": 1.0, "epoch": 48.17670494492618, "grad_norm": 0.007212798111140728, "learning_rate": 3.68600434952918e-08, "loss": 0.00862357, "memory(GiB)": 13.7, "step": 102785, "train_speed(iter/s)": 1.528903 }, { "acc": 0.98083334, "epoch": 48.17904851183501, "grad_norm": 5.177236080169678, "learning_rate": 3.676743274211203e-08, "loss": 0.04046114, "memory(GiB)": 13.7, "step": 102790, "train_speed(iter/s)": 1.528905 }, { "acc": 0.9854166, "epoch": 48.18139207874385, "grad_norm": 1.4909031391143799, "learning_rate": 3.667494130075631e-08, "loss": 0.02078084, "memory(GiB)": 13.7, "step": 102795, "train_speed(iter/s)": 1.52891 }, { "acc": 0.97937498, "epoch": 48.18373564565268, "grad_norm": 3.962883710861206, "learning_rate": 3.6582569173447654e-08, "loss": 0.04711368, "memory(GiB)": 13.7, "step": 102800, "train_speed(iter/s)": 1.528915 }, { "acc": 0.97986107, "epoch": 48.186079212561516, "grad_norm": 3.575127124786377, "learning_rate": 3.649031636240572e-08, "loss": 0.06417556, "memory(GiB)": 13.7, "step": 102805, "train_speed(iter/s)": 1.528917 }, { "acc": 0.9708333, "epoch": 48.18842277947035, "grad_norm": 1.8493497371673584, "learning_rate": 3.639818286984906e-08, "loss": 0.05036986, "memory(GiB)": 13.7, "step": 102810, "train_speed(iter/s)": 1.528914 }, { "acc": 0.98673611, "epoch": 48.19076634637919, "grad_norm": 4.161405086517334, "learning_rate": 3.630616869799181e-08, "loss": 0.0280498, "memory(GiB)": 13.7, "step": 102815, "train_speed(iter/s)": 1.52892 }, { "acc": 0.9864583, "epoch": 48.193109913288026, "grad_norm": 3.339531421661377, "learning_rate": 3.6214273849045306e-08, "loss": 0.02994326, "memory(GiB)": 13.7, "step": 102820, "train_speed(iter/s)": 1.528922 }, { "acc": 0.97270832, "epoch": 48.19545348019686, "grad_norm": 0.005192342679947615, "learning_rate": 3.612249832521922e-08, "loss": 0.04158965, "memory(GiB)": 13.7, "step": 102825, "train_speed(iter/s)": 1.528921 }, { "acc": 0.97541676, "epoch": 48.197797047105695, "grad_norm": 0.0013149844016879797, "learning_rate": 3.603084212871824e-08, "loss": 0.03751184, "memory(GiB)": 13.7, "step": 102830, "train_speed(iter/s)": 1.528924 }, { "acc": 0.99409723, "epoch": 48.20014061401453, "grad_norm": 0.0019819505978375673, "learning_rate": 3.593930526174704e-08, "loss": 0.02238837, "memory(GiB)": 13.7, "step": 102835, "train_speed(iter/s)": 1.528926 }, { "acc": 0.990625, "epoch": 48.20248418092336, "grad_norm": 2.199552297592163, "learning_rate": 3.584788772650476e-08, "loss": 0.01804732, "memory(GiB)": 13.7, "step": 102840, "train_speed(iter/s)": 1.528926 }, { "acc": 0.98062496, "epoch": 48.2048277478322, "grad_norm": 3.140188217163086, "learning_rate": 3.5756589525188874e-08, "loss": 0.03587837, "memory(GiB)": 13.7, "step": 102845, "train_speed(iter/s)": 1.528928 }, { "acc": 0.98291664, "epoch": 48.20717131474104, "grad_norm": 1.5651476383209229, "learning_rate": 3.566541065999461e-08, "loss": 0.02841157, "memory(GiB)": 13.7, "step": 102850, "train_speed(iter/s)": 1.528931 }, { "acc": 0.9838541, "epoch": 48.20951488164987, "grad_norm": 3.8962085247039795, "learning_rate": 3.557435113311279e-08, "loss": 0.03350457, "memory(GiB)": 13.7, "step": 102855, "train_speed(iter/s)": 1.528932 }, { "acc": 0.97062492, "epoch": 48.21185844855871, "grad_norm": 2.7321536540985107, "learning_rate": 3.548341094673255e-08, "loss": 0.04499318, "memory(GiB)": 13.7, "step": 102860, "train_speed(iter/s)": 1.528932 }, { "acc": 0.9927084, "epoch": 48.21420201546754, "grad_norm": 3.2745344638824463, "learning_rate": 3.53925901030397e-08, "loss": 0.02148574, "memory(GiB)": 13.7, "step": 102865, "train_speed(iter/s)": 1.528941 }, { "acc": 0.97738094, "epoch": 48.216545582376376, "grad_norm": 3.2478463649749756, "learning_rate": 3.530188860421783e-08, "loss": 0.05957131, "memory(GiB)": 13.7, "step": 102870, "train_speed(iter/s)": 1.528945 }, { "acc": 0.99333334, "epoch": 48.21888914928521, "grad_norm": 0.5750428438186646, "learning_rate": 3.5211306452446085e-08, "loss": 0.01883762, "memory(GiB)": 13.7, "step": 102875, "train_speed(iter/s)": 1.528948 }, { "acc": 0.98346052, "epoch": 48.221232716194045, "grad_norm": 1.8486542701721191, "learning_rate": 3.512084364990251e-08, "loss": 0.05244338, "memory(GiB)": 13.7, "step": 102880, "train_speed(iter/s)": 1.528952 }, { "acc": 0.98604164, "epoch": 48.22357628310288, "grad_norm": 2.910781145095825, "learning_rate": 3.503050019876126e-08, "loss": 0.0339523, "memory(GiB)": 13.7, "step": 102885, "train_speed(iter/s)": 1.528955 }, { "acc": 0.99548607, "epoch": 48.22591985001172, "grad_norm": 1.3058322668075562, "learning_rate": 3.494027610119372e-08, "loss": 0.02011243, "memory(GiB)": 13.7, "step": 102890, "train_speed(iter/s)": 1.528962 }, { "acc": 0.98715286, "epoch": 48.228263416920555, "grad_norm": 4.538361549377441, "learning_rate": 3.485017135936903e-08, "loss": 0.0288375, "memory(GiB)": 13.7, "step": 102895, "train_speed(iter/s)": 1.528965 }, { "acc": 0.99338741, "epoch": 48.23060698382939, "grad_norm": 1.0462801456451416, "learning_rate": 3.476018597545303e-08, "loss": 0.02522373, "memory(GiB)": 13.7, "step": 102900, "train_speed(iter/s)": 1.528971 }, { "acc": 0.98467264, "epoch": 48.232950550738224, "grad_norm": 2.9209084510803223, "learning_rate": 3.467031995160823e-08, "loss": 0.02937462, "memory(GiB)": 13.7, "step": 102905, "train_speed(iter/s)": 1.528976 }, { "acc": 0.98467264, "epoch": 48.23529411764706, "grad_norm": 0.4808577001094818, "learning_rate": 3.4580573289995444e-08, "loss": 0.03073236, "memory(GiB)": 13.7, "step": 102910, "train_speed(iter/s)": 1.528978 }, { "acc": 0.98249998, "epoch": 48.23763768455589, "grad_norm": 3.884042978286743, "learning_rate": 3.449094599277052e-08, "loss": 0.03388344, "memory(GiB)": 13.7, "step": 102915, "train_speed(iter/s)": 1.52898 }, { "acc": 0.98675594, "epoch": 48.23998125146473, "grad_norm": 3.6153314113616943, "learning_rate": 3.440143806208873e-08, "loss": 0.0328845, "memory(GiB)": 13.7, "step": 102920, "train_speed(iter/s)": 1.528985 }, { "acc": 0.99750004, "epoch": 48.24232481837357, "grad_norm": 0.004141828510910273, "learning_rate": 3.4312049500102045e-08, "loss": 0.0218005, "memory(GiB)": 13.7, "step": 102925, "train_speed(iter/s)": 1.528986 }, { "acc": 0.9958333, "epoch": 48.2446683852824, "grad_norm": 2.157137632369995, "learning_rate": 3.422278030895795e-08, "loss": 0.01348637, "memory(GiB)": 13.7, "step": 102930, "train_speed(iter/s)": 1.528987 }, { "acc": 0.9916667, "epoch": 48.24701195219124, "grad_norm": 4.846813678741455, "learning_rate": 3.413363049080343e-08, "loss": 0.04305637, "memory(GiB)": 13.7, "step": 102935, "train_speed(iter/s)": 1.52899 }, { "acc": 0.98562498, "epoch": 48.24935551910007, "grad_norm": 3.739044189453125, "learning_rate": 3.404460004777987e-08, "loss": 0.04444757, "memory(GiB)": 13.7, "step": 102940, "train_speed(iter/s)": 1.528996 }, { "acc": 0.97479172, "epoch": 48.251699086008905, "grad_norm": 5.045288562774658, "learning_rate": 3.395568898202869e-08, "loss": 0.03533671, "memory(GiB)": 13.7, "step": 102945, "train_speed(iter/s)": 1.528999 }, { "acc": 0.98562498, "epoch": 48.25404265291774, "grad_norm": 2.4029664993286133, "learning_rate": 3.386689729568629e-08, "loss": 0.02472626, "memory(GiB)": 13.7, "step": 102950, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98041668, "epoch": 48.256386219826574, "grad_norm": 5.033239841461182, "learning_rate": 3.377822499088631e-08, "loss": 0.05226041, "memory(GiB)": 13.7, "step": 102955, "train_speed(iter/s)": 1.529001 }, { "acc": 0.99437504, "epoch": 48.25872978673541, "grad_norm": 1.696352481842041, "learning_rate": 3.368967206976128e-08, "loss": 0.04030533, "memory(GiB)": 13.7, "step": 102960, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98883934, "epoch": 48.26107335364425, "grad_norm": 4.296490669250488, "learning_rate": 3.360123853443928e-08, "loss": 0.04109459, "memory(GiB)": 13.7, "step": 102965, "train_speed(iter/s)": 1.529007 }, { "acc": 0.99065475, "epoch": 48.263416920553084, "grad_norm": 2.304419755935669, "learning_rate": 3.351292438704616e-08, "loss": 0.02882335, "memory(GiB)": 13.7, "step": 102970, "train_speed(iter/s)": 1.529005 }, { "acc": 0.98354168, "epoch": 48.26576048746192, "grad_norm": 2.424633026123047, "learning_rate": 3.342472962970448e-08, "loss": 0.02746941, "memory(GiB)": 13.7, "step": 102975, "train_speed(iter/s)": 1.529002 }, { "acc": 0.98217258, "epoch": 48.26810405437075, "grad_norm": 2.1608152389526367, "learning_rate": 3.333665426453398e-08, "loss": 0.03636579, "memory(GiB)": 13.7, "step": 102980, "train_speed(iter/s)": 1.529004 }, { "acc": 0.97383938, "epoch": 48.27044762127959, "grad_norm": 0.9887261986732483, "learning_rate": 3.324869829365221e-08, "loss": 0.04879047, "memory(GiB)": 13.7, "step": 102985, "train_speed(iter/s)": 1.529005 }, { "acc": 0.99125004, "epoch": 48.27279118818842, "grad_norm": 2.4590744972229004, "learning_rate": 3.316086171917282e-08, "loss": 0.01942906, "memory(GiB)": 13.7, "step": 102990, "train_speed(iter/s)": 1.529005 }, { "acc": 0.9875, "epoch": 48.275134755097255, "grad_norm": 1.6177403926849365, "learning_rate": 3.3073144543207795e-08, "loss": 0.02844919, "memory(GiB)": 13.7, "step": 102995, "train_speed(iter/s)": 1.529004 }, { "acc": 0.98291664, "epoch": 48.2774783220061, "grad_norm": 1.6109920740127563, "learning_rate": 3.2985546767864685e-08, "loss": 0.0307757, "memory(GiB)": 13.7, "step": 103000, "train_speed(iter/s)": 1.52901 }, { "acc": 0.98083334, "epoch": 48.27982188891493, "grad_norm": 2.894491672515869, "learning_rate": 3.289806839524937e-08, "loss": 0.04832637, "memory(GiB)": 13.7, "step": 103005, "train_speed(iter/s)": 1.529013 }, { "acc": 0.97562494, "epoch": 48.282165455823765, "grad_norm": 4.049280643463135, "learning_rate": 3.281070942746441e-08, "loss": 0.05264004, "memory(GiB)": 13.7, "step": 103010, "train_speed(iter/s)": 1.529015 }, { "acc": 0.99125004, "epoch": 48.2845090227326, "grad_norm": 3.7520554065704346, "learning_rate": 3.2723469866610686e-08, "loss": 0.02160753, "memory(GiB)": 13.7, "step": 103015, "train_speed(iter/s)": 1.529017 }, { "acc": 0.98906822, "epoch": 48.286852589641434, "grad_norm": 3.3672122955322266, "learning_rate": 3.2636349714783527e-08, "loss": 0.04945197, "memory(GiB)": 13.7, "step": 103020, "train_speed(iter/s)": 1.529014 }, { "acc": 0.99187498, "epoch": 48.28919615655027, "grad_norm": 0.9865319728851318, "learning_rate": 3.254934897407829e-08, "loss": 0.0403266, "memory(GiB)": 13.7, "step": 103025, "train_speed(iter/s)": 1.529018 }, { "acc": 0.98488102, "epoch": 48.2915397234591, "grad_norm": 1.4638786315917969, "learning_rate": 3.2462467646585846e-08, "loss": 0.02332772, "memory(GiB)": 13.7, "step": 103030, "train_speed(iter/s)": 1.529021 }, { "acc": 0.99298611, "epoch": 48.29388329036794, "grad_norm": 1.3771460056304932, "learning_rate": 3.237570573439433e-08, "loss": 0.0160732, "memory(GiB)": 13.7, "step": 103035, "train_speed(iter/s)": 1.529022 }, { "acc": 0.97925596, "epoch": 48.29622685727678, "grad_norm": 4.276405334472656, "learning_rate": 3.228906323958908e-08, "loss": 0.03180875, "memory(GiB)": 13.7, "step": 103040, "train_speed(iter/s)": 1.529025 }, { "acc": 0.9979167, "epoch": 48.29857042418561, "grad_norm": 0.0010827910155057907, "learning_rate": 3.2202540164252685e-08, "loss": 0.01593792, "memory(GiB)": 13.7, "step": 103045, "train_speed(iter/s)": 1.529027 }, { "acc": 0.98166676, "epoch": 48.30091399109445, "grad_norm": 1.4720449447631836, "learning_rate": 3.2116136510466025e-08, "loss": 0.07511588, "memory(GiB)": 13.7, "step": 103050, "train_speed(iter/s)": 1.529031 }, { "acc": 0.996875, "epoch": 48.30325755800328, "grad_norm": 0.0029687457717955112, "learning_rate": 3.202985228030392e-08, "loss": 0.01924716, "memory(GiB)": 13.7, "step": 103055, "train_speed(iter/s)": 1.529033 }, { "acc": 0.9729167, "epoch": 48.305601124912116, "grad_norm": 3.921910285949707, "learning_rate": 3.194368747584226e-08, "loss": 0.05620942, "memory(GiB)": 13.7, "step": 103060, "train_speed(iter/s)": 1.529032 }, { "acc": 0.97890873, "epoch": 48.30794469182095, "grad_norm": 2.2842166423797607, "learning_rate": 3.1857642099150856e-08, "loss": 0.04686849, "memory(GiB)": 13.7, "step": 103065, "train_speed(iter/s)": 1.529034 }, { "acc": 0.9895834, "epoch": 48.310288258729784, "grad_norm": 4.3383564949035645, "learning_rate": 3.177171615229897e-08, "loss": 0.01735362, "memory(GiB)": 13.7, "step": 103070, "train_speed(iter/s)": 1.529038 }, { "acc": 0.98842258, "epoch": 48.31263182563862, "grad_norm": 4.526365756988525, "learning_rate": 3.168590963735084e-08, "loss": 0.02657189, "memory(GiB)": 13.7, "step": 103075, "train_speed(iter/s)": 1.529037 }, { "acc": 0.98562508, "epoch": 48.31497539254746, "grad_norm": 2.149118185043335, "learning_rate": 3.1600222556370167e-08, "loss": 0.03534074, "memory(GiB)": 13.7, "step": 103080, "train_speed(iter/s)": 1.529037 }, { "acc": 0.9953125, "epoch": 48.317318959456294, "grad_norm": 3.5226783752441406, "learning_rate": 3.1514654911416216e-08, "loss": 0.02801578, "memory(GiB)": 13.7, "step": 103085, "train_speed(iter/s)": 1.529035 }, { "acc": 0.99035721, "epoch": 48.31966252636513, "grad_norm": 0.9643293023109436, "learning_rate": 3.142920670454546e-08, "loss": 0.0155645, "memory(GiB)": 13.7, "step": 103090, "train_speed(iter/s)": 1.529036 }, { "acc": 0.98000002, "epoch": 48.32200609327396, "grad_norm": 0.0028606930281966925, "learning_rate": 3.134387793781161e-08, "loss": 0.03030619, "memory(GiB)": 13.7, "step": 103095, "train_speed(iter/s)": 1.529036 }, { "acc": 0.98916664, "epoch": 48.3243496601828, "grad_norm": 5.93680477142334, "learning_rate": 3.125866861326561e-08, "loss": 0.02502775, "memory(GiB)": 13.7, "step": 103100, "train_speed(iter/s)": 1.529036 }, { "acc": 0.98321438, "epoch": 48.32669322709163, "grad_norm": 3.3204848766326904, "learning_rate": 3.11735787329567e-08, "loss": 0.03168325, "memory(GiB)": 13.7, "step": 103105, "train_speed(iter/s)": 1.529043 }, { "acc": 0.9858902, "epoch": 48.329036794000466, "grad_norm": 6.861393928527832, "learning_rate": 3.108860829892917e-08, "loss": 0.08672132, "memory(GiB)": 13.7, "step": 103110, "train_speed(iter/s)": 1.529041 }, { "acc": 0.97790184, "epoch": 48.33138036090931, "grad_norm": 3.726383686065674, "learning_rate": 3.1003757313226176e-08, "loss": 0.03380913, "memory(GiB)": 13.7, "step": 103115, "train_speed(iter/s)": 1.529042 }, { "acc": 0.9958334, "epoch": 48.33372392781814, "grad_norm": 0.006119084544479847, "learning_rate": 3.0919025777886443e-08, "loss": 0.01889884, "memory(GiB)": 13.7, "step": 103120, "train_speed(iter/s)": 1.529041 }, { "acc": 0.99035721, "epoch": 48.336067494726976, "grad_norm": 3.9488611221313477, "learning_rate": 3.0834413694946474e-08, "loss": 0.03407252, "memory(GiB)": 13.7, "step": 103125, "train_speed(iter/s)": 1.529043 }, { "acc": 0.98594704, "epoch": 48.33841106163581, "grad_norm": 2.7406270503997803, "learning_rate": 3.074992106644109e-08, "loss": 0.03686007, "memory(GiB)": 13.7, "step": 103130, "train_speed(iter/s)": 1.529048 }, { "acc": 0.96583338, "epoch": 48.340754628544644, "grad_norm": 4.12388801574707, "learning_rate": 3.06655478944007e-08, "loss": 0.05672468, "memory(GiB)": 13.7, "step": 103135, "train_speed(iter/s)": 1.529051 }, { "acc": 0.99020834, "epoch": 48.34309819545348, "grad_norm": 1.84102463722229, "learning_rate": 3.0581294180853474e-08, "loss": 0.0406668, "memory(GiB)": 13.7, "step": 103140, "train_speed(iter/s)": 1.529054 }, { "acc": 0.9888195, "epoch": 48.34544176236231, "grad_norm": 3.306608200073242, "learning_rate": 3.0497159927824246e-08, "loss": 0.03300821, "memory(GiB)": 13.7, "step": 103145, "train_speed(iter/s)": 1.529056 }, { "acc": 0.98656254, "epoch": 48.34778532927115, "grad_norm": 0.0014257418224588037, "learning_rate": 3.0413145137335665e-08, "loss": 0.01645843, "memory(GiB)": 13.7, "step": 103150, "train_speed(iter/s)": 1.529057 }, { "acc": 0.98874998, "epoch": 48.35012889617999, "grad_norm": 4.1160688400268555, "learning_rate": 3.0329249811407e-08, "loss": 0.02856981, "memory(GiB)": 13.7, "step": 103155, "train_speed(iter/s)": 1.529062 }, { "acc": 0.99187498, "epoch": 48.35247246308882, "grad_norm": 0.0037163118831813335, "learning_rate": 3.024547395205533e-08, "loss": 0.03347192, "memory(GiB)": 13.7, "step": 103160, "train_speed(iter/s)": 1.529066 }, { "acc": 0.9965909, "epoch": 48.35481602999766, "grad_norm": 2.212918519973755, "learning_rate": 3.01618175612933e-08, "loss": 0.07541585, "memory(GiB)": 13.7, "step": 103165, "train_speed(iter/s)": 1.529068 }, { "acc": 0.9864584, "epoch": 48.35715959690649, "grad_norm": 2.439589500427246, "learning_rate": 3.007828064113242e-08, "loss": 0.04008539, "memory(GiB)": 13.7, "step": 103170, "train_speed(iter/s)": 1.529067 }, { "acc": 0.98663692, "epoch": 48.359503163815326, "grad_norm": 0.6826860308647156, "learning_rate": 2.9994863193580894e-08, "loss": 0.02119346, "memory(GiB)": 13.7, "step": 103175, "train_speed(iter/s)": 1.529067 }, { "acc": 0.98113098, "epoch": 48.36184673072416, "grad_norm": 3.7334609031677246, "learning_rate": 2.9911565220643024e-08, "loss": 0.04093089, "memory(GiB)": 13.7, "step": 103180, "train_speed(iter/s)": 1.529069 }, { "acc": 0.98529758, "epoch": 48.364190297632994, "grad_norm": 3.034879207611084, "learning_rate": 2.9828386724322e-08, "loss": 0.02555045, "memory(GiB)": 13.7, "step": 103185, "train_speed(iter/s)": 1.529072 }, { "acc": 0.99273815, "epoch": 48.366533864541836, "grad_norm": 2.707810401916504, "learning_rate": 2.9745327706616588e-08, "loss": 0.03605576, "memory(GiB)": 13.7, "step": 103190, "train_speed(iter/s)": 1.529075 }, { "acc": 0.99295502, "epoch": 48.36887743145067, "grad_norm": 4.058414459228516, "learning_rate": 2.9662388169523314e-08, "loss": 0.04214078, "memory(GiB)": 13.7, "step": 103195, "train_speed(iter/s)": 1.529077 }, { "acc": 0.985322, "epoch": 48.371220998359505, "grad_norm": 0.8850492835044861, "learning_rate": 2.9579568115035398e-08, "loss": 0.03133345, "memory(GiB)": 13.7, "step": 103200, "train_speed(iter/s)": 1.52908 }, { "acc": 0.97666664, "epoch": 48.37356456526834, "grad_norm": 4.235438346862793, "learning_rate": 2.9496867545144373e-08, "loss": 0.04456219, "memory(GiB)": 13.7, "step": 103205, "train_speed(iter/s)": 1.529079 }, { "acc": 0.98552084, "epoch": 48.37590813217717, "grad_norm": 2.6731178760528564, "learning_rate": 2.941428646183734e-08, "loss": 0.03335912, "memory(GiB)": 13.7, "step": 103210, "train_speed(iter/s)": 1.529079 }, { "acc": 0.98817959, "epoch": 48.37825169908601, "grad_norm": 2.380387306213379, "learning_rate": 2.9331824867099182e-08, "loss": 0.0470984, "memory(GiB)": 13.7, "step": 103215, "train_speed(iter/s)": 1.529083 }, { "acc": 0.97288694, "epoch": 48.38059526599484, "grad_norm": 2.1939096450805664, "learning_rate": 2.924948276291311e-08, "loss": 0.05039183, "memory(GiB)": 13.7, "step": 103220, "train_speed(iter/s)": 1.529085 }, { "acc": 0.97562504, "epoch": 48.382938832903676, "grad_norm": 7.1775946617126465, "learning_rate": 2.9167260151257343e-08, "loss": 0.04778108, "memory(GiB)": 13.7, "step": 103225, "train_speed(iter/s)": 1.529088 }, { "acc": 0.98458328, "epoch": 48.38528239981252, "grad_norm": 3.3600730895996094, "learning_rate": 2.9085157034108997e-08, "loss": 0.03281089, "memory(GiB)": 13.7, "step": 103230, "train_speed(iter/s)": 1.529093 }, { "acc": 0.98467264, "epoch": 48.38762596672135, "grad_norm": 4.290786266326904, "learning_rate": 2.900317341344018e-08, "loss": 0.03829147, "memory(GiB)": 13.7, "step": 103235, "train_speed(iter/s)": 1.529095 }, { "acc": 0.97956238, "epoch": 48.389969533630186, "grad_norm": 3.722691535949707, "learning_rate": 2.8921309291223563e-08, "loss": 0.0319236, "memory(GiB)": 13.7, "step": 103240, "train_speed(iter/s)": 1.529096 }, { "acc": 0.98857145, "epoch": 48.39231310053902, "grad_norm": 0.000756270659621805, "learning_rate": 2.8839564669424596e-08, "loss": 0.07994347, "memory(GiB)": 13.7, "step": 103245, "train_speed(iter/s)": 1.529097 }, { "acc": 0.9802084, "epoch": 48.394656667447855, "grad_norm": 2.7114791870117188, "learning_rate": 2.8757939550009848e-08, "loss": 0.03323237, "memory(GiB)": 13.7, "step": 103250, "train_speed(iter/s)": 1.529098 }, { "acc": 0.98083334, "epoch": 48.39700023435669, "grad_norm": 0.7385931611061096, "learning_rate": 2.8676433934940886e-08, "loss": 0.03740475, "memory(GiB)": 13.7, "step": 103255, "train_speed(iter/s)": 1.529103 }, { "acc": 0.98500004, "epoch": 48.39934380126552, "grad_norm": 2.4153852462768555, "learning_rate": 2.8595047826177058e-08, "loss": 0.02532658, "memory(GiB)": 13.7, "step": 103260, "train_speed(iter/s)": 1.529108 }, { "acc": 0.98137321, "epoch": 48.401687368174365, "grad_norm": 3.32786226272583, "learning_rate": 2.8513781225673826e-08, "loss": 0.04992135, "memory(GiB)": 13.7, "step": 103265, "train_speed(iter/s)": 1.529109 }, { "acc": 0.99229164, "epoch": 48.4040309350832, "grad_norm": 3.2119405269622803, "learning_rate": 2.8432634135384978e-08, "loss": 0.02493876, "memory(GiB)": 13.7, "step": 103270, "train_speed(iter/s)": 1.529111 }, { "acc": 0.97711048, "epoch": 48.40637450199203, "grad_norm": 4.439340114593506, "learning_rate": 2.835160655726155e-08, "loss": 0.06049494, "memory(GiB)": 13.7, "step": 103275, "train_speed(iter/s)": 1.529112 }, { "acc": 0.98431644, "epoch": 48.40871806890087, "grad_norm": 3.229099750518799, "learning_rate": 2.8270698493250114e-08, "loss": 0.03920805, "memory(GiB)": 13.7, "step": 103280, "train_speed(iter/s)": 1.529114 }, { "acc": 0.98000002, "epoch": 48.4110616358097, "grad_norm": 5.768733024597168, "learning_rate": 2.81899099452967e-08, "loss": 0.02971555, "memory(GiB)": 13.7, "step": 103285, "train_speed(iter/s)": 1.52912 }, { "acc": 0.98676472, "epoch": 48.413405202718536, "grad_norm": 3.6302313804626465, "learning_rate": 2.8109240915342344e-08, "loss": 0.04235384, "memory(GiB)": 13.7, "step": 103290, "train_speed(iter/s)": 1.529121 }, { "acc": 0.99249992, "epoch": 48.41574876962737, "grad_norm": 0.0020105005241930485, "learning_rate": 2.802869140532584e-08, "loss": 0.02738368, "memory(GiB)": 13.7, "step": 103295, "train_speed(iter/s)": 1.529123 }, { "acc": 0.9854167, "epoch": 48.418092336536205, "grad_norm": 2.925384283065796, "learning_rate": 2.7948261417184353e-08, "loss": 0.02974208, "memory(GiB)": 13.7, "step": 103300, "train_speed(iter/s)": 1.529125 }, { "acc": 0.99073868, "epoch": 48.420435903445046, "grad_norm": 2.6280083656311035, "learning_rate": 2.786795095285002e-08, "loss": 0.03256542, "memory(GiB)": 13.7, "step": 103305, "train_speed(iter/s)": 1.529124 }, { "acc": 0.98354168, "epoch": 48.42277947035388, "grad_norm": 4.110773086547852, "learning_rate": 2.7787760014253884e-08, "loss": 0.05143311, "memory(GiB)": 13.7, "step": 103310, "train_speed(iter/s)": 1.529123 }, { "acc": 0.98090782, "epoch": 48.425123037262715, "grad_norm": 2.028041362762451, "learning_rate": 2.7707688603323657e-08, "loss": 0.03889651, "memory(GiB)": 13.7, "step": 103315, "train_speed(iter/s)": 1.529123 }, { "acc": 0.98624992, "epoch": 48.42746660417155, "grad_norm": 3.0154731273651123, "learning_rate": 2.7627736721983716e-08, "loss": 0.06132117, "memory(GiB)": 13.7, "step": 103320, "train_speed(iter/s)": 1.529126 }, { "acc": 0.9946023, "epoch": 48.42981017108038, "grad_norm": 3.3785784244537354, "learning_rate": 2.7547904372155663e-08, "loss": 0.03993258, "memory(GiB)": 13.7, "step": 103325, "train_speed(iter/s)": 1.529126 }, { "acc": 0.98812504, "epoch": 48.43215373798922, "grad_norm": 0.9806753396987915, "learning_rate": 2.7468191555758334e-08, "loss": 0.03441989, "memory(GiB)": 13.7, "step": 103330, "train_speed(iter/s)": 1.529124 }, { "acc": 0.99008932, "epoch": 48.43449730489805, "grad_norm": 0.4821672737598419, "learning_rate": 2.7388598274708887e-08, "loss": 0.02903052, "memory(GiB)": 13.7, "step": 103335, "train_speed(iter/s)": 1.529127 }, { "acc": 0.98322296, "epoch": 48.43684087180689, "grad_norm": 2.6228740215301514, "learning_rate": 2.7309124530918934e-08, "loss": 0.042985, "memory(GiB)": 13.7, "step": 103340, "train_speed(iter/s)": 1.529129 }, { "acc": 0.9854167, "epoch": 48.43918443871573, "grad_norm": 2.924649715423584, "learning_rate": 2.7229770326299533e-08, "loss": 0.02978613, "memory(GiB)": 13.7, "step": 103345, "train_speed(iter/s)": 1.52913 }, { "acc": 0.98447914, "epoch": 48.44152800562456, "grad_norm": 2.939622402191162, "learning_rate": 2.715053566275786e-08, "loss": 0.03912515, "memory(GiB)": 13.7, "step": 103350, "train_speed(iter/s)": 1.529132 }, { "acc": 0.98154764, "epoch": 48.443871572533396, "grad_norm": 4.303891658782959, "learning_rate": 2.7071420542198862e-08, "loss": 0.06824188, "memory(GiB)": 13.7, "step": 103355, "train_speed(iter/s)": 1.529133 }, { "acc": 0.98868589, "epoch": 48.44621513944223, "grad_norm": 7.117031097412109, "learning_rate": 2.6992424966523055e-08, "loss": 0.02666972, "memory(GiB)": 13.7, "step": 103360, "train_speed(iter/s)": 1.529134 }, { "acc": 0.99285717, "epoch": 48.448558706351065, "grad_norm": 2.279615640640259, "learning_rate": 2.691354893763095e-08, "loss": 0.02201151, "memory(GiB)": 13.7, "step": 103365, "train_speed(iter/s)": 1.52914 }, { "acc": 0.98780642, "epoch": 48.4509022732599, "grad_norm": 3.470580816268921, "learning_rate": 2.683479245741751e-08, "loss": 0.0325026, "memory(GiB)": 13.7, "step": 103370, "train_speed(iter/s)": 1.529145 }, { "acc": 0.9984375, "epoch": 48.453245840168734, "grad_norm": 0.0024331307504326105, "learning_rate": 2.6756155527776023e-08, "loss": 0.01320073, "memory(GiB)": 13.7, "step": 103375, "train_speed(iter/s)": 1.52915 }, { "acc": 0.975, "epoch": 48.455589407077575, "grad_norm": 4.233953952789307, "learning_rate": 2.667763815059646e-08, "loss": 0.04750801, "memory(GiB)": 13.7, "step": 103380, "train_speed(iter/s)": 1.529152 }, { "acc": 0.97892857, "epoch": 48.45793297398641, "grad_norm": 4.682165145874023, "learning_rate": 2.6599240327766016e-08, "loss": 0.03375785, "memory(GiB)": 13.7, "step": 103385, "train_speed(iter/s)": 1.529161 }, { "acc": 0.98270302, "epoch": 48.460276540895244, "grad_norm": 3.5358848571777344, "learning_rate": 2.6520962061169653e-08, "loss": 0.04407821, "memory(GiB)": 13.7, "step": 103390, "train_speed(iter/s)": 1.529166 }, { "acc": 0.99541664, "epoch": 48.46262010780408, "grad_norm": 0.24141383171081543, "learning_rate": 2.644280335268791e-08, "loss": 0.01666344, "memory(GiB)": 13.7, "step": 103395, "train_speed(iter/s)": 1.52917 }, { "acc": 0.98291664, "epoch": 48.46496367471291, "grad_norm": 1.3541460037231445, "learning_rate": 2.636476420420075e-08, "loss": 0.02990403, "memory(GiB)": 13.7, "step": 103400, "train_speed(iter/s)": 1.529175 }, { "acc": 0.98187504, "epoch": 48.46730724162175, "grad_norm": 4.077388286590576, "learning_rate": 2.6286844617583162e-08, "loss": 0.03638426, "memory(GiB)": 13.7, "step": 103405, "train_speed(iter/s)": 1.529175 }, { "acc": 0.99249992, "epoch": 48.46965080853058, "grad_norm": 2.6490023136138916, "learning_rate": 2.6209044594708457e-08, "loss": 0.03906518, "memory(GiB)": 13.7, "step": 103410, "train_speed(iter/s)": 1.529179 }, { "acc": 0.98779764, "epoch": 48.47199437543942, "grad_norm": 1.9013415575027466, "learning_rate": 2.6131364137446615e-08, "loss": 0.03679535, "memory(GiB)": 13.7, "step": 103415, "train_speed(iter/s)": 1.529182 }, { "acc": 0.98638887, "epoch": 48.47433794234826, "grad_norm": 2.813647508621216, "learning_rate": 2.6053803247663738e-08, "loss": 0.0271355, "memory(GiB)": 13.7, "step": 103420, "train_speed(iter/s)": 1.529182 }, { "acc": 0.98177719, "epoch": 48.47668150925709, "grad_norm": 5.668262004852295, "learning_rate": 2.5976361927225924e-08, "loss": 0.05769784, "memory(GiB)": 13.7, "step": 103425, "train_speed(iter/s)": 1.529181 }, { "acc": 0.98916664, "epoch": 48.479025076165925, "grad_norm": 3.93194580078125, "learning_rate": 2.5899040177993722e-08, "loss": 0.02242054, "memory(GiB)": 13.7, "step": 103430, "train_speed(iter/s)": 1.52918 }, { "acc": 0.98291664, "epoch": 48.48136864307476, "grad_norm": 0.06652804464101791, "learning_rate": 2.582183800182601e-08, "loss": 0.06513757, "memory(GiB)": 13.7, "step": 103435, "train_speed(iter/s)": 1.529184 }, { "acc": 0.9880147, "epoch": 48.483712209983594, "grad_norm": 5.359342098236084, "learning_rate": 2.5744755400577787e-08, "loss": 0.05324328, "memory(GiB)": 13.7, "step": 103440, "train_speed(iter/s)": 1.529185 }, { "acc": 0.99321423, "epoch": 48.48605577689243, "grad_norm": 4.1570048332214355, "learning_rate": 2.566779237610239e-08, "loss": 0.01749715, "memory(GiB)": 13.7, "step": 103445, "train_speed(iter/s)": 1.529185 }, { "acc": 0.9833334, "epoch": 48.48839934380126, "grad_norm": 0.09423661231994629, "learning_rate": 2.5590948930249816e-08, "loss": 0.0262882, "memory(GiB)": 13.7, "step": 103450, "train_speed(iter/s)": 1.529192 }, { "acc": 0.99291668, "epoch": 48.490742910710104, "grad_norm": 0.07793556153774261, "learning_rate": 2.5514225064866734e-08, "loss": 0.03960876, "memory(GiB)": 13.7, "step": 103455, "train_speed(iter/s)": 1.529194 }, { "acc": 0.97506943, "epoch": 48.49308647761894, "grad_norm": 2.568434953689575, "learning_rate": 2.5437620781798156e-08, "loss": 0.06555258, "memory(GiB)": 13.7, "step": 103460, "train_speed(iter/s)": 1.529199 }, { "acc": 0.97875004, "epoch": 48.49543004452777, "grad_norm": 2.7679848670959473, "learning_rate": 2.536113608288409e-08, "loss": 0.04426674, "memory(GiB)": 13.7, "step": 103465, "train_speed(iter/s)": 1.529201 }, { "acc": 0.99083328, "epoch": 48.49777361143661, "grad_norm": 1.715099811553955, "learning_rate": 2.5284770969963988e-08, "loss": 0.06836581, "memory(GiB)": 13.7, "step": 103470, "train_speed(iter/s)": 1.529204 }, { "acc": 0.98556089, "epoch": 48.50011717834544, "grad_norm": 2.0048396587371826, "learning_rate": 2.5208525444872873e-08, "loss": 0.03915531, "memory(GiB)": 13.7, "step": 103475, "train_speed(iter/s)": 1.529201 }, { "acc": 0.98166666, "epoch": 48.502460745254275, "grad_norm": 4.21024751663208, "learning_rate": 2.513239950944464e-08, "loss": 0.04622507, "memory(GiB)": 13.7, "step": 103480, "train_speed(iter/s)": 1.529201 }, { "acc": 0.98354168, "epoch": 48.50480431216311, "grad_norm": 1.7812299728393555, "learning_rate": 2.50563931655071e-08, "loss": 0.03293074, "memory(GiB)": 13.7, "step": 103485, "train_speed(iter/s)": 1.529199 }, { "acc": 0.98499994, "epoch": 48.50714787907195, "grad_norm": 4.318841934204102, "learning_rate": 2.498050641488915e-08, "loss": 0.03692616, "memory(GiB)": 13.7, "step": 103490, "train_speed(iter/s)": 1.529202 }, { "acc": 0.98534222, "epoch": 48.509491445980785, "grad_norm": 2.9417030811309814, "learning_rate": 2.4904739259413607e-08, "loss": 0.05498554, "memory(GiB)": 13.7, "step": 103495, "train_speed(iter/s)": 1.529208 }, { "acc": 0.98291664, "epoch": 48.51183501288962, "grad_norm": 5.49910306930542, "learning_rate": 2.48290917009016e-08, "loss": 0.03187267, "memory(GiB)": 13.7, "step": 103500, "train_speed(iter/s)": 1.529209 }, { "acc": 0.97666664, "epoch": 48.514178579798454, "grad_norm": 4.25764799118042, "learning_rate": 2.47535637411726e-08, "loss": 0.0504491, "memory(GiB)": 13.7, "step": 103505, "train_speed(iter/s)": 1.529205 }, { "acc": 0.98423615, "epoch": 48.51652214670729, "grad_norm": 3.7798843383789062, "learning_rate": 2.4678155382040543e-08, "loss": 0.0209214, "memory(GiB)": 13.7, "step": 103510, "train_speed(iter/s)": 1.529209 }, { "acc": 0.97820511, "epoch": 48.51886571361612, "grad_norm": 5.771718502044678, "learning_rate": 2.4602866625319343e-08, "loss": 0.04352966, "memory(GiB)": 13.7, "step": 103515, "train_speed(iter/s)": 1.529213 }, { "acc": 0.98847218, "epoch": 48.52120928052496, "grad_norm": 0.8996119499206543, "learning_rate": 2.4527697472817933e-08, "loss": 0.04355043, "memory(GiB)": 13.7, "step": 103520, "train_speed(iter/s)": 1.529213 }, { "acc": 0.98562498, "epoch": 48.52355284743379, "grad_norm": 0.03892078250646591, "learning_rate": 2.4452647926343564e-08, "loss": 0.03055329, "memory(GiB)": 13.7, "step": 103525, "train_speed(iter/s)": 1.529214 }, { "acc": 0.98430557, "epoch": 48.52589641434263, "grad_norm": 0.0008527175523340702, "learning_rate": 2.437771798770018e-08, "loss": 0.02186607, "memory(GiB)": 13.7, "step": 103530, "train_speed(iter/s)": 1.529216 }, { "acc": 0.98467255, "epoch": 48.52823998125147, "grad_norm": 4.418041706085205, "learning_rate": 2.4302907658687822e-08, "loss": 0.02703735, "memory(GiB)": 13.7, "step": 103535, "train_speed(iter/s)": 1.529218 }, { "acc": 0.98863096, "epoch": 48.5305835481603, "grad_norm": 3.031405210494995, "learning_rate": 2.422821694110598e-08, "loss": 0.04255561, "memory(GiB)": 13.7, "step": 103540, "train_speed(iter/s)": 1.529219 }, { "acc": 0.98446426, "epoch": 48.532927115069135, "grad_norm": 2.165869951248169, "learning_rate": 2.415364583674971e-08, "loss": 0.02329958, "memory(GiB)": 13.7, "step": 103545, "train_speed(iter/s)": 1.52922 }, { "acc": 0.98496037, "epoch": 48.53527068197797, "grad_norm": 3.7865281105041504, "learning_rate": 2.4079194347410734e-08, "loss": 0.05456314, "memory(GiB)": 13.7, "step": 103550, "train_speed(iter/s)": 1.52922 }, { "acc": 0.98377981, "epoch": 48.537614248886804, "grad_norm": 3.246061325073242, "learning_rate": 2.4004862474879656e-08, "loss": 0.02996722, "memory(GiB)": 13.7, "step": 103555, "train_speed(iter/s)": 1.529218 }, { "acc": 0.98604164, "epoch": 48.53995781579564, "grad_norm": 2.5834922790527344, "learning_rate": 2.3930650220942098e-08, "loss": 0.01499371, "memory(GiB)": 13.7, "step": 103560, "train_speed(iter/s)": 1.52922 }, { "acc": 0.99229164, "epoch": 48.54230138270448, "grad_norm": 2.9900145530700684, "learning_rate": 2.3856557587382555e-08, "loss": 0.01377838, "memory(GiB)": 13.7, "step": 103565, "train_speed(iter/s)": 1.529221 }, { "acc": 0.9979167, "epoch": 48.544644949613314, "grad_norm": 0.6871256232261658, "learning_rate": 2.378258457598221e-08, "loss": 0.01723137, "memory(GiB)": 13.7, "step": 103570, "train_speed(iter/s)": 1.529225 }, { "acc": 0.98708334, "epoch": 48.54698851652215, "grad_norm": 0.014808917418122292, "learning_rate": 2.3708731188518357e-08, "loss": 0.01413866, "memory(GiB)": 13.7, "step": 103575, "train_speed(iter/s)": 1.529229 }, { "acc": 0.9927084, "epoch": 48.54933208343098, "grad_norm": 4.097772121429443, "learning_rate": 2.3634997426766607e-08, "loss": 0.01694103, "memory(GiB)": 13.7, "step": 103580, "train_speed(iter/s)": 1.529233 }, { "acc": 0.97666664, "epoch": 48.55167565033982, "grad_norm": 4.77777624130249, "learning_rate": 2.3561383292499258e-08, "loss": 0.03719246, "memory(GiB)": 13.7, "step": 103585, "train_speed(iter/s)": 1.529236 }, { "acc": 0.97949295, "epoch": 48.55401921724865, "grad_norm": 2.705543279647827, "learning_rate": 2.348788878748584e-08, "loss": 0.06904368, "memory(GiB)": 13.7, "step": 103590, "train_speed(iter/s)": 1.529238 }, { "acc": 0.9856945, "epoch": 48.556362784157486, "grad_norm": 3.945326328277588, "learning_rate": 2.3414513913493076e-08, "loss": 0.03147787, "memory(GiB)": 13.7, "step": 103595, "train_speed(iter/s)": 1.529242 }, { "acc": 0.98428574, "epoch": 48.55870635106632, "grad_norm": 2.62823748588562, "learning_rate": 2.3341258672283846e-08, "loss": 0.03695033, "memory(GiB)": 13.7, "step": 103600, "train_speed(iter/s)": 1.529243 }, { "acc": 0.99875002, "epoch": 48.56104991797516, "grad_norm": 2.4653689861297607, "learning_rate": 2.326812306561988e-08, "loss": 0.02253411, "memory(GiB)": 13.7, "step": 103605, "train_speed(iter/s)": 1.529245 }, { "acc": 0.97458334, "epoch": 48.563393484883996, "grad_norm": 0.0006918457802385092, "learning_rate": 2.3195107095258494e-08, "loss": 0.05997899, "memory(GiB)": 13.7, "step": 103610, "train_speed(iter/s)": 1.529245 }, { "acc": 0.9890625, "epoch": 48.56573705179283, "grad_norm": 3.617905378341675, "learning_rate": 2.3122210762955326e-08, "loss": 0.03419555, "memory(GiB)": 13.7, "step": 103615, "train_speed(iter/s)": 1.529249 }, { "acc": 0.990625, "epoch": 48.568080618701664, "grad_norm": 5.050476551055908, "learning_rate": 2.3049434070461587e-08, "loss": 0.02360537, "memory(GiB)": 13.7, "step": 103620, "train_speed(iter/s)": 1.529251 }, { "acc": 0.98125, "epoch": 48.5704241856105, "grad_norm": 2.5989553928375244, "learning_rate": 2.297677701952791e-08, "loss": 0.03827741, "memory(GiB)": 13.7, "step": 103625, "train_speed(iter/s)": 1.529251 }, { "acc": 0.99072914, "epoch": 48.57276775251933, "grad_norm": 3.0475268363952637, "learning_rate": 2.290423961190051e-08, "loss": 0.0160007, "memory(GiB)": 13.7, "step": 103630, "train_speed(iter/s)": 1.529256 }, { "acc": 0.98708334, "epoch": 48.57511131942817, "grad_norm": 3.336577892303467, "learning_rate": 2.2831821849321704e-08, "loss": 0.04320434, "memory(GiB)": 13.7, "step": 103635, "train_speed(iter/s)": 1.52926 }, { "acc": 0.9854167, "epoch": 48.577454886337, "grad_norm": 1.592076063156128, "learning_rate": 2.2759523733533262e-08, "loss": 0.03986178, "memory(GiB)": 13.7, "step": 103640, "train_speed(iter/s)": 1.529263 }, { "acc": 0.99027777, "epoch": 48.57979845324584, "grad_norm": 1.16871976852417, "learning_rate": 2.2687345266272504e-08, "loss": 0.04721521, "memory(GiB)": 13.7, "step": 103645, "train_speed(iter/s)": 1.529267 }, { "acc": 0.98666668, "epoch": 48.58214202015468, "grad_norm": 4.032280921936035, "learning_rate": 2.2615286449275092e-08, "loss": 0.0585399, "memory(GiB)": 13.7, "step": 103650, "train_speed(iter/s)": 1.529272 }, { "acc": 0.98500004, "epoch": 48.58448558706351, "grad_norm": 4.010463714599609, "learning_rate": 2.2543347284272248e-08, "loss": 0.0283929, "memory(GiB)": 13.7, "step": 103655, "train_speed(iter/s)": 1.529274 }, { "acc": 0.99305553, "epoch": 48.586829153972346, "grad_norm": 0.0047085159458220005, "learning_rate": 2.2471527772993522e-08, "loss": 0.01998366, "memory(GiB)": 13.7, "step": 103660, "train_speed(iter/s)": 1.529279 }, { "acc": 0.9895833, "epoch": 48.58917272088118, "grad_norm": 3.3919100761413574, "learning_rate": 2.2399827917165148e-08, "loss": 0.02217487, "memory(GiB)": 13.7, "step": 103665, "train_speed(iter/s)": 1.529278 }, { "acc": 0.99174681, "epoch": 48.591516287790014, "grad_norm": 2.4709792137145996, "learning_rate": 2.2328247718510563e-08, "loss": 0.02502676, "memory(GiB)": 13.7, "step": 103670, "train_speed(iter/s)": 1.529283 }, { "acc": 0.97979164, "epoch": 48.59385985469885, "grad_norm": 0.03153756260871887, "learning_rate": 2.2256787178750442e-08, "loss": 0.06030415, "memory(GiB)": 13.7, "step": 103675, "train_speed(iter/s)": 1.529281 }, { "acc": 0.99333334, "epoch": 48.59620342160769, "grad_norm": 4.336026668548584, "learning_rate": 2.2185446299602137e-08, "loss": 0.03033858, "memory(GiB)": 13.7, "step": 103680, "train_speed(iter/s)": 1.529284 }, { "acc": 0.98145828, "epoch": 48.598546988516524, "grad_norm": 4.2113261222839355, "learning_rate": 2.2114225082780766e-08, "loss": 0.02960156, "memory(GiB)": 13.7, "step": 103685, "train_speed(iter/s)": 1.529284 }, { "acc": 0.99363976, "epoch": 48.60089055542536, "grad_norm": 2.4026737213134766, "learning_rate": 2.2043123529998117e-08, "loss": 0.02986259, "memory(GiB)": 13.7, "step": 103690, "train_speed(iter/s)": 1.529285 }, { "acc": 0.99437504, "epoch": 48.60323412233419, "grad_norm": 2.7012667655944824, "learning_rate": 2.1972141642963773e-08, "loss": 0.02549927, "memory(GiB)": 13.7, "step": 103695, "train_speed(iter/s)": 1.52929 }, { "acc": 0.97152777, "epoch": 48.60557768924303, "grad_norm": 3.455268383026123, "learning_rate": 2.190127942338286e-08, "loss": 0.0756417, "memory(GiB)": 13.7, "step": 103700, "train_speed(iter/s)": 1.529291 }, { "acc": 0.98666668, "epoch": 48.60792125615186, "grad_norm": 2.455812692642212, "learning_rate": 2.1830536872959395e-08, "loss": 0.03185336, "memory(GiB)": 13.7, "step": 103705, "train_speed(iter/s)": 1.529296 }, { "acc": 0.9947916, "epoch": 48.610264823060696, "grad_norm": 2.3677401542663574, "learning_rate": 2.175991399339352e-08, "loss": 0.02221093, "memory(GiB)": 13.7, "step": 103710, "train_speed(iter/s)": 1.529299 }, { "acc": 0.984375, "epoch": 48.61260838996953, "grad_norm": 2.095290422439575, "learning_rate": 2.168941078638315e-08, "loss": 0.02719548, "memory(GiB)": 13.7, "step": 103715, "train_speed(iter/s)": 1.529303 }, { "acc": 0.98344688, "epoch": 48.61495195687837, "grad_norm": 2.183872699737549, "learning_rate": 2.161902725362232e-08, "loss": 0.03348781, "memory(GiB)": 13.7, "step": 103720, "train_speed(iter/s)": 1.529306 }, { "acc": 0.98312492, "epoch": 48.617295523787206, "grad_norm": 3.141099214553833, "learning_rate": 2.154876339680339e-08, "loss": 0.0230371, "memory(GiB)": 13.7, "step": 103725, "train_speed(iter/s)": 1.52931 }, { "acc": 0.98790178, "epoch": 48.61963909069604, "grad_norm": 3.8432416915893555, "learning_rate": 2.14786192176154e-08, "loss": 0.02845593, "memory(GiB)": 13.7, "step": 103730, "train_speed(iter/s)": 1.52931 }, { "acc": 0.97645836, "epoch": 48.621982657604875, "grad_norm": 2.4724507331848145, "learning_rate": 2.1408594717743495e-08, "loss": 0.04918816, "memory(GiB)": 13.7, "step": 103735, "train_speed(iter/s)": 1.529315 }, { "acc": 0.98683748, "epoch": 48.62432622451371, "grad_norm": 0.18892677128314972, "learning_rate": 2.1338689898871165e-08, "loss": 0.0423436, "memory(GiB)": 13.7, "step": 103740, "train_speed(iter/s)": 1.529317 }, { "acc": 0.98770294, "epoch": 48.62666979142254, "grad_norm": 0.0016354586696252227, "learning_rate": 2.126890476267967e-08, "loss": 0.02830609, "memory(GiB)": 13.7, "step": 103745, "train_speed(iter/s)": 1.529317 }, { "acc": 0.97375002, "epoch": 48.62901335833138, "grad_norm": 5.993849277496338, "learning_rate": 2.119923931084529e-08, "loss": 0.06744319, "memory(GiB)": 13.7, "step": 103750, "train_speed(iter/s)": 1.529319 }, { "acc": 1.0, "epoch": 48.63135692524022, "grad_norm": 0.026355354115366936, "learning_rate": 2.1129693545043165e-08, "loss": 0.01305118, "memory(GiB)": 13.7, "step": 103755, "train_speed(iter/s)": 1.529321 }, { "acc": 0.98708334, "epoch": 48.63370049214905, "grad_norm": 0.8354263305664062, "learning_rate": 2.1060267466944025e-08, "loss": 0.02645001, "memory(GiB)": 13.7, "step": 103760, "train_speed(iter/s)": 1.529323 }, { "acc": 0.98500004, "epoch": 48.63604405905789, "grad_norm": 3.07118558883667, "learning_rate": 2.0990961078218037e-08, "loss": 0.0757123, "memory(GiB)": 13.7, "step": 103765, "train_speed(iter/s)": 1.529326 }, { "acc": 0.984375, "epoch": 48.63838762596672, "grad_norm": 4.329788684844971, "learning_rate": 2.092177438052981e-08, "loss": 0.03921686, "memory(GiB)": 13.7, "step": 103770, "train_speed(iter/s)": 1.529327 }, { "acc": 0.9864583, "epoch": 48.640731192875556, "grad_norm": 4.1404900550842285, "learning_rate": 2.08527073755434e-08, "loss": 0.03443882, "memory(GiB)": 13.7, "step": 103775, "train_speed(iter/s)": 1.529332 }, { "acc": 0.97687502, "epoch": 48.64307475978439, "grad_norm": 2.967320680618286, "learning_rate": 2.0783760064918424e-08, "loss": 0.03566773, "memory(GiB)": 13.7, "step": 103780, "train_speed(iter/s)": 1.529333 }, { "acc": 0.98395824, "epoch": 48.645418326693225, "grad_norm": 3.073068141937256, "learning_rate": 2.0714932450312284e-08, "loss": 0.03328178, "memory(GiB)": 13.7, "step": 103785, "train_speed(iter/s)": 1.529334 }, { "acc": 0.99131947, "epoch": 48.64776189360206, "grad_norm": 2.581611156463623, "learning_rate": 2.0646224533379595e-08, "loss": 0.03620526, "memory(GiB)": 13.7, "step": 103790, "train_speed(iter/s)": 1.529337 }, { "acc": 0.98842258, "epoch": 48.6501054605109, "grad_norm": 2.354387044906616, "learning_rate": 2.05776363157711e-08, "loss": 0.02025768, "memory(GiB)": 13.7, "step": 103795, "train_speed(iter/s)": 1.529337 }, { "acc": 0.9822917, "epoch": 48.652449027419735, "grad_norm": 4.0164475440979, "learning_rate": 2.050916779913587e-08, "loss": 0.03473851, "memory(GiB)": 13.7, "step": 103800, "train_speed(iter/s)": 1.529339 }, { "acc": 0.99375, "epoch": 48.65479259432857, "grad_norm": 2.1385486125946045, "learning_rate": 2.044081898511964e-08, "loss": 0.022101, "memory(GiB)": 13.7, "step": 103805, "train_speed(iter/s)": 1.529339 }, { "acc": 0.98145828, "epoch": 48.6571361612374, "grad_norm": 4.187171459197998, "learning_rate": 2.0372589875365936e-08, "loss": 0.04083521, "memory(GiB)": 13.7, "step": 103810, "train_speed(iter/s)": 1.52934 }, { "acc": 0.98458328, "epoch": 48.65947972814624, "grad_norm": 4.365574359893799, "learning_rate": 2.030448047151384e-08, "loss": 0.02671038, "memory(GiB)": 13.7, "step": 103815, "train_speed(iter/s)": 1.529341 }, { "acc": 0.99718132, "epoch": 48.66182329505507, "grad_norm": 2.547147750854492, "learning_rate": 2.023649077520076e-08, "loss": 0.01951635, "memory(GiB)": 13.7, "step": 103820, "train_speed(iter/s)": 1.529343 }, { "acc": 0.98468742, "epoch": 48.664166861963906, "grad_norm": 3.7529451847076416, "learning_rate": 2.0168620788060784e-08, "loss": 0.03019514, "memory(GiB)": 13.7, "step": 103825, "train_speed(iter/s)": 1.529347 }, { "acc": 0.9833333, "epoch": 48.66651042887275, "grad_norm": 2.21116042137146, "learning_rate": 2.0100870511725784e-08, "loss": 0.0313294, "memory(GiB)": 13.7, "step": 103830, "train_speed(iter/s)": 1.529347 }, { "acc": 0.9895833, "epoch": 48.66885399578158, "grad_norm": 0.02647264115512371, "learning_rate": 2.0033239947824284e-08, "loss": 0.05437599, "memory(GiB)": 13.7, "step": 103835, "train_speed(iter/s)": 1.529348 }, { "acc": 0.97000008, "epoch": 48.671197562690416, "grad_norm": 4.39730978012085, "learning_rate": 1.996572909798094e-08, "loss": 0.05727533, "memory(GiB)": 13.7, "step": 103840, "train_speed(iter/s)": 1.529347 }, { "acc": 0.98708334, "epoch": 48.67354112959925, "grad_norm": 3.8519763946533203, "learning_rate": 1.9898337963819837e-08, "loss": 0.01928625, "memory(GiB)": 13.7, "step": 103845, "train_speed(iter/s)": 1.529346 }, { "acc": 0.98708334, "epoch": 48.675884696508085, "grad_norm": 2.7779922485351562, "learning_rate": 1.983106654695953e-08, "loss": 0.04307089, "memory(GiB)": 13.7, "step": 103850, "train_speed(iter/s)": 1.529347 }, { "acc": 0.9916666, "epoch": 48.67822826341692, "grad_norm": 3.443120002746582, "learning_rate": 1.9763914849018e-08, "loss": 0.02975448, "memory(GiB)": 13.7, "step": 103855, "train_speed(iter/s)": 1.529348 }, { "acc": 0.98374996, "epoch": 48.68057183032575, "grad_norm": 2.4614744186401367, "learning_rate": 1.969688287160824e-08, "loss": 0.04129597, "memory(GiB)": 13.7, "step": 103860, "train_speed(iter/s)": 1.529347 }, { "acc": 0.97342262, "epoch": 48.68291539723459, "grad_norm": 5.8681960105896, "learning_rate": 1.9629970616343248e-08, "loss": 0.03989794, "memory(GiB)": 13.7, "step": 103865, "train_speed(iter/s)": 1.52935 }, { "acc": 0.99008923, "epoch": 48.68525896414343, "grad_norm": 3.443436861038208, "learning_rate": 1.95631780848299e-08, "loss": 0.05609269, "memory(GiB)": 13.7, "step": 103870, "train_speed(iter/s)": 1.529353 }, { "acc": 0.98770828, "epoch": 48.68760253105226, "grad_norm": 2.0591061115264893, "learning_rate": 1.949650527867399e-08, "loss": 0.0198448, "memory(GiB)": 13.7, "step": 103875, "train_speed(iter/s)": 1.529352 }, { "acc": 0.97729168, "epoch": 48.6899460979611, "grad_norm": 1.6435275077819824, "learning_rate": 1.9429952199478503e-08, "loss": 0.04628833, "memory(GiB)": 13.7, "step": 103880, "train_speed(iter/s)": 1.529353 }, { "acc": 0.97601185, "epoch": 48.69228966486993, "grad_norm": 2.9538309574127197, "learning_rate": 1.9363518848842563e-08, "loss": 0.07290684, "memory(GiB)": 13.7, "step": 103885, "train_speed(iter/s)": 1.529351 }, { "acc": 0.99243469, "epoch": 48.69463323177877, "grad_norm": 3.0771560668945312, "learning_rate": 1.929720522836362e-08, "loss": 0.05859027, "memory(GiB)": 13.7, "step": 103890, "train_speed(iter/s)": 1.529351 }, { "acc": 0.98592262, "epoch": 48.6969767986876, "grad_norm": 4.382026672363281, "learning_rate": 1.9231011339634687e-08, "loss": 0.03409927, "memory(GiB)": 13.7, "step": 103895, "train_speed(iter/s)": 1.529353 }, { "acc": 0.9916667, "epoch": 48.699320365596435, "grad_norm": 1.5172953605651855, "learning_rate": 1.9164937184248214e-08, "loss": 0.019868, "memory(GiB)": 13.7, "step": 103900, "train_speed(iter/s)": 1.529355 }, { "acc": 0.97892857, "epoch": 48.70166393250528, "grad_norm": 5.1121063232421875, "learning_rate": 1.909898276379167e-08, "loss": 0.04204175, "memory(GiB)": 13.7, "step": 103905, "train_speed(iter/s)": 1.52936 }, { "acc": 0.98093748, "epoch": 48.70400749941411, "grad_norm": 4.2859086990356445, "learning_rate": 1.9033148079850286e-08, "loss": 0.04893449, "memory(GiB)": 13.7, "step": 103910, "train_speed(iter/s)": 1.529362 }, { "acc": 0.996875, "epoch": 48.706351066322945, "grad_norm": 3.8954918384552, "learning_rate": 1.896743313400653e-08, "loss": 0.01049005, "memory(GiB)": 13.7, "step": 103915, "train_speed(iter/s)": 1.529363 }, { "acc": 0.99125004, "epoch": 48.70869463323178, "grad_norm": 2.4677040576934814, "learning_rate": 1.8901837927840096e-08, "loss": 0.01082097, "memory(GiB)": 13.7, "step": 103920, "train_speed(iter/s)": 1.529367 }, { "acc": 0.990625, "epoch": 48.711038200140614, "grad_norm": 3.4028913974761963, "learning_rate": 1.883636246292789e-08, "loss": 0.06092559, "memory(GiB)": 13.7, "step": 103925, "train_speed(iter/s)": 1.529366 }, { "acc": 0.9875, "epoch": 48.71338176704945, "grad_norm": 2.186519145965576, "learning_rate": 1.8771006740842946e-08, "loss": 0.02088497, "memory(GiB)": 13.7, "step": 103930, "train_speed(iter/s)": 1.529367 }, { "acc": 0.990625, "epoch": 48.71572533395828, "grad_norm": 3.7630560398101807, "learning_rate": 1.8705770763157182e-08, "loss": 0.02239642, "memory(GiB)": 13.7, "step": 103935, "train_speed(iter/s)": 1.529366 }, { "acc": 0.9875, "epoch": 48.71806890086712, "grad_norm": 3.9258244037628174, "learning_rate": 1.8640654531438077e-08, "loss": 0.03625556, "memory(GiB)": 13.7, "step": 103940, "train_speed(iter/s)": 1.529368 }, { "acc": 0.9802084, "epoch": 48.72041246777596, "grad_norm": 4.234703540802002, "learning_rate": 1.857565804725144e-08, "loss": 0.0333299, "memory(GiB)": 13.7, "step": 103945, "train_speed(iter/s)": 1.529366 }, { "acc": 0.99437504, "epoch": 48.72275603468479, "grad_norm": 0.9398713111877441, "learning_rate": 1.851078131215809e-08, "loss": 0.02119297, "memory(GiB)": 13.7, "step": 103950, "train_speed(iter/s)": 1.529365 }, { "acc": 0.98354168, "epoch": 48.72509960159363, "grad_norm": 4.506056308746338, "learning_rate": 1.84460243277194e-08, "loss": 0.04031628, "memory(GiB)": 13.7, "step": 103955, "train_speed(iter/s)": 1.529364 }, { "acc": 0.97270832, "epoch": 48.72744316850246, "grad_norm": 4.58366584777832, "learning_rate": 1.838138709549064e-08, "loss": 0.05031512, "memory(GiB)": 13.7, "step": 103960, "train_speed(iter/s)": 1.529365 }, { "acc": 0.99375, "epoch": 48.729786735411295, "grad_norm": 1.999431848526001, "learning_rate": 1.8316869617025408e-08, "loss": 0.03442454, "memory(GiB)": 13.7, "step": 103965, "train_speed(iter/s)": 1.529364 }, { "acc": 0.9875, "epoch": 48.73213030232013, "grad_norm": 3.596987009048462, "learning_rate": 1.8252471893875644e-08, "loss": 0.02192336, "memory(GiB)": 13.7, "step": 103970, "train_speed(iter/s)": 1.529366 }, { "acc": 0.98604164, "epoch": 48.734473869228964, "grad_norm": 2.0026824474334717, "learning_rate": 1.8188193927587725e-08, "loss": 0.03906165, "memory(GiB)": 13.7, "step": 103975, "train_speed(iter/s)": 1.529366 }, { "acc": 0.9958334, "epoch": 48.7368174361378, "grad_norm": 5.324703216552734, "learning_rate": 1.8124035719708052e-08, "loss": 0.01652732, "memory(GiB)": 13.7, "step": 103980, "train_speed(iter/s)": 1.52937 }, { "acc": 0.9958333, "epoch": 48.73916100304664, "grad_norm": 3.0424206256866455, "learning_rate": 1.8059997271778002e-08, "loss": 0.03271355, "memory(GiB)": 13.7, "step": 103985, "train_speed(iter/s)": 1.529374 }, { "acc": 0.97684021, "epoch": 48.741504569955474, "grad_norm": 7.6008429527282715, "learning_rate": 1.799607858533675e-08, "loss": 0.04532453, "memory(GiB)": 13.7, "step": 103990, "train_speed(iter/s)": 1.529377 }, { "acc": 0.97909718, "epoch": 48.74384813686431, "grad_norm": 1.332646369934082, "learning_rate": 1.793227966192125e-08, "loss": 0.04415091, "memory(GiB)": 13.7, "step": 103995, "train_speed(iter/s)": 1.529383 }, { "acc": 0.98627834, "epoch": 48.74619170377314, "grad_norm": 0.015135560184717178, "learning_rate": 1.7868600503065117e-08, "loss": 0.04852198, "memory(GiB)": 13.7, "step": 104000, "train_speed(iter/s)": 1.529386 }, { "acc": 0.98812504, "epoch": 48.74853527068198, "grad_norm": 3.2024316787719727, "learning_rate": 1.7805041110298085e-08, "loss": 0.01467609, "memory(GiB)": 13.7, "step": 104005, "train_speed(iter/s)": 1.529388 }, { "acc": 0.98395834, "epoch": 48.75087883759081, "grad_norm": 0.006526371464133263, "learning_rate": 1.7741601485148225e-08, "loss": 0.04803247, "memory(GiB)": 13.7, "step": 104010, "train_speed(iter/s)": 1.529392 }, { "acc": 0.97946434, "epoch": 48.753222404499645, "grad_norm": 0.94925856590271, "learning_rate": 1.767828162914139e-08, "loss": 0.04658163, "memory(GiB)": 13.7, "step": 104015, "train_speed(iter/s)": 1.529392 }, { "acc": 0.97250004, "epoch": 48.75556597140849, "grad_norm": 4.444779872894287, "learning_rate": 1.7615081543798423e-08, "loss": 0.05190558, "memory(GiB)": 13.7, "step": 104020, "train_speed(iter/s)": 1.52939 }, { "acc": 0.9895834, "epoch": 48.75790953831732, "grad_norm": 0.9883919358253479, "learning_rate": 1.7552001230639075e-08, "loss": 0.02655443, "memory(GiB)": 13.7, "step": 104025, "train_speed(iter/s)": 1.529392 }, { "acc": 0.98487186, "epoch": 48.760253105226155, "grad_norm": 0.004389289300888777, "learning_rate": 1.748904069117865e-08, "loss": 0.04048711, "memory(GiB)": 13.7, "step": 104030, "train_speed(iter/s)": 1.529399 }, { "acc": 1.0, "epoch": 48.76259667213499, "grad_norm": 0.00041519475053064525, "learning_rate": 1.742619992693189e-08, "loss": 0.00868728, "memory(GiB)": 13.7, "step": 104035, "train_speed(iter/s)": 1.529401 }, { "acc": 0.98833332, "epoch": 48.764940239043824, "grad_norm": 1.6715760231018066, "learning_rate": 1.7363478939408555e-08, "loss": 0.01960332, "memory(GiB)": 13.7, "step": 104040, "train_speed(iter/s)": 1.529408 }, { "acc": 0.98477678, "epoch": 48.76728380595266, "grad_norm": 2.4388957023620605, "learning_rate": 1.730087773011617e-08, "loss": 0.04137701, "memory(GiB)": 13.7, "step": 104045, "train_speed(iter/s)": 1.529413 }, { "acc": 0.98946428, "epoch": 48.76962737286149, "grad_norm": 3.127657175064087, "learning_rate": 1.7238396300559496e-08, "loss": 0.02299941, "memory(GiB)": 13.7, "step": 104050, "train_speed(iter/s)": 1.529415 }, { "acc": 0.95988102, "epoch": 48.77197093977033, "grad_norm": 5.121621608734131, "learning_rate": 1.717603465224051e-08, "loss": 0.05765266, "memory(GiB)": 13.7, "step": 104055, "train_speed(iter/s)": 1.529421 }, { "acc": 0.98071423, "epoch": 48.77431450667917, "grad_norm": 3.294825315475464, "learning_rate": 1.7113792786658424e-08, "loss": 0.03360063, "memory(GiB)": 13.7, "step": 104060, "train_speed(iter/s)": 1.529423 }, { "acc": 0.9856945, "epoch": 48.776658073588, "grad_norm": 3.9969544410705566, "learning_rate": 1.7051670705309112e-08, "loss": 0.0435748, "memory(GiB)": 13.7, "step": 104065, "train_speed(iter/s)": 1.529429 }, { "acc": 0.98476639, "epoch": 48.77900164049684, "grad_norm": 2.9905481338500977, "learning_rate": 1.6989668409685116e-08, "loss": 0.04068611, "memory(GiB)": 13.7, "step": 104070, "train_speed(iter/s)": 1.529428 }, { "acc": 0.99125004, "epoch": 48.78134520740567, "grad_norm": 5.163313388824463, "learning_rate": 1.6927785901277876e-08, "loss": 0.039749, "memory(GiB)": 13.7, "step": 104075, "train_speed(iter/s)": 1.52943 }, { "acc": 0.99571428, "epoch": 48.783688774314506, "grad_norm": 4.002503871917725, "learning_rate": 1.6866023181574384e-08, "loss": 0.01624011, "memory(GiB)": 13.7, "step": 104080, "train_speed(iter/s)": 1.529431 }, { "acc": 0.98562832, "epoch": 48.78603234122334, "grad_norm": 0.03166165575385094, "learning_rate": 1.6804380252059417e-08, "loss": 0.04731869, "memory(GiB)": 13.7, "step": 104085, "train_speed(iter/s)": 1.529432 }, { "acc": 0.9854167, "epoch": 48.788375908132174, "grad_norm": 1.718445062637329, "learning_rate": 1.6742857114213864e-08, "loss": 0.02440858, "memory(GiB)": 13.7, "step": 104090, "train_speed(iter/s)": 1.529437 }, { "acc": 0.98425598, "epoch": 48.790719475041016, "grad_norm": 0.9022979140281677, "learning_rate": 1.6681453769517502e-08, "loss": 0.03666719, "memory(GiB)": 13.7, "step": 104095, "train_speed(iter/s)": 1.529435 }, { "acc": 0.98788691, "epoch": 48.79306304194985, "grad_norm": 1.9508450031280518, "learning_rate": 1.6620170219445673e-08, "loss": 0.06096081, "memory(GiB)": 13.7, "step": 104100, "train_speed(iter/s)": 1.529434 }, { "acc": 0.98562498, "epoch": 48.795406608858684, "grad_norm": 3.8450393676757812, "learning_rate": 1.655900646547205e-08, "loss": 0.03913942, "memory(GiB)": 13.7, "step": 104105, "train_speed(iter/s)": 1.529435 }, { "acc": 0.99258928, "epoch": 48.79775017576752, "grad_norm": 0.9941239953041077, "learning_rate": 1.6497962509065863e-08, "loss": 0.02170091, "memory(GiB)": 13.7, "step": 104110, "train_speed(iter/s)": 1.529438 }, { "acc": 0.98458328, "epoch": 48.80009374267635, "grad_norm": 2.6514840126037598, "learning_rate": 1.643703835169524e-08, "loss": 0.03091365, "memory(GiB)": 13.7, "step": 104115, "train_speed(iter/s)": 1.529441 }, { "acc": 0.98176537, "epoch": 48.80243730958519, "grad_norm": 3.3496649265289307, "learning_rate": 1.637623399482386e-08, "loss": 0.05286917, "memory(GiB)": 13.7, "step": 104120, "train_speed(iter/s)": 1.529446 }, { "acc": 0.9833334, "epoch": 48.80478087649402, "grad_norm": 4.691189765930176, "learning_rate": 1.6315549439914297e-08, "loss": 0.03332731, "memory(GiB)": 13.7, "step": 104125, "train_speed(iter/s)": 1.529446 }, { "acc": 0.97758923, "epoch": 48.807124443402856, "grad_norm": 4.443906784057617, "learning_rate": 1.625498468842413e-08, "loss": 0.04486999, "memory(GiB)": 13.7, "step": 104130, "train_speed(iter/s)": 1.529445 }, { "acc": 0.97520828, "epoch": 48.8094680103117, "grad_norm": 3.9672458171844482, "learning_rate": 1.6194539741809822e-08, "loss": 0.03996078, "memory(GiB)": 13.7, "step": 104135, "train_speed(iter/s)": 1.529447 }, { "acc": 0.97666664, "epoch": 48.81181157722053, "grad_norm": 2.9192521572113037, "learning_rate": 1.6134214601524513e-08, "loss": 0.05527893, "memory(GiB)": 13.7, "step": 104140, "train_speed(iter/s)": 1.529452 }, { "acc": 0.985322, "epoch": 48.814155144129366, "grad_norm": 2.7293832302093506, "learning_rate": 1.6074009269016895e-08, "loss": 0.04216675, "memory(GiB)": 13.7, "step": 104145, "train_speed(iter/s)": 1.529454 }, { "acc": 0.99466343, "epoch": 48.8164987110382, "grad_norm": 1.1756283044815063, "learning_rate": 1.601392374573567e-08, "loss": 0.04196838, "memory(GiB)": 13.7, "step": 104150, "train_speed(iter/s)": 1.529457 }, { "acc": 0.97580357, "epoch": 48.818842277947034, "grad_norm": 3.6698811054229736, "learning_rate": 1.595395803312398e-08, "loss": 0.05927607, "memory(GiB)": 13.7, "step": 104155, "train_speed(iter/s)": 1.529459 }, { "acc": 0.99020834, "epoch": 48.82118584485587, "grad_norm": 0.020019561052322388, "learning_rate": 1.589411213262386e-08, "loss": 0.03128163, "memory(GiB)": 13.7, "step": 104160, "train_speed(iter/s)": 1.529463 }, { "acc": 0.98812504, "epoch": 48.8235294117647, "grad_norm": 1.0601872205734253, "learning_rate": 1.583438604567346e-08, "loss": 0.06475157, "memory(GiB)": 13.7, "step": 104165, "train_speed(iter/s)": 1.529468 }, { "acc": 0.98291664, "epoch": 48.825872978673544, "grad_norm": 2.7545809745788574, "learning_rate": 1.5774779773708713e-08, "loss": 0.04083237, "memory(GiB)": 13.7, "step": 104170, "train_speed(iter/s)": 1.529466 }, { "acc": 0.98028851, "epoch": 48.82821654558238, "grad_norm": 5.691187381744385, "learning_rate": 1.5715293318162214e-08, "loss": 0.04065021, "memory(GiB)": 13.7, "step": 104175, "train_speed(iter/s)": 1.529472 }, { "acc": 0.98656254, "epoch": 48.83056011249121, "grad_norm": 2.851958990097046, "learning_rate": 1.5655926680463793e-08, "loss": 0.04854821, "memory(GiB)": 13.7, "step": 104180, "train_speed(iter/s)": 1.529473 }, { "acc": 0.98895826, "epoch": 48.83290367940005, "grad_norm": 2.550863265991211, "learning_rate": 1.559667986203994e-08, "loss": 0.02328834, "memory(GiB)": 13.7, "step": 104185, "train_speed(iter/s)": 1.529472 }, { "acc": 0.97562504, "epoch": 48.83524724630888, "grad_norm": 2.885554552078247, "learning_rate": 1.5537552864315483e-08, "loss": 0.0627993, "memory(GiB)": 13.7, "step": 104190, "train_speed(iter/s)": 1.529473 }, { "acc": 0.98050594, "epoch": 48.837590813217716, "grad_norm": 4.336403846740723, "learning_rate": 1.547854568871192e-08, "loss": 0.06451228, "memory(GiB)": 13.7, "step": 104195, "train_speed(iter/s)": 1.529476 }, { "acc": 0.984375, "epoch": 48.83993438012655, "grad_norm": 4.230818748474121, "learning_rate": 1.5419658336646315e-08, "loss": 0.03396985, "memory(GiB)": 13.7, "step": 104200, "train_speed(iter/s)": 1.529479 }, { "acc": 0.98125, "epoch": 48.842277947035384, "grad_norm": 1.5791324377059937, "learning_rate": 1.5360890809535165e-08, "loss": 0.03222682, "memory(GiB)": 13.7, "step": 104205, "train_speed(iter/s)": 1.529481 }, { "acc": 0.97751942, "epoch": 48.844621513944226, "grad_norm": 2.5097672939300537, "learning_rate": 1.5302243108790534e-08, "loss": 0.05342277, "memory(GiB)": 13.7, "step": 104210, "train_speed(iter/s)": 1.529478 }, { "acc": 0.98125, "epoch": 48.84696508085306, "grad_norm": 5.833779335021973, "learning_rate": 1.5243715235822263e-08, "loss": 0.03888639, "memory(GiB)": 13.7, "step": 104215, "train_speed(iter/s)": 1.529477 }, { "acc": 0.97833338, "epoch": 48.849308647761895, "grad_norm": 3.5133090019226074, "learning_rate": 1.5185307192037978e-08, "loss": 0.05032259, "memory(GiB)": 13.7, "step": 104220, "train_speed(iter/s)": 1.529479 }, { "acc": 0.9895607, "epoch": 48.85165221467073, "grad_norm": 1.5262271165847778, "learning_rate": 1.5127018978840303e-08, "loss": 0.03911899, "memory(GiB)": 13.7, "step": 104225, "train_speed(iter/s)": 1.52948 }, { "acc": 0.98729172, "epoch": 48.85399578157956, "grad_norm": 0.9160811305046082, "learning_rate": 1.5068850597630752e-08, "loss": 0.02606165, "memory(GiB)": 13.7, "step": 104230, "train_speed(iter/s)": 1.529485 }, { "acc": 0.9863636, "epoch": 48.8563393484884, "grad_norm": 5.134131908416748, "learning_rate": 1.501080204980807e-08, "loss": 0.0270543, "memory(GiB)": 13.7, "step": 104235, "train_speed(iter/s)": 1.52949 }, { "acc": 0.98863087, "epoch": 48.85868291539723, "grad_norm": 3.0424318313598633, "learning_rate": 1.4952873336766556e-08, "loss": 0.03375562, "memory(GiB)": 13.7, "step": 104240, "train_speed(iter/s)": 1.529492 }, { "acc": 0.99291668, "epoch": 48.86102648230607, "grad_norm": 0.24261261522769928, "learning_rate": 1.48950644598994e-08, "loss": 0.01715386, "memory(GiB)": 13.7, "step": 104245, "train_speed(iter/s)": 1.529493 }, { "acc": 0.97562504, "epoch": 48.86337004921491, "grad_norm": 5.21726655960083, "learning_rate": 1.4837375420595909e-08, "loss": 0.03454449, "memory(GiB)": 13.7, "step": 104250, "train_speed(iter/s)": 1.529495 }, { "acc": 0.9895834, "epoch": 48.86571361612374, "grad_norm": 3.6478564739227295, "learning_rate": 1.4779806220243168e-08, "loss": 0.03799922, "memory(GiB)": 13.7, "step": 104255, "train_speed(iter/s)": 1.529501 }, { "acc": 0.97700758, "epoch": 48.868057183032576, "grad_norm": 3.958479881286621, "learning_rate": 1.4722356860224372e-08, "loss": 0.0585564, "memory(GiB)": 13.7, "step": 104260, "train_speed(iter/s)": 1.529502 }, { "acc": 0.98071432, "epoch": 48.87040074994141, "grad_norm": 2.5751523971557617, "learning_rate": 1.466502734191995e-08, "loss": 0.0326414, "memory(GiB)": 13.7, "step": 104265, "train_speed(iter/s)": 1.529502 }, { "acc": 0.97416668, "epoch": 48.872744316850245, "grad_norm": 4.8361945152282715, "learning_rate": 1.4607817666709217e-08, "loss": 0.07069996, "memory(GiB)": 13.7, "step": 104270, "train_speed(iter/s)": 1.529506 }, { "acc": 0.99419651, "epoch": 48.87508788375908, "grad_norm": 6.311830520629883, "learning_rate": 1.4550727835965932e-08, "loss": 0.05698837, "memory(GiB)": 13.7, "step": 104275, "train_speed(iter/s)": 1.52951 }, { "acc": 0.9885416, "epoch": 48.87743145066791, "grad_norm": 1.6273128986358643, "learning_rate": 1.4493757851063862e-08, "loss": 0.02505659, "memory(GiB)": 13.7, "step": 104280, "train_speed(iter/s)": 1.529513 }, { "acc": 0.98291664, "epoch": 48.879775017576755, "grad_norm": 0.0726630836725235, "learning_rate": 1.443690771337122e-08, "loss": 0.04734598, "memory(GiB)": 13.7, "step": 104285, "train_speed(iter/s)": 1.529516 }, { "acc": 0.99475269, "epoch": 48.88211858448559, "grad_norm": 1.2323824167251587, "learning_rate": 1.4380177424254553e-08, "loss": 0.02507204, "memory(GiB)": 13.7, "step": 104290, "train_speed(iter/s)": 1.529518 }, { "acc": 0.9816761, "epoch": 48.88446215139442, "grad_norm": 2.397808313369751, "learning_rate": 1.4323566985078187e-08, "loss": 0.05199036, "memory(GiB)": 13.7, "step": 104295, "train_speed(iter/s)": 1.52952 }, { "acc": 0.9895834, "epoch": 48.88680571830326, "grad_norm": 2.2358293533325195, "learning_rate": 1.4267076397202011e-08, "loss": 0.01789848, "memory(GiB)": 13.7, "step": 104300, "train_speed(iter/s)": 1.529522 }, { "acc": 0.98083324, "epoch": 48.88914928521209, "grad_norm": 2.604034900665283, "learning_rate": 1.4210705661984246e-08, "loss": 0.03496231, "memory(GiB)": 13.7, "step": 104305, "train_speed(iter/s)": 1.529526 }, { "acc": 0.98767862, "epoch": 48.891492852120926, "grad_norm": 3.955259323120117, "learning_rate": 1.4154454780780341e-08, "loss": 0.02668769, "memory(GiB)": 13.7, "step": 104310, "train_speed(iter/s)": 1.529526 }, { "acc": 0.98249998, "epoch": 48.89383641902976, "grad_norm": 0.940186083316803, "learning_rate": 1.40983237549413e-08, "loss": 0.0345757, "memory(GiB)": 13.7, "step": 104315, "train_speed(iter/s)": 1.529525 }, { "acc": 0.98798609, "epoch": 48.8961799859386, "grad_norm": 4.151477336883545, "learning_rate": 1.4042312585817576e-08, "loss": 0.04344939, "memory(GiB)": 13.7, "step": 104320, "train_speed(iter/s)": 1.529526 }, { "acc": 0.99192715, "epoch": 48.898523552847436, "grad_norm": 2.085968017578125, "learning_rate": 1.3986421274754624e-08, "loss": 0.03641185, "memory(GiB)": 13.7, "step": 104325, "train_speed(iter/s)": 1.529529 }, { "acc": 0.98467264, "epoch": 48.90086711975627, "grad_norm": 0.6825322508811951, "learning_rate": 1.3930649823096236e-08, "loss": 0.02790706, "memory(GiB)": 13.7, "step": 104330, "train_speed(iter/s)": 1.529531 }, { "acc": 0.98354168, "epoch": 48.903210686665105, "grad_norm": 4.9961347579956055, "learning_rate": 1.3874998232182868e-08, "loss": 0.0392907, "memory(GiB)": 13.7, "step": 104335, "train_speed(iter/s)": 1.529533 }, { "acc": 0.9916667, "epoch": 48.90555425357394, "grad_norm": 2.654589891433716, "learning_rate": 1.3819466503351654e-08, "loss": 0.02495955, "memory(GiB)": 13.7, "step": 104340, "train_speed(iter/s)": 1.529534 }, { "acc": 0.99196434, "epoch": 48.90789782048277, "grad_norm": 4.4513325691223145, "learning_rate": 1.3764054637938613e-08, "loss": 0.0453952, "memory(GiB)": 13.7, "step": 104345, "train_speed(iter/s)": 1.529539 }, { "acc": 0.97756367, "epoch": 48.91024138739161, "grad_norm": 2.9109723567962646, "learning_rate": 1.3708762637274766e-08, "loss": 0.04792781, "memory(GiB)": 13.7, "step": 104350, "train_speed(iter/s)": 1.529541 }, { "acc": 0.98968754, "epoch": 48.91258495430044, "grad_norm": 7.18223762512207, "learning_rate": 1.3653590502689478e-08, "loss": 0.03359788, "memory(GiB)": 13.7, "step": 104355, "train_speed(iter/s)": 1.529539 }, { "acc": 0.9930769, "epoch": 48.91492852120928, "grad_norm": 0.3709864318370819, "learning_rate": 1.3598538235508774e-08, "loss": 0.0287373, "memory(GiB)": 13.7, "step": 104360, "train_speed(iter/s)": 1.529537 }, { "acc": 0.99750004, "epoch": 48.91727208811812, "grad_norm": 2.132063388824463, "learning_rate": 1.3543605837055907e-08, "loss": 0.02998282, "memory(GiB)": 13.7, "step": 104365, "train_speed(iter/s)": 1.529543 }, { "acc": 0.98708324, "epoch": 48.91961565502695, "grad_norm": 0.0003786964516621083, "learning_rate": 1.3488793308651356e-08, "loss": 0.02533112, "memory(GiB)": 13.7, "step": 104370, "train_speed(iter/s)": 1.529546 }, { "acc": 0.9822916, "epoch": 48.921959221935786, "grad_norm": 2.917996406555176, "learning_rate": 1.3434100651612272e-08, "loss": 0.02536038, "memory(GiB)": 13.7, "step": 104375, "train_speed(iter/s)": 1.529545 }, { "acc": 0.98121986, "epoch": 48.92430278884462, "grad_norm": 5.544652938842773, "learning_rate": 1.3379527867254131e-08, "loss": 0.03315379, "memory(GiB)": 13.7, "step": 104380, "train_speed(iter/s)": 1.529547 }, { "acc": 0.97250004, "epoch": 48.926646355753455, "grad_norm": 0.013014939613640308, "learning_rate": 1.3325074956887983e-08, "loss": 0.03288813, "memory(GiB)": 13.7, "step": 104385, "train_speed(iter/s)": 1.529547 }, { "acc": 0.990625, "epoch": 48.92898992266229, "grad_norm": 5.299862384796143, "learning_rate": 1.32707419218232e-08, "loss": 0.04146809, "memory(GiB)": 13.7, "step": 104390, "train_speed(iter/s)": 1.529547 }, { "acc": 0.977985, "epoch": 48.93133348957113, "grad_norm": 5.629836082458496, "learning_rate": 1.3216528763364719e-08, "loss": 0.08643458, "memory(GiB)": 13.7, "step": 104395, "train_speed(iter/s)": 1.529549 }, { "acc": 0.97633924, "epoch": 48.933677056479965, "grad_norm": 1.337059736251831, "learning_rate": 1.3162435482816925e-08, "loss": 0.04031453, "memory(GiB)": 13.7, "step": 104400, "train_speed(iter/s)": 1.529552 }, { "acc": 0.97946434, "epoch": 48.9360206233888, "grad_norm": 2.747858762741089, "learning_rate": 1.3108462081479754e-08, "loss": 0.04195462, "memory(GiB)": 13.7, "step": 104405, "train_speed(iter/s)": 1.529552 }, { "acc": 0.97826633, "epoch": 48.938364190297634, "grad_norm": 0.10984058678150177, "learning_rate": 1.305460856064982e-08, "loss": 0.06294862, "memory(GiB)": 13.7, "step": 104410, "train_speed(iter/s)": 1.529549 }, { "acc": 0.98048611, "epoch": 48.94070775720647, "grad_norm": 2.9970273971557617, "learning_rate": 1.3000874921622071e-08, "loss": 0.04952572, "memory(GiB)": 13.7, "step": 104415, "train_speed(iter/s)": 1.529552 }, { "acc": 0.98883934, "epoch": 48.9430513241153, "grad_norm": 3.825718641281128, "learning_rate": 1.294726116568867e-08, "loss": 0.01798294, "memory(GiB)": 13.7, "step": 104420, "train_speed(iter/s)": 1.52955 }, { "acc": 0.98779764, "epoch": 48.94539489102414, "grad_norm": 3.6229348182678223, "learning_rate": 1.2893767294137356e-08, "loss": 0.02375346, "memory(GiB)": 13.7, "step": 104425, "train_speed(iter/s)": 1.529554 }, { "acc": 0.9864584, "epoch": 48.94773845793297, "grad_norm": 3.558051586151123, "learning_rate": 1.2840393308254186e-08, "loss": 0.03360307, "memory(GiB)": 13.7, "step": 104430, "train_speed(iter/s)": 1.529554 }, { "acc": 0.98687496, "epoch": 48.95008202484181, "grad_norm": 2.1662724018096924, "learning_rate": 1.2787139209322457e-08, "loss": 0.04498395, "memory(GiB)": 13.7, "step": 104435, "train_speed(iter/s)": 1.529555 }, { "acc": 0.9791667, "epoch": 48.95242559175065, "grad_norm": 1.1766752004623413, "learning_rate": 1.2734004998621563e-08, "loss": 0.04649909, "memory(GiB)": 13.7, "step": 104440, "train_speed(iter/s)": 1.529556 }, { "acc": 0.974757, "epoch": 48.95476915865948, "grad_norm": 4.351765155792236, "learning_rate": 1.2680990677429252e-08, "loss": 0.0508759, "memory(GiB)": 13.7, "step": 104445, "train_speed(iter/s)": 1.529559 }, { "acc": 0.97729168, "epoch": 48.957112725568315, "grad_norm": 3.606748342514038, "learning_rate": 1.262809624701993e-08, "loss": 0.03761332, "memory(GiB)": 13.7, "step": 104450, "train_speed(iter/s)": 1.529563 }, { "acc": 0.97974205, "epoch": 48.95945629247715, "grad_norm": 2.383573293685913, "learning_rate": 1.2575321708664676e-08, "loss": 0.04151014, "memory(GiB)": 13.7, "step": 104455, "train_speed(iter/s)": 1.529564 }, { "acc": 0.98520832, "epoch": 48.961799859385984, "grad_norm": 1.2712494134902954, "learning_rate": 1.2522667063632345e-08, "loss": 0.03393054, "memory(GiB)": 13.7, "step": 104460, "train_speed(iter/s)": 1.529571 }, { "acc": 0.98727684, "epoch": 48.96414342629482, "grad_norm": 4.028763771057129, "learning_rate": 1.247013231318791e-08, "loss": 0.02692011, "memory(GiB)": 13.7, "step": 104465, "train_speed(iter/s)": 1.529573 }, { "acc": 0.97458344, "epoch": 48.96648699320366, "grad_norm": 2.28798770904541, "learning_rate": 1.241771745859468e-08, "loss": 0.03873927, "memory(GiB)": 13.7, "step": 104470, "train_speed(iter/s)": 1.529574 }, { "acc": 0.97875004, "epoch": 48.968830560112494, "grad_norm": 4.712602138519287, "learning_rate": 1.2365422501112076e-08, "loss": 0.03439794, "memory(GiB)": 13.7, "step": 104475, "train_speed(iter/s)": 1.529576 }, { "acc": 0.96902418, "epoch": 48.97117412702133, "grad_norm": 4.308620929718018, "learning_rate": 1.2313247441997856e-08, "loss": 0.07464043, "memory(GiB)": 13.7, "step": 104480, "train_speed(iter/s)": 1.529579 }, { "acc": 0.97833338, "epoch": 48.97351769393016, "grad_norm": 4.327332019805908, "learning_rate": 1.226119228250534e-08, "loss": 0.03274084, "memory(GiB)": 13.7, "step": 104485, "train_speed(iter/s)": 1.529581 }, { "acc": 0.99080811, "epoch": 48.975861260839, "grad_norm": 2.5225143432617188, "learning_rate": 1.2209257023886726e-08, "loss": 0.02463522, "memory(GiB)": 13.7, "step": 104490, "train_speed(iter/s)": 1.529581 }, { "acc": 0.98354168, "epoch": 48.97820482774783, "grad_norm": 1.793297290802002, "learning_rate": 1.2157441667389231e-08, "loss": 0.03647383, "memory(GiB)": 13.7, "step": 104495, "train_speed(iter/s)": 1.52958 }, { "acc": 0.9833334, "epoch": 48.980548394656665, "grad_norm": 2.3358633518218994, "learning_rate": 1.2105746214258952e-08, "loss": 0.02321726, "memory(GiB)": 13.7, "step": 104500, "train_speed(iter/s)": 1.52958 }, { "acc": 0.99300594, "epoch": 48.9828919615655, "grad_norm": 2.6426844596862793, "learning_rate": 1.2054170665738664e-08, "loss": 0.02190489, "memory(GiB)": 13.7, "step": 104505, "train_speed(iter/s)": 1.529578 }, { "acc": 0.97791662, "epoch": 48.98523552847434, "grad_norm": 2.51631760597229, "learning_rate": 1.2002715023067248e-08, "loss": 0.04146225, "memory(GiB)": 13.7, "step": 104510, "train_speed(iter/s)": 1.52958 }, { "acc": 0.98083334, "epoch": 48.987579095383175, "grad_norm": 2.011866807937622, "learning_rate": 1.1951379287482483e-08, "loss": 0.027093, "memory(GiB)": 13.7, "step": 104515, "train_speed(iter/s)": 1.529585 }, { "acc": 0.9854167, "epoch": 48.98992266229201, "grad_norm": 0.003053609048947692, "learning_rate": 1.1900163460217698e-08, "loss": 0.02932625, "memory(GiB)": 13.7, "step": 104520, "train_speed(iter/s)": 1.529587 }, { "acc": 0.98467264, "epoch": 48.992266229200844, "grad_norm": 0.45765721797943115, "learning_rate": 1.1849067542504566e-08, "loss": 0.03004854, "memory(GiB)": 13.7, "step": 104525, "train_speed(iter/s)": 1.529586 }, { "acc": 0.98666668, "epoch": 48.99460979610968, "grad_norm": 2.6979384422302246, "learning_rate": 1.1798091535570313e-08, "loss": 0.0249349, "memory(GiB)": 13.7, "step": 104530, "train_speed(iter/s)": 1.529588 }, { "acc": 0.98187504, "epoch": 48.99695336301851, "grad_norm": 2.0529940128326416, "learning_rate": 1.1747235440641061e-08, "loss": 0.0374429, "memory(GiB)": 13.7, "step": 104535, "train_speed(iter/s)": 1.529589 }, { "acc": 0.9958333, "epoch": 48.99929692992735, "grad_norm": 0.9462826251983643, "learning_rate": 1.1696499258939042e-08, "loss": 0.00864565, "memory(GiB)": 13.7, "step": 104540, "train_speed(iter/s)": 1.52959 }, { "acc": 0.98624992, "epoch": 49.00164049683618, "grad_norm": 3.8294551372528076, "learning_rate": 1.1645882991683714e-08, "loss": 0.0400544, "memory(GiB)": 13.7, "step": 104545, "train_speed(iter/s)": 1.52958 }, { "acc": 0.99131947, "epoch": 49.00398406374502, "grad_norm": 3.47052264213562, "learning_rate": 1.1595386640091758e-08, "loss": 0.02298808, "memory(GiB)": 13.7, "step": 104550, "train_speed(iter/s)": 1.529577 }, { "acc": 0.99236107, "epoch": 49.00632763065386, "grad_norm": 2.9047582149505615, "learning_rate": 1.1545010205376528e-08, "loss": 0.01401495, "memory(GiB)": 13.7, "step": 104555, "train_speed(iter/s)": 1.529579 }, { "acc": 0.97833328, "epoch": 49.00867119756269, "grad_norm": 5.5088210105896, "learning_rate": 1.1494753688749712e-08, "loss": 0.07700101, "memory(GiB)": 13.7, "step": 104560, "train_speed(iter/s)": 1.529582 }, { "acc": 0.9984375, "epoch": 49.011014764471525, "grad_norm": 2.0925490856170654, "learning_rate": 1.1444617091418004e-08, "loss": 0.0372751, "memory(GiB)": 13.7, "step": 104565, "train_speed(iter/s)": 1.529583 }, { "acc": 0.99250002, "epoch": 49.01335833138036, "grad_norm": 2.269315481185913, "learning_rate": 1.139460041458809e-08, "loss": 0.01766077, "memory(GiB)": 13.7, "step": 104570, "train_speed(iter/s)": 1.529587 }, { "acc": 0.98916664, "epoch": 49.015701898289194, "grad_norm": 3.480801820755005, "learning_rate": 1.134470365946167e-08, "loss": 0.02711782, "memory(GiB)": 13.7, "step": 104575, "train_speed(iter/s)": 1.52959 }, { "acc": 0.97979164, "epoch": 49.01804546519803, "grad_norm": 2.335702657699585, "learning_rate": 1.1294926827237662e-08, "loss": 0.04178834, "memory(GiB)": 13.7, "step": 104580, "train_speed(iter/s)": 1.529589 }, { "acc": 0.98798618, "epoch": 49.02038903210687, "grad_norm": 2.619144916534424, "learning_rate": 1.124526991911277e-08, "loss": 0.06312186, "memory(GiB)": 13.7, "step": 104585, "train_speed(iter/s)": 1.529594 }, { "acc": 0.99375, "epoch": 49.022732599015704, "grad_norm": 4.373849868774414, "learning_rate": 1.1195732936280362e-08, "loss": 0.01801904, "memory(GiB)": 13.7, "step": 104590, "train_speed(iter/s)": 1.529599 }, { "acc": 0.9794445, "epoch": 49.02507616592454, "grad_norm": 4.1210150718688965, "learning_rate": 1.1146315879932146e-08, "loss": 0.0351493, "memory(GiB)": 13.7, "step": 104595, "train_speed(iter/s)": 1.529601 }, { "acc": 0.99125004, "epoch": 49.02741973283337, "grad_norm": 3.4654712677001953, "learning_rate": 1.1097018751254274e-08, "loss": 0.02625915, "memory(GiB)": 13.7, "step": 104600, "train_speed(iter/s)": 1.529602 }, { "acc": 0.97645836, "epoch": 49.02976329974221, "grad_norm": 4.486721038818359, "learning_rate": 1.1047841551433457e-08, "loss": 0.04333468, "memory(GiB)": 13.7, "step": 104605, "train_speed(iter/s)": 1.529606 }, { "acc": 0.98708324, "epoch": 49.03210686665104, "grad_norm": 3.1896040439605713, "learning_rate": 1.0998784281650299e-08, "loss": 0.02697754, "memory(GiB)": 13.7, "step": 104610, "train_speed(iter/s)": 1.529604 }, { "acc": 0.98095694, "epoch": 49.034450433559876, "grad_norm": 2.6858267784118652, "learning_rate": 1.094984694308485e-08, "loss": 0.03647729, "memory(GiB)": 13.7, "step": 104615, "train_speed(iter/s)": 1.529607 }, { "acc": 0.98849211, "epoch": 49.03679400046871, "grad_norm": 1.461692214012146, "learning_rate": 1.0901029536912715e-08, "loss": 0.04106709, "memory(GiB)": 13.7, "step": 104620, "train_speed(iter/s)": 1.52961 }, { "acc": 0.98812504, "epoch": 49.03913756737755, "grad_norm": 3.7894256114959717, "learning_rate": 1.0852332064308396e-08, "loss": 0.02094611, "memory(GiB)": 13.7, "step": 104625, "train_speed(iter/s)": 1.52961 }, { "acc": 0.98562498, "epoch": 49.041481134286386, "grad_norm": 0.04985501617193222, "learning_rate": 1.0803754526441397e-08, "loss": 0.02927371, "memory(GiB)": 13.7, "step": 104630, "train_speed(iter/s)": 1.529605 }, { "acc": 0.99080353, "epoch": 49.04382470119522, "grad_norm": 3.9912822246551514, "learning_rate": 1.0755296924479551e-08, "loss": 0.03461981, "memory(GiB)": 13.7, "step": 104635, "train_speed(iter/s)": 1.529607 }, { "acc": 0.98342266, "epoch": 49.046168268104054, "grad_norm": 1.8767272233963013, "learning_rate": 1.0706959259588481e-08, "loss": 0.03751328, "memory(GiB)": 13.7, "step": 104640, "train_speed(iter/s)": 1.529607 }, { "acc": 0.99291668, "epoch": 49.04851183501289, "grad_norm": 2.293718099594116, "learning_rate": 1.0658741532928805e-08, "loss": 0.0439185, "memory(GiB)": 13.7, "step": 104645, "train_speed(iter/s)": 1.529609 }, { "acc": 0.97613087, "epoch": 49.05085540192172, "grad_norm": 4.797766208648682, "learning_rate": 1.0610643745660036e-08, "loss": 0.04002482, "memory(GiB)": 13.7, "step": 104650, "train_speed(iter/s)": 1.529608 }, { "acc": 0.98270836, "epoch": 49.05319896883056, "grad_norm": 0.7842454314231873, "learning_rate": 1.0562665898938356e-08, "loss": 0.04343263, "memory(GiB)": 13.7, "step": 104655, "train_speed(iter/s)": 1.529613 }, { "acc": 0.98874998, "epoch": 49.0555425357394, "grad_norm": 2.0160794258117676, "learning_rate": 1.0514807993917174e-08, "loss": 0.02508175, "memory(GiB)": 13.7, "step": 104660, "train_speed(iter/s)": 1.529615 }, { "acc": 0.97854176, "epoch": 49.05788610264823, "grad_norm": 4.240291118621826, "learning_rate": 1.0467070031746561e-08, "loss": 0.04511137, "memory(GiB)": 13.7, "step": 104665, "train_speed(iter/s)": 1.529613 }, { "acc": 0.98725414, "epoch": 49.06022966955707, "grad_norm": 0.6833919882774353, "learning_rate": 1.0419452013573823e-08, "loss": 0.03734979, "memory(GiB)": 13.7, "step": 104670, "train_speed(iter/s)": 1.529613 }, { "acc": 0.97479162, "epoch": 49.0625732364659, "grad_norm": 0.9057836532592773, "learning_rate": 1.0371953940544036e-08, "loss": 0.04588552, "memory(GiB)": 13.7, "step": 104675, "train_speed(iter/s)": 1.529613 }, { "acc": 0.9927083, "epoch": 49.064916803374736, "grad_norm": 1.4734501838684082, "learning_rate": 1.0324575813798399e-08, "loss": 0.02367333, "memory(GiB)": 13.7, "step": 104680, "train_speed(iter/s)": 1.529613 }, { "acc": 0.9947916, "epoch": 49.06726037028357, "grad_norm": 3.6954421997070312, "learning_rate": 1.0277317634476438e-08, "loss": 0.04851321, "memory(GiB)": 13.7, "step": 104685, "train_speed(iter/s)": 1.529611 }, { "acc": 0.99135418, "epoch": 49.069603937192404, "grad_norm": 4.8341169357299805, "learning_rate": 1.0230179403713244e-08, "loss": 0.04111952, "memory(GiB)": 13.7, "step": 104690, "train_speed(iter/s)": 1.529613 }, { "acc": 0.98916664, "epoch": 49.07194750410124, "grad_norm": 0.8910704255104065, "learning_rate": 1.0183161122642242e-08, "loss": 0.02890015, "memory(GiB)": 13.7, "step": 104695, "train_speed(iter/s)": 1.529612 }, { "acc": 0.98792582, "epoch": 49.07429107101008, "grad_norm": 0.0009944095509126782, "learning_rate": 1.0136262792393528e-08, "loss": 0.06219156, "memory(GiB)": 13.7, "step": 104700, "train_speed(iter/s)": 1.529613 }, { "acc": 0.984375, "epoch": 49.076634637918914, "grad_norm": 2.617180824279785, "learning_rate": 1.0089484414094971e-08, "loss": 0.02672171, "memory(GiB)": 13.7, "step": 104705, "train_speed(iter/s)": 1.529614 }, { "acc": 0.95216351, "epoch": 49.07897820482775, "grad_norm": 4.805970668792725, "learning_rate": 1.0042825988869456e-08, "loss": 0.09492078, "memory(GiB)": 13.7, "step": 104710, "train_speed(iter/s)": 1.529616 }, { "acc": 0.996875, "epoch": 49.08132177173658, "grad_norm": 0.5186606049537659, "learning_rate": 9.996287517840408e-09, "loss": 0.01015964, "memory(GiB)": 13.7, "step": 104715, "train_speed(iter/s)": 1.529621 }, { "acc": 0.9947917, "epoch": 49.08366533864542, "grad_norm": 1.0848759412765503, "learning_rate": 9.949869002125162e-09, "loss": 0.02708628, "memory(GiB)": 13.7, "step": 104720, "train_speed(iter/s)": 1.529621 }, { "acc": 0.97948103, "epoch": 49.08600890555425, "grad_norm": 2.316798448562622, "learning_rate": 9.903570442839931e-09, "loss": 0.03366124, "memory(GiB)": 13.7, "step": 104725, "train_speed(iter/s)": 1.529623 }, { "acc": 0.99072914, "epoch": 49.088352472463086, "grad_norm": 2.3818018436431885, "learning_rate": 9.857391841097603e-09, "loss": 0.0422146, "memory(GiB)": 13.7, "step": 104730, "train_speed(iter/s)": 1.529625 }, { "acc": 0.98624992, "epoch": 49.09069603937193, "grad_norm": 0.00976604875177145, "learning_rate": 9.811333198007737e-09, "loss": 0.0184894, "memory(GiB)": 13.7, "step": 104735, "train_speed(iter/s)": 1.529625 }, { "acc": 0.98946428, "epoch": 49.09303960628076, "grad_norm": 4.87168550491333, "learning_rate": 9.76539451467767e-09, "loss": 0.02906895, "memory(GiB)": 13.7, "step": 104740, "train_speed(iter/s)": 1.529627 }, { "acc": 0.98708334, "epoch": 49.095383173189596, "grad_norm": 0.0008192823734134436, "learning_rate": 9.719575792211961e-09, "loss": 0.04184359, "memory(GiB)": 13.7, "step": 104745, "train_speed(iter/s)": 1.52963 }, { "acc": 0.9895834, "epoch": 49.09772674009843, "grad_norm": 1.0859556198120117, "learning_rate": 9.673877031711287e-09, "loss": 0.02280942, "memory(GiB)": 13.7, "step": 104750, "train_speed(iter/s)": 1.529626 }, { "acc": 0.99548607, "epoch": 49.100070307007265, "grad_norm": 1.0291403532028198, "learning_rate": 9.628298234275214e-09, "loss": 0.03484513, "memory(GiB)": 13.7, "step": 104755, "train_speed(iter/s)": 1.529628 }, { "acc": 0.99305553, "epoch": 49.1024138739161, "grad_norm": 0.21798355877399445, "learning_rate": 9.582839400997757e-09, "loss": 0.01116546, "memory(GiB)": 13.7, "step": 104760, "train_speed(iter/s)": 1.529628 }, { "acc": 0.990625, "epoch": 49.10475744082493, "grad_norm": 3.0205001831054688, "learning_rate": 9.537500532972933e-09, "loss": 0.01909352, "memory(GiB)": 13.7, "step": 104765, "train_speed(iter/s)": 1.529632 }, { "acc": 0.99333334, "epoch": 49.10710100773377, "grad_norm": 1.7598077058792114, "learning_rate": 9.49228163128976e-09, "loss": 0.07099306, "memory(GiB)": 13.7, "step": 104770, "train_speed(iter/s)": 1.529632 }, { "acc": 0.9875, "epoch": 49.10944457464261, "grad_norm": 1.8194000720977783, "learning_rate": 9.447182697035594e-09, "loss": 0.02604162, "memory(GiB)": 13.7, "step": 104775, "train_speed(iter/s)": 1.529636 }, { "acc": 0.98946428, "epoch": 49.11178814155144, "grad_norm": 2.8155770301818848, "learning_rate": 9.402203731293903e-09, "loss": 0.03014876, "memory(GiB)": 13.7, "step": 104780, "train_speed(iter/s)": 1.529641 }, { "acc": 0.99571428, "epoch": 49.11413170846028, "grad_norm": 2.115804672241211, "learning_rate": 9.357344735146492e-09, "loss": 0.02392915, "memory(GiB)": 13.7, "step": 104785, "train_speed(iter/s)": 1.529642 }, { "acc": 0.98883934, "epoch": 49.11647527536911, "grad_norm": 4.707930564880371, "learning_rate": 9.312605709671279e-09, "loss": 0.03741278, "memory(GiB)": 13.7, "step": 104790, "train_speed(iter/s)": 1.529644 }, { "acc": 0.9822916, "epoch": 49.118818842277946, "grad_norm": 3.3582065105438232, "learning_rate": 9.267986655943963e-09, "loss": 0.03566592, "memory(GiB)": 13.7, "step": 104795, "train_speed(iter/s)": 1.529648 }, { "acc": 0.99122906, "epoch": 49.12116240918678, "grad_norm": 0.03854374587535858, "learning_rate": 9.223487575036358e-09, "loss": 0.01365057, "memory(GiB)": 13.7, "step": 104800, "train_speed(iter/s)": 1.529649 }, { "acc": 0.98916664, "epoch": 49.123505976095615, "grad_norm": 4.380741596221924, "learning_rate": 9.179108468018612e-09, "loss": 0.03625883, "memory(GiB)": 13.7, "step": 104805, "train_speed(iter/s)": 1.529652 }, { "acc": 0.98812504, "epoch": 49.125849543004456, "grad_norm": 4.762993812561035, "learning_rate": 9.13484933595754e-09, "loss": 0.02123127, "memory(GiB)": 13.7, "step": 104810, "train_speed(iter/s)": 1.529654 }, { "acc": 0.97433605, "epoch": 49.12819310991329, "grad_norm": 3.147874593734741, "learning_rate": 9.090710179916634e-09, "loss": 0.06637155, "memory(GiB)": 13.7, "step": 104815, "train_speed(iter/s)": 1.529661 }, { "acc": 0.98812504, "epoch": 49.130536676822125, "grad_norm": 0.16150106489658356, "learning_rate": 9.046691000957154e-09, "loss": 0.02073309, "memory(GiB)": 13.7, "step": 104820, "train_speed(iter/s)": 1.529666 }, { "acc": 0.99250002, "epoch": 49.13288024373096, "grad_norm": 5.344255259842612e-05, "learning_rate": 9.00279180013649e-09, "loss": 0.04190096, "memory(GiB)": 13.7, "step": 104825, "train_speed(iter/s)": 1.529671 }, { "acc": 1.0, "epoch": 49.13522381063979, "grad_norm": 1.9682669639587402, "learning_rate": 8.959012578510908e-09, "loss": 0.01298151, "memory(GiB)": 13.7, "step": 104830, "train_speed(iter/s)": 1.529668 }, { "acc": 0.98673611, "epoch": 49.13756737754863, "grad_norm": 5.815371513366699, "learning_rate": 8.915353337131685e-09, "loss": 0.04411945, "memory(GiB)": 13.7, "step": 104835, "train_speed(iter/s)": 1.529675 }, { "acc": 0.978125, "epoch": 49.13991094445746, "grad_norm": 3.5529491901397705, "learning_rate": 8.871814077049543e-09, "loss": 0.05171816, "memory(GiB)": 13.7, "step": 104840, "train_speed(iter/s)": 1.529673 }, { "acc": 0.9885417, "epoch": 49.142254511366296, "grad_norm": 3.5649235248565674, "learning_rate": 8.828394799309651e-09, "loss": 0.0294856, "memory(GiB)": 13.7, "step": 104845, "train_speed(iter/s)": 1.529677 }, { "acc": 0.98260422, "epoch": 49.14459807827514, "grad_norm": 0.9141724705696106, "learning_rate": 8.785095504956072e-09, "loss": 0.03285499, "memory(GiB)": 13.7, "step": 104850, "train_speed(iter/s)": 1.529678 }, { "acc": 0.97726192, "epoch": 49.14694164518397, "grad_norm": 1.775970458984375, "learning_rate": 8.74191619503009e-09, "loss": 0.05914614, "memory(GiB)": 13.7, "step": 104855, "train_speed(iter/s)": 1.529683 }, { "acc": 0.98217258, "epoch": 49.149285212092806, "grad_norm": 3.322575569152832, "learning_rate": 8.698856870569102e-09, "loss": 0.03576128, "memory(GiB)": 13.7, "step": 104860, "train_speed(iter/s)": 1.529683 }, { "acc": 0.972822, "epoch": 49.15162877900164, "grad_norm": 6.443047523498535, "learning_rate": 8.655917532608846e-09, "loss": 0.04327208, "memory(GiB)": 13.7, "step": 104865, "train_speed(iter/s)": 1.529684 }, { "acc": 0.98915176, "epoch": 49.153972345910475, "grad_norm": 2.338242530822754, "learning_rate": 8.613098182180059e-09, "loss": 0.03487952, "memory(GiB)": 13.7, "step": 104870, "train_speed(iter/s)": 1.529685 }, { "acc": 0.98812504, "epoch": 49.15631591281931, "grad_norm": 4.9935712814331055, "learning_rate": 8.570398820312925e-09, "loss": 0.03033462, "memory(GiB)": 13.7, "step": 104875, "train_speed(iter/s)": 1.529688 }, { "acc": 0.9895833, "epoch": 49.15865947972814, "grad_norm": 0.0014905744465067983, "learning_rate": 8.527819448033744e-09, "loss": 0.01590476, "memory(GiB)": 13.7, "step": 104880, "train_speed(iter/s)": 1.529693 }, { "acc": 0.98458328, "epoch": 49.161003046636985, "grad_norm": 3.2030811309814453, "learning_rate": 8.485360066366039e-09, "loss": 0.05524668, "memory(GiB)": 13.7, "step": 104885, "train_speed(iter/s)": 1.529697 }, { "acc": 0.98308983, "epoch": 49.16334661354582, "grad_norm": 3.3578333854675293, "learning_rate": 8.44302067633e-09, "loss": 0.05019114, "memory(GiB)": 13.7, "step": 104890, "train_speed(iter/s)": 1.529694 }, { "acc": 0.9875, "epoch": 49.165690180454654, "grad_norm": 3.4378740787506104, "learning_rate": 8.400801278943604e-09, "loss": 0.03242692, "memory(GiB)": 13.7, "step": 104895, "train_speed(iter/s)": 1.529699 }, { "acc": 0.99229164, "epoch": 49.16803374736349, "grad_norm": 3.6656301021575928, "learning_rate": 8.358701875222045e-09, "loss": 0.01560802, "memory(GiB)": 13.7, "step": 104900, "train_speed(iter/s)": 1.529699 }, { "acc": 0.99074993, "epoch": 49.17037731427232, "grad_norm": 1.7782105207443237, "learning_rate": 8.316722466176082e-09, "loss": 0.02183592, "memory(GiB)": 13.7, "step": 104905, "train_speed(iter/s)": 1.529705 }, { "acc": 0.98633928, "epoch": 49.17272088118116, "grad_norm": 3.1609511375427246, "learning_rate": 8.274863052816472e-09, "loss": 0.05476795, "memory(GiB)": 13.7, "step": 104910, "train_speed(iter/s)": 1.52971 }, { "acc": 0.99174681, "epoch": 49.17506444808999, "grad_norm": 3.2849135398864746, "learning_rate": 8.23312363614731e-09, "loss": 0.03273807, "memory(GiB)": 13.7, "step": 104915, "train_speed(iter/s)": 1.529707 }, { "acc": 0.99508934, "epoch": 49.177408014998825, "grad_norm": 3.756580114364624, "learning_rate": 8.191504217173802e-09, "loss": 0.02166037, "memory(GiB)": 13.7, "step": 104920, "train_speed(iter/s)": 1.529709 }, { "acc": 0.99291668, "epoch": 49.17975158190767, "grad_norm": 0.8504369258880615, "learning_rate": 8.150004796895052e-09, "loss": 0.03248801, "memory(GiB)": 13.7, "step": 104925, "train_speed(iter/s)": 1.529708 }, { "acc": 0.99321432, "epoch": 49.1820951488165, "grad_norm": 0.004548956640064716, "learning_rate": 8.108625376309602e-09, "loss": 0.00988941, "memory(GiB)": 13.7, "step": 104930, "train_speed(iter/s)": 1.529711 }, { "acc": 0.98631945, "epoch": 49.184438715725335, "grad_norm": 3.6524717807769775, "learning_rate": 8.067365956411004e-09, "loss": 0.0355721, "memory(GiB)": 13.7, "step": 104935, "train_speed(iter/s)": 1.529719 }, { "acc": 0.98701925, "epoch": 49.18678228263417, "grad_norm": 0.8484195470809937, "learning_rate": 8.0262265381917e-09, "loss": 0.02767796, "memory(GiB)": 13.7, "step": 104940, "train_speed(iter/s)": 1.529724 }, { "acc": 0.98383923, "epoch": 49.189125849543004, "grad_norm": 2.065727710723877, "learning_rate": 7.985207122640241e-09, "loss": 0.04268257, "memory(GiB)": 13.7, "step": 104945, "train_speed(iter/s)": 1.529726 }, { "acc": 0.97875004, "epoch": 49.19146941645184, "grad_norm": 0.11755339056253433, "learning_rate": 7.944307710742962e-09, "loss": 0.03886267, "memory(GiB)": 13.7, "step": 104950, "train_speed(iter/s)": 1.529725 }, { "acc": 0.99035721, "epoch": 49.19381298336067, "grad_norm": 3.183455467224121, "learning_rate": 7.90352830348287e-09, "loss": 0.04056768, "memory(GiB)": 13.7, "step": 104955, "train_speed(iter/s)": 1.529729 }, { "acc": 0.9833334, "epoch": 49.196156550269514, "grad_norm": 4.1196136474609375, "learning_rate": 7.862868901839637e-09, "loss": 0.04360595, "memory(GiB)": 13.7, "step": 104960, "train_speed(iter/s)": 1.529729 }, { "acc": 0.97508926, "epoch": 49.19850011717835, "grad_norm": 2.85622239112854, "learning_rate": 7.822329506790717e-09, "loss": 0.04319229, "memory(GiB)": 13.7, "step": 104965, "train_speed(iter/s)": 1.529727 }, { "acc": 0.98018436, "epoch": 49.20084368408718, "grad_norm": 1.509247064590454, "learning_rate": 7.781910119311343e-09, "loss": 0.03207938, "memory(GiB)": 13.7, "step": 104970, "train_speed(iter/s)": 1.52973 }, { "acc": 0.98497477, "epoch": 49.20318725099602, "grad_norm": 3.7131574153900146, "learning_rate": 7.741610740372307e-09, "loss": 0.04160586, "memory(GiB)": 13.7, "step": 104975, "train_speed(iter/s)": 1.529731 }, { "acc": 0.98895292, "epoch": 49.20553081790485, "grad_norm": 2.4598498344421387, "learning_rate": 7.701431370942186e-09, "loss": 0.02609386, "memory(GiB)": 13.7, "step": 104980, "train_speed(iter/s)": 1.529732 }, { "acc": 0.99541664, "epoch": 49.207874384813685, "grad_norm": 0.0007303363527171314, "learning_rate": 7.661372011987329e-09, "loss": 0.02124287, "memory(GiB)": 13.7, "step": 104985, "train_speed(iter/s)": 1.529733 }, { "acc": 0.98460312, "epoch": 49.21021795172252, "grad_norm": 2.4010562896728516, "learning_rate": 7.6214326644702e-09, "loss": 0.03124895, "memory(GiB)": 13.7, "step": 104990, "train_speed(iter/s)": 1.529732 }, { "acc": 0.98132439, "epoch": 49.212561518631354, "grad_norm": 3.9194319248199463, "learning_rate": 7.581613329351052e-09, "loss": 0.05506539, "memory(GiB)": 13.7, "step": 104995, "train_speed(iter/s)": 1.529734 }, { "acc": 0.97904758, "epoch": 49.214905085540195, "grad_norm": 4.108042240142822, "learning_rate": 7.5419140075868e-09, "loss": 0.05085544, "memory(GiB)": 13.7, "step": 105000, "train_speed(iter/s)": 1.529733 }, { "acc": 0.98395834, "epoch": 49.21724865244903, "grad_norm": 5.160592555999756, "learning_rate": 7.502334700132138e-09, "loss": 0.02871893, "memory(GiB)": 13.7, "step": 105005, "train_speed(iter/s)": 1.529735 }, { "acc": 0.98062496, "epoch": 49.219592219357864, "grad_norm": 0.013725596480071545, "learning_rate": 7.462875407937879e-09, "loss": 0.03130301, "memory(GiB)": 13.7, "step": 105010, "train_speed(iter/s)": 1.529736 }, { "acc": 0.9895833, "epoch": 49.2219357862667, "grad_norm": 1.5801414251327515, "learning_rate": 7.423536131952611e-09, "loss": 0.02146957, "memory(GiB)": 13.7, "step": 105015, "train_speed(iter/s)": 1.529739 }, { "acc": 0.99011364, "epoch": 49.22427935317553, "grad_norm": 2.048954963684082, "learning_rate": 7.384316873121598e-09, "loss": 0.03463945, "memory(GiB)": 13.7, "step": 105020, "train_speed(iter/s)": 1.529744 }, { "acc": 0.9875, "epoch": 49.22662292008437, "grad_norm": 2.2397522926330566, "learning_rate": 7.345217632388433e-09, "loss": 0.02498957, "memory(GiB)": 13.7, "step": 105025, "train_speed(iter/s)": 1.529748 }, { "acc": 0.996875, "epoch": 49.2289664869932, "grad_norm": 0.0025017340667545795, "learning_rate": 7.3062384106922715e-09, "loss": 0.0165011, "memory(GiB)": 13.7, "step": 105030, "train_speed(iter/s)": 1.529749 }, { "acc": 0.97444448, "epoch": 49.231310053902035, "grad_norm": 1.8364286422729492, "learning_rate": 7.2673792089706025e-09, "loss": 0.05550981, "memory(GiB)": 13.7, "step": 105035, "train_speed(iter/s)": 1.529751 }, { "acc": 0.98715277, "epoch": 49.23365362081088, "grad_norm": 4.015614986419678, "learning_rate": 7.22864002815703e-09, "loss": 0.02992998, "memory(GiB)": 13.7, "step": 105040, "train_speed(iter/s)": 1.529755 }, { "acc": 0.9953125, "epoch": 49.23599718771971, "grad_norm": 1.111504077911377, "learning_rate": 7.190020869182385e-09, "loss": 0.02408005, "memory(GiB)": 13.7, "step": 105045, "train_speed(iter/s)": 1.529757 }, { "acc": 0.97621536, "epoch": 49.238340754628545, "grad_norm": 5.4000244140625, "learning_rate": 7.151521732975829e-09, "loss": 0.05473609, "memory(GiB)": 13.7, "step": 105050, "train_speed(iter/s)": 1.529756 }, { "acc": 0.9786397, "epoch": 49.24068432153738, "grad_norm": 3.5736029148101807, "learning_rate": 7.113142620462086e-09, "loss": 0.04682875, "memory(GiB)": 13.7, "step": 105055, "train_speed(iter/s)": 1.529755 }, { "acc": 0.97979164, "epoch": 49.243027888446214, "grad_norm": 2.8122098445892334, "learning_rate": 7.074883532563659e-09, "loss": 0.04329576, "memory(GiB)": 13.7, "step": 105060, "train_speed(iter/s)": 1.529759 }, { "acc": 0.99541664, "epoch": 49.24537145535505, "grad_norm": 0.8408856391906738, "learning_rate": 7.036744470200276e-09, "loss": 0.02856985, "memory(GiB)": 13.7, "step": 105065, "train_speed(iter/s)": 1.529762 }, { "acc": 0.9731945, "epoch": 49.24771502226388, "grad_norm": 5.228612899780273, "learning_rate": 6.998725434288888e-09, "loss": 0.05250832, "memory(GiB)": 13.7, "step": 105070, "train_speed(iter/s)": 1.529765 }, { "acc": 0.9885416, "epoch": 49.250058589172724, "grad_norm": 5.458358287811279, "learning_rate": 6.960826425743119e-09, "loss": 0.03534655, "memory(GiB)": 13.7, "step": 105075, "train_speed(iter/s)": 1.529763 }, { "acc": 0.99077454, "epoch": 49.25240215608156, "grad_norm": 0.9102423191070557, "learning_rate": 6.923047445474373e-09, "loss": 0.04098032, "memory(GiB)": 13.7, "step": 105080, "train_speed(iter/s)": 1.529768 }, { "acc": 0.98833332, "epoch": 49.25474572299039, "grad_norm": 3.1445436477661133, "learning_rate": 6.885388494390162e-09, "loss": 0.043763, "memory(GiB)": 13.7, "step": 105085, "train_speed(iter/s)": 1.52977 }, { "acc": 0.9840476, "epoch": 49.25708928989923, "grad_norm": 3.1157166957855225, "learning_rate": 6.847849573395785e-09, "loss": 0.03877613, "memory(GiB)": 13.7, "step": 105090, "train_speed(iter/s)": 1.529772 }, { "acc": 0.98675594, "epoch": 49.25943285680806, "grad_norm": 0.0019919173792004585, "learning_rate": 6.810430683394316e-09, "loss": 0.04560091, "memory(GiB)": 13.7, "step": 105095, "train_speed(iter/s)": 1.529776 }, { "acc": 0.984375, "epoch": 49.261776423716896, "grad_norm": 4.118205547332764, "learning_rate": 6.773131825283839e-09, "loss": 0.02124373, "memory(GiB)": 13.7, "step": 105100, "train_speed(iter/s)": 1.529778 }, { "acc": 0.97238102, "epoch": 49.26411999062573, "grad_norm": 9.79727554321289, "learning_rate": 6.735952999961877e-09, "loss": 0.05584344, "memory(GiB)": 13.7, "step": 105105, "train_speed(iter/s)": 1.529779 }, { "acc": 0.98708324, "epoch": 49.266463557534564, "grad_norm": 3.472743511199951, "learning_rate": 6.698894208321515e-09, "loss": 0.03044588, "memory(GiB)": 13.7, "step": 105110, "train_speed(iter/s)": 1.52978 }, { "acc": 0.9760416, "epoch": 49.268807124443406, "grad_norm": 0.0962706208229065, "learning_rate": 6.661955451254176e-09, "loss": 0.03675058, "memory(GiB)": 13.7, "step": 105115, "train_speed(iter/s)": 1.529781 }, { "acc": 0.9894886, "epoch": 49.27115069135224, "grad_norm": 2.1686503887176514, "learning_rate": 6.62513672964739e-09, "loss": 0.02136518, "memory(GiB)": 13.7, "step": 105120, "train_speed(iter/s)": 1.529782 }, { "acc": 0.98895836, "epoch": 49.273494258261074, "grad_norm": 3.8468689918518066, "learning_rate": 6.588438044385919e-09, "loss": 0.04080966, "memory(GiB)": 13.7, "step": 105125, "train_speed(iter/s)": 1.529784 }, { "acc": 0.99571428, "epoch": 49.27583782516991, "grad_norm": 1.6443208456039429, "learning_rate": 6.551859396352858e-09, "loss": 0.0149463, "memory(GiB)": 13.7, "step": 105130, "train_speed(iter/s)": 1.529785 }, { "acc": 0.98791676, "epoch": 49.27818139207874, "grad_norm": 0.002471707994118333, "learning_rate": 6.515400786425747e-09, "loss": 0.05943743, "memory(GiB)": 13.7, "step": 105135, "train_speed(iter/s)": 1.529785 }, { "acc": 0.98718748, "epoch": 49.28052495898758, "grad_norm": 3.2315096855163574, "learning_rate": 6.479062215482689e-09, "loss": 0.02011911, "memory(GiB)": 13.7, "step": 105140, "train_speed(iter/s)": 1.529785 }, { "acc": 0.99229164, "epoch": 49.28286852589641, "grad_norm": 1.9840489625930786, "learning_rate": 6.442843684395674e-09, "loss": 0.01617136, "memory(GiB)": 13.7, "step": 105145, "train_speed(iter/s)": 1.529789 }, { "acc": 0.98458338, "epoch": 49.28521209280525, "grad_norm": 1.1445845365524292, "learning_rate": 6.406745194036141e-09, "loss": 0.0400013, "memory(GiB)": 13.7, "step": 105150, "train_speed(iter/s)": 1.529787 }, { "acc": 0.98562498, "epoch": 49.28755565971409, "grad_norm": 0.9215478301048279, "learning_rate": 6.370766745271641e-09, "loss": 0.03256404, "memory(GiB)": 13.7, "step": 105155, "train_speed(iter/s)": 1.529788 }, { "acc": 0.98916664, "epoch": 49.28989922662292, "grad_norm": 2.3659231662750244, "learning_rate": 6.334908338966952e-09, "loss": 0.05001085, "memory(GiB)": 13.7, "step": 105160, "train_speed(iter/s)": 1.52979 }, { "acc": 0.98862171, "epoch": 49.292242793531756, "grad_norm": 1.8838870525360107, "learning_rate": 6.299169975984078e-09, "loss": 0.04541795, "memory(GiB)": 13.7, "step": 105165, "train_speed(iter/s)": 1.529789 }, { "acc": 0.97821426, "epoch": 49.29458636044059, "grad_norm": 2.013881206512451, "learning_rate": 6.26355165718169e-09, "loss": 0.04032998, "memory(GiB)": 13.7, "step": 105170, "train_speed(iter/s)": 1.52979 }, { "acc": 0.98612175, "epoch": 49.296929927349424, "grad_norm": 2.706740140914917, "learning_rate": 6.228053383416791e-09, "loss": 0.0308682, "memory(GiB)": 13.7, "step": 105175, "train_speed(iter/s)": 1.529795 }, { "acc": 0.99375, "epoch": 49.29927349425826, "grad_norm": 2.754581928253174, "learning_rate": 6.192675155541396e-09, "loss": 0.02547511, "memory(GiB)": 13.7, "step": 105180, "train_speed(iter/s)": 1.529795 }, { "acc": 0.97730484, "epoch": 49.30161706116709, "grad_norm": 6.014913558959961, "learning_rate": 6.157416974406404e-09, "loss": 0.04163695, "memory(GiB)": 13.7, "step": 105185, "train_speed(iter/s)": 1.529794 }, { "acc": 0.97537766, "epoch": 49.303960628075934, "grad_norm": 3.9059128761291504, "learning_rate": 6.12227884085994e-09, "loss": 0.07051434, "memory(GiB)": 13.7, "step": 105190, "train_speed(iter/s)": 1.529799 }, { "acc": 0.98812504, "epoch": 49.30630419498477, "grad_norm": 2.6963276863098145, "learning_rate": 6.0872607557456906e-09, "loss": 0.01679749, "memory(GiB)": 13.7, "step": 105195, "train_speed(iter/s)": 1.529797 }, { "acc": 0.982197, "epoch": 49.3086477618936, "grad_norm": 4.227380275726318, "learning_rate": 6.052362719905676e-09, "loss": 0.04199979, "memory(GiB)": 13.7, "step": 105200, "train_speed(iter/s)": 1.529796 }, { "acc": 0.96958332, "epoch": 49.31099132880244, "grad_norm": 4.7980170249938965, "learning_rate": 6.01758473417914e-09, "loss": 0.04992968, "memory(GiB)": 13.7, "step": 105205, "train_speed(iter/s)": 1.5298 }, { "acc": 0.9864584, "epoch": 49.31333489571127, "grad_norm": 0.5022289752960205, "learning_rate": 5.982926799401996e-09, "loss": 0.04452718, "memory(GiB)": 13.7, "step": 105210, "train_speed(iter/s)": 1.529802 }, { "acc": 0.98931551, "epoch": 49.315678462620106, "grad_norm": 3.0213263034820557, "learning_rate": 5.948388916406275e-09, "loss": 0.02625077, "memory(GiB)": 13.7, "step": 105215, "train_speed(iter/s)": 1.529805 }, { "acc": 0.97979164, "epoch": 49.31802202952894, "grad_norm": 4.253551006317139, "learning_rate": 5.9139710860234484e-09, "loss": 0.03476847, "memory(GiB)": 13.7, "step": 105220, "train_speed(iter/s)": 1.52981 }, { "acc": 0.98675594, "epoch": 49.32036559643778, "grad_norm": 2.518125057220459, "learning_rate": 5.879673309079998e-09, "loss": 0.05107075, "memory(GiB)": 13.7, "step": 105225, "train_speed(iter/s)": 1.529812 }, { "acc": 0.98187504, "epoch": 49.322709163346616, "grad_norm": 5.576558589935303, "learning_rate": 5.8454955864012884e-09, "loss": 0.04110654, "memory(GiB)": 13.7, "step": 105230, "train_speed(iter/s)": 1.529815 }, { "acc": 0.97785721, "epoch": 49.32505273025545, "grad_norm": 2.043713331222534, "learning_rate": 5.8114379188071396e-09, "loss": 0.05018665, "memory(GiB)": 13.7, "step": 105235, "train_speed(iter/s)": 1.529817 }, { "acc": 0.98477678, "epoch": 49.327396297164285, "grad_norm": 4.174931526184082, "learning_rate": 5.777500307117923e-09, "loss": 0.03704801, "memory(GiB)": 13.7, "step": 105240, "train_speed(iter/s)": 1.529817 }, { "acc": 0.97875004, "epoch": 49.32973986407312, "grad_norm": 1.8104572296142578, "learning_rate": 5.743682752148462e-09, "loss": 0.0360359, "memory(GiB)": 13.7, "step": 105245, "train_speed(iter/s)": 1.529815 }, { "acc": 0.98130951, "epoch": 49.33208343098195, "grad_norm": 3.3618667125701904, "learning_rate": 5.709985254711359e-09, "loss": 0.05146354, "memory(GiB)": 13.7, "step": 105250, "train_speed(iter/s)": 1.529816 }, { "acc": 0.97994051, "epoch": 49.33442699789079, "grad_norm": 1.9714332818984985, "learning_rate": 5.67640781561755e-09, "loss": 0.07144174, "memory(GiB)": 13.7, "step": 105255, "train_speed(iter/s)": 1.529814 }, { "acc": 0.98083334, "epoch": 49.33677056479962, "grad_norm": 6.523121356964111, "learning_rate": 5.64295043567353e-09, "loss": 0.03574992, "memory(GiB)": 13.7, "step": 105260, "train_speed(iter/s)": 1.529813 }, { "acc": 0.984375, "epoch": 49.33911413170846, "grad_norm": 5.163078784942627, "learning_rate": 5.609613115684133e-09, "loss": 0.01914892, "memory(GiB)": 13.7, "step": 105265, "train_speed(iter/s)": 1.529816 }, { "acc": 0.99196434, "epoch": 49.3414576986173, "grad_norm": 0.3021620810031891, "learning_rate": 5.576395856449192e-09, "loss": 0.03619926, "memory(GiB)": 13.7, "step": 105270, "train_speed(iter/s)": 1.529816 }, { "acc": 0.98916664, "epoch": 49.34380126552613, "grad_norm": 0.04307188093662262, "learning_rate": 5.543298658768544e-09, "loss": 0.02655545, "memory(GiB)": 13.7, "step": 105275, "train_speed(iter/s)": 1.52982 }, { "acc": 0.98999996, "epoch": 49.346144832434966, "grad_norm": 0.037509702146053314, "learning_rate": 5.510321523437585e-09, "loss": 0.02574703, "memory(GiB)": 13.7, "step": 105280, "train_speed(iter/s)": 1.529818 }, { "acc": 0.99499998, "epoch": 49.3484883993438, "grad_norm": 1.3537708520889282, "learning_rate": 5.477464451248379e-09, "loss": 0.02987204, "memory(GiB)": 13.7, "step": 105285, "train_speed(iter/s)": 1.529821 }, { "acc": 0.97081356, "epoch": 49.350831966252635, "grad_norm": 3.010655641555786, "learning_rate": 5.444727442990771e-09, "loss": 0.04972669, "memory(GiB)": 13.7, "step": 105290, "train_speed(iter/s)": 1.529824 }, { "acc": 0.97645836, "epoch": 49.35317553316147, "grad_norm": 2.516545057296753, "learning_rate": 5.412110499452385e-09, "loss": 0.04367551, "memory(GiB)": 13.7, "step": 105295, "train_speed(iter/s)": 1.529825 }, { "acc": 0.98145828, "epoch": 49.35551910007031, "grad_norm": 2.381850242614746, "learning_rate": 5.379613621416405e-09, "loss": 0.03803812, "memory(GiB)": 13.7, "step": 105300, "train_speed(iter/s)": 1.529828 }, { "acc": 0.99187498, "epoch": 49.357862666979145, "grad_norm": 1.9158949851989746, "learning_rate": 5.347236809664351e-09, "loss": 0.01456355, "memory(GiB)": 13.7, "step": 105305, "train_speed(iter/s)": 1.529833 }, { "acc": 0.98093262, "epoch": 49.36020623388798, "grad_norm": 1.8494927883148193, "learning_rate": 5.3149800649744095e-09, "loss": 0.03929372, "memory(GiB)": 13.7, "step": 105310, "train_speed(iter/s)": 1.529839 }, { "acc": 0.99375, "epoch": 49.36254980079681, "grad_norm": 0.017714377492666245, "learning_rate": 5.2828433881214405e-09, "loss": 0.01326993, "memory(GiB)": 13.7, "step": 105315, "train_speed(iter/s)": 1.529841 }, { "acc": 0.98604164, "epoch": 49.36489336770565, "grad_norm": 3.9593796730041504, "learning_rate": 5.250826779878635e-09, "loss": 0.02746733, "memory(GiB)": 13.7, "step": 105320, "train_speed(iter/s)": 1.529844 }, { "acc": 0.9895833, "epoch": 49.36723693461448, "grad_norm": 0.003404242917895317, "learning_rate": 5.218930241015304e-09, "loss": 0.04137955, "memory(GiB)": 13.7, "step": 105325, "train_speed(iter/s)": 1.529849 }, { "acc": 0.98910713, "epoch": 49.369580501523316, "grad_norm": 3.35898756980896, "learning_rate": 5.187153772298532e-09, "loss": 0.0430601, "memory(GiB)": 13.7, "step": 105330, "train_speed(iter/s)": 1.529854 }, { "acc": 0.98819447, "epoch": 49.37192406843215, "grad_norm": 3.830460786819458, "learning_rate": 5.155497374491521e-09, "loss": 0.02800454, "memory(GiB)": 13.7, "step": 105335, "train_speed(iter/s)": 1.529861 }, { "acc": 0.97300596, "epoch": 49.37426763534099, "grad_norm": 3.5117485523223877, "learning_rate": 5.123961048355256e-09, "loss": 0.02705639, "memory(GiB)": 13.7, "step": 105340, "train_speed(iter/s)": 1.529865 }, { "acc": 0.98791666, "epoch": 49.376611202249826, "grad_norm": 2.346503496170044, "learning_rate": 5.092544794647939e-09, "loss": 0.02549522, "memory(GiB)": 13.7, "step": 105345, "train_speed(iter/s)": 1.529868 }, { "acc": 0.99125004, "epoch": 49.37895476915866, "grad_norm": 0.08006930351257324, "learning_rate": 5.0612486141244486e-09, "loss": 0.01474522, "memory(GiB)": 13.7, "step": 105350, "train_speed(iter/s)": 1.529871 }, { "acc": 0.99458332, "epoch": 49.381298336067495, "grad_norm": 2.3541479110717773, "learning_rate": 5.03007250753744e-09, "loss": 0.03745808, "memory(GiB)": 13.7, "step": 105355, "train_speed(iter/s)": 1.529872 }, { "acc": 0.98572922, "epoch": 49.38364190297633, "grad_norm": 2.9243147373199463, "learning_rate": 4.9990164756356855e-09, "loss": 0.03137445, "memory(GiB)": 13.7, "step": 105360, "train_speed(iter/s)": 1.529873 }, { "acc": 0.9874054, "epoch": 49.38598546988516, "grad_norm": 2.4461076259613037, "learning_rate": 4.968080519166844e-09, "loss": 0.03064879, "memory(GiB)": 13.7, "step": 105365, "train_speed(iter/s)": 1.529873 }, { "acc": 0.990625, "epoch": 49.388329036794, "grad_norm": 2.4880220890045166, "learning_rate": 4.937264638873025e-09, "loss": 0.02678589, "memory(GiB)": 13.7, "step": 105370, "train_speed(iter/s)": 1.529876 }, { "acc": 0.996875, "epoch": 49.39067260370284, "grad_norm": 1.4543315172195435, "learning_rate": 4.906568835496339e-09, "loss": 0.02710643, "memory(GiB)": 13.7, "step": 105375, "train_speed(iter/s)": 1.529874 }, { "acc": 0.9944643, "epoch": 49.39301617061167, "grad_norm": 0.0016074541490525007, "learning_rate": 4.875993109773345e-09, "loss": 0.02404538, "memory(GiB)": 13.7, "step": 105380, "train_speed(iter/s)": 1.529875 }, { "acc": 0.98705359, "epoch": 49.39535973752051, "grad_norm": 0.022094624117016792, "learning_rate": 4.845537462440048e-09, "loss": 0.03545871, "memory(GiB)": 13.7, "step": 105385, "train_speed(iter/s)": 1.52988 }, { "acc": 0.98573866, "epoch": 49.39770330442934, "grad_norm": 2.6992580890655518, "learning_rate": 4.815201894227456e-09, "loss": 0.02743948, "memory(GiB)": 13.7, "step": 105390, "train_speed(iter/s)": 1.529881 }, { "acc": 0.98916664, "epoch": 49.400046871338176, "grad_norm": 3.147531747817993, "learning_rate": 4.784986405866023e-09, "loss": 0.01334784, "memory(GiB)": 13.7, "step": 105395, "train_speed(iter/s)": 1.52988 }, { "acc": 0.97809029, "epoch": 49.40239043824701, "grad_norm": 3.3800933361053467, "learning_rate": 4.7548909980806525e-09, "loss": 0.05861857, "memory(GiB)": 13.7, "step": 105400, "train_speed(iter/s)": 1.529881 }, { "acc": 0.98619051, "epoch": 49.404734005155845, "grad_norm": 3.4374892711639404, "learning_rate": 4.724915671595691e-09, "loss": 0.04758341, "memory(GiB)": 13.7, "step": 105405, "train_speed(iter/s)": 1.529878 }, { "acc": 0.98529758, "epoch": 49.40707757206468, "grad_norm": 2.2183480262756348, "learning_rate": 4.695060427131604e-09, "loss": 0.04677847, "memory(GiB)": 13.7, "step": 105410, "train_speed(iter/s)": 1.529881 }, { "acc": 0.9885416, "epoch": 49.40942113897352, "grad_norm": 2.120934247970581, "learning_rate": 4.665325265405522e-09, "loss": 0.02098329, "memory(GiB)": 13.7, "step": 105415, "train_speed(iter/s)": 1.529882 }, { "acc": 0.98152781, "epoch": 49.411764705882355, "grad_norm": 6.261257171630859, "learning_rate": 4.6357101871323564e-09, "loss": 0.03730503, "memory(GiB)": 13.7, "step": 105420, "train_speed(iter/s)": 1.529887 }, { "acc": 0.99562502, "epoch": 49.41410827279119, "grad_norm": 4.233724594116211, "learning_rate": 4.606215193024245e-09, "loss": 0.02919469, "memory(GiB)": 13.7, "step": 105425, "train_speed(iter/s)": 1.529887 }, { "acc": 0.9864583, "epoch": 49.416451839700024, "grad_norm": 2.315531015396118, "learning_rate": 4.576840283789995e-09, "loss": 0.02619671, "memory(GiB)": 13.7, "step": 105430, "train_speed(iter/s)": 1.529889 }, { "acc": 0.98743057, "epoch": 49.41879540660886, "grad_norm": 3.5231146812438965, "learning_rate": 4.547585460135637e-09, "loss": 0.04326523, "memory(GiB)": 13.7, "step": 105435, "train_speed(iter/s)": 1.529891 }, { "acc": 0.97104168, "epoch": 49.42113897351769, "grad_norm": 5.587645053863525, "learning_rate": 4.5184507227638716e-09, "loss": 0.04679952, "memory(GiB)": 13.7, "step": 105440, "train_speed(iter/s)": 1.529897 }, { "acc": 0.97383928, "epoch": 49.42348254042653, "grad_norm": 3.2162258625030518, "learning_rate": 4.489436072375735e-09, "loss": 0.04264036, "memory(GiB)": 13.7, "step": 105445, "train_speed(iter/s)": 1.529897 }, { "acc": 0.99008923, "epoch": 49.42582610733537, "grad_norm": 0.00340660591609776, "learning_rate": 4.460541509668377e-09, "loss": 0.05427774, "memory(GiB)": 13.7, "step": 105450, "train_speed(iter/s)": 1.529898 }, { "acc": 0.9947917, "epoch": 49.4281696742442, "grad_norm": 1.2535011768341064, "learning_rate": 4.431767035336728e-09, "loss": 0.02515016, "memory(GiB)": 13.7, "step": 105455, "train_speed(iter/s)": 1.5299 }, { "acc": 0.97562504, "epoch": 49.43051324115304, "grad_norm": 1.1771345138549805, "learning_rate": 4.403112650071278e-09, "loss": 0.05600444, "memory(GiB)": 13.7, "step": 105460, "train_speed(iter/s)": 1.529905 }, { "acc": 0.98543558, "epoch": 49.43285680806187, "grad_norm": 1.8702384233474731, "learning_rate": 4.374578354562516e-09, "loss": 0.03104105, "memory(GiB)": 13.7, "step": 105465, "train_speed(iter/s)": 1.529905 }, { "acc": 0.97622032, "epoch": 49.435200374970705, "grad_norm": 4.518957138061523, "learning_rate": 4.3461641494948275e-09, "loss": 0.04750777, "memory(GiB)": 13.7, "step": 105470, "train_speed(iter/s)": 1.529908 }, { "acc": 0.98625002, "epoch": 49.43754394187954, "grad_norm": 5.713766098022461, "learning_rate": 4.317870035551484e-09, "loss": 0.06782471, "memory(GiB)": 13.7, "step": 105475, "train_speed(iter/s)": 1.52991 }, { "acc": 0.97924824, "epoch": 49.439887508788374, "grad_norm": 5.200483322143555, "learning_rate": 4.289696013412986e-09, "loss": 0.07965543, "memory(GiB)": 13.7, "step": 105480, "train_speed(iter/s)": 1.52991 }, { "acc": 0.99333324, "epoch": 49.44223107569721, "grad_norm": 2.2162435054779053, "learning_rate": 4.261642083756502e-09, "loss": 0.04220968, "memory(GiB)": 13.7, "step": 105485, "train_speed(iter/s)": 1.52991 }, { "acc": 0.9802084, "epoch": 49.44457464260605, "grad_norm": 1.445582628250122, "learning_rate": 4.233708247255868e-09, "loss": 0.03913178, "memory(GiB)": 13.7, "step": 105490, "train_speed(iter/s)": 1.529912 }, { "acc": 0.9885417, "epoch": 49.446918209514884, "grad_norm": 2.344669818878174, "learning_rate": 4.205894504583259e-09, "loss": 0.02441026, "memory(GiB)": 13.7, "step": 105495, "train_speed(iter/s)": 1.529915 }, { "acc": 0.98812504, "epoch": 49.44926177642372, "grad_norm": 0.9655210971832275, "learning_rate": 4.178200856406961e-09, "loss": 0.04568062, "memory(GiB)": 13.7, "step": 105500, "train_speed(iter/s)": 1.529919 }, { "acc": 0.98321428, "epoch": 49.45160534333255, "grad_norm": 4.0071120262146, "learning_rate": 4.150627303391932e-09, "loss": 0.04275148, "memory(GiB)": 13.7, "step": 105505, "train_speed(iter/s)": 1.529918 }, { "acc": 0.9817709, "epoch": 49.45394891024139, "grad_norm": 3.2191500663757324, "learning_rate": 4.123173846202019e-09, "loss": 0.03321797, "memory(GiB)": 13.7, "step": 105510, "train_speed(iter/s)": 1.529924 }, { "acc": 0.984375, "epoch": 49.45629247715022, "grad_norm": 6.062455177307129, "learning_rate": 4.095840485496628e-09, "loss": 0.03867667, "memory(GiB)": 13.7, "step": 105515, "train_speed(iter/s)": 1.529924 }, { "acc": 0.98187504, "epoch": 49.458636044059055, "grad_norm": 4.859806060791016, "learning_rate": 4.0686272219323906e-09, "loss": 0.03382051, "memory(GiB)": 13.7, "step": 105520, "train_speed(iter/s)": 1.529931 }, { "acc": 0.97895832, "epoch": 49.46097961096789, "grad_norm": 4.458165645599365, "learning_rate": 4.041534056164272e-09, "loss": 0.06233672, "memory(GiB)": 13.7, "step": 105525, "train_speed(iter/s)": 1.529936 }, { "acc": 0.9875, "epoch": 49.46332317787673, "grad_norm": 2.4295105934143066, "learning_rate": 4.014560988842799e-09, "loss": 0.0482356, "memory(GiB)": 13.7, "step": 105530, "train_speed(iter/s)": 1.529937 }, { "acc": 0.98495045, "epoch": 49.465666744785565, "grad_norm": 2.106313943862915, "learning_rate": 3.987708020616831e-09, "loss": 0.04645086, "memory(GiB)": 13.7, "step": 105535, "train_speed(iter/s)": 1.529939 }, { "acc": 0.98080359, "epoch": 49.4680103116944, "grad_norm": 3.970041513442993, "learning_rate": 3.960975152131344e-09, "loss": 0.06035767, "memory(GiB)": 13.7, "step": 105540, "train_speed(iter/s)": 1.52994 }, { "acc": 0.98829823, "epoch": 49.470353878603234, "grad_norm": 1.6894882917404175, "learning_rate": 3.934362384029093e-09, "loss": 0.04636015, "memory(GiB)": 13.7, "step": 105545, "train_speed(iter/s)": 1.529941 }, { "acc": 0.98187504, "epoch": 49.47269744551207, "grad_norm": 1.9978076219558716, "learning_rate": 3.9078697169495015e-09, "loss": 0.02632701, "memory(GiB)": 13.7, "step": 105550, "train_speed(iter/s)": 1.529945 }, { "acc": 0.97937498, "epoch": 49.4750410124209, "grad_norm": 4.72913932800293, "learning_rate": 3.881497151529774e-09, "loss": 0.03884814, "memory(GiB)": 13.7, "step": 105555, "train_speed(iter/s)": 1.529947 }, { "acc": 0.98500004, "epoch": 49.47738457932974, "grad_norm": 5.021446704864502, "learning_rate": 3.8552446884043405e-09, "loss": 0.02416048, "memory(GiB)": 13.7, "step": 105560, "train_speed(iter/s)": 1.529944 }, { "acc": 0.97052078, "epoch": 49.47972814623858, "grad_norm": 0.9632467031478882, "learning_rate": 3.829112328202634e-09, "loss": 0.03750354, "memory(GiB)": 13.7, "step": 105565, "train_speed(iter/s)": 1.529946 }, { "acc": 0.99107141, "epoch": 49.48207171314741, "grad_norm": 1.9798938035964966, "learning_rate": 3.8031000715540874e-09, "loss": 0.0147529, "memory(GiB)": 13.7, "step": 105570, "train_speed(iter/s)": 1.529947 }, { "acc": 0.97875004, "epoch": 49.48441528005625, "grad_norm": 4.492392063140869, "learning_rate": 3.777207919083694e-09, "loss": 0.04489288, "memory(GiB)": 13.7, "step": 105575, "train_speed(iter/s)": 1.529949 }, { "acc": 0.9760417, "epoch": 49.48675884696508, "grad_norm": 4.785898208618164, "learning_rate": 3.751435871413672e-09, "loss": 0.04442214, "memory(GiB)": 13.7, "step": 105580, "train_speed(iter/s)": 1.529953 }, { "acc": 0.98666668, "epoch": 49.489102413873916, "grad_norm": 2.47337007522583, "learning_rate": 3.725783929163464e-09, "loss": 0.03409018, "memory(GiB)": 13.7, "step": 105585, "train_speed(iter/s)": 1.529958 }, { "acc": 0.98125, "epoch": 49.49144598078275, "grad_norm": 3.533146381378174, "learning_rate": 3.7002520929502914e-09, "loss": 0.05495738, "memory(GiB)": 13.7, "step": 105590, "train_speed(iter/s)": 1.52996 }, { "acc": 0.97875004, "epoch": 49.493789547691584, "grad_norm": 2.0574662685394287, "learning_rate": 3.674840363386382e-09, "loss": 0.05250808, "memory(GiB)": 13.7, "step": 105595, "train_speed(iter/s)": 1.529962 }, { "acc": 0.98500004, "epoch": 49.49613311460042, "grad_norm": 2.359208822250366, "learning_rate": 3.6495487410839617e-09, "loss": 0.04815642, "memory(GiB)": 13.7, "step": 105600, "train_speed(iter/s)": 1.52996 }, { "acc": 0.98604164, "epoch": 49.49847668150926, "grad_norm": 2.6411285400390625, "learning_rate": 3.624377226650817e-09, "loss": 0.03742591, "memory(GiB)": 13.7, "step": 105605, "train_speed(iter/s)": 1.529963 }, { "acc": 0.98812504, "epoch": 49.500820248418094, "grad_norm": 3.697932243347168, "learning_rate": 3.5993258206908495e-09, "loss": 0.03299521, "memory(GiB)": 13.7, "step": 105610, "train_speed(iter/s)": 1.529965 }, { "acc": 0.9854167, "epoch": 49.50316381532693, "grad_norm": 0.00349196488969028, "learning_rate": 3.574394523807404e-09, "loss": 0.03195259, "memory(GiB)": 13.7, "step": 105615, "train_speed(iter/s)": 1.52997 }, { "acc": 0.98041668, "epoch": 49.50550738223576, "grad_norm": 4.250580310821533, "learning_rate": 3.5495833365993873e-09, "loss": 0.03305295, "memory(GiB)": 13.7, "step": 105620, "train_speed(iter/s)": 1.529969 }, { "acc": 0.98597221, "epoch": 49.5078509491446, "grad_norm": 5.903295040130615, "learning_rate": 3.5248922596629284e-09, "loss": 0.04038002, "memory(GiB)": 13.7, "step": 105625, "train_speed(iter/s)": 1.529972 }, { "acc": 0.98161707, "epoch": 49.51019451605343, "grad_norm": 0.002500747097656131, "learning_rate": 3.5003212935919374e-09, "loss": 0.03159696, "memory(GiB)": 13.7, "step": 105630, "train_speed(iter/s)": 1.529974 }, { "acc": 0.98604164, "epoch": 49.512538082962266, "grad_norm": 0.695785403251648, "learning_rate": 3.475870438976994e-09, "loss": 0.01815197, "memory(GiB)": 13.7, "step": 105635, "train_speed(iter/s)": 1.529975 }, { "acc": 0.9765564, "epoch": 49.51488164987111, "grad_norm": 4.131628036499023, "learning_rate": 3.4515396964053483e-09, "loss": 0.03454853, "memory(GiB)": 13.7, "step": 105640, "train_speed(iter/s)": 1.529977 }, { "acc": 0.996875, "epoch": 49.51722521677994, "grad_norm": 2.5755345821380615, "learning_rate": 3.427329066462028e-09, "loss": 0.02445921, "memory(GiB)": 13.7, "step": 105645, "train_speed(iter/s)": 1.529978 }, { "acc": 0.98874998, "epoch": 49.519568783688776, "grad_norm": 0.01169303897768259, "learning_rate": 3.4032385497292876e-09, "loss": 0.02011897, "memory(GiB)": 13.7, "step": 105650, "train_speed(iter/s)": 1.529981 }, { "acc": 0.990625, "epoch": 49.52191235059761, "grad_norm": 4.210772514343262, "learning_rate": 3.379268146785495e-09, "loss": 0.01685124, "memory(GiB)": 13.7, "step": 105655, "train_speed(iter/s)": 1.529981 }, { "acc": 0.99130421, "epoch": 49.524255917506444, "grad_norm": 4.537995338439941, "learning_rate": 3.3554178582079086e-09, "loss": 0.02348317, "memory(GiB)": 13.7, "step": 105660, "train_speed(iter/s)": 1.529985 }, { "acc": 0.98764877, "epoch": 49.52659948441528, "grad_norm": 2.467503786087036, "learning_rate": 3.3316876845687916e-09, "loss": 0.03387706, "memory(GiB)": 13.7, "step": 105665, "train_speed(iter/s)": 1.529991 }, { "acc": 0.97010422, "epoch": 49.52894305132411, "grad_norm": 6.923072338104248, "learning_rate": 3.308077626438741e-09, "loss": 0.04973764, "memory(GiB)": 13.7, "step": 105670, "train_speed(iter/s)": 1.529994 }, { "acc": 0.98187504, "epoch": 49.53128661823295, "grad_norm": 2.7508950233459473, "learning_rate": 3.2845876843861344e-09, "loss": 0.03663941, "memory(GiB)": 13.7, "step": 105675, "train_speed(iter/s)": 1.529996 }, { "acc": 0.97527781, "epoch": 49.53363018514179, "grad_norm": 2.480985164642334, "learning_rate": 3.261217858974354e-09, "loss": 0.04361395, "memory(GiB)": 13.7, "step": 105680, "train_speed(iter/s)": 1.530001 }, { "acc": 0.9791666, "epoch": 49.53597375205062, "grad_norm": 4.662919998168945, "learning_rate": 3.23796815076567e-09, "loss": 0.02328342, "memory(GiB)": 13.7, "step": 105685, "train_speed(iter/s)": 1.530005 }, { "acc": 0.99350281, "epoch": 49.53831731895946, "grad_norm": 1.7548668384552002, "learning_rate": 3.2148385603190256e-09, "loss": 0.01998874, "memory(GiB)": 13.7, "step": 105690, "train_speed(iter/s)": 1.530006 }, { "acc": 0.99499998, "epoch": 49.54066088586829, "grad_norm": 0.5015860199928284, "learning_rate": 3.1918290881905867e-09, "loss": 0.02139858, "memory(GiB)": 13.7, "step": 105695, "train_speed(iter/s)": 1.530005 }, { "acc": 0.97993469, "epoch": 49.543004452777126, "grad_norm": 5.2240309715271, "learning_rate": 3.168939734933188e-09, "loss": 0.06065367, "memory(GiB)": 13.7, "step": 105700, "train_speed(iter/s)": 1.530008 }, { "acc": 0.97625008, "epoch": 49.54534801968596, "grad_norm": 3.5290615558624268, "learning_rate": 3.1461705010963355e-09, "loss": 0.03070513, "memory(GiB)": 13.7, "step": 105705, "train_speed(iter/s)": 1.530012 }, { "acc": 0.98988104, "epoch": 49.547691586594794, "grad_norm": 4.185137748718262, "learning_rate": 3.123521387228425e-09, "loss": 0.0398029, "memory(GiB)": 13.7, "step": 105710, "train_speed(iter/s)": 1.530009 }, { "acc": 0.97479172, "epoch": 49.550035153503636, "grad_norm": 4.020528316497803, "learning_rate": 3.1009923938734103e-09, "loss": 0.0486737, "memory(GiB)": 13.7, "step": 105715, "train_speed(iter/s)": 1.530011 }, { "acc": 0.9911397, "epoch": 49.55237872041247, "grad_norm": 3.997589588165283, "learning_rate": 3.078583521573027e-09, "loss": 0.03407873, "memory(GiB)": 13.7, "step": 105720, "train_speed(iter/s)": 1.530016 }, { "acc": 0.98208332, "epoch": 49.554722287321304, "grad_norm": 2.02345609664917, "learning_rate": 3.056294770865124e-09, "loss": 0.0256916, "memory(GiB)": 13.7, "step": 105725, "train_speed(iter/s)": 1.530019 }, { "acc": 0.99333334, "epoch": 49.55706585423014, "grad_norm": 0.001734266639687121, "learning_rate": 3.034126142286441e-09, "loss": 0.00822134, "memory(GiB)": 13.7, "step": 105730, "train_speed(iter/s)": 1.530021 }, { "acc": 0.98125, "epoch": 49.55940942113897, "grad_norm": 3.6693499088287354, "learning_rate": 3.012077636369276e-09, "loss": 0.03801863, "memory(GiB)": 13.7, "step": 105735, "train_speed(iter/s)": 1.530024 }, { "acc": 0.98187504, "epoch": 49.56175298804781, "grad_norm": 6.001129627227783, "learning_rate": 2.9901492536437073e-09, "loss": 0.03761212, "memory(GiB)": 13.7, "step": 105740, "train_speed(iter/s)": 1.530024 }, { "acc": 0.97529764, "epoch": 49.56409655495664, "grad_norm": 5.100326061248779, "learning_rate": 2.968340994637039e-09, "loss": 0.04028935, "memory(GiB)": 13.7, "step": 105745, "train_speed(iter/s)": 1.530027 }, { "acc": 0.98093748, "epoch": 49.566440121865476, "grad_norm": 4.161900043487549, "learning_rate": 2.9466528598732435e-09, "loss": 0.05379359, "memory(GiB)": 13.7, "step": 105750, "train_speed(iter/s)": 1.530031 }, { "acc": 0.9916667, "epoch": 49.56878368877432, "grad_norm": 0.00031619041692465544, "learning_rate": 2.9250848498735178e-09, "loss": 0.01954089, "memory(GiB)": 13.7, "step": 105755, "train_speed(iter/s)": 1.530031 }, { "acc": 0.99541664, "epoch": 49.57112725568315, "grad_norm": 2.387739658355713, "learning_rate": 2.903636965156285e-09, "loss": 0.04887355, "memory(GiB)": 13.7, "step": 105760, "train_speed(iter/s)": 1.530031 }, { "acc": 0.97723217, "epoch": 49.573470822591986, "grad_norm": 4.7823967933654785, "learning_rate": 2.882309206237747e-09, "loss": 0.04171026, "memory(GiB)": 13.7, "step": 105765, "train_speed(iter/s)": 1.530031 }, { "acc": 0.98611107, "epoch": 49.57581438950082, "grad_norm": 2.486978054046631, "learning_rate": 2.8611015736296654e-09, "loss": 0.01913379, "memory(GiB)": 13.7, "step": 105770, "train_speed(iter/s)": 1.530034 }, { "acc": 0.99236107, "epoch": 49.578157956409655, "grad_norm": 1.6759132146835327, "learning_rate": 2.840014067842137e-09, "loss": 0.0205896, "memory(GiB)": 13.7, "step": 105775, "train_speed(iter/s)": 1.530033 }, { "acc": 0.98361111, "epoch": 49.58050152331849, "grad_norm": 5.415189743041992, "learning_rate": 2.819046689382482e-09, "loss": 0.03316526, "memory(GiB)": 13.7, "step": 105780, "train_speed(iter/s)": 1.530034 }, { "acc": 0.98381948, "epoch": 49.58284509022732, "grad_norm": 4.017263889312744, "learning_rate": 2.798199438754137e-09, "loss": 0.04020042, "memory(GiB)": 13.7, "step": 105785, "train_speed(iter/s)": 1.530035 }, { "acc": 0.98916664, "epoch": 49.585188657136165, "grad_norm": 2.3926784992218018, "learning_rate": 2.7774723164583178e-09, "loss": 0.02572308, "memory(GiB)": 13.7, "step": 105790, "train_speed(iter/s)": 1.530035 }, { "acc": 0.9947917, "epoch": 49.587532224045, "grad_norm": 4.258945465087891, "learning_rate": 2.756865322992909e-09, "loss": 0.02143924, "memory(GiB)": 13.7, "step": 105795, "train_speed(iter/s)": 1.530037 }, { "acc": 0.9822917, "epoch": 49.58987579095383, "grad_norm": 1.9518136978149414, "learning_rate": 2.736378458854131e-09, "loss": 0.03683477, "memory(GiB)": 13.7, "step": 105800, "train_speed(iter/s)": 1.530038 }, { "acc": 0.9838542, "epoch": 49.59221935786267, "grad_norm": 5.12600564956665, "learning_rate": 2.716011724533763e-09, "loss": 0.05889747, "memory(GiB)": 13.7, "step": 105805, "train_speed(iter/s)": 1.530041 }, { "acc": 0.9854167, "epoch": 49.5945629247715, "grad_norm": 3.74501895904541, "learning_rate": 2.6957651205213648e-09, "loss": 0.04816037, "memory(GiB)": 13.7, "step": 105810, "train_speed(iter/s)": 1.530045 }, { "acc": 0.98705359, "epoch": 49.596906491680336, "grad_norm": 3.129401922225952, "learning_rate": 2.6756386473037205e-09, "loss": 0.03518116, "memory(GiB)": 13.7, "step": 105815, "train_speed(iter/s)": 1.530048 }, { "acc": 0.99457722, "epoch": 49.59925005858917, "grad_norm": 2.6746902465820312, "learning_rate": 2.655632305364839e-09, "loss": 0.02555705, "memory(GiB)": 13.7, "step": 105820, "train_speed(iter/s)": 1.530055 }, { "acc": 0.9765564, "epoch": 49.601593625498005, "grad_norm": 4.9121832847595215, "learning_rate": 2.6357460951848436e-09, "loss": 0.06092329, "memory(GiB)": 13.7, "step": 105825, "train_speed(iter/s)": 1.530055 }, { "acc": 0.9729167, "epoch": 49.603937192406846, "grad_norm": 3.354236125946045, "learning_rate": 2.615980017242747e-09, "loss": 0.05119537, "memory(GiB)": 13.7, "step": 105830, "train_speed(iter/s)": 1.530057 }, { "acc": 0.99041672, "epoch": 49.60628075931568, "grad_norm": 2.7055444717407227, "learning_rate": 2.5963340720125685e-09, "loss": 0.02164954, "memory(GiB)": 13.7, "step": 105835, "train_speed(iter/s)": 1.530058 }, { "acc": 0.9953125, "epoch": 49.608624326224515, "grad_norm": 0.0043946122750639915, "learning_rate": 2.5768082599677693e-09, "loss": 0.03010623, "memory(GiB)": 13.7, "step": 105840, "train_speed(iter/s)": 1.530064 }, { "acc": 0.99454861, "epoch": 49.61096789313335, "grad_norm": 1.6026601791381836, "learning_rate": 2.5574025815768174e-09, "loss": 0.02280937, "memory(GiB)": 13.7, "step": 105845, "train_speed(iter/s)": 1.530066 }, { "acc": 0.98312502, "epoch": 49.61331146004218, "grad_norm": 3.477536678314209, "learning_rate": 2.538117037306515e-09, "loss": 0.04751099, "memory(GiB)": 13.7, "step": 105850, "train_speed(iter/s)": 1.530068 }, { "acc": 0.9677084, "epoch": 49.61565502695102, "grad_norm": 4.389555931091309, "learning_rate": 2.5189516276203344e-09, "loss": 0.06673608, "memory(GiB)": 13.7, "step": 105855, "train_speed(iter/s)": 1.530068 }, { "acc": 0.984375, "epoch": 49.61799859385985, "grad_norm": 2.3129055500030518, "learning_rate": 2.4999063529784156e-09, "loss": 0.04635699, "memory(GiB)": 13.7, "step": 105860, "train_speed(iter/s)": 1.530069 }, { "acc": 0.98708334, "epoch": 49.62034216076869, "grad_norm": 0.0007392491097562015, "learning_rate": 2.480981213839236e-09, "loss": 0.01868168, "memory(GiB)": 13.7, "step": 105865, "train_speed(iter/s)": 1.530067 }, { "acc": 0.9958333, "epoch": 49.62268572767753, "grad_norm": 0.0748542845249176, "learning_rate": 2.462176210657941e-09, "loss": 0.02182618, "memory(GiB)": 13.7, "step": 105870, "train_speed(iter/s)": 1.530067 }, { "acc": 0.9889679, "epoch": 49.62502929458636, "grad_norm": 0.0068430230021476746, "learning_rate": 2.443491343885791e-09, "loss": 0.04045125, "memory(GiB)": 13.7, "step": 105875, "train_speed(iter/s)": 1.530068 }, { "acc": 0.97990532, "epoch": 49.627372861495196, "grad_norm": 0.9924229383468628, "learning_rate": 2.4249266139718266e-09, "loss": 0.03500389, "memory(GiB)": 13.7, "step": 105880, "train_speed(iter/s)": 1.530069 }, { "acc": 0.99125004, "epoch": 49.62971642840403, "grad_norm": 4.610205173492432, "learning_rate": 2.4064820213634232e-09, "loss": 0.02574834, "memory(GiB)": 13.7, "step": 105885, "train_speed(iter/s)": 1.530073 }, { "acc": 0.97979164, "epoch": 49.632059995312865, "grad_norm": 4.8196210861206055, "learning_rate": 2.3881575665029602e-09, "loss": 0.06402829, "memory(GiB)": 13.7, "step": 105890, "train_speed(iter/s)": 1.530074 }, { "acc": 0.99124994, "epoch": 49.6344035622217, "grad_norm": 3.6155316829681396, "learning_rate": 2.3699532498305966e-09, "loss": 0.03238963, "memory(GiB)": 13.7, "step": 105895, "train_speed(iter/s)": 1.530081 }, { "acc": 0.98916664, "epoch": 49.63674712913053, "grad_norm": 0.0029981154948472977, "learning_rate": 2.3518690717853828e-09, "loss": 0.02210419, "memory(GiB)": 13.7, "step": 105900, "train_speed(iter/s)": 1.530082 }, { "acc": 0.98724422, "epoch": 49.639090696039375, "grad_norm": 1.0903247594833374, "learning_rate": 2.333905032800261e-09, "loss": 0.05708148, "memory(GiB)": 13.7, "step": 105905, "train_speed(iter/s)": 1.530086 }, { "acc": 1.0, "epoch": 49.64143426294821, "grad_norm": 2.5961997509002686, "learning_rate": 2.3160611333081755e-09, "loss": 0.00915451, "memory(GiB)": 13.7, "step": 105910, "train_speed(iter/s)": 1.530087 }, { "acc": 0.98875008, "epoch": 49.643777829857044, "grad_norm": 5.127549648284912, "learning_rate": 2.2983373737376303e-09, "loss": 0.02642049, "memory(GiB)": 13.7, "step": 105915, "train_speed(iter/s)": 1.530089 }, { "acc": 0.98020296, "epoch": 49.64612139676588, "grad_norm": 3.173353672027588, "learning_rate": 2.2807337545143525e-09, "loss": 0.03885988, "memory(GiB)": 13.7, "step": 105920, "train_speed(iter/s)": 1.530092 }, { "acc": 0.9941761, "epoch": 49.64846496367471, "grad_norm": 2.355482339859009, "learning_rate": 2.2632502760618498e-09, "loss": 0.0167287, "memory(GiB)": 13.7, "step": 105925, "train_speed(iter/s)": 1.530096 }, { "acc": 0.9848959, "epoch": 49.65080853058355, "grad_norm": 1.5217617750167847, "learning_rate": 2.245886938800855e-09, "loss": 0.03391865, "memory(GiB)": 13.7, "step": 105930, "train_speed(iter/s)": 1.530099 }, { "acc": 0.98083334, "epoch": 49.65315209749238, "grad_norm": 1.2118043899536133, "learning_rate": 2.2286437431476596e-09, "loss": 0.04973413, "memory(GiB)": 13.7, "step": 105935, "train_speed(iter/s)": 1.530105 }, { "acc": 0.98874998, "epoch": 49.655495664401215, "grad_norm": 0.0027926915790885687, "learning_rate": 2.211520689517446e-09, "loss": 0.01602375, "memory(GiB)": 13.7, "step": 105940, "train_speed(iter/s)": 1.53011 }, { "acc": 0.99041662, "epoch": 49.65783923131006, "grad_norm": 3.530576467514038, "learning_rate": 2.1945177783220662e-09, "loss": 0.02358044, "memory(GiB)": 13.7, "step": 105945, "train_speed(iter/s)": 1.53011 }, { "acc": 0.9869792, "epoch": 49.66018279821889, "grad_norm": 3.8699634075164795, "learning_rate": 2.17763500996893e-09, "loss": 0.05119178, "memory(GiB)": 13.7, "step": 105950, "train_speed(iter/s)": 1.530115 }, { "acc": 0.98988094, "epoch": 49.662526365127725, "grad_norm": 0.1834723949432373, "learning_rate": 2.1608723848654495e-09, "loss": 0.04270341, "memory(GiB)": 13.7, "step": 105955, "train_speed(iter/s)": 1.530117 }, { "acc": 0.98167658, "epoch": 49.66486993203656, "grad_norm": 1.7770624160766602, "learning_rate": 2.144229903413485e-09, "loss": 0.02721832, "memory(GiB)": 13.7, "step": 105960, "train_speed(iter/s)": 1.530121 }, { "acc": 0.99321423, "epoch": 49.667213498945394, "grad_norm": 2.360842704772949, "learning_rate": 2.1277075660132322e-09, "loss": 0.01618513, "memory(GiB)": 13.7, "step": 105965, "train_speed(iter/s)": 1.530123 }, { "acc": 0.98599205, "epoch": 49.66955706585423, "grad_norm": 3.1448142528533936, "learning_rate": 2.1113053730621102e-09, "loss": 0.06390134, "memory(GiB)": 13.7, "step": 105970, "train_speed(iter/s)": 1.530129 }, { "acc": 0.98150291, "epoch": 49.67190063276306, "grad_norm": 5.670588970184326, "learning_rate": 2.0950233249542092e-09, "loss": 0.06553227, "memory(GiB)": 13.7, "step": 105975, "train_speed(iter/s)": 1.530132 }, { "acc": 0.98625002, "epoch": 49.674244199671904, "grad_norm": 3.271357536315918, "learning_rate": 2.078861422080845e-09, "loss": 0.03134517, "memory(GiB)": 13.7, "step": 105980, "train_speed(iter/s)": 1.530133 }, { "acc": 0.99548607, "epoch": 49.67658776658074, "grad_norm": 3.1507630348205566, "learning_rate": 2.062819664830555e-09, "loss": 0.02298271, "memory(GiB)": 13.7, "step": 105985, "train_speed(iter/s)": 1.530135 }, { "acc": 0.98999996, "epoch": 49.67893133348957, "grad_norm": 3.621708393096924, "learning_rate": 2.0468980535891034e-09, "loss": 0.0350587, "memory(GiB)": 13.7, "step": 105990, "train_speed(iter/s)": 1.530139 }, { "acc": 0.9916667, "epoch": 49.68127490039841, "grad_norm": 5.854923248291016, "learning_rate": 2.031096588738925e-09, "loss": 0.04176617, "memory(GiB)": 13.7, "step": 105995, "train_speed(iter/s)": 1.530143 }, { "acc": 0.98778839, "epoch": 49.68361846730724, "grad_norm": 3.037487268447876, "learning_rate": 2.015415270660232e-09, "loss": 0.03874201, "memory(GiB)": 13.7, "step": 106000, "train_speed(iter/s)": 1.530147 }, { "acc": 0.97437496, "epoch": 49.685962034216075, "grad_norm": 2.345973491668701, "learning_rate": 1.9998540997293526e-09, "loss": 0.0417579, "memory(GiB)": 13.7, "step": 106005, "train_speed(iter/s)": 1.530149 }, { "acc": 0.98187504, "epoch": 49.68830560112491, "grad_norm": 4.087911605834961, "learning_rate": 1.984413076320395e-09, "loss": 0.02789439, "memory(GiB)": 13.7, "step": 106010, "train_speed(iter/s)": 1.53015 }, { "acc": 0.98336315, "epoch": 49.690649168033744, "grad_norm": 2.6319169998168945, "learning_rate": 1.969092200805246e-09, "loss": 0.07618818, "memory(GiB)": 13.7, "step": 106015, "train_speed(iter/s)": 1.530148 }, { "acc": 1.0, "epoch": 49.692992734942585, "grad_norm": 2.2552413940429688, "learning_rate": 1.9538914735513534e-09, "loss": 0.01754282, "memory(GiB)": 13.7, "step": 106020, "train_speed(iter/s)": 1.53015 }, { "acc": 0.99375, "epoch": 49.69533630185142, "grad_norm": 2.48637056350708, "learning_rate": 1.9388108949244985e-09, "loss": 0.01430025, "memory(GiB)": 13.7, "step": 106025, "train_speed(iter/s)": 1.530154 }, { "acc": 0.98500004, "epoch": 49.697679868760254, "grad_norm": 5.625621795654297, "learning_rate": 1.923850465287133e-09, "loss": 0.05492247, "memory(GiB)": 13.7, "step": 106030, "train_speed(iter/s)": 1.530156 }, { "acc": 0.98458328, "epoch": 49.70002343566909, "grad_norm": 0.04934307187795639, "learning_rate": 1.909010184998379e-09, "loss": 0.02379879, "memory(GiB)": 13.7, "step": 106035, "train_speed(iter/s)": 1.530162 }, { "acc": 0.98421879, "epoch": 49.70236700257792, "grad_norm": 2.185934543609619, "learning_rate": 1.8942900544156912e-09, "loss": 0.0431128, "memory(GiB)": 13.7, "step": 106040, "train_speed(iter/s)": 1.530164 }, { "acc": 0.99508934, "epoch": 49.70471056948676, "grad_norm": 3.397183895111084, "learning_rate": 1.879690073893196e-09, "loss": 0.0341537, "memory(GiB)": 13.7, "step": 106045, "train_speed(iter/s)": 1.53017 }, { "acc": 0.97778845, "epoch": 49.70705413639559, "grad_norm": 2.906162977218628, "learning_rate": 1.8652102437805788e-09, "loss": 0.05766833, "memory(GiB)": 13.7, "step": 106050, "train_speed(iter/s)": 1.530175 }, { "acc": 0.99386368, "epoch": 49.70939770330443, "grad_norm": 3.29826021194458, "learning_rate": 1.8508505644264144e-09, "loss": 0.07449064, "memory(GiB)": 13.7, "step": 106055, "train_speed(iter/s)": 1.530178 }, { "acc": 0.9760417, "epoch": 49.71174127021327, "grad_norm": 2.6619272232055664, "learning_rate": 1.836611036175948e-09, "loss": 0.03403526, "memory(GiB)": 13.7, "step": 106060, "train_speed(iter/s)": 1.530178 }, { "acc": 0.97770834, "epoch": 49.7140848371221, "grad_norm": 2.7927982807159424, "learning_rate": 1.822491659371649e-09, "loss": 0.03857958, "memory(GiB)": 13.7, "step": 106065, "train_speed(iter/s)": 1.530184 }, { "acc": 0.99300594, "epoch": 49.716428404030935, "grad_norm": 2.786576986312866, "learning_rate": 1.8084924343526563e-09, "loss": 0.0310377, "memory(GiB)": 13.7, "step": 106070, "train_speed(iter/s)": 1.530188 }, { "acc": 0.98696423, "epoch": 49.71877197093977, "grad_norm": 0.0007892127032391727, "learning_rate": 1.7946133614558897e-09, "loss": 0.02185156, "memory(GiB)": 13.7, "step": 106075, "train_speed(iter/s)": 1.530191 }, { "acc": 0.98916664, "epoch": 49.721115537848604, "grad_norm": 2.51005482673645, "learning_rate": 1.780854441014382e-09, "loss": 0.03666386, "memory(GiB)": 13.7, "step": 106080, "train_speed(iter/s)": 1.530192 }, { "acc": 0.9864583, "epoch": 49.72345910475744, "grad_norm": 5.269067764282227, "learning_rate": 1.7672156733589472e-09, "loss": 0.03084814, "memory(GiB)": 13.7, "step": 106085, "train_speed(iter/s)": 1.530194 }, { "acc": 0.98916664, "epoch": 49.72580267166627, "grad_norm": 3.2253806591033936, "learning_rate": 1.7536970588176226e-09, "loss": 0.01596895, "memory(GiB)": 13.7, "step": 106090, "train_speed(iter/s)": 1.530198 }, { "acc": 0.96937504, "epoch": 49.728146238575114, "grad_norm": 6.086099147796631, "learning_rate": 1.7402985977151167e-09, "loss": 0.04151919, "memory(GiB)": 13.7, "step": 106095, "train_speed(iter/s)": 1.5302 }, { "acc": 0.98363094, "epoch": 49.73048980548395, "grad_norm": 4.162627220153809, "learning_rate": 1.7270202903739168e-09, "loss": 0.0427672, "memory(GiB)": 13.7, "step": 106100, "train_speed(iter/s)": 1.530201 }, { "acc": 0.98142853, "epoch": 49.73283337239278, "grad_norm": 2.349642038345337, "learning_rate": 1.713862137112625e-09, "loss": 0.03209261, "memory(GiB)": 13.7, "step": 106105, "train_speed(iter/s)": 1.530205 }, { "acc": 0.99020834, "epoch": 49.73517693930162, "grad_norm": 1.4121580123901367, "learning_rate": 1.7008241382476232e-09, "loss": 0.03557417, "memory(GiB)": 13.7, "step": 106110, "train_speed(iter/s)": 1.53021 }, { "acc": 0.98666668, "epoch": 49.73752050621045, "grad_norm": 2.065932273864746, "learning_rate": 1.6879062940930731e-09, "loss": 0.0335386, "memory(GiB)": 13.7, "step": 106115, "train_speed(iter/s)": 1.530214 }, { "acc": 0.98881941, "epoch": 49.739864073119286, "grad_norm": 2.555386543273926, "learning_rate": 1.675108604958696e-09, "loss": 0.02119231, "memory(GiB)": 13.7, "step": 106120, "train_speed(iter/s)": 1.530218 }, { "acc": 0.99375, "epoch": 49.74220764002812, "grad_norm": 4.004854202270508, "learning_rate": 1.662431071151993e-09, "loss": 0.03741562, "memory(GiB)": 13.7, "step": 106125, "train_speed(iter/s)": 1.53022 }, { "acc": 0.98606148, "epoch": 49.74455120693696, "grad_norm": 4.577651023864746, "learning_rate": 1.6498736929782447e-09, "loss": 0.09253427, "memory(GiB)": 13.7, "step": 106130, "train_speed(iter/s)": 1.530224 }, { "acc": 1.0, "epoch": 49.746894773845796, "grad_norm": 1.787040114402771, "learning_rate": 1.6374364707388467e-09, "loss": 0.01777034, "memory(GiB)": 13.7, "step": 106135, "train_speed(iter/s)": 1.530226 }, { "acc": 0.98395824, "epoch": 49.74923834075463, "grad_norm": 0.008635103702545166, "learning_rate": 1.625119404732974e-09, "loss": 0.03094538, "memory(GiB)": 13.7, "step": 106140, "train_speed(iter/s)": 1.53023 }, { "acc": 0.98298616, "epoch": 49.751581907663464, "grad_norm": 2.7383432388305664, "learning_rate": 1.6129224952570268e-09, "loss": 0.03188951, "memory(GiB)": 13.7, "step": 106145, "train_speed(iter/s)": 1.530233 }, { "acc": 0.98883924, "epoch": 49.7539254745723, "grad_norm": 3.2356650829315186, "learning_rate": 1.600845742603519e-09, "loss": 0.03259665, "memory(GiB)": 13.7, "step": 106150, "train_speed(iter/s)": 1.530235 }, { "acc": 0.98895836, "epoch": 49.75626904148113, "grad_norm": 3.3821370601654053, "learning_rate": 1.588889147062746e-09, "loss": 0.03227153, "memory(GiB)": 13.7, "step": 106155, "train_speed(iter/s)": 1.530239 }, { "acc": 0.98113098, "epoch": 49.75861260838997, "grad_norm": 0.006409638561308384, "learning_rate": 1.5770527089227806e-09, "loss": 0.02849848, "memory(GiB)": 13.7, "step": 106160, "train_speed(iter/s)": 1.530242 }, { "acc": 0.98604164, "epoch": 49.7609561752988, "grad_norm": 4.8123979568481445, "learning_rate": 1.5653364284678122e-09, "loss": 0.02323952, "memory(GiB)": 13.7, "step": 106165, "train_speed(iter/s)": 1.530242 }, { "acc": 0.99499998, "epoch": 49.76329974220764, "grad_norm": 2.3009183406829834, "learning_rate": 1.5537403059792541e-09, "loss": 0.03110667, "memory(GiB)": 13.7, "step": 106170, "train_speed(iter/s)": 1.530243 }, { "acc": 0.984375, "epoch": 49.76564330911648, "grad_norm": 4.8333048820495605, "learning_rate": 1.5422643417357445e-09, "loss": 0.04158514, "memory(GiB)": 13.7, "step": 106175, "train_speed(iter/s)": 1.530245 }, { "acc": 0.99375, "epoch": 49.76798687602531, "grad_norm": 3.3651881217956543, "learning_rate": 1.5309085360131457e-09, "loss": 0.01966401, "memory(GiB)": 13.7, "step": 106180, "train_speed(iter/s)": 1.530247 }, { "acc": 0.9875, "epoch": 49.770330442934146, "grad_norm": 1.9071394205093384, "learning_rate": 1.519672889085101e-09, "loss": 0.03487414, "memory(GiB)": 13.7, "step": 106185, "train_speed(iter/s)": 1.530248 }, { "acc": 0.99003468, "epoch": 49.77267400984298, "grad_norm": 2.978954315185547, "learning_rate": 1.5085574012213671e-09, "loss": 0.02713546, "memory(GiB)": 13.7, "step": 106190, "train_speed(iter/s)": 1.530246 }, { "acc": 0.99437504, "epoch": 49.775017576751814, "grad_norm": 1.059558629989624, "learning_rate": 1.4975620726883714e-09, "loss": 0.03831857, "memory(GiB)": 13.7, "step": 106195, "train_speed(iter/s)": 1.530248 }, { "acc": 0.9833333, "epoch": 49.77736114366065, "grad_norm": 1.124498963356018, "learning_rate": 1.4866869037514301e-09, "loss": 0.04296773, "memory(GiB)": 13.7, "step": 106200, "train_speed(iter/s)": 1.530248 }, { "acc": 0.9979167, "epoch": 49.77970471056949, "grad_norm": 0.9429436326026917, "learning_rate": 1.4759318946714198e-09, "loss": 0.01645365, "memory(GiB)": 13.7, "step": 106205, "train_speed(iter/s)": 1.530247 }, { "acc": 0.9791667, "epoch": 49.782048277478324, "grad_norm": 6.943064212799072, "learning_rate": 1.4652970457064417e-09, "loss": 0.04612891, "memory(GiB)": 13.7, "step": 106210, "train_speed(iter/s)": 1.530247 }, { "acc": 0.99107141, "epoch": 49.78439184438716, "grad_norm": 2.4502768516540527, "learning_rate": 1.4547823571134864e-09, "loss": 0.03016563, "memory(GiB)": 13.7, "step": 106215, "train_speed(iter/s)": 1.530254 }, { "acc": 0.98938494, "epoch": 49.78673541129599, "grad_norm": 2.2026360034942627, "learning_rate": 1.4443878291439943e-09, "loss": 0.0333005, "memory(GiB)": 13.7, "step": 106220, "train_speed(iter/s)": 1.530255 }, { "acc": 0.9927084, "epoch": 49.78907897820483, "grad_norm": 1.9135557413101196, "learning_rate": 1.434113462048296e-09, "loss": 0.02352839, "memory(GiB)": 13.7, "step": 106225, "train_speed(iter/s)": 1.530253 }, { "acc": 0.98395834, "epoch": 49.79142254511366, "grad_norm": 2.645089626312256, "learning_rate": 1.4239592560728359e-09, "loss": 0.02775678, "memory(GiB)": 13.7, "step": 106230, "train_speed(iter/s)": 1.530253 }, { "acc": 0.98250008, "epoch": 49.793766112022496, "grad_norm": 0.0003650069993454963, "learning_rate": 1.4139252114629492e-09, "loss": 0.03399268, "memory(GiB)": 13.7, "step": 106235, "train_speed(iter/s)": 1.530254 }, { "acc": 0.97939491, "epoch": 49.79610967893133, "grad_norm": 3.6365857124328613, "learning_rate": 1.4040113284584198e-09, "loss": 0.06865641, "memory(GiB)": 13.7, "step": 106240, "train_speed(iter/s)": 1.530258 }, { "acc": 0.99219704, "epoch": 49.79845324584017, "grad_norm": 4.591066837310791, "learning_rate": 1.394217607298477e-09, "loss": 0.02403499, "memory(GiB)": 13.7, "step": 106245, "train_speed(iter/s)": 1.530264 }, { "acc": 0.98383923, "epoch": 49.800796812749006, "grad_norm": 1.0658926963806152, "learning_rate": 1.3845440482179094e-09, "loss": 0.03373016, "memory(GiB)": 13.7, "step": 106250, "train_speed(iter/s)": 1.530264 }, { "acc": 0.99591351, "epoch": 49.80314037965784, "grad_norm": 2.768101453781128, "learning_rate": 1.3749906514498402e-09, "loss": 0.02990315, "memory(GiB)": 13.7, "step": 106255, "train_speed(iter/s)": 1.530267 }, { "acc": 0.98208332, "epoch": 49.805483946566675, "grad_norm": 4.225922584533691, "learning_rate": 1.3655574172235082e-09, "loss": 0.04501418, "memory(GiB)": 13.7, "step": 106260, "train_speed(iter/s)": 1.530273 }, { "acc": 0.98633928, "epoch": 49.80782751347551, "grad_norm": 0.4287029504776001, "learning_rate": 1.3562443457659309e-09, "loss": 0.02301048, "memory(GiB)": 13.7, "step": 106265, "train_speed(iter/s)": 1.530274 }, { "acc": 0.98552084, "epoch": 49.81017108038434, "grad_norm": 4.301940441131592, "learning_rate": 1.347051437300241e-09, "loss": 0.03478451, "memory(GiB)": 13.7, "step": 106270, "train_speed(iter/s)": 1.530275 }, { "acc": 0.99041672, "epoch": 49.81251464729318, "grad_norm": 1.920088529586792, "learning_rate": 1.3379786920484606e-09, "loss": 0.01892219, "memory(GiB)": 13.7, "step": 106275, "train_speed(iter/s)": 1.530276 }, { "acc": 0.98648815, "epoch": 49.81485821420202, "grad_norm": 0.0013564282562583685, "learning_rate": 1.3290261102276167e-09, "loss": 0.03085248, "memory(GiB)": 13.7, "step": 106280, "train_speed(iter/s)": 1.530277 }, { "acc": 0.98967266, "epoch": 49.81720178111085, "grad_norm": 3.12019681930542, "learning_rate": 1.3201936920541813e-09, "loss": 0.05292569, "memory(GiB)": 13.7, "step": 106285, "train_speed(iter/s)": 1.530281 }, { "acc": 0.99437504, "epoch": 49.81954534801969, "grad_norm": 2.977693557739258, "learning_rate": 1.3114814377390756e-09, "loss": 0.01705416, "memory(GiB)": 13.7, "step": 106290, "train_speed(iter/s)": 1.530284 }, { "acc": 0.96739578, "epoch": 49.82188891492852, "grad_norm": 5.499781131744385, "learning_rate": 1.3028893474926657e-09, "loss": 0.0567159, "memory(GiB)": 13.7, "step": 106295, "train_speed(iter/s)": 1.530287 }, { "acc": 0.97446432, "epoch": 49.824232481837356, "grad_norm": 5.251668930053711, "learning_rate": 1.294417421520878e-09, "loss": 0.06688461, "memory(GiB)": 13.7, "step": 106300, "train_speed(iter/s)": 1.53029 }, { "acc": 0.96784725, "epoch": 49.82657604874619, "grad_norm": 4.025205135345459, "learning_rate": 1.2860656600279721e-09, "loss": 0.04701822, "memory(GiB)": 13.7, "step": 106305, "train_speed(iter/s)": 1.530295 }, { "acc": 0.98003397, "epoch": 49.828919615655025, "grad_norm": 4.099106311798096, "learning_rate": 1.2778340632143238e-09, "loss": 0.05022945, "memory(GiB)": 13.7, "step": 106310, "train_speed(iter/s)": 1.530296 }, { "acc": 0.99508934, "epoch": 49.83126318256386, "grad_norm": 0.23627923429012299, "learning_rate": 1.2697226312775329e-09, "loss": 0.01898205, "memory(GiB)": 13.7, "step": 106315, "train_speed(iter/s)": 1.530299 }, { "acc": 0.9864584, "epoch": 49.8336067494727, "grad_norm": 0.9886932373046875, "learning_rate": 1.2617313644129792e-09, "loss": 0.03340774, "memory(GiB)": 13.7, "step": 106320, "train_speed(iter/s)": 1.5303 }, { "acc": 0.98968754, "epoch": 49.835950316381535, "grad_norm": 4.230598449707031, "learning_rate": 1.253860262812712e-09, "loss": 0.02854335, "memory(GiB)": 13.7, "step": 106325, "train_speed(iter/s)": 1.530305 }, { "acc": 0.98923607, "epoch": 49.83829388329037, "grad_norm": 1.1989834308624268, "learning_rate": 1.2461093266660058e-09, "loss": 0.03590034, "memory(GiB)": 13.7, "step": 106330, "train_speed(iter/s)": 1.530305 }, { "acc": 0.97889881, "epoch": 49.8406374501992, "grad_norm": 3.135981559753418, "learning_rate": 1.2384785561588037e-09, "loss": 0.05065863, "memory(GiB)": 13.7, "step": 106335, "train_speed(iter/s)": 1.530306 }, { "acc": 0.99071426, "epoch": 49.84298101710804, "grad_norm": 0.018355945125222206, "learning_rate": 1.2309679514748294e-09, "loss": 0.0229976, "memory(GiB)": 13.7, "step": 106340, "train_speed(iter/s)": 1.530306 }, { "acc": 0.98604164, "epoch": 49.84532458401687, "grad_norm": 3.305941343307495, "learning_rate": 1.2235775127944766e-09, "loss": 0.04561057, "memory(GiB)": 13.7, "step": 106345, "train_speed(iter/s)": 1.53031 }, { "acc": 0.98937492, "epoch": 49.847668150925706, "grad_norm": 0.5386454463005066, "learning_rate": 1.2163072402953626e-09, "loss": 0.01819093, "memory(GiB)": 13.7, "step": 106350, "train_speed(iter/s)": 1.530311 }, { "acc": 0.98583336, "epoch": 49.85001171783455, "grad_norm": 3.473740816116333, "learning_rate": 1.2091571341523303e-09, "loss": 0.03165138, "memory(GiB)": 13.7, "step": 106355, "train_speed(iter/s)": 1.530316 }, { "acc": 0.98812504, "epoch": 49.85235528474338, "grad_norm": 0.013652007095515728, "learning_rate": 1.202127194536892e-09, "loss": 0.02223344, "memory(GiB)": 13.7, "step": 106360, "train_speed(iter/s)": 1.530319 }, { "acc": 0.9755209, "epoch": 49.854698851652216, "grad_norm": 3.9349257946014404, "learning_rate": 1.195217421618895e-09, "loss": 0.05069721, "memory(GiB)": 13.7, "step": 106365, "train_speed(iter/s)": 1.530323 }, { "acc": 0.98017864, "epoch": 49.85704241856105, "grad_norm": 0.0009376846137456596, "learning_rate": 1.1884278155637458e-09, "loss": 0.05405651, "memory(GiB)": 13.7, "step": 106370, "train_speed(iter/s)": 1.530324 }, { "acc": 0.98104172, "epoch": 49.859385985469885, "grad_norm": 2.845344305038452, "learning_rate": 1.1817583765346307e-09, "loss": 0.04771601, "memory(GiB)": 13.7, "step": 106375, "train_speed(iter/s)": 1.530326 }, { "acc": 0.99333334, "epoch": 49.86172955237872, "grad_norm": 0.001846939674578607, "learning_rate": 1.1752091046919612e-09, "loss": 0.01189804, "memory(GiB)": 13.7, "step": 106380, "train_speed(iter/s)": 1.530328 }, { "acc": 0.98611116, "epoch": 49.86407311928755, "grad_norm": 3.6923861503601074, "learning_rate": 1.168780000193373e-09, "loss": 0.02542872, "memory(GiB)": 13.7, "step": 106385, "train_speed(iter/s)": 1.530329 }, { "acc": 0.98312492, "epoch": 49.86641668619639, "grad_norm": 2.6746513843536377, "learning_rate": 1.162471063193172e-09, "loss": 0.0319131, "memory(GiB)": 13.7, "step": 106390, "train_speed(iter/s)": 1.530332 }, { "acc": 0.98819447, "epoch": 49.86876025310523, "grad_norm": 3.4025626182556152, "learning_rate": 1.156282293842888e-09, "loss": 0.03070506, "memory(GiB)": 13.7, "step": 106395, "train_speed(iter/s)": 1.530334 }, { "acc": 0.98500004, "epoch": 49.87110382001406, "grad_norm": 6.166023254394531, "learning_rate": 1.1502136922918313e-09, "loss": 0.03276308, "memory(GiB)": 13.7, "step": 106400, "train_speed(iter/s)": 1.530333 }, { "acc": 0.98696423, "epoch": 49.8734473869229, "grad_norm": 3.4204461574554443, "learning_rate": 1.1442652586854266e-09, "loss": 0.0337153, "memory(GiB)": 13.7, "step": 106405, "train_speed(iter/s)": 1.530337 }, { "acc": 0.97937498, "epoch": 49.87579095383173, "grad_norm": 5.18102502822876, "learning_rate": 1.1384369931663232e-09, "loss": 0.06301689, "memory(GiB)": 13.7, "step": 106410, "train_speed(iter/s)": 1.530338 }, { "acc": 0.99750004, "epoch": 49.878134520740566, "grad_norm": 0.060255326330661774, "learning_rate": 1.1327288958755056e-09, "loss": 0.01853665, "memory(GiB)": 13.7, "step": 106415, "train_speed(iter/s)": 1.530339 }, { "acc": 1.0, "epoch": 49.8804780876494, "grad_norm": 3.835653305053711, "learning_rate": 1.1271409669495172e-09, "loss": 0.04648314, "memory(GiB)": 13.7, "step": 106420, "train_speed(iter/s)": 1.530342 }, { "acc": 0.984375, "epoch": 49.882821654558235, "grad_norm": 3.0591745376586914, "learning_rate": 1.1216732065226817e-09, "loss": 0.02953532, "memory(GiB)": 13.7, "step": 106425, "train_speed(iter/s)": 1.530341 }, { "acc": 0.9885417, "epoch": 49.885165221467076, "grad_norm": 3.692749261856079, "learning_rate": 1.1163256147271026e-09, "loss": 0.02757495, "memory(GiB)": 13.7, "step": 106430, "train_speed(iter/s)": 1.53034 }, { "acc": 0.97551479, "epoch": 49.88750878837591, "grad_norm": 3.9641494750976562, "learning_rate": 1.1110981916904425e-09, "loss": 0.04692543, "memory(GiB)": 13.7, "step": 106435, "train_speed(iter/s)": 1.530344 }, { "acc": 0.98666668, "epoch": 49.889852355284745, "grad_norm": 2.4267706871032715, "learning_rate": 1.1059909375386993e-09, "loss": 0.03470888, "memory(GiB)": 13.7, "step": 106440, "train_speed(iter/s)": 1.530348 }, { "acc": 0.98395834, "epoch": 49.89219592219358, "grad_norm": 2.5729963779449463, "learning_rate": 1.1010038523945402e-09, "loss": 0.03640411, "memory(GiB)": 13.7, "step": 106445, "train_speed(iter/s)": 1.530352 }, { "acc": 0.98500004, "epoch": 49.894539489102414, "grad_norm": 2.5243043899536133, "learning_rate": 1.0961369363778575e-09, "loss": 0.02713788, "memory(GiB)": 13.7, "step": 106450, "train_speed(iter/s)": 1.53035 }, { "acc": 0.98363094, "epoch": 49.89688305601125, "grad_norm": 5.68057107925415, "learning_rate": 1.091390189606323e-09, "loss": 0.03846409, "memory(GiB)": 13.7, "step": 106455, "train_speed(iter/s)": 1.530353 }, { "acc": 0.99258928, "epoch": 49.89922662292008, "grad_norm": 0.0003075890999753028, "learning_rate": 1.086763612192613e-09, "loss": 0.00695692, "memory(GiB)": 13.7, "step": 106460, "train_speed(iter/s)": 1.530357 }, { "acc": 0.97726288, "epoch": 49.90157018982892, "grad_norm": 2.4661705493927, "learning_rate": 1.082257204249404e-09, "loss": 0.05320812, "memory(GiB)": 13.7, "step": 106465, "train_speed(iter/s)": 1.530353 }, { "acc": 0.97403851, "epoch": 49.90391375673776, "grad_norm": 3.8840081691741943, "learning_rate": 1.0778709658843768e-09, "loss": 0.04218815, "memory(GiB)": 13.7, "step": 106470, "train_speed(iter/s)": 1.530353 }, { "acc": 0.98923607, "epoch": 49.90625732364659, "grad_norm": 3.277561664581299, "learning_rate": 1.0736048972024367e-09, "loss": 0.02455271, "memory(GiB)": 13.7, "step": 106475, "train_speed(iter/s)": 1.530356 }, { "acc": 0.98500004, "epoch": 49.90860089055543, "grad_norm": 3.2229583263397217, "learning_rate": 1.0694589983068246e-09, "loss": 0.03466161, "memory(GiB)": 13.7, "step": 106480, "train_speed(iter/s)": 1.530354 }, { "acc": 0.98859625, "epoch": 49.91094445746426, "grad_norm": 4.698862552642822, "learning_rate": 1.0654332692974505e-09, "loss": 0.0253016, "memory(GiB)": 13.7, "step": 106485, "train_speed(iter/s)": 1.530357 }, { "acc": 0.96886358, "epoch": 49.913288024373095, "grad_norm": 3.3183112144470215, "learning_rate": 1.0615277102703386e-09, "loss": 0.07995919, "memory(GiB)": 13.7, "step": 106490, "train_speed(iter/s)": 1.53036 }, { "acc": 0.99571428, "epoch": 49.91563159128193, "grad_norm": 4.00620698928833, "learning_rate": 1.057742321319849e-09, "loss": 0.03312875, "memory(GiB)": 13.7, "step": 106495, "train_speed(iter/s)": 1.53036 }, { "acc": 0.98673611, "epoch": 49.917975158190764, "grad_norm": 2.780751943588257, "learning_rate": 1.0540771025364557e-09, "loss": 0.03449116, "memory(GiB)": 13.7, "step": 106500, "train_speed(iter/s)": 1.530363 }, { "acc": 0.99255209, "epoch": 49.9203187250996, "grad_norm": 1.562835931777954, "learning_rate": 1.0505320540084123e-09, "loss": 0.0255367, "memory(GiB)": 13.7, "step": 106505, "train_speed(iter/s)": 1.530364 }, { "acc": 0.98883934, "epoch": 49.92266229200844, "grad_norm": 3.2056798934936523, "learning_rate": 1.047107175821753e-09, "loss": 0.0301393, "memory(GiB)": 13.7, "step": 106510, "train_speed(iter/s)": 1.530363 }, { "acc": 1.0, "epoch": 49.925005858917274, "grad_norm": 2.342634916305542, "learning_rate": 1.0438024680580704e-09, "loss": 0.00837331, "memory(GiB)": 13.7, "step": 106515, "train_speed(iter/s)": 1.530366 }, { "acc": 0.97633934, "epoch": 49.92734942582611, "grad_norm": 3.446415424346924, "learning_rate": 1.0406179307961826e-09, "loss": 0.06445888, "memory(GiB)": 13.7, "step": 106520, "train_speed(iter/s)": 1.530365 }, { "acc": 0.97979164, "epoch": 49.92969299273494, "grad_norm": 0.9398536682128906, "learning_rate": 1.0375535641143529e-09, "loss": 0.04447002, "memory(GiB)": 13.7, "step": 106525, "train_speed(iter/s)": 1.530365 }, { "acc": 0.97354164, "epoch": 49.93203655964378, "grad_norm": 5.373019695281982, "learning_rate": 1.0346093680841834e-09, "loss": 0.04386883, "memory(GiB)": 13.7, "step": 106530, "train_speed(iter/s)": 1.530369 }, { "acc": 0.98718748, "epoch": 49.93438012655261, "grad_norm": 4.385641098022461, "learning_rate": 1.0317853427783863e-09, "loss": 0.03274886, "memory(GiB)": 13.7, "step": 106535, "train_speed(iter/s)": 1.530372 }, { "acc": 0.98708334, "epoch": 49.936723693461445, "grad_norm": 4.195624828338623, "learning_rate": 1.0290814882635681e-09, "loss": 0.03449506, "memory(GiB)": 13.7, "step": 106540, "train_speed(iter/s)": 1.530369 }, { "acc": 0.98343754, "epoch": 49.93906726037029, "grad_norm": 3.866685390472412, "learning_rate": 1.026497804605226e-09, "loss": 0.05074332, "memory(GiB)": 13.7, "step": 106545, "train_speed(iter/s)": 1.53037 }, { "acc": 0.98291664, "epoch": 49.94141082727912, "grad_norm": 4.383640766143799, "learning_rate": 1.0240342918649706e-09, "loss": 0.04752546, "memory(GiB)": 13.7, "step": 106550, "train_speed(iter/s)": 1.530368 }, { "acc": 0.9919445, "epoch": 49.943754394187955, "grad_norm": 2.6219303607940674, "learning_rate": 1.0216909501027484e-09, "loss": 0.01552663, "memory(GiB)": 13.7, "step": 106555, "train_speed(iter/s)": 1.530367 }, { "acc": 0.96925592, "epoch": 49.94609796109679, "grad_norm": 4.989633560180664, "learning_rate": 1.0194677793740646e-09, "loss": 0.05150888, "memory(GiB)": 13.7, "step": 106560, "train_speed(iter/s)": 1.530365 }, { "acc": 0.99541664, "epoch": 49.948441528005624, "grad_norm": 2.132355213165283, "learning_rate": 1.0173647797333152e-09, "loss": 0.03001765, "memory(GiB)": 13.7, "step": 106565, "train_speed(iter/s)": 1.530363 }, { "acc": 0.98488102, "epoch": 49.95078509491446, "grad_norm": 2.6874594688415527, "learning_rate": 1.0153819512304548e-09, "loss": 0.02330735, "memory(GiB)": 13.7, "step": 106570, "train_speed(iter/s)": 1.530367 }, { "acc": 0.9864584, "epoch": 49.95312866182329, "grad_norm": 2.432219982147217, "learning_rate": 1.0135192939132182e-09, "loss": 0.04459006, "memory(GiB)": 13.7, "step": 106575, "train_speed(iter/s)": 1.530367 }, { "acc": 0.9947917, "epoch": 49.95547222873213, "grad_norm": 0.13517192006111145, "learning_rate": 1.0117768078260102e-09, "loss": 0.02694434, "memory(GiB)": 13.7, "step": 106580, "train_speed(iter/s)": 1.530367 }, { "acc": 0.98979168, "epoch": 49.95781579564097, "grad_norm": 0.000621846760623157, "learning_rate": 1.0101544930115698e-09, "loss": 0.03188076, "memory(GiB)": 13.7, "step": 106585, "train_speed(iter/s)": 1.53037 }, { "acc": 0.99187498, "epoch": 49.9601593625498, "grad_norm": 2.1511449813842773, "learning_rate": 1.0086523495081964e-09, "loss": 0.03065889, "memory(GiB)": 13.7, "step": 106590, "train_speed(iter/s)": 1.530371 }, { "acc": 0.9875, "epoch": 49.96250292945864, "grad_norm": 4.162737846374512, "learning_rate": 1.007270377351968e-09, "loss": 0.0229867, "memory(GiB)": 13.7, "step": 106595, "train_speed(iter/s)": 1.530376 }, { "acc": 0.97833328, "epoch": 49.96484649636747, "grad_norm": 2.989804983139038, "learning_rate": 1.0060085765767432e-09, "loss": 0.03178686, "memory(GiB)": 13.7, "step": 106600, "train_speed(iter/s)": 1.530377 }, { "acc": 0.98687506, "epoch": 49.967190063276306, "grad_norm": 3.0699188709259033, "learning_rate": 1.0048669472124953e-09, "loss": 0.06473111, "memory(GiB)": 13.7, "step": 106605, "train_speed(iter/s)": 1.53038 }, { "acc": 0.98687496, "epoch": 49.96953363018514, "grad_norm": 2.833855152130127, "learning_rate": 1.0038454892864217e-09, "loss": 0.02887144, "memory(GiB)": 13.7, "step": 106610, "train_speed(iter/s)": 1.530379 }, { "acc": 0.98416672, "epoch": 49.971877197093974, "grad_norm": 4.889809608459473, "learning_rate": 1.0029442028235001e-09, "loss": 0.03127842, "memory(GiB)": 13.7, "step": 106615, "train_speed(iter/s)": 1.530377 }, { "acc": 0.99333067, "epoch": 49.974220764002816, "grad_norm": 2.154264211654663, "learning_rate": 1.0021630878448226e-09, "loss": 0.01722035, "memory(GiB)": 13.7, "step": 106620, "train_speed(iter/s)": 1.530383 }, { "acc": 0.98071423, "epoch": 49.97656433091165, "grad_norm": 6.076335906982422, "learning_rate": 1.0015021443698162e-09, "loss": 0.06749645, "memory(GiB)": 13.7, "step": 106625, "train_speed(iter/s)": 1.530384 }, { "acc": 0.9926754, "epoch": 49.978907897820484, "grad_norm": 0.006512347608804703, "learning_rate": 1.0009613724140223e-09, "loss": 0.02353591, "memory(GiB)": 13.7, "step": 106630, "train_speed(iter/s)": 1.530391 }, { "acc": 0.98674145, "epoch": 49.98125146472932, "grad_norm": 2.867293357849121, "learning_rate": 1.0005407719907624e-09, "loss": 0.04650867, "memory(GiB)": 13.7, "step": 106635, "train_speed(iter/s)": 1.530394 }, { "acc": 0.99196434, "epoch": 49.98359503163815, "grad_norm": 2.457434892654419, "learning_rate": 1.0002403431094725e-09, "loss": 0.02562642, "memory(GiB)": 13.7, "step": 106640, "train_speed(iter/s)": 1.530398 }, { "acc": 0.98270292, "epoch": 49.98593859854699, "grad_norm": 2.286989688873291, "learning_rate": 1.0000600857779233e-09, "loss": 0.03623725, "memory(GiB)": 13.7, "step": 106645, "train_speed(iter/s)": 1.530403 }, { "acc": 0.99125004, "epoch": 49.98828216545582, "grad_norm": 4.097896099090576, "learning_rate": 1e-09, "loss": 0.03759271, "memory(GiB)": 13.7, "step": 106650, "train_speed(iter/s)": 1.530407 }, { "epoch": 49.98828216545582, "eval_acc": 0.779119174478991, "eval_loss": 1.2676383256912231, "eval_runtime": 143.4507, "eval_samples_per_second": 56.242, "eval_steps_per_second": 7.034, "step": 106650 } ], "logging_steps": 5, "max_steps": 106650, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0541409111533158e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }