|
{ |
|
"best_metric": 6.35734701, |
|
"best_model_checkpoint": "/home/yisiyang/outputland/qwen2-vl-2b-instruct/v7-20241111-031420/checkpoint-2000", |
|
"epoch": 6.315789473684211, |
|
"eval_steps": 2000, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.29797983, |
|
"epoch": 0.003157894736842105, |
|
"grad_norm": 189.54470825195312, |
|
"learning_rate": 0.0, |
|
"loss": 6.8490591, |
|
"memory(GiB)": 14.42, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.079303 |
|
}, |
|
{ |
|
"acc": 0.30050507, |
|
"epoch": 0.015789473684210527, |
|
"grad_norm": 130.0618438720703, |
|
"learning_rate": 2.4122151757793534e-06, |
|
"loss": 5.23786497, |
|
"memory(GiB)": 14.42, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.306791 |
|
}, |
|
{ |
|
"acc": 0.35050507, |
|
"epoch": 0.031578947368421054, |
|
"grad_norm": 32.736263275146484, |
|
"learning_rate": 3.4510997050164104e-06, |
|
"loss": 3.81935425, |
|
"memory(GiB)": 14.42, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.475545 |
|
}, |
|
{ |
|
"acc": 0.37878788, |
|
"epoch": 0.04736842105263158, |
|
"grad_norm": 41.80891036987305, |
|
"learning_rate": 4.05880819719944e-06, |
|
"loss": 3.30803146, |
|
"memory(GiB)": 14.42, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.579356 |
|
}, |
|
{ |
|
"acc": 0.44040408, |
|
"epoch": 0.06315789473684211, |
|
"grad_norm": 27.359540939331055, |
|
"learning_rate": 4.4899842342534665e-06, |
|
"loss": 3.16991081, |
|
"memory(GiB)": 14.42, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.650113 |
|
}, |
|
{ |
|
"acc": 0.52828288, |
|
"epoch": 0.07894736842105263, |
|
"grad_norm": 26.735942840576172, |
|
"learning_rate": 4.824430351558707e-06, |
|
"loss": 3.03735027, |
|
"memory(GiB)": 14.42, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.703029 |
|
}, |
|
{ |
|
"acc": 0.5636364, |
|
"epoch": 0.09473684210526316, |
|
"grad_norm": 32.92796325683594, |
|
"learning_rate": 5.097692726436498e-06, |
|
"loss": 2.92974243, |
|
"memory(GiB)": 14.42, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.738904 |
|
}, |
|
{ |
|
"acc": 0.66464653, |
|
"epoch": 0.11052631578947368, |
|
"grad_norm": 25.728958129882812, |
|
"learning_rate": 5.328732772382501e-06, |
|
"loss": 2.82154236, |
|
"memory(GiB)": 14.42, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.769629 |
|
}, |
|
{ |
|
"acc": 0.64444456, |
|
"epoch": 0.12631578947368421, |
|
"grad_norm": 23.986242294311523, |
|
"learning_rate": 5.5288687634905236e-06, |
|
"loss": 2.76429024, |
|
"memory(GiB)": 14.42, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.789225 |
|
}, |
|
{ |
|
"acc": 0.68080816, |
|
"epoch": 0.14210526315789473, |
|
"grad_norm": 24.05732536315918, |
|
"learning_rate": 5.705401218619526e-06, |
|
"loss": 2.64311733, |
|
"memory(GiB)": 14.42, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.806424 |
|
}, |
|
{ |
|
"acc": 0.6424243, |
|
"epoch": 0.15789473684210525, |
|
"grad_norm": 27.014245986938477, |
|
"learning_rate": 5.863314880795763e-06, |
|
"loss": 2.62187328, |
|
"memory(GiB)": 14.42, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.821589 |
|
}, |
|
{ |
|
"acc": 0.65858588, |
|
"epoch": 0.1736842105263158, |
|
"grad_norm": 22.408689498901367, |
|
"learning_rate": 6.006165164335152e-06, |
|
"loss": 2.51736145, |
|
"memory(GiB)": 14.42, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.838318 |
|
}, |
|
{ |
|
"acc": 0.64848495, |
|
"epoch": 0.18947368421052632, |
|
"grad_norm": 24.095256805419922, |
|
"learning_rate": 6.136577255673552e-06, |
|
"loss": 2.47515697, |
|
"memory(GiB)": 14.42, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.853904 |
|
}, |
|
{ |
|
"acc": 0.66666675, |
|
"epoch": 0.20526315789473684, |
|
"grad_norm": 22.594839096069336, |
|
"learning_rate": 6.256544750330469e-06, |
|
"loss": 2.38148956, |
|
"memory(GiB)": 14.42, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.865441 |
|
}, |
|
{ |
|
"acc": 0.6818182, |
|
"epoch": 0.22105263157894736, |
|
"grad_norm": 22.433921813964844, |
|
"learning_rate": 6.367617301619559e-06, |
|
"loss": 2.33378811, |
|
"memory(GiB)": 14.42, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.875815 |
|
}, |
|
{ |
|
"acc": 0.67373748, |
|
"epoch": 0.23684210526315788, |
|
"grad_norm": 24.16921043395996, |
|
"learning_rate": 6.471023372978793e-06, |
|
"loss": 2.24571877, |
|
"memory(GiB)": 14.42, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.884661 |
|
}, |
|
{ |
|
"acc": 0.66060615, |
|
"epoch": 0.25263157894736843, |
|
"grad_norm": 22.719818115234375, |
|
"learning_rate": 6.56775329272758e-06, |
|
"loss": 2.19932251, |
|
"memory(GiB)": 14.42, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.894036 |
|
}, |
|
{ |
|
"acc": 0.67171721, |
|
"epoch": 0.26842105263157895, |
|
"grad_norm": 22.295177459716797, |
|
"learning_rate": 6.6586170853856745e-06, |
|
"loss": 2.10522652, |
|
"memory(GiB)": 14.42, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.90264 |
|
}, |
|
{ |
|
"acc": 0.65252533, |
|
"epoch": 0.28421052631578947, |
|
"grad_norm": 25.903900146484375, |
|
"learning_rate": 6.744285747856582e-06, |
|
"loss": 2.0310627, |
|
"memory(GiB)": 14.42, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.910435 |
|
}, |
|
{ |
|
"acc": 0.66565661, |
|
"epoch": 0.3, |
|
"grad_norm": 22.72349739074707, |
|
"learning_rate": 6.825321350816333e-06, |
|
"loss": 1.95369396, |
|
"memory(GiB)": 14.42, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.916948 |
|
}, |
|
{ |
|
"acc": 0.68080816, |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 22.040664672851562, |
|
"learning_rate": 6.902199410032821e-06, |
|
"loss": 1.89046402, |
|
"memory(GiB)": 14.42, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.923641 |
|
}, |
|
{ |
|
"acc": 0.66464653, |
|
"epoch": 0.33157894736842103, |
|
"grad_norm": 21.79152488708496, |
|
"learning_rate": 6.975325793802587e-06, |
|
"loss": 1.84504929, |
|
"memory(GiB)": 14.42, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.92652 |
|
}, |
|
{ |
|
"acc": 0.67373743, |
|
"epoch": 0.3473684210526316, |
|
"grad_norm": 22.12148666381836, |
|
"learning_rate": 7.045049693572208e-06, |
|
"loss": 1.73611717, |
|
"memory(GiB)": 14.42, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.931682 |
|
}, |
|
{ |
|
"acc": 0.69191928, |
|
"epoch": 0.3631578947368421, |
|
"grad_norm": 22.5858154296875, |
|
"learning_rate": 7.111673708972303e-06, |
|
"loss": 1.65465469, |
|
"memory(GiB)": 14.42, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.937037 |
|
}, |
|
{ |
|
"acc": 0.6878788, |
|
"epoch": 0.37894736842105264, |
|
"grad_norm": 22.869487762451172, |
|
"learning_rate": 7.17546178491061e-06, |
|
"loss": 1.56676788, |
|
"memory(GiB)": 14.42, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.942049 |
|
}, |
|
{ |
|
"acc": 0.68383846, |
|
"epoch": 0.39473684210526316, |
|
"grad_norm": 22.559728622436523, |
|
"learning_rate": 7.236645527338061e-06, |
|
"loss": 1.48099957, |
|
"memory(GiB)": 14.42, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.946078 |
|
}, |
|
{ |
|
"acc": 0.68787889, |
|
"epoch": 0.4105263157894737, |
|
"grad_norm": 21.670761108398438, |
|
"learning_rate": 7.295429279567525e-06, |
|
"loss": 1.41184158, |
|
"memory(GiB)": 14.42, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.948975 |
|
}, |
|
{ |
|
"acc": 0.6909091, |
|
"epoch": 0.4263157894736842, |
|
"grad_norm": 21.114803314208984, |
|
"learning_rate": 7.351994240039613e-06, |
|
"loss": 1.33319778, |
|
"memory(GiB)": 14.42, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.95101 |
|
}, |
|
{ |
|
"acc": 0.6939395, |
|
"epoch": 0.4421052631578947, |
|
"grad_norm": 20.817777633666992, |
|
"learning_rate": 7.406501830856614e-06, |
|
"loss": 1.28563147, |
|
"memory(GiB)": 14.42, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.951565 |
|
}, |
|
{ |
|
"acc": 0.68585868, |
|
"epoch": 0.45789473684210524, |
|
"grad_norm": 19.757150650024414, |
|
"learning_rate": 7.459096474945029e-06, |
|
"loss": 1.21721191, |
|
"memory(GiB)": 14.42, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.952007 |
|
}, |
|
{ |
|
"acc": 0.69090915, |
|
"epoch": 0.47368421052631576, |
|
"grad_norm": 18.436542510986328, |
|
"learning_rate": 7.509907902215849e-06, |
|
"loss": 1.12313366, |
|
"memory(GiB)": 14.42, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.952888 |
|
}, |
|
{ |
|
"acc": 0.68686876, |
|
"epoch": 0.48947368421052634, |
|
"grad_norm": 18.385173797607422, |
|
"learning_rate": 7.559053077443585e-06, |
|
"loss": 1.04606466, |
|
"memory(GiB)": 14.42, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.95311 |
|
}, |
|
{ |
|
"acc": 0.68080816, |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 16.366899490356445, |
|
"learning_rate": 7.606637821964637e-06, |
|
"loss": 1.02951183, |
|
"memory(GiB)": 14.42, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.953518 |
|
}, |
|
{ |
|
"acc": 0.68383846, |
|
"epoch": 0.5210526315789473, |
|
"grad_norm": 13.738570213317871, |
|
"learning_rate": 7.652758185755238e-06, |
|
"loss": 0.98384418, |
|
"memory(GiB)": 14.42, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.954063 |
|
}, |
|
{ |
|
"acc": 0.66464653, |
|
"epoch": 0.5368421052631579, |
|
"grad_norm": 11.252880096435547, |
|
"learning_rate": 7.697501614622732e-06, |
|
"loss": 0.95045271, |
|
"memory(GiB)": 14.42, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.954931 |
|
}, |
|
{ |
|
"acc": 0.68484855, |
|
"epoch": 0.5526315789473685, |
|
"grad_norm": 10.676709175109863, |
|
"learning_rate": 7.740947948161855e-06, |
|
"loss": 0.88134241, |
|
"memory(GiB)": 14.42, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.956063 |
|
}, |
|
{ |
|
"acc": 0.68080816, |
|
"epoch": 0.5684210526315789, |
|
"grad_norm": 9.484841346740723, |
|
"learning_rate": 7.78317027709364e-06, |
|
"loss": 0.87568655, |
|
"memory(GiB)": 14.42, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.957191 |
|
}, |
|
{ |
|
"acc": 0.68888893, |
|
"epoch": 0.5842105263157895, |
|
"grad_norm": 10.45778751373291, |
|
"learning_rate": 7.824235683113184e-06, |
|
"loss": 0.84243193, |
|
"memory(GiB)": 14.42, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.956968 |
|
}, |
|
{ |
|
"acc": 0.68686876, |
|
"epoch": 0.6, |
|
"grad_norm": 8.84788990020752, |
|
"learning_rate": 7.86420588005339e-06, |
|
"loss": 0.82236614, |
|
"memory(GiB)": 14.42, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.956966 |
|
}, |
|
{ |
|
"acc": 0.68686876, |
|
"epoch": 0.6157894736842106, |
|
"grad_norm": 5.3862409591674805, |
|
"learning_rate": 7.903137771750555e-06, |
|
"loss": 0.79296165, |
|
"memory(GiB)": 14.42, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.956498 |
|
}, |
|
{ |
|
"acc": 0.68989902, |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 7.4440436363220215, |
|
"learning_rate": 7.941083939269876e-06, |
|
"loss": 0.78682814, |
|
"memory(GiB)": 14.42, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.956087 |
|
}, |
|
{ |
|
"acc": 0.70202026, |
|
"epoch": 0.6473684210526316, |
|
"grad_norm": 6.23380708694458, |
|
"learning_rate": 7.97809306796006e-06, |
|
"loss": 0.75604529, |
|
"memory(GiB)": 14.42, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.958917 |
|
}, |
|
{ |
|
"acc": 0.70909095, |
|
"epoch": 0.6631578947368421, |
|
"grad_norm": 7.885345935821533, |
|
"learning_rate": 8.014210323039644e-06, |
|
"loss": 0.77311206, |
|
"memory(GiB)": 14.42, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.961234 |
|
}, |
|
{ |
|
"acc": 0.70202026, |
|
"epoch": 0.6789473684210526, |
|
"grad_norm": 5.368066310882568, |
|
"learning_rate": 8.049477680983675e-06, |
|
"loss": 0.77599392, |
|
"memory(GiB)": 14.42, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.963174 |
|
}, |
|
{ |
|
"acc": 0.69191928, |
|
"epoch": 0.6947368421052632, |
|
"grad_norm": 5.842986106872559, |
|
"learning_rate": 8.083934222809266e-06, |
|
"loss": 0.78730788, |
|
"memory(GiB)": 14.42, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.964927 |
|
}, |
|
{ |
|
"acc": 0.7000001, |
|
"epoch": 0.7105263157894737, |
|
"grad_norm": 4.532631874084473, |
|
"learning_rate": 8.11761639439888e-06, |
|
"loss": 0.76723304, |
|
"memory(GiB)": 14.42, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.966333 |
|
}, |
|
{ |
|
"acc": 0.69696975, |
|
"epoch": 0.7263157894736842, |
|
"grad_norm": 6.793882846832275, |
|
"learning_rate": 8.15055823820936e-06, |
|
"loss": 0.77692299, |
|
"memory(GiB)": 14.42, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.967921 |
|
}, |
|
{ |
|
"acc": 0.68282833, |
|
"epoch": 0.7421052631578947, |
|
"grad_norm": 5.946906089782715, |
|
"learning_rate": 8.182791600059879e-06, |
|
"loss": 0.78708334, |
|
"memory(GiB)": 14.42, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.968657 |
|
}, |
|
{ |
|
"acc": 0.69494953, |
|
"epoch": 0.7578947368421053, |
|
"grad_norm": 5.495345115661621, |
|
"learning_rate": 8.214346314147667e-06, |
|
"loss": 0.77794123, |
|
"memory(GiB)": 14.42, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.969446 |
|
}, |
|
{ |
|
"acc": 0.70202026, |
|
"epoch": 0.7736842105263158, |
|
"grad_norm": 4.244708061218262, |
|
"learning_rate": 8.24525036898565e-06, |
|
"loss": 0.74534717, |
|
"memory(GiB)": 14.42, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.971096 |
|
}, |
|
{ |
|
"acc": 0.68080816, |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 7.3419013023376465, |
|
"learning_rate": 8.275530056575115e-06, |
|
"loss": 0.79043388, |
|
"memory(GiB)": 14.42, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.971701 |
|
}, |
|
{ |
|
"acc": 0.6818182, |
|
"epoch": 0.8052631578947368, |
|
"grad_norm": 5.537174701690674, |
|
"learning_rate": 8.30521010680576e-06, |
|
"loss": 0.76065493, |
|
"memory(GiB)": 14.42, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.972913 |
|
}, |
|
{ |
|
"acc": 0.69595966, |
|
"epoch": 0.8210526315789474, |
|
"grad_norm": 6.621566295623779, |
|
"learning_rate": 8.334313808804581e-06, |
|
"loss": 0.73345542, |
|
"memory(GiB)": 14.42, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.973338 |
|
}, |
|
{ |
|
"acc": 0.70101013, |
|
"epoch": 0.8368421052631579, |
|
"grad_norm": 5.5801239013671875, |
|
"learning_rate": 8.36286312072555e-06, |
|
"loss": 0.76861334, |
|
"memory(GiB)": 14.42, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.9735 |
|
}, |
|
{ |
|
"acc": 0.69797988, |
|
"epoch": 0.8526315789473684, |
|
"grad_norm": 4.083832263946533, |
|
"learning_rate": 8.39087876927667e-06, |
|
"loss": 0.76421027, |
|
"memory(GiB)": 14.42, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.972914 |
|
}, |
|
{ |
|
"acc": 0.67979808, |
|
"epoch": 0.868421052631579, |
|
"grad_norm": 8.106366157531738, |
|
"learning_rate": 8.418380340114505e-06, |
|
"loss": 0.7734849, |
|
"memory(GiB)": 14.42, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.972258 |
|
}, |
|
{ |
|
"acc": 0.71010108, |
|
"epoch": 0.8842105263157894, |
|
"grad_norm": 5.391201972961426, |
|
"learning_rate": 8.445386360093671e-06, |
|
"loss": 0.75838871, |
|
"memory(GiB)": 14.42, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.973168 |
|
}, |
|
{ |
|
"acc": 0.69898996, |
|
"epoch": 0.9, |
|
"grad_norm": 4.560817241668701, |
|
"learning_rate": 8.47191437223642e-06, |
|
"loss": 0.72944508, |
|
"memory(GiB)": 14.42, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.974072 |
|
}, |
|
{ |
|
"acc": 0.71616168, |
|
"epoch": 0.9157894736842105, |
|
"grad_norm": 4.5082550048828125, |
|
"learning_rate": 8.497981004182087e-06, |
|
"loss": 0.72101011, |
|
"memory(GiB)": 14.42, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.974336 |
|
}, |
|
{ |
|
"acc": 0.71111121, |
|
"epoch": 0.9315789473684211, |
|
"grad_norm": 5.37807035446167, |
|
"learning_rate": 8.523602030785273e-06, |
|
"loss": 0.72204466, |
|
"memory(GiB)": 14.42, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.974602 |
|
}, |
|
{ |
|
"acc": 0.69494953, |
|
"epoch": 0.9473684210526315, |
|
"grad_norm": 7.141894340515137, |
|
"learning_rate": 8.548792431452906e-06, |
|
"loss": 0.7805851, |
|
"memory(GiB)": 14.42, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.974531 |
|
}, |
|
{ |
|
"acc": 0.69595962, |
|
"epoch": 0.9631578947368421, |
|
"grad_norm": 5.165417194366455, |
|
"learning_rate": 8.573566442742008e-06, |
|
"loss": 0.77751379, |
|
"memory(GiB)": 14.42, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.973871 |
|
}, |
|
{ |
|
"acc": 0.69898996, |
|
"epoch": 0.9789473684210527, |
|
"grad_norm": 5.182235240936279, |
|
"learning_rate": 8.597937606680642e-06, |
|
"loss": 0.75329895, |
|
"memory(GiB)": 14.42, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.973247 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 0.9947368421052631, |
|
"grad_norm": 4.949851036071777, |
|
"learning_rate": 8.621918815222675e-06, |
|
"loss": 0.72683144, |
|
"memory(GiB)": 14.42, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.97287 |
|
}, |
|
{ |
|
"acc": 0.71616168, |
|
"epoch": 1.0105263157894737, |
|
"grad_norm": 6.737766742706299, |
|
"learning_rate": 8.645522351201692e-06, |
|
"loss": 0.76480436, |
|
"memory(GiB)": 14.42, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.970956 |
|
}, |
|
{ |
|
"acc": 0.70303035, |
|
"epoch": 1.0263157894736843, |
|
"grad_norm": 4.715226173400879, |
|
"learning_rate": 8.668759926109822e-06, |
|
"loss": 0.73685551, |
|
"memory(GiB)": 14.42, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.97194 |
|
}, |
|
{ |
|
"acc": 0.69797988, |
|
"epoch": 1.0421052631578946, |
|
"grad_norm": 7.166709899902344, |
|
"learning_rate": 8.691642714992295e-06, |
|
"loss": 0.73682857, |
|
"memory(GiB)": 14.42, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.972694 |
|
}, |
|
{ |
|
"acc": 0.70404043, |
|
"epoch": 1.0578947368421052, |
|
"grad_norm": 4.959497928619385, |
|
"learning_rate": 8.714181388718075e-06, |
|
"loss": 0.7466671, |
|
"memory(GiB)": 14.42, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.972539 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 1.0736842105263158, |
|
"grad_norm": 4.548097133636475, |
|
"learning_rate": 8.736386143859788e-06, |
|
"loss": 0.72707992, |
|
"memory(GiB)": 14.42, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.972893 |
|
}, |
|
{ |
|
"acc": 0.69494963, |
|
"epoch": 1.0894736842105264, |
|
"grad_norm": 5.036413192749023, |
|
"learning_rate": 8.75826673039239e-06, |
|
"loss": 0.76058822, |
|
"memory(GiB)": 14.42, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.973963 |
|
}, |
|
{ |
|
"acc": 0.69899001, |
|
"epoch": 1.1052631578947367, |
|
"grad_norm": 4.9548420906066895, |
|
"learning_rate": 8.77983247739891e-06, |
|
"loss": 0.75256853, |
|
"memory(GiB)": 14.42, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.975074 |
|
}, |
|
{ |
|
"acc": 0.71010108, |
|
"epoch": 1.1210526315789473, |
|
"grad_norm": 5.18994140625, |
|
"learning_rate": 8.801092316952921e-06, |
|
"loss": 0.7360672, |
|
"memory(GiB)": 14.42, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.976375 |
|
}, |
|
{ |
|
"acc": 0.71111116, |
|
"epoch": 1.1368421052631579, |
|
"grad_norm": 3.965578317642212, |
|
"learning_rate": 8.822054806330697e-06, |
|
"loss": 0.7136857, |
|
"memory(GiB)": 14.42, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.977264 |
|
}, |
|
{ |
|
"acc": 0.7060606, |
|
"epoch": 1.1526315789473685, |
|
"grad_norm": 5.464134216308594, |
|
"learning_rate": 8.842728148691392e-06, |
|
"loss": 0.7406353, |
|
"memory(GiB)": 14.42, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.978313 |
|
}, |
|
{ |
|
"acc": 0.71111121, |
|
"epoch": 1.168421052631579, |
|
"grad_norm": 4.730813980102539, |
|
"learning_rate": 8.863120212350241e-06, |
|
"loss": 0.71864109, |
|
"memory(GiB)": 14.42, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.978981 |
|
}, |
|
{ |
|
"acc": 0.69898996, |
|
"epoch": 1.1842105263157894, |
|
"grad_norm": 6.221624851226807, |
|
"learning_rate": 8.883238548758146e-06, |
|
"loss": 0.73387871, |
|
"memory(GiB)": 14.42, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.979682 |
|
}, |
|
{ |
|
"acc": 0.73232336, |
|
"epoch": 1.2, |
|
"grad_norm": 5.355544567108154, |
|
"learning_rate": 8.903090409290447e-06, |
|
"loss": 0.70250816, |
|
"memory(GiB)": 14.42, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.980355 |
|
}, |
|
{ |
|
"acc": 0.68989906, |
|
"epoch": 1.2157894736842105, |
|
"grad_norm": 5.793821811676025, |
|
"learning_rate": 8.922682760938298e-06, |
|
"loss": 0.75479689, |
|
"memory(GiB)": 14.42, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.981586 |
|
}, |
|
{ |
|
"acc": 0.69696975, |
|
"epoch": 1.231578947368421, |
|
"grad_norm": 4.8115434646606445, |
|
"learning_rate": 8.942022300987612e-06, |
|
"loss": 0.7389276, |
|
"memory(GiB)": 14.42, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.982499 |
|
}, |
|
{ |
|
"acc": 0.7181819, |
|
"epoch": 1.2473684210526317, |
|
"grad_norm": 3.446106195449829, |
|
"learning_rate": 8.961115470762944e-06, |
|
"loss": 0.71739759, |
|
"memory(GiB)": 14.42, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.983313 |
|
}, |
|
{ |
|
"acc": 0.7181819, |
|
"epoch": 1.263157894736842, |
|
"grad_norm": 5.517762660980225, |
|
"learning_rate": 8.979968468506933e-06, |
|
"loss": 0.71014919, |
|
"memory(GiB)": 14.42, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.98393 |
|
}, |
|
{ |
|
"acc": 0.72121215, |
|
"epoch": 1.2789473684210526, |
|
"grad_norm": 4.866208076477051, |
|
"learning_rate": 8.998587261459699e-06, |
|
"loss": 0.70516768, |
|
"memory(GiB)": 14.42, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.984487 |
|
}, |
|
{ |
|
"acc": 0.69595966, |
|
"epoch": 1.2947368421052632, |
|
"grad_norm": 4.031857967376709, |
|
"learning_rate": 9.016977597197119e-06, |
|
"loss": 0.7197782, |
|
"memory(GiB)": 14.42, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.984938 |
|
}, |
|
{ |
|
"acc": 0.71616168, |
|
"epoch": 1.3105263157894738, |
|
"grad_norm": 5.485307216644287, |
|
"learning_rate": 9.035145014281876e-06, |
|
"loss": 0.69396477, |
|
"memory(GiB)": 14.42, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.985351 |
|
}, |
|
{ |
|
"acc": 0.70303035, |
|
"epoch": 1.3263157894736843, |
|
"grad_norm": 4.575260162353516, |
|
"learning_rate": 9.053094852276701e-06, |
|
"loss": 0.71491661, |
|
"memory(GiB)": 14.42, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.98555 |
|
}, |
|
{ |
|
"acc": 0.71717176, |
|
"epoch": 1.3421052631578947, |
|
"grad_norm": 4.316519260406494, |
|
"learning_rate": 9.070832261165027e-06, |
|
"loss": 0.71715617, |
|
"memory(GiB)": 14.42, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.98596 |
|
}, |
|
{ |
|
"acc": 0.69898996, |
|
"epoch": 1.3578947368421053, |
|
"grad_norm": 4.875655651092529, |
|
"learning_rate": 9.088362210220732e-06, |
|
"loss": 0.73308363, |
|
"memory(GiB)": 14.42, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.985125 |
|
}, |
|
{ |
|
"acc": 0.70505056, |
|
"epoch": 1.3736842105263158, |
|
"grad_norm": 5.2696852684021, |
|
"learning_rate": 9.105689496365117e-06, |
|
"loss": 0.74291925, |
|
"memory(GiB)": 14.42, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.98451 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 1.3894736842105262, |
|
"grad_norm": 5.138822078704834, |
|
"learning_rate": 9.122818752046321e-06, |
|
"loss": 0.72327461, |
|
"memory(GiB)": 14.42, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.984452 |
|
}, |
|
{ |
|
"acc": 0.72222228, |
|
"epoch": 1.4052631578947368, |
|
"grad_norm": 3.904170036315918, |
|
"learning_rate": 9.13975445267355e-06, |
|
"loss": 0.7007791, |
|
"memory(GiB)": 14.42, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.984092 |
|
}, |
|
{ |
|
"acc": 0.71414146, |
|
"epoch": 1.4210526315789473, |
|
"grad_norm": 5.243173599243164, |
|
"learning_rate": 9.156500923635937e-06, |
|
"loss": 0.69670939, |
|
"memory(GiB)": 14.42, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.984038 |
|
}, |
|
{ |
|
"acc": 0.70808086, |
|
"epoch": 1.436842105263158, |
|
"grad_norm": 5.191768169403076, |
|
"learning_rate": 9.173062346933617e-06, |
|
"loss": 0.74531546, |
|
"memory(GiB)": 14.42, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.983589 |
|
}, |
|
{ |
|
"acc": 0.69797988, |
|
"epoch": 1.4526315789473685, |
|
"grad_norm": 6.049670696258545, |
|
"learning_rate": 9.189442767446416e-06, |
|
"loss": 0.70544295, |
|
"memory(GiB)": 14.42, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.983483 |
|
}, |
|
{ |
|
"acc": 0.72828288, |
|
"epoch": 1.4684210526315788, |
|
"grad_norm": 4.956687927246094, |
|
"learning_rate": 9.205646098863671e-06, |
|
"loss": 0.69190674, |
|
"memory(GiB)": 14.42, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.983203 |
|
}, |
|
{ |
|
"acc": 0.71414146, |
|
"epoch": 1.4842105263157894, |
|
"grad_norm": 4.949287414550781, |
|
"learning_rate": 9.221676129296936e-06, |
|
"loss": 0.71432433, |
|
"memory(GiB)": 14.42, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.982974 |
|
}, |
|
{ |
|
"acc": 0.71010103, |
|
"epoch": 1.5, |
|
"grad_norm": 4.197946071624756, |
|
"learning_rate": 9.237536526595687e-06, |
|
"loss": 0.70365272, |
|
"memory(GiB)": 14.42, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.983499 |
|
}, |
|
{ |
|
"acc": 0.72222228, |
|
"epoch": 1.5157894736842106, |
|
"grad_norm": 3.9227776527404785, |
|
"learning_rate": 9.253230843384724e-06, |
|
"loss": 0.68840942, |
|
"memory(GiB)": 14.42, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.984297 |
|
}, |
|
{ |
|
"acc": 0.70101013, |
|
"epoch": 1.5315789473684212, |
|
"grad_norm": 5.360828876495361, |
|
"learning_rate": 9.268762521840534e-06, |
|
"loss": 0.71461992, |
|
"memory(GiB)": 14.42, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.985036 |
|
}, |
|
{ |
|
"acc": 0.71515155, |
|
"epoch": 1.5473684210526315, |
|
"grad_norm": 3.924023151397705, |
|
"learning_rate": 9.284134898222705e-06, |
|
"loss": 0.70787678, |
|
"memory(GiB)": 14.42, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.985206 |
|
}, |
|
{ |
|
"acc": 0.72121215, |
|
"epoch": 1.563157894736842, |
|
"grad_norm": 4.619671821594238, |
|
"learning_rate": 9.299351207175325e-06, |
|
"loss": 0.70242157, |
|
"memory(GiB)": 14.42, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.984921 |
|
}, |
|
{ |
|
"acc": 0.70707073, |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 3.776333808898926, |
|
"learning_rate": 9.314414585812172e-06, |
|
"loss": 0.71011858, |
|
"memory(GiB)": 14.42, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.984704 |
|
}, |
|
{ |
|
"acc": 0.71515155, |
|
"epoch": 1.594736842105263, |
|
"grad_norm": 4.775974273681641, |
|
"learning_rate": 9.329328077598717e-06, |
|
"loss": 0.72129836, |
|
"memory(GiB)": 14.42, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.984498 |
|
}, |
|
{ |
|
"acc": 0.70303035, |
|
"epoch": 1.6105263157894738, |
|
"grad_norm": 4.790719032287598, |
|
"learning_rate": 9.344094636042818e-06, |
|
"loss": 0.71535187, |
|
"memory(GiB)": 14.42, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.984973 |
|
}, |
|
{ |
|
"acc": 0.70909095, |
|
"epoch": 1.6263157894736842, |
|
"grad_norm": 5.532032012939453, |
|
"learning_rate": 9.358717128205423e-06, |
|
"loss": 0.72311149, |
|
"memory(GiB)": 14.42, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.985246 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 1.6421052631578947, |
|
"grad_norm": 4.431013107299805, |
|
"learning_rate": 9.373198338041638e-06, |
|
"loss": 0.70520873, |
|
"memory(GiB)": 14.42, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.985808 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 1.6578947368421053, |
|
"grad_norm": 5.026467800140381, |
|
"learning_rate": 9.38754096958194e-06, |
|
"loss": 0.71683903, |
|
"memory(GiB)": 14.42, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.986072 |
|
}, |
|
{ |
|
"acc": 0.71313133, |
|
"epoch": 1.6736842105263157, |
|
"grad_norm": 4.701731204986572, |
|
"learning_rate": 9.401747649962608e-06, |
|
"loss": 0.68079748, |
|
"memory(GiB)": 14.42, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.986402 |
|
}, |
|
{ |
|
"acc": 0.72020206, |
|
"epoch": 1.6894736842105265, |
|
"grad_norm": 3.6100666522979736, |
|
"learning_rate": 9.415820932313868e-06, |
|
"loss": 0.7060638, |
|
"memory(GiB)": 14.42, |
|
"step": 535, |
|
"train_speed(iter/s)": 0.987056 |
|
}, |
|
{ |
|
"acc": 0.73535361, |
|
"epoch": 1.7052631578947368, |
|
"grad_norm": 3.5025079250335693, |
|
"learning_rate": 9.429763298513726e-06, |
|
"loss": 0.70274849, |
|
"memory(GiB)": 14.42, |
|
"step": 540, |
|
"train_speed(iter/s)": 0.987628 |
|
}, |
|
{ |
|
"acc": 0.7272728, |
|
"epoch": 1.7210526315789474, |
|
"grad_norm": 4.738600254058838, |
|
"learning_rate": 9.443577161814856e-06, |
|
"loss": 0.69971213, |
|
"memory(GiB)": 14.42, |
|
"step": 545, |
|
"train_speed(iter/s)": 0.988328 |
|
}, |
|
{ |
|
"acc": 0.7272728, |
|
"epoch": 1.736842105263158, |
|
"grad_norm": 4.521397113800049, |
|
"learning_rate": 9.45726486935156e-06, |
|
"loss": 0.68742881, |
|
"memory(GiB)": 14.42, |
|
"step": 550, |
|
"train_speed(iter/s)": 0.988822 |
|
}, |
|
{ |
|
"acc": 0.71111116, |
|
"epoch": 1.7526315789473683, |
|
"grad_norm": 5.196502685546875, |
|
"learning_rate": 9.470828704533272e-06, |
|
"loss": 0.69165187, |
|
"memory(GiB)": 14.42, |
|
"step": 555, |
|
"train_speed(iter/s)": 0.989364 |
|
}, |
|
{ |
|
"acc": 0.72525253, |
|
"epoch": 1.768421052631579, |
|
"grad_norm": 5.465752124786377, |
|
"learning_rate": 9.484270889330728e-06, |
|
"loss": 0.70788536, |
|
"memory(GiB)": 14.42, |
|
"step": 560, |
|
"train_speed(iter/s)": 0.989796 |
|
}, |
|
{ |
|
"acc": 0.71414146, |
|
"epoch": 1.7842105263157895, |
|
"grad_norm": 6.159824848175049, |
|
"learning_rate": 9.497593586460533e-06, |
|
"loss": 0.70432334, |
|
"memory(GiB)": 14.42, |
|
"step": 565, |
|
"train_speed(iter/s)": 0.990473 |
|
}, |
|
{ |
|
"acc": 0.7060607, |
|
"epoch": 1.8, |
|
"grad_norm": 4.181656837463379, |
|
"learning_rate": 9.510798901473478e-06, |
|
"loss": 0.70204101, |
|
"memory(GiB)": 14.42, |
|
"step": 570, |
|
"train_speed(iter/s)": 0.991023 |
|
}, |
|
{ |
|
"acc": 0.7272728, |
|
"epoch": 1.8157894736842106, |
|
"grad_norm": 4.163989067077637, |
|
"learning_rate": 9.523888884751656e-06, |
|
"loss": 0.70399027, |
|
"memory(GiB)": 14.42, |
|
"step": 575, |
|
"train_speed(iter/s)": 0.991508 |
|
}, |
|
{ |
|
"acc": 0.71919203, |
|
"epoch": 1.831578947368421, |
|
"grad_norm": 4.880708694458008, |
|
"learning_rate": 9.536865533419144e-06, |
|
"loss": 0.67688112, |
|
"memory(GiB)": 14.42, |
|
"step": 580, |
|
"train_speed(iter/s)": 0.991861 |
|
}, |
|
{ |
|
"acc": 0.7363637, |
|
"epoch": 1.8473684210526315, |
|
"grad_norm": 4.401225566864014, |
|
"learning_rate": 9.549730793170642e-06, |
|
"loss": 0.68770628, |
|
"memory(GiB)": 14.42, |
|
"step": 585, |
|
"train_speed(iter/s)": 0.992325 |
|
}, |
|
{ |
|
"acc": 0.7242425, |
|
"epoch": 1.8631578947368421, |
|
"grad_norm": 4.399166107177734, |
|
"learning_rate": 9.562486560022329e-06, |
|
"loss": 0.6890409, |
|
"memory(GiB)": 14.42, |
|
"step": 590, |
|
"train_speed(iter/s)": 0.992375 |
|
}, |
|
{ |
|
"acc": 0.73939395, |
|
"epoch": 1.8789473684210525, |
|
"grad_norm": 3.671194314956665, |
|
"learning_rate": 9.575134681988824e-06, |
|
"loss": 0.6716217, |
|
"memory(GiB)": 14.42, |
|
"step": 595, |
|
"train_speed(iter/s)": 0.992404 |
|
}, |
|
{ |
|
"acc": 0.72525253, |
|
"epoch": 1.8947368421052633, |
|
"grad_norm": 4.786886215209961, |
|
"learning_rate": 9.587676960689963e-06, |
|
"loss": 0.67677307, |
|
"memory(GiB)": 14.42, |
|
"step": 600, |
|
"train_speed(iter/s)": 0.992267 |
|
}, |
|
{ |
|
"acc": 0.7303031, |
|
"epoch": 1.9105263157894736, |
|
"grad_norm": 4.316333293914795, |
|
"learning_rate": 9.60011515289095e-06, |
|
"loss": 0.68564234, |
|
"memory(GiB)": 14.42, |
|
"step": 605, |
|
"train_speed(iter/s)": 0.992113 |
|
}, |
|
{ |
|
"acc": 0.73131313, |
|
"epoch": 1.9263157894736842, |
|
"grad_norm": 6.806692123413086, |
|
"learning_rate": 9.612450971979065e-06, |
|
"loss": 0.70519056, |
|
"memory(GiB)": 14.42, |
|
"step": 610, |
|
"train_speed(iter/s)": 0.991945 |
|
}, |
|
{ |
|
"acc": 0.71717176, |
|
"epoch": 1.9421052631578948, |
|
"grad_norm": 4.798392295837402, |
|
"learning_rate": 9.624686089380146e-06, |
|
"loss": 0.70453787, |
|
"memory(GiB)": 14.42, |
|
"step": 615, |
|
"train_speed(iter/s)": 0.991941 |
|
}, |
|
{ |
|
"acc": 0.72626266, |
|
"epoch": 1.9578947368421051, |
|
"grad_norm": 5.573512554168701, |
|
"learning_rate": 9.6368221359177e-06, |
|
"loss": 0.69707575, |
|
"memory(GiB)": 14.42, |
|
"step": 620, |
|
"train_speed(iter/s)": 0.991755 |
|
}, |
|
{ |
|
"acc": 0.72525253, |
|
"epoch": 1.973684210526316, |
|
"grad_norm": 5.02384090423584, |
|
"learning_rate": 9.648860703117414e-06, |
|
"loss": 0.693221, |
|
"memory(GiB)": 14.42, |
|
"step": 625, |
|
"train_speed(iter/s)": 0.992196 |
|
}, |
|
{ |
|
"acc": 0.72323241, |
|
"epoch": 1.9894736842105263, |
|
"grad_norm": 4.060290336608887, |
|
"learning_rate": 9.660803344459732e-06, |
|
"loss": 0.71005087, |
|
"memory(GiB)": 14.42, |
|
"step": 630, |
|
"train_speed(iter/s)": 0.992576 |
|
}, |
|
{ |
|
"acc": 0.73131313, |
|
"epoch": 2.0052631578947366, |
|
"grad_norm": 4.20712423324585, |
|
"learning_rate": 9.672651576582882e-06, |
|
"loss": 0.6809638, |
|
"memory(GiB)": 14.42, |
|
"step": 635, |
|
"train_speed(iter/s)": 0.992308 |
|
}, |
|
{ |
|
"acc": 0.74141417, |
|
"epoch": 2.0210526315789474, |
|
"grad_norm": 4.575295448303223, |
|
"learning_rate": 9.68440688043875e-06, |
|
"loss": 0.67519088, |
|
"memory(GiB)": 14.42, |
|
"step": 640, |
|
"train_speed(iter/s)": 0.992596 |
|
}, |
|
{ |
|
"acc": 0.72828288, |
|
"epoch": 2.036842105263158, |
|
"grad_norm": 4.49033784866333, |
|
"learning_rate": 9.696070702403762e-06, |
|
"loss": 0.68399596, |
|
"memory(GiB)": 14.42, |
|
"step": 645, |
|
"train_speed(iter/s)": 0.993133 |
|
}, |
|
{ |
|
"acc": 0.71717176, |
|
"epoch": 2.0526315789473686, |
|
"grad_norm": 6.009328842163086, |
|
"learning_rate": 9.707644455346878e-06, |
|
"loss": 0.67610931, |
|
"memory(GiB)": 14.42, |
|
"step": 650, |
|
"train_speed(iter/s)": 0.993511 |
|
}, |
|
{ |
|
"acc": 0.72020206, |
|
"epoch": 2.068421052631579, |
|
"grad_norm": 5.208271503448486, |
|
"learning_rate": 9.719129519656674e-06, |
|
"loss": 0.68497334, |
|
"memory(GiB)": 14.42, |
|
"step": 655, |
|
"train_speed(iter/s)": 0.993788 |
|
}, |
|
{ |
|
"acc": 0.73131323, |
|
"epoch": 2.0842105263157893, |
|
"grad_norm": 4.493729591369629, |
|
"learning_rate": 9.730527244229352e-06, |
|
"loss": 0.65444393, |
|
"memory(GiB)": 14.42, |
|
"step": 660, |
|
"train_speed(iter/s)": 0.993916 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 2.1, |
|
"grad_norm": 3.5913338661193848, |
|
"learning_rate": 9.741838947419482e-06, |
|
"loss": 0.64511662, |
|
"memory(GiB)": 14.42, |
|
"step": 665, |
|
"train_speed(iter/s)": 0.99447 |
|
}, |
|
{ |
|
"acc": 0.73232327, |
|
"epoch": 2.1157894736842104, |
|
"grad_norm": 4.7521138191223145, |
|
"learning_rate": 9.753065917955132e-06, |
|
"loss": 0.67699757, |
|
"memory(GiB)": 14.42, |
|
"step": 670, |
|
"train_speed(iter/s)": 0.994865 |
|
}, |
|
{ |
|
"acc": 0.71919203, |
|
"epoch": 2.1315789473684212, |
|
"grad_norm": 3.9687211513519287, |
|
"learning_rate": 9.764209415818967e-06, |
|
"loss": 0.67539482, |
|
"memory(GiB)": 14.42, |
|
"step": 675, |
|
"train_speed(iter/s)": 0.994992 |
|
}, |
|
{ |
|
"acc": 0.7242425, |
|
"epoch": 2.1473684210526316, |
|
"grad_norm": 4.969510555267334, |
|
"learning_rate": 9.775270673096843e-06, |
|
"loss": 0.69655457, |
|
"memory(GiB)": 14.42, |
|
"step": 680, |
|
"train_speed(iter/s)": 0.995448 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 2.163157894736842, |
|
"grad_norm": 4.0912275314331055, |
|
"learning_rate": 9.786250894795326e-06, |
|
"loss": 0.66344948, |
|
"memory(GiB)": 14.42, |
|
"step": 685, |
|
"train_speed(iter/s)": 0.995672 |
|
}, |
|
{ |
|
"acc": 0.73434348, |
|
"epoch": 2.1789473684210527, |
|
"grad_norm": 3.628950595855713, |
|
"learning_rate": 9.797151259629445e-06, |
|
"loss": 0.66907301, |
|
"memory(GiB)": 14.42, |
|
"step": 690, |
|
"train_speed(iter/s)": 0.995844 |
|
}, |
|
{ |
|
"acc": 0.74141417, |
|
"epoch": 2.194736842105263, |
|
"grad_norm": 4.280593395233154, |
|
"learning_rate": 9.807972920782061e-06, |
|
"loss": 0.65676284, |
|
"memory(GiB)": 14.42, |
|
"step": 695, |
|
"train_speed(iter/s)": 0.996132 |
|
}, |
|
{ |
|
"acc": 0.73131323, |
|
"epoch": 2.2105263157894735, |
|
"grad_norm": 4.347470760345459, |
|
"learning_rate": 9.818717006635969e-06, |
|
"loss": 0.68502617, |
|
"memory(GiB)": 14.42, |
|
"step": 700, |
|
"train_speed(iter/s)": 0.996579 |
|
}, |
|
{ |
|
"acc": 0.75353541, |
|
"epoch": 2.2263157894736842, |
|
"grad_norm": 4.028265953063965, |
|
"learning_rate": 9.829384621479966e-06, |
|
"loss": 0.64554987, |
|
"memory(GiB)": 14.42, |
|
"step": 705, |
|
"train_speed(iter/s)": 0.996871 |
|
}, |
|
{ |
|
"acc": 0.7333334, |
|
"epoch": 2.2421052631578946, |
|
"grad_norm": 5.8726959228515625, |
|
"learning_rate": 9.839976846189978e-06, |
|
"loss": 0.67025905, |
|
"memory(GiB)": 14.42, |
|
"step": 710, |
|
"train_speed(iter/s)": 0.997007 |
|
}, |
|
{ |
|
"acc": 0.74040413, |
|
"epoch": 2.2578947368421054, |
|
"grad_norm": 5.193964004516602, |
|
"learning_rate": 9.850494738886266e-06, |
|
"loss": 0.6601409, |
|
"memory(GiB)": 14.42, |
|
"step": 715, |
|
"train_speed(iter/s)": 0.996822 |
|
}, |
|
{ |
|
"acc": 0.73232327, |
|
"epoch": 2.2736842105263158, |
|
"grad_norm": 6.3874711990356445, |
|
"learning_rate": 9.860939335567754e-06, |
|
"loss": 0.65816932, |
|
"memory(GiB)": 14.42, |
|
"step": 720, |
|
"train_speed(iter/s)": 0.996759 |
|
}, |
|
{ |
|
"acc": 0.73131323, |
|
"epoch": 2.2894736842105265, |
|
"grad_norm": 5.662936210632324, |
|
"learning_rate": 9.871311650724383e-06, |
|
"loss": 0.66208439, |
|
"memory(GiB)": 14.42, |
|
"step": 725, |
|
"train_speed(iter/s)": 0.996521 |
|
}, |
|
{ |
|
"acc": 0.7363637, |
|
"epoch": 2.305263157894737, |
|
"grad_norm": 5.146431922912598, |
|
"learning_rate": 9.881612677928449e-06, |
|
"loss": 0.65422068, |
|
"memory(GiB)": 14.42, |
|
"step": 730, |
|
"train_speed(iter/s)": 0.996472 |
|
}, |
|
{ |
|
"acc": 0.71919203, |
|
"epoch": 2.3210526315789473, |
|
"grad_norm": 5.648817539215088, |
|
"learning_rate": 9.891843390405736e-06, |
|
"loss": 0.67373962, |
|
"memory(GiB)": 14.42, |
|
"step": 735, |
|
"train_speed(iter/s)": 0.99626 |
|
}, |
|
{ |
|
"acc": 0.7363637, |
|
"epoch": 2.336842105263158, |
|
"grad_norm": 4.64543342590332, |
|
"learning_rate": 9.902004741587298e-06, |
|
"loss": 0.66676788, |
|
"memory(GiB)": 14.42, |
|
"step": 740, |
|
"train_speed(iter/s)": 0.996012 |
|
}, |
|
{ |
|
"acc": 0.74141421, |
|
"epoch": 2.3526315789473684, |
|
"grad_norm": 4.293550968170166, |
|
"learning_rate": 9.912097665642666e-06, |
|
"loss": 0.68388977, |
|
"memory(GiB)": 14.42, |
|
"step": 745, |
|
"train_speed(iter/s)": 0.995772 |
|
}, |
|
{ |
|
"acc": 0.73434348, |
|
"epoch": 2.3684210526315788, |
|
"grad_norm": 4.982192516326904, |
|
"learning_rate": 9.922123077995203e-06, |
|
"loss": 0.68120513, |
|
"memory(GiB)": 14.42, |
|
"step": 750, |
|
"train_speed(iter/s)": 0.996044 |
|
}, |
|
{ |
|
"acc": 0.7212122, |
|
"epoch": 2.3842105263157896, |
|
"grad_norm": 4.732614040374756, |
|
"learning_rate": 9.932081875820369e-06, |
|
"loss": 0.67536116, |
|
"memory(GiB)": 14.42, |
|
"step": 755, |
|
"train_speed(iter/s)": 0.996511 |
|
}, |
|
{ |
|
"acc": 0.7303031, |
|
"epoch": 2.4, |
|
"grad_norm": 4.1169867515563965, |
|
"learning_rate": 9.941974938527504e-06, |
|
"loss": 0.6712615, |
|
"memory(GiB)": 14.42, |
|
"step": 760, |
|
"train_speed(iter/s)": 0.996962 |
|
}, |
|
{ |
|
"acc": 0.73131323, |
|
"epoch": 2.4157894736842107, |
|
"grad_norm": 4.577713966369629, |
|
"learning_rate": 9.951803128225848e-06, |
|
"loss": 0.69072509, |
|
"memory(GiB)": 14.42, |
|
"step": 765, |
|
"train_speed(iter/s)": 0.997234 |
|
}, |
|
{ |
|
"acc": 0.73131323, |
|
"epoch": 2.431578947368421, |
|
"grad_norm": 4.2971367835998535, |
|
"learning_rate": 9.961567290175355e-06, |
|
"loss": 0.68363256, |
|
"memory(GiB)": 14.42, |
|
"step": 770, |
|
"train_speed(iter/s)": 0.997844 |
|
}, |
|
{ |
|
"acc": 0.74949493, |
|
"epoch": 2.4473684210526314, |
|
"grad_norm": 5.387909889221191, |
|
"learning_rate": 9.97126825322294e-06, |
|
"loss": 0.65476675, |
|
"memory(GiB)": 14.42, |
|
"step": 775, |
|
"train_speed(iter/s)": 0.998001 |
|
}, |
|
{ |
|
"acc": 0.74747481, |
|
"epoch": 2.463157894736842, |
|
"grad_norm": 5.511860370635986, |
|
"learning_rate": 9.980906830224669e-06, |
|
"loss": 0.66419239, |
|
"memory(GiB)": 14.42, |
|
"step": 780, |
|
"train_speed(iter/s)": 0.998258 |
|
}, |
|
{ |
|
"acc": 0.75656567, |
|
"epoch": 2.4789473684210526, |
|
"grad_norm": 4.687104225158691, |
|
"learning_rate": 9.990483818454497e-06, |
|
"loss": 0.62274342, |
|
"memory(GiB)": 14.42, |
|
"step": 785, |
|
"train_speed(iter/s)": 0.998602 |
|
}, |
|
{ |
|
"acc": 0.74444447, |
|
"epoch": 2.4947368421052634, |
|
"grad_norm": 4.910262107849121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6592267, |
|
"memory(GiB)": 14.42, |
|
"step": 790, |
|
"train_speed(iter/s)": 0.998998 |
|
}, |
|
{ |
|
"acc": 0.7606061, |
|
"epoch": 2.5105263157894737, |
|
"grad_norm": 4.4709601402282715, |
|
"learning_rate": 9.99999726236902e-06, |
|
"loss": 0.65275908, |
|
"memory(GiB)": 14.42, |
|
"step": 795, |
|
"train_speed(iter/s)": 0.999322 |
|
}, |
|
{ |
|
"acc": 0.72525253, |
|
"epoch": 2.526315789473684, |
|
"grad_norm": 4.263107776641846, |
|
"learning_rate": 9.999989049479075e-06, |
|
"loss": 0.67762294, |
|
"memory(GiB)": 14.42, |
|
"step": 800, |
|
"train_speed(iter/s)": 0.999649 |
|
}, |
|
{ |
|
"acc": 0.74747477, |
|
"epoch": 2.542105263157895, |
|
"grad_norm": 4.175100326538086, |
|
"learning_rate": 9.999975361339162e-06, |
|
"loss": 0.64345884, |
|
"memory(GiB)": 14.42, |
|
"step": 805, |
|
"train_speed(iter/s)": 0.999975 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 2.557894736842105, |
|
"grad_norm": 4.516010284423828, |
|
"learning_rate": 9.99995619796427e-06, |
|
"loss": 0.64713798, |
|
"memory(GiB)": 14.42, |
|
"step": 810, |
|
"train_speed(iter/s)": 1.000482 |
|
}, |
|
{ |
|
"acc": 0.7333334, |
|
"epoch": 2.5736842105263156, |
|
"grad_norm": 5.067887306213379, |
|
"learning_rate": 9.999931559375387e-06, |
|
"loss": 0.66570234, |
|
"memory(GiB)": 14.42, |
|
"step": 815, |
|
"train_speed(iter/s)": 1.000429 |
|
}, |
|
{ |
|
"acc": 0.7484849, |
|
"epoch": 2.5894736842105264, |
|
"grad_norm": 5.1078996658325195, |
|
"learning_rate": 9.999901445599495e-06, |
|
"loss": 0.6631753, |
|
"memory(GiB)": 14.42, |
|
"step": 820, |
|
"train_speed(iter/s)": 1.000908 |
|
}, |
|
{ |
|
"acc": 0.74141417, |
|
"epoch": 2.6052631578947367, |
|
"grad_norm": 3.582270860671997, |
|
"learning_rate": 9.999865856669573e-06, |
|
"loss": 0.65539184, |
|
"memory(GiB)": 14.42, |
|
"step": 825, |
|
"train_speed(iter/s)": 1.001101 |
|
}, |
|
{ |
|
"acc": 0.74646468, |
|
"epoch": 2.6210526315789475, |
|
"grad_norm": 4.469554901123047, |
|
"learning_rate": 9.999824792624602e-06, |
|
"loss": 0.66321173, |
|
"memory(GiB)": 14.42, |
|
"step": 830, |
|
"train_speed(iter/s)": 1.001374 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 2.636842105263158, |
|
"grad_norm": 4.35246467590332, |
|
"learning_rate": 9.999778253509548e-06, |
|
"loss": 0.63566313, |
|
"memory(GiB)": 14.42, |
|
"step": 835, |
|
"train_speed(iter/s)": 1.001339 |
|
}, |
|
{ |
|
"acc": 0.7636364, |
|
"epoch": 2.6526315789473687, |
|
"grad_norm": 4.282431125640869, |
|
"learning_rate": 9.99972623937538e-06, |
|
"loss": 0.64590969, |
|
"memory(GiB)": 14.42, |
|
"step": 840, |
|
"train_speed(iter/s)": 1.000982 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 2.668421052631579, |
|
"grad_norm": 4.943569183349609, |
|
"learning_rate": 9.999668750279061e-06, |
|
"loss": 0.6387764, |
|
"memory(GiB)": 14.42, |
|
"step": 845, |
|
"train_speed(iter/s)": 1.000657 |
|
}, |
|
{ |
|
"acc": 0.7454546, |
|
"epoch": 2.6842105263157894, |
|
"grad_norm": 5.16862154006958, |
|
"learning_rate": 9.999605786283555e-06, |
|
"loss": 0.65590019, |
|
"memory(GiB)": 14.42, |
|
"step": 850, |
|
"train_speed(iter/s)": 1.000469 |
|
}, |
|
{ |
|
"acc": 0.75353541, |
|
"epoch": 2.7, |
|
"grad_norm": 6.1622633934021, |
|
"learning_rate": 9.999537347457813e-06, |
|
"loss": 0.63483958, |
|
"memory(GiB)": 14.42, |
|
"step": 855, |
|
"train_speed(iter/s)": 1.000055 |
|
}, |
|
{ |
|
"acc": 0.73737373, |
|
"epoch": 2.7157894736842105, |
|
"grad_norm": 4.183565139770508, |
|
"learning_rate": 9.99946343387679e-06, |
|
"loss": 0.65940695, |
|
"memory(GiB)": 14.42, |
|
"step": 860, |
|
"train_speed(iter/s)": 1.000002 |
|
}, |
|
{ |
|
"acc": 0.72525253, |
|
"epoch": 2.731578947368421, |
|
"grad_norm": 5.071559906005859, |
|
"learning_rate": 9.999384045621432e-06, |
|
"loss": 0.65910063, |
|
"memory(GiB)": 14.42, |
|
"step": 865, |
|
"train_speed(iter/s)": 0.999665 |
|
}, |
|
{ |
|
"acc": 0.73535361, |
|
"epoch": 2.7473684210526317, |
|
"grad_norm": 5.395997524261475, |
|
"learning_rate": 9.999299182778683e-06, |
|
"loss": 0.66515555, |
|
"memory(GiB)": 14.42, |
|
"step": 870, |
|
"train_speed(iter/s)": 0.999492 |
|
}, |
|
{ |
|
"acc": 0.7575758, |
|
"epoch": 2.763157894736842, |
|
"grad_norm": 3.9158406257629395, |
|
"learning_rate": 9.99920884544148e-06, |
|
"loss": 0.6367353, |
|
"memory(GiB)": 14.42, |
|
"step": 875, |
|
"train_speed(iter/s)": 0.999322 |
|
}, |
|
{ |
|
"acc": 0.74949503, |
|
"epoch": 2.7789473684210524, |
|
"grad_norm": 4.631548881530762, |
|
"learning_rate": 9.999113033708756e-06, |
|
"loss": 0.64674835, |
|
"memory(GiB)": 14.42, |
|
"step": 880, |
|
"train_speed(iter/s)": 0.9996 |
|
}, |
|
{ |
|
"acc": 0.76464653, |
|
"epoch": 2.794736842105263, |
|
"grad_norm": 4.38846492767334, |
|
"learning_rate": 9.999011747685443e-06, |
|
"loss": 0.60613623, |
|
"memory(GiB)": 14.42, |
|
"step": 885, |
|
"train_speed(iter/s)": 0.999923 |
|
}, |
|
{ |
|
"acc": 0.75858588, |
|
"epoch": 2.8105263157894735, |
|
"grad_norm": 5.623709201812744, |
|
"learning_rate": 9.998904987482465e-06, |
|
"loss": 0.61659794, |
|
"memory(GiB)": 14.42, |
|
"step": 890, |
|
"train_speed(iter/s)": 1.000227 |
|
}, |
|
{ |
|
"acc": 0.7636364, |
|
"epoch": 2.8263157894736843, |
|
"grad_norm": 4.804795265197754, |
|
"learning_rate": 9.998792753216741e-06, |
|
"loss": 0.62756157, |
|
"memory(GiB)": 14.42, |
|
"step": 895, |
|
"train_speed(iter/s)": 1.000273 |
|
}, |
|
{ |
|
"acc": 0.7666667, |
|
"epoch": 2.8421052631578947, |
|
"grad_norm": 5.261493682861328, |
|
"learning_rate": 9.998675045011186e-06, |
|
"loss": 0.60897455, |
|
"memory(GiB)": 14.42, |
|
"step": 900, |
|
"train_speed(iter/s)": 1.000569 |
|
}, |
|
{ |
|
"acc": 0.7454546, |
|
"epoch": 2.8578947368421055, |
|
"grad_norm": 4.73408317565918, |
|
"learning_rate": 9.998551862994711e-06, |
|
"loss": 0.62260122, |
|
"memory(GiB)": 14.42, |
|
"step": 905, |
|
"train_speed(iter/s)": 1.000726 |
|
}, |
|
{ |
|
"acc": 0.76868696, |
|
"epoch": 2.873684210526316, |
|
"grad_norm": 5.430922031402588, |
|
"learning_rate": 9.998423207302215e-06, |
|
"loss": 0.62232122, |
|
"memory(GiB)": 14.42, |
|
"step": 910, |
|
"train_speed(iter/s)": 1.000597 |
|
}, |
|
{ |
|
"acc": 0.74646468, |
|
"epoch": 2.889473684210526, |
|
"grad_norm": 5.249138355255127, |
|
"learning_rate": 9.998289078074605e-06, |
|
"loss": 0.62488241, |
|
"memory(GiB)": 14.42, |
|
"step": 915, |
|
"train_speed(iter/s)": 1.000763 |
|
}, |
|
{ |
|
"acc": 0.7666667, |
|
"epoch": 2.905263157894737, |
|
"grad_norm": 4.930274963378906, |
|
"learning_rate": 9.998149475458766e-06, |
|
"loss": 0.61784182, |
|
"memory(GiB)": 14.42, |
|
"step": 920, |
|
"train_speed(iter/s)": 1.000804 |
|
}, |
|
{ |
|
"acc": 0.7575758, |
|
"epoch": 2.9210526315789473, |
|
"grad_norm": 5.189949989318848, |
|
"learning_rate": 9.998004399607592e-06, |
|
"loss": 0.61897421, |
|
"memory(GiB)": 14.42, |
|
"step": 925, |
|
"train_speed(iter/s)": 1.000775 |
|
}, |
|
{ |
|
"acc": 0.76464648, |
|
"epoch": 2.9368421052631577, |
|
"grad_norm": 5.007906913757324, |
|
"learning_rate": 9.997853850679959e-06, |
|
"loss": 0.61194296, |
|
"memory(GiB)": 14.42, |
|
"step": 930, |
|
"train_speed(iter/s)": 1.000594 |
|
}, |
|
{ |
|
"acc": 0.75353537, |
|
"epoch": 2.9526315789473685, |
|
"grad_norm": 4.766158103942871, |
|
"learning_rate": 9.997697828840749e-06, |
|
"loss": 0.64542856, |
|
"memory(GiB)": 14.42, |
|
"step": 935, |
|
"train_speed(iter/s)": 1.000421 |
|
}, |
|
{ |
|
"acc": 0.76161613, |
|
"epoch": 2.968421052631579, |
|
"grad_norm": 4.673051834106445, |
|
"learning_rate": 9.997536334260826e-06, |
|
"loss": 0.6193069, |
|
"memory(GiB)": 14.42, |
|
"step": 940, |
|
"train_speed(iter/s)": 1.000437 |
|
}, |
|
{ |
|
"acc": 0.76262627, |
|
"epoch": 2.984210526315789, |
|
"grad_norm": 5.344494342803955, |
|
"learning_rate": 9.997369367117051e-06, |
|
"loss": 0.62504797, |
|
"memory(GiB)": 14.42, |
|
"step": 945, |
|
"train_speed(iter/s)": 1.000649 |
|
}, |
|
{ |
|
"acc": 0.7454546, |
|
"epoch": 3.0, |
|
"grad_norm": 5.629596710205078, |
|
"learning_rate": 9.997196927592286e-06, |
|
"loss": 0.63563552, |
|
"memory(GiB)": 14.42, |
|
"step": 950, |
|
"train_speed(iter/s)": 1.000471 |
|
}, |
|
{ |
|
"acc": 0.7545455, |
|
"epoch": 3.0157894736842104, |
|
"grad_norm": 5.282176971435547, |
|
"learning_rate": 9.997019015875379e-06, |
|
"loss": 0.62507687, |
|
"memory(GiB)": 14.42, |
|
"step": 955, |
|
"train_speed(iter/s)": 1.000304 |
|
}, |
|
{ |
|
"acc": 0.76060615, |
|
"epoch": 3.031578947368421, |
|
"grad_norm": 4.46095085144043, |
|
"learning_rate": 9.996835632161168e-06, |
|
"loss": 0.61408606, |
|
"memory(GiB)": 14.42, |
|
"step": 960, |
|
"train_speed(iter/s)": 1.000621 |
|
}, |
|
{ |
|
"acc": 0.75050507, |
|
"epoch": 3.0473684210526315, |
|
"grad_norm": 6.5806660652160645, |
|
"learning_rate": 9.996646776650492e-06, |
|
"loss": 0.64194403, |
|
"memory(GiB)": 14.42, |
|
"step": 965, |
|
"train_speed(iter/s)": 1.000733 |
|
}, |
|
{ |
|
"acc": 0.7575758, |
|
"epoch": 3.0631578947368423, |
|
"grad_norm": 4.796421051025391, |
|
"learning_rate": 9.996452449550176e-06, |
|
"loss": 0.63968344, |
|
"memory(GiB)": 14.42, |
|
"step": 970, |
|
"train_speed(iter/s)": 1.000961 |
|
}, |
|
{ |
|
"acc": 0.75151515, |
|
"epoch": 3.0789473684210527, |
|
"grad_norm": 5.288400650024414, |
|
"learning_rate": 9.99625265107304e-06, |
|
"loss": 0.61021528, |
|
"memory(GiB)": 14.42, |
|
"step": 975, |
|
"train_speed(iter/s)": 1.001195 |
|
}, |
|
{ |
|
"acc": 0.77373743, |
|
"epoch": 3.094736842105263, |
|
"grad_norm": 4.586065292358398, |
|
"learning_rate": 9.996047381437895e-06, |
|
"loss": 0.60301132, |
|
"memory(GiB)": 14.42, |
|
"step": 980, |
|
"train_speed(iter/s)": 1.001461 |
|
}, |
|
{ |
|
"acc": 0.76565661, |
|
"epoch": 3.110526315789474, |
|
"grad_norm": 4.384403228759766, |
|
"learning_rate": 9.995836640869548e-06, |
|
"loss": 0.63142433, |
|
"memory(GiB)": 14.42, |
|
"step": 985, |
|
"train_speed(iter/s)": 1.001819 |
|
}, |
|
{ |
|
"acc": 0.75959601, |
|
"epoch": 3.126315789473684, |
|
"grad_norm": 4.395773887634277, |
|
"learning_rate": 9.995620429598791e-06, |
|
"loss": 0.63195262, |
|
"memory(GiB)": 14.42, |
|
"step": 990, |
|
"train_speed(iter/s)": 1.001857 |
|
}, |
|
{ |
|
"acc": 0.75858588, |
|
"epoch": 3.1421052631578945, |
|
"grad_norm": 4.790668964385986, |
|
"learning_rate": 9.995398747862412e-06, |
|
"loss": 0.60636911, |
|
"memory(GiB)": 14.42, |
|
"step": 995, |
|
"train_speed(iter/s)": 1.002024 |
|
}, |
|
{ |
|
"acc": 0.77171721, |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 4.812241554260254, |
|
"learning_rate": 9.995171595903185e-06, |
|
"loss": 0.61093168, |
|
"memory(GiB)": 14.42, |
|
"step": 1000, |
|
"train_speed(iter/s)": 1.002015 |
|
}, |
|
{ |
|
"acc": 0.77676773, |
|
"epoch": 3.1736842105263157, |
|
"grad_norm": 7.178610324859619, |
|
"learning_rate": 9.994938973969881e-06, |
|
"loss": 0.59977636, |
|
"memory(GiB)": 14.42, |
|
"step": 1005, |
|
"train_speed(iter/s)": 1.002083 |
|
}, |
|
{ |
|
"acc": 0.74747481, |
|
"epoch": 3.1894736842105265, |
|
"grad_norm": 4.942941665649414, |
|
"learning_rate": 9.99470088231726e-06, |
|
"loss": 0.62871065, |
|
"memory(GiB)": 14.42, |
|
"step": 1010, |
|
"train_speed(iter/s)": 1.001901 |
|
}, |
|
{ |
|
"acc": 0.77272735, |
|
"epoch": 3.205263157894737, |
|
"grad_norm": 6.371291160583496, |
|
"learning_rate": 9.994457321206067e-06, |
|
"loss": 0.63051238, |
|
"memory(GiB)": 14.42, |
|
"step": 1015, |
|
"train_speed(iter/s)": 1.001826 |
|
}, |
|
{ |
|
"acc": 0.76565661, |
|
"epoch": 3.221052631578947, |
|
"grad_norm": 6.595971584320068, |
|
"learning_rate": 9.994208290903044e-06, |
|
"loss": 0.60574374, |
|
"memory(GiB)": 14.42, |
|
"step": 1020, |
|
"train_speed(iter/s)": 1.001531 |
|
}, |
|
{ |
|
"acc": 0.77474747, |
|
"epoch": 3.236842105263158, |
|
"grad_norm": 6.768241882324219, |
|
"learning_rate": 9.993953791680919e-06, |
|
"loss": 0.6086751, |
|
"memory(GiB)": 14.42, |
|
"step": 1025, |
|
"train_speed(iter/s)": 1.001264 |
|
}, |
|
{ |
|
"acc": 0.76060615, |
|
"epoch": 3.2526315789473683, |
|
"grad_norm": 5.642907619476318, |
|
"learning_rate": 9.993693823818408e-06, |
|
"loss": 0.61570983, |
|
"memory(GiB)": 14.42, |
|
"step": 1030, |
|
"train_speed(iter/s)": 1.001062 |
|
}, |
|
{ |
|
"acc": 0.7757576, |
|
"epoch": 3.268421052631579, |
|
"grad_norm": 5.4705810546875, |
|
"learning_rate": 9.99342838760022e-06, |
|
"loss": 0.57923889, |
|
"memory(GiB)": 14.42, |
|
"step": 1035, |
|
"train_speed(iter/s)": 1.0012 |
|
}, |
|
{ |
|
"acc": 0.7787879, |
|
"epoch": 3.2842105263157895, |
|
"grad_norm": 5.741537570953369, |
|
"learning_rate": 9.993157483317049e-06, |
|
"loss": 0.59299955, |
|
"memory(GiB)": 14.42, |
|
"step": 1040, |
|
"train_speed(iter/s)": 1.001369 |
|
}, |
|
{ |
|
"acc": 0.77373743, |
|
"epoch": 3.3, |
|
"grad_norm": 6.200732707977295, |
|
"learning_rate": 9.99288111126558e-06, |
|
"loss": 0.6062952, |
|
"memory(GiB)": 14.42, |
|
"step": 1045, |
|
"train_speed(iter/s)": 1.001563 |
|
}, |
|
{ |
|
"acc": 0.77272735, |
|
"epoch": 3.3157894736842106, |
|
"grad_norm": 5.042914867401123, |
|
"learning_rate": 9.992599271748487e-06, |
|
"loss": 0.59764023, |
|
"memory(GiB)": 14.42, |
|
"step": 1050, |
|
"train_speed(iter/s)": 1.001803 |
|
}, |
|
{ |
|
"acc": 0.76161623, |
|
"epoch": 3.331578947368421, |
|
"grad_norm": 4.9619293212890625, |
|
"learning_rate": 9.992311965074426e-06, |
|
"loss": 0.60689812, |
|
"memory(GiB)": 14.42, |
|
"step": 1055, |
|
"train_speed(iter/s)": 1.001739 |
|
}, |
|
{ |
|
"acc": 0.77070713, |
|
"epoch": 3.3473684210526318, |
|
"grad_norm": 4.4396820068359375, |
|
"learning_rate": 9.992019191558047e-06, |
|
"loss": 0.59743018, |
|
"memory(GiB)": 14.42, |
|
"step": 1060, |
|
"train_speed(iter/s)": 1.001876 |
|
}, |
|
{ |
|
"acc": 0.76161623, |
|
"epoch": 3.363157894736842, |
|
"grad_norm": 4.762928009033203, |
|
"learning_rate": 9.991720951519983e-06, |
|
"loss": 0.60235682, |
|
"memory(GiB)": 14.42, |
|
"step": 1065, |
|
"train_speed(iter/s)": 1.002076 |
|
}, |
|
{ |
|
"acc": 0.79595962, |
|
"epoch": 3.3789473684210525, |
|
"grad_norm": 6.026463508605957, |
|
"learning_rate": 9.991417245286857e-06, |
|
"loss": 0.56530128, |
|
"memory(GiB)": 14.42, |
|
"step": 1070, |
|
"train_speed(iter/s)": 1.002259 |
|
}, |
|
{ |
|
"acc": 0.7666667, |
|
"epoch": 3.3947368421052633, |
|
"grad_norm": 5.576995372772217, |
|
"learning_rate": 9.991108073191275e-06, |
|
"loss": 0.60584636, |
|
"memory(GiB)": 14.42, |
|
"step": 1075, |
|
"train_speed(iter/s)": 1.002387 |
|
}, |
|
{ |
|
"acc": 0.76565657, |
|
"epoch": 3.4105263157894736, |
|
"grad_norm": 5.6912336349487305, |
|
"learning_rate": 9.990793435571831e-06, |
|
"loss": 0.60695724, |
|
"memory(GiB)": 14.42, |
|
"step": 1080, |
|
"train_speed(iter/s)": 1.002498 |
|
}, |
|
{ |
|
"acc": 0.77070708, |
|
"epoch": 3.4263157894736844, |
|
"grad_norm": 7.098496913909912, |
|
"learning_rate": 9.990473332773104e-06, |
|
"loss": 0.61024132, |
|
"memory(GiB)": 14.42, |
|
"step": 1085, |
|
"train_speed(iter/s)": 1.002286 |
|
}, |
|
{ |
|
"acc": 0.77474747, |
|
"epoch": 3.442105263157895, |
|
"grad_norm": 5.199113845825195, |
|
"learning_rate": 9.990147765145657e-06, |
|
"loss": 0.57286983, |
|
"memory(GiB)": 14.42, |
|
"step": 1090, |
|
"train_speed(iter/s)": 1.002033 |
|
}, |
|
{ |
|
"acc": 0.77171721, |
|
"epoch": 3.457894736842105, |
|
"grad_norm": 5.377429485321045, |
|
"learning_rate": 9.989816733046042e-06, |
|
"loss": 0.59407434, |
|
"memory(GiB)": 14.42, |
|
"step": 1095, |
|
"train_speed(iter/s)": 1.002361 |
|
}, |
|
{ |
|
"acc": 0.78585863, |
|
"epoch": 3.473684210526316, |
|
"grad_norm": 8.064284324645996, |
|
"learning_rate": 9.98948023683679e-06, |
|
"loss": 0.59351382, |
|
"memory(GiB)": 14.42, |
|
"step": 1100, |
|
"train_speed(iter/s)": 1.00254 |
|
}, |
|
{ |
|
"acc": 0.8151516, |
|
"epoch": 3.4894736842105263, |
|
"grad_norm": 5.125907897949219, |
|
"learning_rate": 9.989138276886422e-06, |
|
"loss": 0.55623984, |
|
"memory(GiB)": 14.42, |
|
"step": 1105, |
|
"train_speed(iter/s)": 1.002758 |
|
}, |
|
{ |
|
"acc": 0.7818182, |
|
"epoch": 3.5052631578947366, |
|
"grad_norm": 5.503304481506348, |
|
"learning_rate": 9.988790853569435e-06, |
|
"loss": 0.58769279, |
|
"memory(GiB)": 14.42, |
|
"step": 1110, |
|
"train_speed(iter/s)": 1.002692 |
|
}, |
|
{ |
|
"acc": 0.79494948, |
|
"epoch": 3.5210526315789474, |
|
"grad_norm": 5.440110683441162, |
|
"learning_rate": 9.988437967266318e-06, |
|
"loss": 0.54681435, |
|
"memory(GiB)": 14.42, |
|
"step": 1115, |
|
"train_speed(iter/s)": 1.002763 |
|
}, |
|
{ |
|
"acc": 0.7787879, |
|
"epoch": 3.536842105263158, |
|
"grad_norm": 6.168666362762451, |
|
"learning_rate": 9.98807961836354e-06, |
|
"loss": 0.57537584, |
|
"memory(GiB)": 14.42, |
|
"step": 1120, |
|
"train_speed(iter/s)": 1.00306 |
|
}, |
|
{ |
|
"acc": 0.78383842, |
|
"epoch": 3.5526315789473686, |
|
"grad_norm": 6.1219000816345215, |
|
"learning_rate": 9.987715807253544e-06, |
|
"loss": 0.56294775, |
|
"memory(GiB)": 14.42, |
|
"step": 1125, |
|
"train_speed(iter/s)": 1.003329 |
|
}, |
|
{ |
|
"acc": 0.79898987, |
|
"epoch": 3.568421052631579, |
|
"grad_norm": 7.895573139190674, |
|
"learning_rate": 9.987346534334769e-06, |
|
"loss": 0.56549187, |
|
"memory(GiB)": 14.42, |
|
"step": 1130, |
|
"train_speed(iter/s)": 1.003635 |
|
}, |
|
{ |
|
"acc": 0.76565661, |
|
"epoch": 3.5842105263157897, |
|
"grad_norm": 7.460114002227783, |
|
"learning_rate": 9.986971800011624e-06, |
|
"loss": 0.59118419, |
|
"memory(GiB)": 14.42, |
|
"step": 1135, |
|
"train_speed(iter/s)": 1.003749 |
|
}, |
|
{ |
|
"acc": 0.78585863, |
|
"epoch": 3.6, |
|
"grad_norm": 5.666573524475098, |
|
"learning_rate": 9.986591604694506e-06, |
|
"loss": 0.55221477, |
|
"memory(GiB)": 14.42, |
|
"step": 1140, |
|
"train_speed(iter/s)": 1.004019 |
|
}, |
|
{ |
|
"acc": 0.8060606, |
|
"epoch": 3.6157894736842104, |
|
"grad_norm": 5.389659404754639, |
|
"learning_rate": 9.986205948799792e-06, |
|
"loss": 0.55691719, |
|
"memory(GiB)": 14.42, |
|
"step": 1145, |
|
"train_speed(iter/s)": 1.004229 |
|
}, |
|
{ |
|
"acc": 0.7757576, |
|
"epoch": 3.6315789473684212, |
|
"grad_norm": 6.799347877502441, |
|
"learning_rate": 9.985814832749832e-06, |
|
"loss": 0.61102791, |
|
"memory(GiB)": 14.42, |
|
"step": 1150, |
|
"train_speed(iter/s)": 1.004461 |
|
}, |
|
{ |
|
"acc": 0.7787879, |
|
"epoch": 3.6473684210526316, |
|
"grad_norm": 5.257281303405762, |
|
"learning_rate": 9.98541825697297e-06, |
|
"loss": 0.57405472, |
|
"memory(GiB)": 14.42, |
|
"step": 1155, |
|
"train_speed(iter/s)": 1.004366 |
|
}, |
|
{ |
|
"acc": 0.7757576, |
|
"epoch": 3.663157894736842, |
|
"grad_norm": 5.32186222076416, |
|
"learning_rate": 9.98501622190351e-06, |
|
"loss": 0.58651361, |
|
"memory(GiB)": 14.42, |
|
"step": 1160, |
|
"train_speed(iter/s)": 1.004375 |
|
}, |
|
{ |
|
"acc": 0.79191923, |
|
"epoch": 3.6789473684210527, |
|
"grad_norm": 6.293529987335205, |
|
"learning_rate": 9.984608727981756e-06, |
|
"loss": 0.54780645, |
|
"memory(GiB)": 14.42, |
|
"step": 1165, |
|
"train_speed(iter/s)": 1.004371 |
|
}, |
|
{ |
|
"acc": 0.78888893, |
|
"epoch": 3.694736842105263, |
|
"grad_norm": 5.983913898468018, |
|
"learning_rate": 9.984195775653973e-06, |
|
"loss": 0.56223335, |
|
"memory(GiB)": 14.42, |
|
"step": 1170, |
|
"train_speed(iter/s)": 1.004242 |
|
}, |
|
{ |
|
"acc": 0.78585863, |
|
"epoch": 3.7105263157894735, |
|
"grad_norm": 6.722934722900391, |
|
"learning_rate": 9.983777365372412e-06, |
|
"loss": 0.56356654, |
|
"memory(GiB)": 14.42, |
|
"step": 1175, |
|
"train_speed(iter/s)": 1.004355 |
|
}, |
|
{ |
|
"acc": 0.78080816, |
|
"epoch": 3.7263157894736842, |
|
"grad_norm": 6.902647495269775, |
|
"learning_rate": 9.983353497595303e-06, |
|
"loss": 0.56628561, |
|
"memory(GiB)": 14.42, |
|
"step": 1180, |
|
"train_speed(iter/s)": 1.004572 |
|
}, |
|
{ |
|
"acc": 0.80505056, |
|
"epoch": 3.7421052631578946, |
|
"grad_norm": 6.423276901245117, |
|
"learning_rate": 9.982924172786847e-06, |
|
"loss": 0.51588736, |
|
"memory(GiB)": 14.42, |
|
"step": 1185, |
|
"train_speed(iter/s)": 1.004765 |
|
}, |
|
{ |
|
"acc": 0.78888893, |
|
"epoch": 3.7578947368421054, |
|
"grad_norm": 7.339409828186035, |
|
"learning_rate": 9.982489391417225e-06, |
|
"loss": 0.55465469, |
|
"memory(GiB)": 14.42, |
|
"step": 1190, |
|
"train_speed(iter/s)": 1.004517 |
|
}, |
|
{ |
|
"acc": 0.77979803, |
|
"epoch": 3.7736842105263158, |
|
"grad_norm": 5.153477191925049, |
|
"learning_rate": 9.982049153962591e-06, |
|
"loss": 0.57469625, |
|
"memory(GiB)": 14.42, |
|
"step": 1195, |
|
"train_speed(iter/s)": 1.004249 |
|
}, |
|
{ |
|
"acc": 0.78989902, |
|
"epoch": 3.7894736842105265, |
|
"grad_norm": 6.078983306884766, |
|
"learning_rate": 9.98160346090508e-06, |
|
"loss": 0.56436605, |
|
"memory(GiB)": 14.42, |
|
"step": 1200, |
|
"train_speed(iter/s)": 1.004159 |
|
}, |
|
{ |
|
"acc": 0.80505056, |
|
"epoch": 3.805263157894737, |
|
"grad_norm": 6.977550029754639, |
|
"learning_rate": 9.981152312732795e-06, |
|
"loss": 0.533461, |
|
"memory(GiB)": 14.42, |
|
"step": 1205, |
|
"train_speed(iter/s)": 1.004298 |
|
}, |
|
{ |
|
"acc": 0.80101013, |
|
"epoch": 3.8210526315789473, |
|
"grad_norm": 5.750452518463135, |
|
"learning_rate": 9.980695709939817e-06, |
|
"loss": 0.53172665, |
|
"memory(GiB)": 14.42, |
|
"step": 1210, |
|
"train_speed(iter/s)": 1.004419 |
|
}, |
|
{ |
|
"acc": 0.79292927, |
|
"epoch": 3.836842105263158, |
|
"grad_norm": 8.136344909667969, |
|
"learning_rate": 9.980233653026204e-06, |
|
"loss": 0.56949782, |
|
"memory(GiB)": 14.42, |
|
"step": 1215, |
|
"train_speed(iter/s)": 1.004574 |
|
}, |
|
{ |
|
"acc": 0.79898987, |
|
"epoch": 3.8526315789473684, |
|
"grad_norm": 6.550292015075684, |
|
"learning_rate": 9.979766142497977e-06, |
|
"loss": 0.57330713, |
|
"memory(GiB)": 14.42, |
|
"step": 1220, |
|
"train_speed(iter/s)": 1.004753 |
|
}, |
|
{ |
|
"acc": 0.79292936, |
|
"epoch": 3.8684210526315788, |
|
"grad_norm": 6.593601703643799, |
|
"learning_rate": 9.979293178867138e-06, |
|
"loss": 0.55137663, |
|
"memory(GiB)": 14.42, |
|
"step": 1225, |
|
"train_speed(iter/s)": 1.005004 |
|
}, |
|
{ |
|
"acc": 0.80505056, |
|
"epoch": 3.8842105263157896, |
|
"grad_norm": 6.004925727844238, |
|
"learning_rate": 9.978814762651657e-06, |
|
"loss": 0.522402, |
|
"memory(GiB)": 14.42, |
|
"step": 1230, |
|
"train_speed(iter/s)": 1.005125 |
|
}, |
|
{ |
|
"acc": 0.8060606, |
|
"epoch": 3.9, |
|
"grad_norm": 6.588228702545166, |
|
"learning_rate": 9.978330894375483e-06, |
|
"loss": 0.53442154, |
|
"memory(GiB)": 14.42, |
|
"step": 1235, |
|
"train_speed(iter/s)": 1.005384 |
|
}, |
|
{ |
|
"acc": 0.80707073, |
|
"epoch": 3.9157894736842103, |
|
"grad_norm": 6.131303787231445, |
|
"learning_rate": 9.977841574568525e-06, |
|
"loss": 0.55704346, |
|
"memory(GiB)": 14.42, |
|
"step": 1240, |
|
"train_speed(iter/s)": 1.005664 |
|
}, |
|
{ |
|
"acc": 0.79191923, |
|
"epoch": 3.931578947368421, |
|
"grad_norm": 8.15088176727295, |
|
"learning_rate": 9.977346803766668e-06, |
|
"loss": 0.55681534, |
|
"memory(GiB)": 14.42, |
|
"step": 1245, |
|
"train_speed(iter/s)": 1.005903 |
|
}, |
|
{ |
|
"acc": 0.82020206, |
|
"epoch": 3.9473684210526314, |
|
"grad_norm": 5.325417518615723, |
|
"learning_rate": 9.976846582511768e-06, |
|
"loss": 0.5185771, |
|
"memory(GiB)": 14.42, |
|
"step": 1250, |
|
"train_speed(iter/s)": 1.00618 |
|
}, |
|
{ |
|
"acc": 0.80404043, |
|
"epoch": 3.963157894736842, |
|
"grad_norm": 5.654501914978027, |
|
"learning_rate": 9.976340911351648e-06, |
|
"loss": 0.52004037, |
|
"memory(GiB)": 14.42, |
|
"step": 1255, |
|
"train_speed(iter/s)": 1.006248 |
|
}, |
|
{ |
|
"acc": 0.79191923, |
|
"epoch": 3.9789473684210526, |
|
"grad_norm": 6.57743501663208, |
|
"learning_rate": 9.975829790840095e-06, |
|
"loss": 0.56735611, |
|
"memory(GiB)": 14.42, |
|
"step": 1260, |
|
"train_speed(iter/s)": 1.006459 |
|
}, |
|
{ |
|
"acc": 0.79191923, |
|
"epoch": 3.9947368421052634, |
|
"grad_norm": 5.604462146759033, |
|
"learning_rate": 9.975313221536876e-06, |
|
"loss": 0.54044685, |
|
"memory(GiB)": 14.42, |
|
"step": 1265, |
|
"train_speed(iter/s)": 1.006423 |
|
}, |
|
{ |
|
"acc": 0.79797983, |
|
"epoch": 4.010526315789473, |
|
"grad_norm": 6.081342697143555, |
|
"learning_rate": 9.974791204007714e-06, |
|
"loss": 0.55837898, |
|
"memory(GiB)": 14.42, |
|
"step": 1270, |
|
"train_speed(iter/s)": 1.005996 |
|
}, |
|
{ |
|
"acc": 0.80808086, |
|
"epoch": 4.026315789473684, |
|
"grad_norm": 6.739323616027832, |
|
"learning_rate": 9.974263738824304e-06, |
|
"loss": 0.49736958, |
|
"memory(GiB)": 14.42, |
|
"step": 1275, |
|
"train_speed(iter/s)": 1.005807 |
|
}, |
|
{ |
|
"acc": 0.81717176, |
|
"epoch": 4.042105263157895, |
|
"grad_norm": 6.606033802032471, |
|
"learning_rate": 9.973730826564306e-06, |
|
"loss": 0.50827093, |
|
"memory(GiB)": 14.42, |
|
"step": 1280, |
|
"train_speed(iter/s)": 1.005844 |
|
}, |
|
{ |
|
"acc": 0.79797983, |
|
"epoch": 4.057894736842106, |
|
"grad_norm": 7.347227096557617, |
|
"learning_rate": 9.973192467811341e-06, |
|
"loss": 0.5242722, |
|
"memory(GiB)": 14.42, |
|
"step": 1285, |
|
"train_speed(iter/s)": 1.006037 |
|
}, |
|
{ |
|
"acc": 0.82323236, |
|
"epoch": 4.073684210526316, |
|
"grad_norm": 6.304427146911621, |
|
"learning_rate": 9.972648663155006e-06, |
|
"loss": 0.49846554, |
|
"memory(GiB)": 14.42, |
|
"step": 1290, |
|
"train_speed(iter/s)": 1.006178 |
|
}, |
|
{ |
|
"acc": 0.81010103, |
|
"epoch": 4.089473684210526, |
|
"grad_norm": 7.817327976226807, |
|
"learning_rate": 9.97209941319085e-06, |
|
"loss": 0.5091485, |
|
"memory(GiB)": 14.42, |
|
"step": 1295, |
|
"train_speed(iter/s)": 1.006371 |
|
}, |
|
{ |
|
"acc": 0.82222214, |
|
"epoch": 4.105263157894737, |
|
"grad_norm": 7.094549179077148, |
|
"learning_rate": 9.971544718520392e-06, |
|
"loss": 0.49558301, |
|
"memory(GiB)": 14.42, |
|
"step": 1300, |
|
"train_speed(iter/s)": 1.006509 |
|
}, |
|
{ |
|
"acc": 0.81313133, |
|
"epoch": 4.121052631578947, |
|
"grad_norm": 8.023250579833984, |
|
"learning_rate": 9.970984579751112e-06, |
|
"loss": 0.50098181, |
|
"memory(GiB)": 14.42, |
|
"step": 1305, |
|
"train_speed(iter/s)": 1.006667 |
|
}, |
|
{ |
|
"acc": 0.8333334, |
|
"epoch": 4.136842105263158, |
|
"grad_norm": 7.398085117340088, |
|
"learning_rate": 9.970418997496456e-06, |
|
"loss": 0.48294721, |
|
"memory(GiB)": 14.42, |
|
"step": 1310, |
|
"train_speed(iter/s)": 1.006569 |
|
}, |
|
{ |
|
"acc": 0.8181819, |
|
"epoch": 4.152631578947369, |
|
"grad_norm": 7.375761032104492, |
|
"learning_rate": 9.969847972375823e-06, |
|
"loss": 0.50575571, |
|
"memory(GiB)": 14.42, |
|
"step": 1315, |
|
"train_speed(iter/s)": 1.006525 |
|
}, |
|
{ |
|
"acc": 0.78888893, |
|
"epoch": 4.168421052631579, |
|
"grad_norm": 6.804074287414551, |
|
"learning_rate": 9.96927150501458e-06, |
|
"loss": 0.55558271, |
|
"memory(GiB)": 14.42, |
|
"step": 1320, |
|
"train_speed(iter/s)": 1.006562 |
|
}, |
|
{ |
|
"acc": 0.8151516, |
|
"epoch": 4.184210526315789, |
|
"grad_norm": 6.9089131355285645, |
|
"learning_rate": 9.968689596044054e-06, |
|
"loss": 0.50905294, |
|
"memory(GiB)": 14.42, |
|
"step": 1325, |
|
"train_speed(iter/s)": 1.006623 |
|
}, |
|
{ |
|
"acc": 0.81717176, |
|
"epoch": 4.2, |
|
"grad_norm": 7.5984086990356445, |
|
"learning_rate": 9.968102246101527e-06, |
|
"loss": 0.50087404, |
|
"memory(GiB)": 14.42, |
|
"step": 1330, |
|
"train_speed(iter/s)": 1.006669 |
|
}, |
|
{ |
|
"acc": 0.81616163, |
|
"epoch": 4.215789473684211, |
|
"grad_norm": 6.235317707061768, |
|
"learning_rate": 9.967509455830244e-06, |
|
"loss": 0.48131518, |
|
"memory(GiB)": 14.42, |
|
"step": 1335, |
|
"train_speed(iter/s)": 1.006806 |
|
}, |
|
{ |
|
"acc": 0.81717176, |
|
"epoch": 4.231578947368421, |
|
"grad_norm": 7.801043510437012, |
|
"learning_rate": 9.966911225879407e-06, |
|
"loss": 0.48789158, |
|
"memory(GiB)": 14.42, |
|
"step": 1340, |
|
"train_speed(iter/s)": 1.006723 |
|
}, |
|
{ |
|
"acc": 0.80505056, |
|
"epoch": 4.247368421052632, |
|
"grad_norm": 7.775477409362793, |
|
"learning_rate": 9.966307556904172e-06, |
|
"loss": 0.50765548, |
|
"memory(GiB)": 14.42, |
|
"step": 1345, |
|
"train_speed(iter/s)": 1.006702 |
|
}, |
|
{ |
|
"acc": 0.8121212, |
|
"epoch": 4.2631578947368425, |
|
"grad_norm": 7.874118804931641, |
|
"learning_rate": 9.965698449565654e-06, |
|
"loss": 0.47103491, |
|
"memory(GiB)": 14.42, |
|
"step": 1350, |
|
"train_speed(iter/s)": 1.006579 |
|
}, |
|
{ |
|
"acc": 0.82828283, |
|
"epoch": 4.278947368421052, |
|
"grad_norm": 7.379849433898926, |
|
"learning_rate": 9.965083904530928e-06, |
|
"loss": 0.48206449, |
|
"memory(GiB)": 14.42, |
|
"step": 1355, |
|
"train_speed(iter/s)": 1.006487 |
|
}, |
|
{ |
|
"acc": 0.8242425, |
|
"epoch": 4.294736842105263, |
|
"grad_norm": 8.313944816589355, |
|
"learning_rate": 9.964463922473016e-06, |
|
"loss": 0.50168781, |
|
"memory(GiB)": 14.42, |
|
"step": 1360, |
|
"train_speed(iter/s)": 1.006278 |
|
}, |
|
{ |
|
"acc": 0.83535366, |
|
"epoch": 4.310526315789474, |
|
"grad_norm": 6.975119113922119, |
|
"learning_rate": 9.963838504070901e-06, |
|
"loss": 0.48316975, |
|
"memory(GiB)": 14.42, |
|
"step": 1365, |
|
"train_speed(iter/s)": 1.006081 |
|
}, |
|
{ |
|
"acc": 0.82626266, |
|
"epoch": 4.326315789473684, |
|
"grad_norm": 7.731008052825928, |
|
"learning_rate": 9.96320765000952e-06, |
|
"loss": 0.50050735, |
|
"memory(GiB)": 14.42, |
|
"step": 1370, |
|
"train_speed(iter/s)": 1.005849 |
|
}, |
|
{ |
|
"acc": 0.82828293, |
|
"epoch": 4.342105263157895, |
|
"grad_norm": 7.532197952270508, |
|
"learning_rate": 9.962571360979756e-06, |
|
"loss": 0.48054743, |
|
"memory(GiB)": 14.42, |
|
"step": 1375, |
|
"train_speed(iter/s)": 1.005899 |
|
}, |
|
{ |
|
"acc": 0.82929296, |
|
"epoch": 4.3578947368421055, |
|
"grad_norm": 6.518101215362549, |
|
"learning_rate": 9.961929637678449e-06, |
|
"loss": 0.47691903, |
|
"memory(GiB)": 14.42, |
|
"step": 1380, |
|
"train_speed(iter/s)": 1.006048 |
|
}, |
|
{ |
|
"acc": 0.83737373, |
|
"epoch": 4.373684210526315, |
|
"grad_norm": 6.837509632110596, |
|
"learning_rate": 9.961282480808391e-06, |
|
"loss": 0.48503637, |
|
"memory(GiB)": 14.42, |
|
"step": 1385, |
|
"train_speed(iter/s)": 1.006208 |
|
}, |
|
{ |
|
"acc": 0.79191923, |
|
"epoch": 4.389473684210526, |
|
"grad_norm": 7.671966552734375, |
|
"learning_rate": 9.960629891078325e-06, |
|
"loss": 0.53532209, |
|
"memory(GiB)": 14.42, |
|
"step": 1390, |
|
"train_speed(iter/s)": 1.006186 |
|
}, |
|
{ |
|
"acc": 0.83131313, |
|
"epoch": 4.405263157894737, |
|
"grad_norm": 6.7305006980896, |
|
"learning_rate": 9.95997186920294e-06, |
|
"loss": 0.47744513, |
|
"memory(GiB)": 14.42, |
|
"step": 1395, |
|
"train_speed(iter/s)": 1.006212 |
|
}, |
|
{ |
|
"acc": 0.82626266, |
|
"epoch": 4.421052631578947, |
|
"grad_norm": 6.894629955291748, |
|
"learning_rate": 9.959308415902876e-06, |
|
"loss": 0.46218028, |
|
"memory(GiB)": 14.42, |
|
"step": 1400, |
|
"train_speed(iter/s)": 1.006345 |
|
}, |
|
{ |
|
"acc": 0.8181819, |
|
"epoch": 4.436842105263158, |
|
"grad_norm": 7.045905113220215, |
|
"learning_rate": 9.958639531904725e-06, |
|
"loss": 0.46672087, |
|
"memory(GiB)": 14.42, |
|
"step": 1405, |
|
"train_speed(iter/s)": 1.006472 |
|
}, |
|
{ |
|
"acc": 0.8303031, |
|
"epoch": 4.4526315789473685, |
|
"grad_norm": 7.43247652053833, |
|
"learning_rate": 9.957965217941023e-06, |
|
"loss": 0.47453899, |
|
"memory(GiB)": 14.42, |
|
"step": 1410, |
|
"train_speed(iter/s)": 1.006343 |
|
}, |
|
{ |
|
"acc": 0.8363636, |
|
"epoch": 4.468421052631579, |
|
"grad_norm": 7.158817768096924, |
|
"learning_rate": 9.957285474750248e-06, |
|
"loss": 0.47886271, |
|
"memory(GiB)": 14.42, |
|
"step": 1415, |
|
"train_speed(iter/s)": 1.00641 |
|
}, |
|
{ |
|
"acc": 0.82626266, |
|
"epoch": 4.484210526315789, |
|
"grad_norm": 6.566321849822998, |
|
"learning_rate": 9.956600303076834e-06, |
|
"loss": 0.45864029, |
|
"memory(GiB)": 14.42, |
|
"step": 1420, |
|
"train_speed(iter/s)": 1.006614 |
|
}, |
|
{ |
|
"acc": 0.83838387, |
|
"epoch": 4.5, |
|
"grad_norm": 7.215709686279297, |
|
"learning_rate": 9.955909703671154e-06, |
|
"loss": 0.46511064, |
|
"memory(GiB)": 14.42, |
|
"step": 1425, |
|
"train_speed(iter/s)": 1.006816 |
|
}, |
|
{ |
|
"acc": 0.82525253, |
|
"epoch": 4.515789473684211, |
|
"grad_norm": 8.241830825805664, |
|
"learning_rate": 9.955213677289523e-06, |
|
"loss": 0.49213314, |
|
"memory(GiB)": 14.42, |
|
"step": 1430, |
|
"train_speed(iter/s)": 1.006992 |
|
}, |
|
{ |
|
"acc": 0.85959597, |
|
"epoch": 4.531578947368421, |
|
"grad_norm": 7.597438335418701, |
|
"learning_rate": 9.954512224694207e-06, |
|
"loss": 0.42885356, |
|
"memory(GiB)": 14.42, |
|
"step": 1435, |
|
"train_speed(iter/s)": 1.006947 |
|
}, |
|
{ |
|
"acc": 0.8333334, |
|
"epoch": 4.5473684210526315, |
|
"grad_norm": 7.716129779815674, |
|
"learning_rate": 9.953805346653407e-06, |
|
"loss": 0.48312497, |
|
"memory(GiB)": 14.42, |
|
"step": 1440, |
|
"train_speed(iter/s)": 1.007053 |
|
}, |
|
{ |
|
"acc": 0.8272727, |
|
"epoch": 4.563157894736842, |
|
"grad_norm": 7.242175579071045, |
|
"learning_rate": 9.953093043941271e-06, |
|
"loss": 0.46601577, |
|
"memory(GiB)": 14.42, |
|
"step": 1445, |
|
"train_speed(iter/s)": 1.006919 |
|
}, |
|
{ |
|
"acc": 0.8272728, |
|
"epoch": 4.578947368421053, |
|
"grad_norm": 8.561416625976562, |
|
"learning_rate": 9.952375317337885e-06, |
|
"loss": 0.47396536, |
|
"memory(GiB)": 14.42, |
|
"step": 1450, |
|
"train_speed(iter/s)": 1.006824 |
|
}, |
|
{ |
|
"acc": 0.82929296, |
|
"epoch": 4.594736842105263, |
|
"grad_norm": 7.143245220184326, |
|
"learning_rate": 9.951652167629274e-06, |
|
"loss": 0.47127285, |
|
"memory(GiB)": 14.42, |
|
"step": 1455, |
|
"train_speed(iter/s)": 1.006945 |
|
}, |
|
{ |
|
"acc": 0.8303031, |
|
"epoch": 4.610526315789474, |
|
"grad_norm": 7.557952880859375, |
|
"learning_rate": 9.950923595607407e-06, |
|
"loss": 0.47242546, |
|
"memory(GiB)": 14.42, |
|
"step": 1460, |
|
"train_speed(iter/s)": 1.006993 |
|
}, |
|
{ |
|
"acc": 0.83131313, |
|
"epoch": 4.626315789473685, |
|
"grad_norm": 6.552892208099365, |
|
"learning_rate": 9.950189602070185e-06, |
|
"loss": 0.45216031, |
|
"memory(GiB)": 14.42, |
|
"step": 1465, |
|
"train_speed(iter/s)": 1.007145 |
|
}, |
|
{ |
|
"acc": 0.87070704, |
|
"epoch": 4.6421052631578945, |
|
"grad_norm": 6.04880428314209, |
|
"learning_rate": 9.949450187821455e-06, |
|
"loss": 0.36264579, |
|
"memory(GiB)": 14.42, |
|
"step": 1470, |
|
"train_speed(iter/s)": 1.007325 |
|
}, |
|
{ |
|
"acc": 0.85252533, |
|
"epoch": 4.657894736842105, |
|
"grad_norm": 8.390496253967285, |
|
"learning_rate": 9.94870535367099e-06, |
|
"loss": 0.42680759, |
|
"memory(GiB)": 14.42, |
|
"step": 1475, |
|
"train_speed(iter/s)": 1.007556 |
|
}, |
|
{ |
|
"acc": 0.84343443, |
|
"epoch": 4.673684210526316, |
|
"grad_norm": 8.083012580871582, |
|
"learning_rate": 9.947955100434505e-06, |
|
"loss": 0.45011797, |
|
"memory(GiB)": 14.42, |
|
"step": 1480, |
|
"train_speed(iter/s)": 1.007794 |
|
}, |
|
{ |
|
"acc": 0.83535357, |
|
"epoch": 4.689473684210526, |
|
"grad_norm": 7.6660590171813965, |
|
"learning_rate": 9.947199428933652e-06, |
|
"loss": 0.47048602, |
|
"memory(GiB)": 14.42, |
|
"step": 1485, |
|
"train_speed(iter/s)": 1.008031 |
|
}, |
|
{ |
|
"acc": 0.82323227, |
|
"epoch": 4.705263157894737, |
|
"grad_norm": 7.676632881164551, |
|
"learning_rate": 9.94643833999601e-06, |
|
"loss": 0.48105483, |
|
"memory(GiB)": 14.42, |
|
"step": 1490, |
|
"train_speed(iter/s)": 1.008291 |
|
}, |
|
{ |
|
"acc": 0.83434353, |
|
"epoch": 4.721052631578948, |
|
"grad_norm": 7.969620704650879, |
|
"learning_rate": 9.945671834455097e-06, |
|
"loss": 0.46031218, |
|
"memory(GiB)": 14.42, |
|
"step": 1495, |
|
"train_speed(iter/s)": 1.008525 |
|
}, |
|
{ |
|
"acc": 0.82020206, |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 9.78809928894043, |
|
"learning_rate": 9.94489991315036e-06, |
|
"loss": 0.49499507, |
|
"memory(GiB)": 14.42, |
|
"step": 1500, |
|
"train_speed(iter/s)": 1.008732 |
|
}, |
|
{ |
|
"acc": 0.8303031, |
|
"epoch": 4.752631578947368, |
|
"grad_norm": 7.448624134063721, |
|
"learning_rate": 9.944122576927178e-06, |
|
"loss": 0.43737659, |
|
"memory(GiB)": 14.42, |
|
"step": 1505, |
|
"train_speed(iter/s)": 1.00888 |
|
}, |
|
{ |
|
"acc": 0.8363636, |
|
"epoch": 4.768421052631579, |
|
"grad_norm": 7.217111110687256, |
|
"learning_rate": 9.943339826636857e-06, |
|
"loss": 0.43941054, |
|
"memory(GiB)": 14.42, |
|
"step": 1510, |
|
"train_speed(iter/s)": 1.008827 |
|
}, |
|
{ |
|
"acc": 0.84949493, |
|
"epoch": 4.784210526315789, |
|
"grad_norm": 7.279267311096191, |
|
"learning_rate": 9.942551663136639e-06, |
|
"loss": 0.41463737, |
|
"memory(GiB)": 14.42, |
|
"step": 1515, |
|
"train_speed(iter/s)": 1.008971 |
|
}, |
|
{ |
|
"acc": 0.8363636, |
|
"epoch": 4.8, |
|
"grad_norm": 8.194135665893555, |
|
"learning_rate": 9.941758087289692e-06, |
|
"loss": 0.4422389, |
|
"memory(GiB)": 14.42, |
|
"step": 1520, |
|
"train_speed(iter/s)": 1.008925 |
|
}, |
|
{ |
|
"acc": 0.82929296, |
|
"epoch": 4.815789473684211, |
|
"grad_norm": 7.623429775238037, |
|
"learning_rate": 9.940959099965104e-06, |
|
"loss": 0.47428942, |
|
"memory(GiB)": 14.42, |
|
"step": 1525, |
|
"train_speed(iter/s)": 1.009105 |
|
}, |
|
{ |
|
"acc": 0.8484848, |
|
"epoch": 4.831578947368421, |
|
"grad_norm": 9.262815475463867, |
|
"learning_rate": 9.9401547020379e-06, |
|
"loss": 0.41575365, |
|
"memory(GiB)": 14.42, |
|
"step": 1530, |
|
"train_speed(iter/s)": 1.009319 |
|
}, |
|
{ |
|
"acc": 0.8484848, |
|
"epoch": 4.847368421052631, |
|
"grad_norm": 8.296936988830566, |
|
"learning_rate": 9.939344894389026e-06, |
|
"loss": 0.47690134, |
|
"memory(GiB)": 14.42, |
|
"step": 1535, |
|
"train_speed(iter/s)": 1.009534 |
|
}, |
|
{ |
|
"acc": 0.84343433, |
|
"epoch": 4.863157894736842, |
|
"grad_norm": 7.545134544372559, |
|
"learning_rate": 9.938529677905353e-06, |
|
"loss": 0.44248581, |
|
"memory(GiB)": 14.42, |
|
"step": 1540, |
|
"train_speed(iter/s)": 1.009587 |
|
}, |
|
{ |
|
"acc": 0.86868687, |
|
"epoch": 4.878947368421053, |
|
"grad_norm": 7.578943252563477, |
|
"learning_rate": 9.93770905347967e-06, |
|
"loss": 0.40284214, |
|
"memory(GiB)": 14.42, |
|
"step": 1545, |
|
"train_speed(iter/s)": 1.009501 |
|
}, |
|
{ |
|
"acc": 0.8545455, |
|
"epoch": 4.894736842105263, |
|
"grad_norm": 7.998993396759033, |
|
"learning_rate": 9.9368830220107e-06, |
|
"loss": 0.40931978, |
|
"memory(GiB)": 14.42, |
|
"step": 1550, |
|
"train_speed(iter/s)": 1.009094 |
|
}, |
|
{ |
|
"acc": 0.83535347, |
|
"epoch": 4.910526315789474, |
|
"grad_norm": 6.669436454772949, |
|
"learning_rate": 9.936051584403078e-06, |
|
"loss": 0.43530879, |
|
"memory(GiB)": 14.42, |
|
"step": 1555, |
|
"train_speed(iter/s)": 1.00892 |
|
}, |
|
{ |
|
"acc": 0.82929296, |
|
"epoch": 4.926315789473684, |
|
"grad_norm": 8.15095329284668, |
|
"learning_rate": 9.935214741567361e-06, |
|
"loss": 0.44833851, |
|
"memory(GiB)": 14.42, |
|
"step": 1560, |
|
"train_speed(iter/s)": 1.008858 |
|
}, |
|
{ |
|
"acc": 0.86565657, |
|
"epoch": 4.942105263157895, |
|
"grad_norm": 10.634395599365234, |
|
"learning_rate": 9.934372494420032e-06, |
|
"loss": 0.40384045, |
|
"memory(GiB)": 14.42, |
|
"step": 1565, |
|
"train_speed(iter/s)": 1.008755 |
|
}, |
|
{ |
|
"acc": 0.85656567, |
|
"epoch": 4.957894736842105, |
|
"grad_norm": 8.751354217529297, |
|
"learning_rate": 9.933524843883484e-06, |
|
"loss": 0.40516777, |
|
"memory(GiB)": 14.42, |
|
"step": 1570, |
|
"train_speed(iter/s)": 1.008868 |
|
}, |
|
{ |
|
"acc": 0.8575758, |
|
"epoch": 4.973684210526316, |
|
"grad_norm": 9.245224952697754, |
|
"learning_rate": 9.932671790886033e-06, |
|
"loss": 0.42211332, |
|
"memory(GiB)": 14.42, |
|
"step": 1575, |
|
"train_speed(iter/s)": 1.008875 |
|
}, |
|
{ |
|
"acc": 0.8515152, |
|
"epoch": 4.989473684210527, |
|
"grad_norm": 8.566627502441406, |
|
"learning_rate": 9.93181333636191e-06, |
|
"loss": 0.43648109, |
|
"memory(GiB)": 14.42, |
|
"step": 1580, |
|
"train_speed(iter/s)": 1.008726 |
|
}, |
|
{ |
|
"acc": 0.87171726, |
|
"epoch": 5.005263157894737, |
|
"grad_norm": 8.230137825012207, |
|
"learning_rate": 9.930949481251263e-06, |
|
"loss": 0.38181038, |
|
"memory(GiB)": 14.42, |
|
"step": 1585, |
|
"train_speed(iter/s)": 1.008325 |
|
}, |
|
{ |
|
"acc": 0.869697, |
|
"epoch": 5.021052631578947, |
|
"grad_norm": 8.635025978088379, |
|
"learning_rate": 9.930080226500154e-06, |
|
"loss": 0.34935093, |
|
"memory(GiB)": 14.42, |
|
"step": 1590, |
|
"train_speed(iter/s)": 1.008136 |
|
}, |
|
{ |
|
"acc": 0.85555553, |
|
"epoch": 5.036842105263158, |
|
"grad_norm": 8.816386222839355, |
|
"learning_rate": 9.929205573060552e-06, |
|
"loss": 0.4156651, |
|
"memory(GiB)": 14.42, |
|
"step": 1595, |
|
"train_speed(iter/s)": 1.008151 |
|
}, |
|
{ |
|
"acc": 0.85959597, |
|
"epoch": 5.052631578947368, |
|
"grad_norm": 7.993004322052002, |
|
"learning_rate": 9.928325521890351e-06, |
|
"loss": 0.38215671, |
|
"memory(GiB)": 14.42, |
|
"step": 1600, |
|
"train_speed(iter/s)": 1.008242 |
|
}, |
|
{ |
|
"acc": 0.8575758, |
|
"epoch": 5.068421052631579, |
|
"grad_norm": 9.520785331726074, |
|
"learning_rate": 9.927440073953346e-06, |
|
"loss": 0.42380819, |
|
"memory(GiB)": 14.42, |
|
"step": 1605, |
|
"train_speed(iter/s)": 1.008391 |
|
}, |
|
{ |
|
"acc": 0.83737373, |
|
"epoch": 5.08421052631579, |
|
"grad_norm": 9.796367645263672, |
|
"learning_rate": 9.926549230219245e-06, |
|
"loss": 0.42773304, |
|
"memory(GiB)": 14.42, |
|
"step": 1610, |
|
"train_speed(iter/s)": 1.008415 |
|
}, |
|
{ |
|
"acc": 0.85858593, |
|
"epoch": 5.1, |
|
"grad_norm": 8.989090919494629, |
|
"learning_rate": 9.925652991663668e-06, |
|
"loss": 0.41599984, |
|
"memory(GiB)": 14.42, |
|
"step": 1615, |
|
"train_speed(iter/s)": 1.008499 |
|
}, |
|
{ |
|
"acc": 0.86565666, |
|
"epoch": 5.11578947368421, |
|
"grad_norm": 8.325226783752441, |
|
"learning_rate": 9.924751359268142e-06, |
|
"loss": 0.37139072, |
|
"memory(GiB)": 14.42, |
|
"step": 1620, |
|
"train_speed(iter/s)": 1.008669 |
|
}, |
|
{ |
|
"acc": 0.8787879, |
|
"epoch": 5.131578947368421, |
|
"grad_norm": 9.19404125213623, |
|
"learning_rate": 9.923844334020099e-06, |
|
"loss": 0.33956528, |
|
"memory(GiB)": 14.42, |
|
"step": 1625, |
|
"train_speed(iter/s)": 1.008823 |
|
}, |
|
{ |
|
"acc": 0.84343433, |
|
"epoch": 5.147368421052631, |
|
"grad_norm": 6.872360706329346, |
|
"learning_rate": 9.922931916912877e-06, |
|
"loss": 0.4026576, |
|
"memory(GiB)": 14.42, |
|
"step": 1630, |
|
"train_speed(iter/s)": 1.008627 |
|
}, |
|
{ |
|
"acc": 0.8727273, |
|
"epoch": 5.163157894736842, |
|
"grad_norm": 8.282492637634277, |
|
"learning_rate": 9.922014108945724e-06, |
|
"loss": 0.35363464, |
|
"memory(GiB)": 14.42, |
|
"step": 1635, |
|
"train_speed(iter/s)": 1.008475 |
|
}, |
|
{ |
|
"acc": 0.869697, |
|
"epoch": 5.178947368421053, |
|
"grad_norm": 9.004985809326172, |
|
"learning_rate": 9.921090911123785e-06, |
|
"loss": 0.37809303, |
|
"memory(GiB)": 14.42, |
|
"step": 1640, |
|
"train_speed(iter/s)": 1.008427 |
|
}, |
|
{ |
|
"acc": 0.86161613, |
|
"epoch": 5.1947368421052635, |
|
"grad_norm": 10.675838470458984, |
|
"learning_rate": 9.920162324458115e-06, |
|
"loss": 0.39833202, |
|
"memory(GiB)": 14.42, |
|
"step": 1645, |
|
"train_speed(iter/s)": 1.008556 |
|
}, |
|
{ |
|
"acc": 0.86767683, |
|
"epoch": 5.2105263157894735, |
|
"grad_norm": 9.150327682495117, |
|
"learning_rate": 9.919228349965663e-06, |
|
"loss": 0.40053234, |
|
"memory(GiB)": 14.42, |
|
"step": 1650, |
|
"train_speed(iter/s)": 1.008788 |
|
}, |
|
{ |
|
"acc": 0.87070713, |
|
"epoch": 5.226315789473684, |
|
"grad_norm": 8.315364837646484, |
|
"learning_rate": 9.918288988669284e-06, |
|
"loss": 0.38295619, |
|
"memory(GiB)": 14.42, |
|
"step": 1655, |
|
"train_speed(iter/s)": 1.009029 |
|
}, |
|
{ |
|
"acc": 0.84949493, |
|
"epoch": 5.242105263157895, |
|
"grad_norm": 10.003687858581543, |
|
"learning_rate": 9.91734424159773e-06, |
|
"loss": 0.39980845, |
|
"memory(GiB)": 14.42, |
|
"step": 1660, |
|
"train_speed(iter/s)": 1.009171 |
|
}, |
|
{ |
|
"acc": 0.86868687, |
|
"epoch": 5.257894736842105, |
|
"grad_norm": 6.885008335113525, |
|
"learning_rate": 9.916394109785654e-06, |
|
"loss": 0.35017097, |
|
"memory(GiB)": 14.42, |
|
"step": 1665, |
|
"train_speed(iter/s)": 1.009269 |
|
}, |
|
{ |
|
"acc": 0.869697, |
|
"epoch": 5.273684210526316, |
|
"grad_norm": 9.228471755981445, |
|
"learning_rate": 9.9154385942736e-06, |
|
"loss": 0.3592062, |
|
"memory(GiB)": 14.42, |
|
"step": 1670, |
|
"train_speed(iter/s)": 1.00922 |
|
}, |
|
{ |
|
"acc": 0.86161613, |
|
"epoch": 5.2894736842105265, |
|
"grad_norm": 9.5510835647583, |
|
"learning_rate": 9.914477696108015e-06, |
|
"loss": 0.3855813, |
|
"memory(GiB)": 14.42, |
|
"step": 1675, |
|
"train_speed(iter/s)": 1.008977 |
|
}, |
|
{ |
|
"acc": 0.88383846, |
|
"epoch": 5.3052631578947365, |
|
"grad_norm": 7.740030765533447, |
|
"learning_rate": 9.91351141634124e-06, |
|
"loss": 0.34937143, |
|
"memory(GiB)": 14.42, |
|
"step": 1680, |
|
"train_speed(iter/s)": 1.008844 |
|
}, |
|
{ |
|
"acc": 0.8575758, |
|
"epoch": 5.321052631578947, |
|
"grad_norm": 9.399805068969727, |
|
"learning_rate": 9.912539756031506e-06, |
|
"loss": 0.38550258, |
|
"memory(GiB)": 14.42, |
|
"step": 1685, |
|
"train_speed(iter/s)": 1.008777 |
|
}, |
|
{ |
|
"acc": 0.86868687, |
|
"epoch": 5.336842105263158, |
|
"grad_norm": 9.374094009399414, |
|
"learning_rate": 9.911562716242933e-06, |
|
"loss": 0.37300575, |
|
"memory(GiB)": 14.42, |
|
"step": 1690, |
|
"train_speed(iter/s)": 1.008623 |
|
}, |
|
{ |
|
"acc": 0.85353537, |
|
"epoch": 5.352631578947369, |
|
"grad_norm": 8.98189640045166, |
|
"learning_rate": 9.910580298045547e-06, |
|
"loss": 0.39055982, |
|
"memory(GiB)": 14.42, |
|
"step": 1695, |
|
"train_speed(iter/s)": 1.008438 |
|
}, |
|
{ |
|
"acc": 0.85656567, |
|
"epoch": 5.368421052631579, |
|
"grad_norm": 7.874349594116211, |
|
"learning_rate": 9.90959250251525e-06, |
|
"loss": 0.38435912, |
|
"memory(GiB)": 14.42, |
|
"step": 1700, |
|
"train_speed(iter/s)": 1.008244 |
|
}, |
|
{ |
|
"acc": 0.87171717, |
|
"epoch": 5.38421052631579, |
|
"grad_norm": 8.327530860900879, |
|
"learning_rate": 9.908599330733839e-06, |
|
"loss": 0.35224979, |
|
"memory(GiB)": 14.42, |
|
"step": 1705, |
|
"train_speed(iter/s)": 1.008096 |
|
}, |
|
{ |
|
"acc": 0.87474747, |
|
"epoch": 5.4, |
|
"grad_norm": 9.212203979492188, |
|
"learning_rate": 9.907600783788997e-06, |
|
"loss": 0.35220504, |
|
"memory(GiB)": 14.42, |
|
"step": 1710, |
|
"train_speed(iter/s)": 1.008077 |
|
}, |
|
{ |
|
"acc": 0.869697, |
|
"epoch": 5.41578947368421, |
|
"grad_norm": 10.622110366821289, |
|
"learning_rate": 9.906596862774297e-06, |
|
"loss": 0.38187442, |
|
"memory(GiB)": 14.42, |
|
"step": 1715, |
|
"train_speed(iter/s)": 1.008041 |
|
}, |
|
{ |
|
"acc": 0.869697, |
|
"epoch": 5.431578947368421, |
|
"grad_norm": 8.22000503540039, |
|
"learning_rate": 9.905587568789192e-06, |
|
"loss": 0.35868003, |
|
"memory(GiB)": 14.42, |
|
"step": 1720, |
|
"train_speed(iter/s)": 1.00814 |
|
}, |
|
{ |
|
"acc": 0.87171726, |
|
"epoch": 5.447368421052632, |
|
"grad_norm": 9.036218643188477, |
|
"learning_rate": 9.904572902939023e-06, |
|
"loss": 0.33650646, |
|
"memory(GiB)": 14.42, |
|
"step": 1725, |
|
"train_speed(iter/s)": 1.008217 |
|
}, |
|
{ |
|
"acc": 0.86565657, |
|
"epoch": 5.463157894736842, |
|
"grad_norm": 8.017346382141113, |
|
"learning_rate": 9.903552866335014e-06, |
|
"loss": 0.39399965, |
|
"memory(GiB)": 14.42, |
|
"step": 1730, |
|
"train_speed(iter/s)": 1.008266 |
|
}, |
|
{ |
|
"acc": 0.8878788, |
|
"epoch": 5.478947368421053, |
|
"grad_norm": 7.885061740875244, |
|
"learning_rate": 9.902527460094271e-06, |
|
"loss": 0.32297885, |
|
"memory(GiB)": 14.42, |
|
"step": 1735, |
|
"train_speed(iter/s)": 1.008123 |
|
}, |
|
{ |
|
"acc": 0.88383846, |
|
"epoch": 5.494736842105263, |
|
"grad_norm": 6.989620208740234, |
|
"learning_rate": 9.90149668533978e-06, |
|
"loss": 0.32285008, |
|
"memory(GiB)": 14.42, |
|
"step": 1740, |
|
"train_speed(iter/s)": 1.007983 |
|
}, |
|
{ |
|
"acc": 0.8757576, |
|
"epoch": 5.510526315789473, |
|
"grad_norm": 10.041963577270508, |
|
"learning_rate": 9.900460543200403e-06, |
|
"loss": 0.36242232, |
|
"memory(GiB)": 14.42, |
|
"step": 1745, |
|
"train_speed(iter/s)": 1.007954 |
|
}, |
|
{ |
|
"acc": 0.86868687, |
|
"epoch": 5.526315789473684, |
|
"grad_norm": 8.784823417663574, |
|
"learning_rate": 9.899419034810887e-06, |
|
"loss": 0.3428535, |
|
"memory(GiB)": 14.42, |
|
"step": 1750, |
|
"train_speed(iter/s)": 1.008053 |
|
}, |
|
{ |
|
"acc": 0.88585854, |
|
"epoch": 5.542105263157895, |
|
"grad_norm": 8.738446235656738, |
|
"learning_rate": 9.898372161311851e-06, |
|
"loss": 0.34290485, |
|
"memory(GiB)": 14.42, |
|
"step": 1755, |
|
"train_speed(iter/s)": 1.00821 |
|
}, |
|
{ |
|
"acc": 0.8848485, |
|
"epoch": 5.557894736842105, |
|
"grad_norm": 7.158604145050049, |
|
"learning_rate": 9.897319923849792e-06, |
|
"loss": 0.34065032, |
|
"memory(GiB)": 14.42, |
|
"step": 1760, |
|
"train_speed(iter/s)": 1.008285 |
|
}, |
|
{ |
|
"acc": 0.86565666, |
|
"epoch": 5.573684210526316, |
|
"grad_norm": 9.903304100036621, |
|
"learning_rate": 9.896262323577079e-06, |
|
"loss": 0.39218345, |
|
"memory(GiB)": 14.42, |
|
"step": 1765, |
|
"train_speed(iter/s)": 1.008471 |
|
}, |
|
{ |
|
"acc": 0.89595966, |
|
"epoch": 5.589473684210526, |
|
"grad_norm": 8.26339340209961, |
|
"learning_rate": 9.895199361651956e-06, |
|
"loss": 0.28524117, |
|
"memory(GiB)": 14.42, |
|
"step": 1770, |
|
"train_speed(iter/s)": 1.008633 |
|
}, |
|
{ |
|
"acc": 0.89191914, |
|
"epoch": 5.605263157894737, |
|
"grad_norm": 8.691139221191406, |
|
"learning_rate": 9.894131039238538e-06, |
|
"loss": 0.31279945, |
|
"memory(GiB)": 14.42, |
|
"step": 1775, |
|
"train_speed(iter/s)": 1.008776 |
|
}, |
|
{ |
|
"acc": 0.87878799, |
|
"epoch": 5.621052631578947, |
|
"grad_norm": 10.063474655151367, |
|
"learning_rate": 9.893057357506812e-06, |
|
"loss": 0.35178609, |
|
"memory(GiB)": 14.42, |
|
"step": 1780, |
|
"train_speed(iter/s)": 1.008879 |
|
}, |
|
{ |
|
"acc": 0.8848485, |
|
"epoch": 5.636842105263158, |
|
"grad_norm": 11.359123229980469, |
|
"learning_rate": 9.891978317632631e-06, |
|
"loss": 0.34196472, |
|
"memory(GiB)": 14.42, |
|
"step": 1785, |
|
"train_speed(iter/s)": 1.009038 |
|
}, |
|
{ |
|
"acc": 0.88989897, |
|
"epoch": 5.652631578947369, |
|
"grad_norm": 9.00007152557373, |
|
"learning_rate": 9.890893920797723e-06, |
|
"loss": 0.32037597, |
|
"memory(GiB)": 14.42, |
|
"step": 1790, |
|
"train_speed(iter/s)": 1.00922 |
|
}, |
|
{ |
|
"acc": 0.88383846, |
|
"epoch": 5.668421052631579, |
|
"grad_norm": 8.761248588562012, |
|
"learning_rate": 9.889804168189673e-06, |
|
"loss": 0.33403468, |
|
"memory(GiB)": 14.42, |
|
"step": 1795, |
|
"train_speed(iter/s)": 1.009263 |
|
}, |
|
{ |
|
"acc": 0.88282833, |
|
"epoch": 5.684210526315789, |
|
"grad_norm": 8.742795944213867, |
|
"learning_rate": 9.88870906100194e-06, |
|
"loss": 0.33450744, |
|
"memory(GiB)": 14.42, |
|
"step": 1800, |
|
"train_speed(iter/s)": 1.00941 |
|
}, |
|
{ |
|
"acc": 0.87070713, |
|
"epoch": 5.7, |
|
"grad_norm": 8.495540618896484, |
|
"learning_rate": 9.88760860043384e-06, |
|
"loss": 0.36525402, |
|
"memory(GiB)": 14.42, |
|
"step": 1805, |
|
"train_speed(iter/s)": 1.009578 |
|
}, |
|
{ |
|
"acc": 0.8818182, |
|
"epoch": 5.715789473684211, |
|
"grad_norm": 8.872817993164062, |
|
"learning_rate": 9.88650278769056e-06, |
|
"loss": 0.33951588, |
|
"memory(GiB)": 14.42, |
|
"step": 1810, |
|
"train_speed(iter/s)": 1.009712 |
|
}, |
|
{ |
|
"acc": 0.88484859, |
|
"epoch": 5.731578947368421, |
|
"grad_norm": 9.544350624084473, |
|
"learning_rate": 9.885391623983142e-06, |
|
"loss": 0.32934637, |
|
"memory(GiB)": 14.42, |
|
"step": 1815, |
|
"train_speed(iter/s)": 1.009819 |
|
}, |
|
{ |
|
"acc": 0.89797983, |
|
"epoch": 5.747368421052632, |
|
"grad_norm": 7.9700117111206055, |
|
"learning_rate": 9.884275110528489e-06, |
|
"loss": 0.29957988, |
|
"memory(GiB)": 14.42, |
|
"step": 1820, |
|
"train_speed(iter/s)": 1.009811 |
|
}, |
|
{ |
|
"acc": 0.87171717, |
|
"epoch": 5.7631578947368425, |
|
"grad_norm": 9.927080154418945, |
|
"learning_rate": 9.883153248549367e-06, |
|
"loss": 0.35283065, |
|
"memory(GiB)": 14.42, |
|
"step": 1825, |
|
"train_speed(iter/s)": 1.009745 |
|
}, |
|
{ |
|
"acc": 0.8878788, |
|
"epoch": 5.778947368421052, |
|
"grad_norm": 8.701126098632812, |
|
"learning_rate": 9.882026039274394e-06, |
|
"loss": 0.30775204, |
|
"memory(GiB)": 14.42, |
|
"step": 1830, |
|
"train_speed(iter/s)": 1.009604 |
|
}, |
|
{ |
|
"acc": 0.87979803, |
|
"epoch": 5.794736842105263, |
|
"grad_norm": 9.421489715576172, |
|
"learning_rate": 9.880893483938046e-06, |
|
"loss": 0.3247303, |
|
"memory(GiB)": 14.42, |
|
"step": 1835, |
|
"train_speed(iter/s)": 1.009598 |
|
}, |
|
{ |
|
"acc": 0.87171717, |
|
"epoch": 5.810526315789474, |
|
"grad_norm": 11.115700721740723, |
|
"learning_rate": 9.879755583780655e-06, |
|
"loss": 0.34211159, |
|
"memory(GiB)": 14.42, |
|
"step": 1840, |
|
"train_speed(iter/s)": 1.009748 |
|
}, |
|
{ |
|
"acc": 0.89898987, |
|
"epoch": 5.826315789473684, |
|
"grad_norm": 8.85307502746582, |
|
"learning_rate": 9.878612340048409e-06, |
|
"loss": 0.28415699, |
|
"memory(GiB)": 14.42, |
|
"step": 1845, |
|
"train_speed(iter/s)": 1.009816 |
|
}, |
|
{ |
|
"acc": 0.89898987, |
|
"epoch": 5.842105263157895, |
|
"grad_norm": 8.639701843261719, |
|
"learning_rate": 9.877463753993341e-06, |
|
"loss": 0.29079266, |
|
"memory(GiB)": 14.42, |
|
"step": 1850, |
|
"train_speed(iter/s)": 1.009713 |
|
}, |
|
{ |
|
"acc": 0.90808086, |
|
"epoch": 5.8578947368421055, |
|
"grad_norm": 7.4572319984436035, |
|
"learning_rate": 9.876309826873343e-06, |
|
"loss": 0.27632694, |
|
"memory(GiB)": 14.42, |
|
"step": 1855, |
|
"train_speed(iter/s)": 1.009548 |
|
}, |
|
{ |
|
"acc": 0.88888893, |
|
"epoch": 5.873684210526315, |
|
"grad_norm": 9.328161239624023, |
|
"learning_rate": 9.875150559952146e-06, |
|
"loss": 0.31321073, |
|
"memory(GiB)": 14.42, |
|
"step": 1860, |
|
"train_speed(iter/s)": 1.009361 |
|
}, |
|
{ |
|
"acc": 0.88686876, |
|
"epoch": 5.889473684210526, |
|
"grad_norm": 8.32144832611084, |
|
"learning_rate": 9.873985954499339e-06, |
|
"loss": 0.32595744, |
|
"memory(GiB)": 14.42, |
|
"step": 1865, |
|
"train_speed(iter/s)": 1.009233 |
|
}, |
|
{ |
|
"acc": 0.89898987, |
|
"epoch": 5.905263157894737, |
|
"grad_norm": 8.621851921081543, |
|
"learning_rate": 9.872816011790353e-06, |
|
"loss": 0.28815918, |
|
"memory(GiB)": 14.42, |
|
"step": 1870, |
|
"train_speed(iter/s)": 1.009126 |
|
}, |
|
{ |
|
"acc": 0.88282833, |
|
"epoch": 5.921052631578947, |
|
"grad_norm": 8.001331329345703, |
|
"learning_rate": 9.871640733106467e-06, |
|
"loss": 0.32237473, |
|
"memory(GiB)": 14.42, |
|
"step": 1875, |
|
"train_speed(iter/s)": 1.009003 |
|
}, |
|
{ |
|
"acc": 0.90202026, |
|
"epoch": 5.936842105263158, |
|
"grad_norm": 9.54694652557373, |
|
"learning_rate": 9.870460119734797e-06, |
|
"loss": 0.28403633, |
|
"memory(GiB)": 14.42, |
|
"step": 1880, |
|
"train_speed(iter/s)": 1.008879 |
|
}, |
|
{ |
|
"acc": 0.89797974, |
|
"epoch": 5.9526315789473685, |
|
"grad_norm": 8.340572357177734, |
|
"learning_rate": 9.869274172968306e-06, |
|
"loss": 0.29031484, |
|
"memory(GiB)": 14.42, |
|
"step": 1885, |
|
"train_speed(iter/s)": 1.008727 |
|
}, |
|
{ |
|
"acc": 0.89494953, |
|
"epoch": 5.968421052631579, |
|
"grad_norm": 8.185052871704102, |
|
"learning_rate": 9.868082894105802e-06, |
|
"loss": 0.27898729, |
|
"memory(GiB)": 14.42, |
|
"step": 1890, |
|
"train_speed(iter/s)": 1.008673 |
|
}, |
|
{ |
|
"acc": 0.88080807, |
|
"epoch": 5.984210526315789, |
|
"grad_norm": 9.495261192321777, |
|
"learning_rate": 9.866886284451926e-06, |
|
"loss": 0.32206149, |
|
"memory(GiB)": 14.42, |
|
"step": 1895, |
|
"train_speed(iter/s)": 1.00858 |
|
}, |
|
{ |
|
"acc": 0.89292936, |
|
"epoch": 6.0, |
|
"grad_norm": 10.085763931274414, |
|
"learning_rate": 9.865684345317158e-06, |
|
"loss": 0.32589726, |
|
"memory(GiB)": 14.42, |
|
"step": 1900, |
|
"train_speed(iter/s)": 1.008557 |
|
}, |
|
{ |
|
"acc": 0.8939394, |
|
"epoch": 6.015789473684211, |
|
"grad_norm": 7.984169960021973, |
|
"learning_rate": 9.864477078017818e-06, |
|
"loss": 0.29888535, |
|
"memory(GiB)": 14.42, |
|
"step": 1905, |
|
"train_speed(iter/s)": 1.008474 |
|
}, |
|
{ |
|
"acc": 0.88383846, |
|
"epoch": 6.031578947368421, |
|
"grad_norm": 8.458024978637695, |
|
"learning_rate": 9.863264483876056e-06, |
|
"loss": 0.28832068, |
|
"memory(GiB)": 14.42, |
|
"step": 1910, |
|
"train_speed(iter/s)": 1.008555 |
|
}, |
|
{ |
|
"acc": 0.91111107, |
|
"epoch": 6.0473684210526315, |
|
"grad_norm": 8.8190279006958, |
|
"learning_rate": 9.862046564219863e-06, |
|
"loss": 0.26591012, |
|
"memory(GiB)": 14.42, |
|
"step": 1915, |
|
"train_speed(iter/s)": 1.008617 |
|
}, |
|
{ |
|
"acc": 0.9060606, |
|
"epoch": 6.063157894736842, |
|
"grad_norm": 10.704242706298828, |
|
"learning_rate": 9.860823320383057e-06, |
|
"loss": 0.30131092, |
|
"memory(GiB)": 14.42, |
|
"step": 1920, |
|
"train_speed(iter/s)": 1.008617 |
|
}, |
|
{ |
|
"acc": 0.88989906, |
|
"epoch": 6.078947368421052, |
|
"grad_norm": 7.7713541984558105, |
|
"learning_rate": 9.859594753705287e-06, |
|
"loss": 0.31342063, |
|
"memory(GiB)": 14.42, |
|
"step": 1925, |
|
"train_speed(iter/s)": 1.008657 |
|
}, |
|
{ |
|
"acc": 0.8969697, |
|
"epoch": 6.094736842105263, |
|
"grad_norm": 9.331098556518555, |
|
"learning_rate": 9.858360865532031e-06, |
|
"loss": 0.30202923, |
|
"memory(GiB)": 14.42, |
|
"step": 1930, |
|
"train_speed(iter/s)": 1.008653 |
|
}, |
|
{ |
|
"acc": 0.90101013, |
|
"epoch": 6.110526315789474, |
|
"grad_norm": 8.15951919555664, |
|
"learning_rate": 9.857121657214598e-06, |
|
"loss": 0.2731153, |
|
"memory(GiB)": 14.42, |
|
"step": 1935, |
|
"train_speed(iter/s)": 1.008746 |
|
}, |
|
{ |
|
"acc": 0.9151516, |
|
"epoch": 6.126315789473685, |
|
"grad_norm": 8.893780708312988, |
|
"learning_rate": 9.855877130110123e-06, |
|
"loss": 0.23995247, |
|
"memory(GiB)": 14.42, |
|
"step": 1940, |
|
"train_speed(iter/s)": 1.008941 |
|
}, |
|
{ |
|
"acc": 0.88686867, |
|
"epoch": 6.1421052631578945, |
|
"grad_norm": 5.452535629272461, |
|
"learning_rate": 9.854627285581564e-06, |
|
"loss": 0.28711867, |
|
"memory(GiB)": 14.42, |
|
"step": 1945, |
|
"train_speed(iter/s)": 1.009073 |
|
}, |
|
{ |
|
"acc": 0.91111107, |
|
"epoch": 6.157894736842105, |
|
"grad_norm": 11.006546020507812, |
|
"learning_rate": 9.853372124997702e-06, |
|
"loss": 0.27313223, |
|
"memory(GiB)": 14.42, |
|
"step": 1950, |
|
"train_speed(iter/s)": 1.009118 |
|
}, |
|
{ |
|
"acc": 0.88989906, |
|
"epoch": 6.173684210526316, |
|
"grad_norm": 7.908746242523193, |
|
"learning_rate": 9.852111649733143e-06, |
|
"loss": 0.32192535, |
|
"memory(GiB)": 14.42, |
|
"step": 1955, |
|
"train_speed(iter/s)": 1.009218 |
|
}, |
|
{ |
|
"acc": 0.89898987, |
|
"epoch": 6.189473684210526, |
|
"grad_norm": 7.4186692237854, |
|
"learning_rate": 9.85084586116831e-06, |
|
"loss": 0.28101387, |
|
"memory(GiB)": 14.42, |
|
"step": 1960, |
|
"train_speed(iter/s)": 1.009292 |
|
}, |
|
{ |
|
"acc": 0.9, |
|
"epoch": 6.205263157894737, |
|
"grad_norm": 8.2991943359375, |
|
"learning_rate": 9.849574760689445e-06, |
|
"loss": 0.30966353, |
|
"memory(GiB)": 14.42, |
|
"step": 1965, |
|
"train_speed(iter/s)": 1.009205 |
|
}, |
|
{ |
|
"acc": 0.9181819, |
|
"epoch": 6.221052631578948, |
|
"grad_norm": 7.403661727905273, |
|
"learning_rate": 9.848298349688609e-06, |
|
"loss": 0.24571998, |
|
"memory(GiB)": 14.42, |
|
"step": 1970, |
|
"train_speed(iter/s)": 1.009147 |
|
}, |
|
{ |
|
"acc": 0.91313133, |
|
"epoch": 6.2368421052631575, |
|
"grad_norm": 7.864448070526123, |
|
"learning_rate": 9.847016629563683e-06, |
|
"loss": 0.26097565, |
|
"memory(GiB)": 14.42, |
|
"step": 1975, |
|
"train_speed(iter/s)": 1.00915 |
|
}, |
|
{ |
|
"acc": 0.9090909, |
|
"epoch": 6.252631578947368, |
|
"grad_norm": 10.98455810546875, |
|
"learning_rate": 9.845729601718354e-06, |
|
"loss": 0.2881371, |
|
"memory(GiB)": 14.42, |
|
"step": 1980, |
|
"train_speed(iter/s)": 1.009002 |
|
}, |
|
{ |
|
"acc": 0.9121212, |
|
"epoch": 6.268421052631579, |
|
"grad_norm": 8.784940719604492, |
|
"learning_rate": 9.844437267562127e-06, |
|
"loss": 0.24309015, |
|
"memory(GiB)": 14.42, |
|
"step": 1985, |
|
"train_speed(iter/s)": 1.008873 |
|
}, |
|
{ |
|
"acc": 0.91616163, |
|
"epoch": 6.284210526315789, |
|
"grad_norm": 7.331908226013184, |
|
"learning_rate": 9.84313962851032e-06, |
|
"loss": 0.24364107, |
|
"memory(GiB)": 14.42, |
|
"step": 1990, |
|
"train_speed(iter/s)": 1.008851 |
|
}, |
|
{ |
|
"acc": 0.91010113, |
|
"epoch": 6.3, |
|
"grad_norm": 10.048667907714844, |
|
"learning_rate": 9.841836685984052e-06, |
|
"loss": 0.24012024, |
|
"memory(GiB)": 14.42, |
|
"step": 1995, |
|
"train_speed(iter/s)": 1.008919 |
|
}, |
|
{ |
|
"acc": 0.90505047, |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 11.079018592834473, |
|
"learning_rate": 9.840528441410261e-06, |
|
"loss": 0.28295364, |
|
"memory(GiB)": 14.42, |
|
"step": 2000, |
|
"train_speed(iter/s)": 1.009018 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"eval_acc": 0.5149603288404551, |
|
"eval_loss": 6.357347011566162, |
|
"eval_runtime": 49.4166, |
|
"eval_samples_per_second": 38.449, |
|
"eval_steps_per_second": 6.415, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 15800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8379717835318886e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|