Disease_Detection / trainer_state.json
dz-osamu's picture
Upload folder using huggingface_hub
9669fd7 verified
{
"best_metric": 4.79392624,
"best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputde2d/qwen2-vl-2b-instruct/v1-20241108-205643/checkpoint-500",
"epoch": 49.31506849315068,
"eval_steps": 500,
"global_step": 1800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.85866278,
"epoch": 0.0273972602739726,
"grad_norm": 11.529897689819336,
"learning_rate": 0.0,
"loss": 0.41227522,
"memory(GiB)": 12.7,
"step": 1,
"train_speed(iter/s)": 0.042692
},
{
"acc": 0.82054573,
"epoch": 0.136986301369863,
"grad_norm": 13.506338119506836,
"learning_rate": 3.576679971701948e-06,
"loss": 0.50167066,
"memory(GiB)": 14.16,
"step": 5,
"train_speed(iter/s)": 0.146289
},
{
"acc": 0.87029715,
"epoch": 0.273972602739726,
"grad_norm": 11.584628105163574,
"learning_rate": 5.117072191244584e-06,
"loss": 0.41271429,
"memory(GiB)": 14.16,
"step": 10,
"train_speed(iter/s)": 0.206768
},
{
"acc": 0.86857662,
"epoch": 0.410958904109589,
"grad_norm": 17.546506881713867,
"learning_rate": 6.018143876079656e-06,
"loss": 0.39275663,
"memory(GiB)": 14.16,
"step": 15,
"train_speed(iter/s)": 0.242483
},
{
"acc": 0.86033154,
"epoch": 0.547945205479452,
"grad_norm": 19.42036247253418,
"learning_rate": 6.65746441078722e-06,
"loss": 0.42753201,
"memory(GiB)": 14.16,
"step": 20,
"train_speed(iter/s)": 0.263319
},
{
"acc": 0.8581007,
"epoch": 0.684931506849315,
"grad_norm": 17.666065216064453,
"learning_rate": 7.153359943403896e-06,
"loss": 0.43485794,
"memory(GiB)": 14.16,
"step": 25,
"train_speed(iter/s)": 0.277966
},
{
"acc": 0.85695076,
"epoch": 0.821917808219178,
"grad_norm": 12.121685028076172,
"learning_rate": 7.558536095622292e-06,
"loss": 0.42965946,
"memory(GiB)": 14.16,
"step": 30,
"train_speed(iter/s)": 0.289626
},
{
"acc": 0.85145502,
"epoch": 0.958904109589041,
"grad_norm": 12.511621475219727,
"learning_rate": 7.901107651134205e-06,
"loss": 0.45605674,
"memory(GiB)": 14.16,
"step": 35,
"train_speed(iter/s)": 0.297359
},
{
"acc": 0.88358593,
"epoch": 1.095890410958904,
"grad_norm": 10.614742279052734,
"learning_rate": 8.197856630329855e-06,
"loss": 0.36642389,
"memory(GiB)": 14.16,
"step": 40,
"train_speed(iter/s)": 0.304027
},
{
"acc": 0.87005787,
"epoch": 1.2328767123287672,
"grad_norm": 12.311365127563477,
"learning_rate": 8.459607780457364e-06,
"loss": 0.43741484,
"memory(GiB)": 14.16,
"step": 45,
"train_speed(iter/s)": 0.309632
},
{
"acc": 0.87604589,
"epoch": 1.36986301369863,
"grad_norm": 13.369311332702637,
"learning_rate": 8.693752162946532e-06,
"loss": 0.39061749,
"memory(GiB)": 14.16,
"step": 50,
"train_speed(iter/s)": 0.313752
},
{
"acc": 0.89183826,
"epoch": 1.5068493150684932,
"grad_norm": 13.052772521972656,
"learning_rate": 8.905561521090629e-06,
"loss": 0.34727774,
"memory(GiB)": 14.16,
"step": 55,
"train_speed(iter/s)": 0.317026
},
{
"acc": 0.88800755,
"epoch": 1.643835616438356,
"grad_norm": 14.654471397399902,
"learning_rate": 9.098928315164927e-06,
"loss": 0.34038644,
"memory(GiB)": 14.16,
"step": 60,
"train_speed(iter/s)": 0.320617
},
{
"acc": 0.88333483,
"epoch": 1.7808219178082192,
"grad_norm": 12.527990341186523,
"learning_rate": 9.27680852241303e-06,
"loss": 0.34145203,
"memory(GiB)": 14.16,
"step": 65,
"train_speed(iter/s)": 0.323067
},
{
"acc": 0.88713379,
"epoch": 1.9178082191780823,
"grad_norm": 13.173103332519531,
"learning_rate": 9.441499870676842e-06,
"loss": 0.3459826,
"memory(GiB)": 14.16,
"step": 70,
"train_speed(iter/s)": 0.326063
},
{
"acc": 0.90485744,
"epoch": 2.0547945205479454,
"grad_norm": 11.286486625671387,
"learning_rate": 9.594823847781604e-06,
"loss": 0.29746895,
"memory(GiB)": 14.16,
"step": 75,
"train_speed(iter/s)": 0.328132
},
{
"acc": 0.91220999,
"epoch": 2.191780821917808,
"grad_norm": 10.608201026916504,
"learning_rate": 9.73824884987249e-06,
"loss": 0.27589982,
"memory(GiB)": 14.16,
"step": 80,
"train_speed(iter/s)": 0.329693
},
{
"acc": 0.92394562,
"epoch": 2.328767123287671,
"grad_norm": 13.439018249511719,
"learning_rate": 9.872975930033608e-06,
"loss": 0.26322646,
"memory(GiB)": 14.16,
"step": 85,
"train_speed(iter/s)": 0.331593
},
{
"acc": 0.91119957,
"epoch": 2.4657534246575343,
"grad_norm": 12.91903018951416,
"learning_rate": 1e-05,
"loss": 0.26378374,
"memory(GiB)": 14.16,
"step": 90,
"train_speed(iter/s)": 0.333498
},
{
"acc": 0.9276722,
"epoch": 2.602739726027397,
"grad_norm": 10.661258697509766,
"learning_rate": 9.999789068686803e-06,
"loss": 0.23127136,
"memory(GiB)": 14.16,
"step": 95,
"train_speed(iter/s)": 0.335181
},
{
"acc": 0.94177589,
"epoch": 2.73972602739726,
"grad_norm": 8.819624900817871,
"learning_rate": 9.999156292545797e-06,
"loss": 0.21489761,
"memory(GiB)": 14.16,
"step": 100,
"train_speed(iter/s)": 0.336761
},
{
"acc": 0.93883839,
"epoch": 2.8767123287671232,
"grad_norm": 9.24251937866211,
"learning_rate": 9.998101724971245e-06,
"loss": 0.20122993,
"memory(GiB)": 14.16,
"step": 105,
"train_speed(iter/s)": 0.337843
},
{
"acc": 0.93721886,
"epoch": 3.0136986301369864,
"grad_norm": 6.485929012298584,
"learning_rate": 9.996625454948572e-06,
"loss": 0.19496574,
"memory(GiB)": 14.16,
"step": 110,
"train_speed(iter/s)": 0.338939
},
{
"acc": 0.94114161,
"epoch": 3.1506849315068495,
"grad_norm": 9.10759449005127,
"learning_rate": 9.99472760704687e-06,
"loss": 0.20500426,
"memory(GiB)": 14.16,
"step": 115,
"train_speed(iter/s)": 0.340322
},
{
"acc": 0.96779289,
"epoch": 3.287671232876712,
"grad_norm": 9.064125061035156,
"learning_rate": 9.992408341408366e-06,
"loss": 0.11549917,
"memory(GiB)": 14.16,
"step": 120,
"train_speed(iter/s)": 0.341348
},
{
"acc": 0.93218994,
"epoch": 3.4246575342465753,
"grad_norm": 10.008238792419434,
"learning_rate": 9.989667853734933e-06,
"loss": 0.21996279,
"memory(GiB)": 14.16,
"step": 125,
"train_speed(iter/s)": 0.342439
},
{
"acc": 0.93686333,
"epoch": 3.5616438356164384,
"grad_norm": 11.974565505981445,
"learning_rate": 9.98650637527156e-06,
"loss": 0.19973722,
"memory(GiB)": 14.16,
"step": 130,
"train_speed(iter/s)": 0.343123
},
{
"acc": 0.95756645,
"epoch": 3.6986301369863015,
"grad_norm": 9.8711576461792,
"learning_rate": 9.982924172786847e-06,
"loss": 0.15214539,
"memory(GiB)": 14.16,
"step": 135,
"train_speed(iter/s)": 0.34449
},
{
"acc": 0.95660496,
"epoch": 3.8356164383561646,
"grad_norm": 6.757988452911377,
"learning_rate": 9.97892154855049e-06,
"loss": 0.15905871,
"memory(GiB)": 14.16,
"step": 140,
"train_speed(iter/s)": 0.345239
},
{
"acc": 0.95482464,
"epoch": 3.9726027397260273,
"grad_norm": 8.047441482543945,
"learning_rate": 9.974498840307775e-06,
"loss": 0.16302727,
"memory(GiB)": 14.16,
"step": 145,
"train_speed(iter/s)": 0.345602
},
{
"acc": 0.94146061,
"epoch": 4.109589041095891,
"grad_norm": 7.961703777313232,
"learning_rate": 9.96965642125109e-06,
"loss": 0.19785479,
"memory(GiB)": 14.16,
"step": 150,
"train_speed(iter/s)": 0.346007
},
{
"acc": 0.96698723,
"epoch": 4.2465753424657535,
"grad_norm": 6.472661972045898,
"learning_rate": 9.964394699988415e-06,
"loss": 0.11739849,
"memory(GiB)": 14.16,
"step": 155,
"train_speed(iter/s)": 0.346863
},
{
"acc": 0.9542901,
"epoch": 4.383561643835616,
"grad_norm": 8.756787300109863,
"learning_rate": 9.958714120508861e-06,
"loss": 0.13702551,
"memory(GiB)": 14.16,
"step": 160,
"train_speed(iter/s)": 0.348349
},
{
"acc": 0.95916128,
"epoch": 4.52054794520548,
"grad_norm": 9.755017280578613,
"learning_rate": 9.952615162145197e-06,
"loss": 0.13223737,
"memory(GiB)": 14.16,
"step": 165,
"train_speed(iter/s)": 0.349345
},
{
"acc": 0.96206884,
"epoch": 4.657534246575342,
"grad_norm": 8.553181648254395,
"learning_rate": 9.946098339533407e-06,
"loss": 0.11991118,
"memory(GiB)": 14.16,
"step": 170,
"train_speed(iter/s)": 0.349712
},
{
"acc": 0.96347275,
"epoch": 4.794520547945205,
"grad_norm": 7.194893836975098,
"learning_rate": 9.93916420256926e-06,
"loss": 0.10365121,
"memory(GiB)": 14.16,
"step": 175,
"train_speed(iter/s)": 0.350314
},
{
"acc": 0.97044868,
"epoch": 4.931506849315069,
"grad_norm": 6.540927410125732,
"learning_rate": 9.93181333636191e-06,
"loss": 0.10110762,
"memory(GiB)": 14.16,
"step": 180,
"train_speed(iter/s)": 0.350746
},
{
"acc": 0.97869854,
"epoch": 5.068493150684931,
"grad_norm": 6.64502477645874,
"learning_rate": 9.924046361184535e-06,
"loss": 0.06834425,
"memory(GiB)": 14.16,
"step": 185,
"train_speed(iter/s)": 0.351047
},
{
"acc": 0.97149448,
"epoch": 5.205479452054795,
"grad_norm": 7.438776016235352,
"learning_rate": 9.91586393242198e-06,
"loss": 0.09642395,
"memory(GiB)": 14.16,
"step": 190,
"train_speed(iter/s)": 0.351567
},
{
"acc": 0.96277952,
"epoch": 5.342465753424658,
"grad_norm": 9.334355354309082,
"learning_rate": 9.907266740515464e-06,
"loss": 0.10700824,
"memory(GiB)": 14.16,
"step": 195,
"train_speed(iter/s)": 0.352169
},
{
"acc": 0.97186604,
"epoch": 5.47945205479452,
"grad_norm": 5.772711753845215,
"learning_rate": 9.898255510904326e-06,
"loss": 0.07952163,
"memory(GiB)": 14.16,
"step": 200,
"train_speed(iter/s)": 0.352683
},
{
"acc": 0.98101072,
"epoch": 5.616438356164384,
"grad_norm": 9.092942237854004,
"learning_rate": 9.888831003964803e-06,
"loss": 0.06738672,
"memory(GiB)": 14.16,
"step": 205,
"train_speed(iter/s)": 0.353043
},
{
"acc": 0.97831497,
"epoch": 5.7534246575342465,
"grad_norm": 8.003717422485352,
"learning_rate": 9.878994014945866e-06,
"loss": 0.06806564,
"memory(GiB)": 14.16,
"step": 210,
"train_speed(iter/s)": 0.354182
},
{
"acc": 0.97665091,
"epoch": 5.890410958904109,
"grad_norm": 6.545485496520996,
"learning_rate": 9.868745373902128e-06,
"loss": 0.07062781,
"memory(GiB)": 14.16,
"step": 215,
"train_speed(iter/s)": 0.354891
},
{
"acc": 0.97873678,
"epoch": 6.027397260273973,
"grad_norm": 4.454226493835449,
"learning_rate": 9.85808594562379e-06,
"loss": 0.07400095,
"memory(GiB)": 14.16,
"step": 220,
"train_speed(iter/s)": 0.355094
},
{
"acc": 0.97500896,
"epoch": 6.164383561643835,
"grad_norm": 9.327370643615723,
"learning_rate": 9.847016629563683e-06,
"loss": 0.07909623,
"memory(GiB)": 14.16,
"step": 225,
"train_speed(iter/s)": 0.355416
},
{
"acc": 0.97549095,
"epoch": 6.301369863013699,
"grad_norm": 7.767273426055908,
"learning_rate": 9.835538359761359e-06,
"loss": 0.08394684,
"memory(GiB)": 14.16,
"step": 230,
"train_speed(iter/s)": 0.35587
},
{
"acc": 0.98198967,
"epoch": 6.438356164383562,
"grad_norm": 8.520513534545898,
"learning_rate": 9.823652104764282e-06,
"loss": 0.06493338,
"memory(GiB)": 14.16,
"step": 235,
"train_speed(iter/s)": 0.356338
},
{
"acc": 0.98189783,
"epoch": 6.575342465753424,
"grad_norm": 6.741430282592773,
"learning_rate": 9.811358867546099e-06,
"loss": 0.06953114,
"memory(GiB)": 14.16,
"step": 240,
"train_speed(iter/s)": 0.356559
},
{
"acc": 0.9792799,
"epoch": 6.712328767123288,
"grad_norm": 6.579135894775391,
"learning_rate": 9.798659685422008e-06,
"loss": 0.07183629,
"memory(GiB)": 14.16,
"step": 245,
"train_speed(iter/s)": 0.357198
},
{
"acc": 0.97903948,
"epoch": 6.8493150684931505,
"grad_norm": 7.918185234069824,
"learning_rate": 9.785555629961232e-06,
"loss": 0.06570032,
"memory(GiB)": 14.16,
"step": 250,
"train_speed(iter/s)": 0.35739
},
{
"acc": 0.98690357,
"epoch": 6.986301369863014,
"grad_norm": 4.936428546905518,
"learning_rate": 9.772047806896599e-06,
"loss": 0.04573858,
"memory(GiB)": 14.16,
"step": 255,
"train_speed(iter/s)": 0.358183
},
{
"acc": 0.98013973,
"epoch": 7.123287671232877,
"grad_norm": 6.603614330291748,
"learning_rate": 9.758137356031226e-06,
"loss": 0.06317404,
"memory(GiB)": 14.16,
"step": 260,
"train_speed(iter/s)": 0.358435
},
{
"acc": 0.98552742,
"epoch": 7.260273972602739,
"grad_norm": 5.6785173416137695,
"learning_rate": 9.74382545114236e-06,
"loss": 0.05590855,
"memory(GiB)": 14.16,
"step": 265,
"train_speed(iter/s)": 0.359116
},
{
"acc": 0.98451328,
"epoch": 7.397260273972603,
"grad_norm": 6.470608711242676,
"learning_rate": 9.729113299882324e-06,
"loss": 0.05722108,
"memory(GiB)": 14.16,
"step": 270,
"train_speed(iter/s)": 0.359102
},
{
"acc": 0.98782816,
"epoch": 7.534246575342466,
"grad_norm": 4.879244804382324,
"learning_rate": 9.714002143676614e-06,
"loss": 0.0392652,
"memory(GiB)": 14.16,
"step": 275,
"train_speed(iter/s)": 0.359249
},
{
"acc": 0.98015614,
"epoch": 7.671232876712329,
"grad_norm": 5.897606372833252,
"learning_rate": 9.69849325761915e-06,
"loss": 0.0653078,
"memory(GiB)": 14.16,
"step": 280,
"train_speed(iter/s)": 0.359463
},
{
"acc": 0.98269339,
"epoch": 7.808219178082192,
"grad_norm": 8.748714447021484,
"learning_rate": 9.682587950364676e-06,
"loss": 0.04879735,
"memory(GiB)": 14.16,
"step": 285,
"train_speed(iter/s)": 0.359431
},
{
"acc": 0.99092007,
"epoch": 7.945205479452055,
"grad_norm": 4.962334156036377,
"learning_rate": 9.666287564018344e-06,
"loss": 0.03704912,
"memory(GiB)": 14.16,
"step": 290,
"train_speed(iter/s)": 0.359385
},
{
"acc": 0.98640242,
"epoch": 8.082191780821917,
"grad_norm": 7.194764137268066,
"learning_rate": 9.649593474022452e-06,
"loss": 0.05298281,
"memory(GiB)": 14.16,
"step": 295,
"train_speed(iter/s)": 0.359641
},
{
"acc": 0.98602715,
"epoch": 8.219178082191782,
"grad_norm": 7.44851541519165,
"learning_rate": 9.632507089040402e-06,
"loss": 0.04129619,
"memory(GiB)": 14.16,
"step": 300,
"train_speed(iter/s)": 0.359864
},
{
"acc": 0.98549156,
"epoch": 8.356164383561644,
"grad_norm": 8.171492576599121,
"learning_rate": 9.615029850837819e-06,
"loss": 0.04942346,
"memory(GiB)": 14.16,
"step": 305,
"train_speed(iter/s)": 0.359882
},
{
"acc": 0.98449697,
"epoch": 8.493150684931507,
"grad_norm": 6.328600883483887,
"learning_rate": 9.597163234160894e-06,
"loss": 0.05851363,
"memory(GiB)": 14.16,
"step": 310,
"train_speed(iter/s)": 0.359848
},
{
"acc": 0.99007683,
"epoch": 8.63013698630137,
"grad_norm": 5.6946258544921875,
"learning_rate": 9.57890874661196e-06,
"loss": 0.03352974,
"memory(GiB)": 14.16,
"step": 315,
"train_speed(iter/s)": 0.360126
},
{
"acc": 0.98694916,
"epoch": 8.767123287671232,
"grad_norm": 4.585356712341309,
"learning_rate": 9.56026792852226e-06,
"loss": 0.04656056,
"memory(GiB)": 14.16,
"step": 320,
"train_speed(iter/s)": 0.360741
},
{
"acc": 0.98873882,
"epoch": 8.904109589041095,
"grad_norm": 7.50302791595459,
"learning_rate": 9.541242352821985e-06,
"loss": 0.03722157,
"memory(GiB)": 14.16,
"step": 325,
"train_speed(iter/s)": 0.360963
},
{
"acc": 0.98872223,
"epoch": 9.04109589041096,
"grad_norm": 8.641664505004883,
"learning_rate": 9.52183362490754e-06,
"loss": 0.04286454,
"memory(GiB)": 14.16,
"step": 330,
"train_speed(iter/s)": 0.361166
},
{
"acc": 0.99097099,
"epoch": 9.178082191780822,
"grad_norm": 5.386726379394531,
"learning_rate": 9.502043382506082e-06,
"loss": 0.02755214,
"memory(GiB)": 14.16,
"step": 335,
"train_speed(iter/s)": 0.361519
},
{
"acc": 0.99000244,
"epoch": 9.315068493150685,
"grad_norm": 4.545804977416992,
"learning_rate": 9.481873295537333e-06,
"loss": 0.04025009,
"memory(GiB)": 14.16,
"step": 340,
"train_speed(iter/s)": 0.361469
},
{
"acc": 0.99092007,
"epoch": 9.452054794520548,
"grad_norm": 8.062037467956543,
"learning_rate": 9.461325065972662e-06,
"loss": 0.04117663,
"memory(GiB)": 14.16,
"step": 345,
"train_speed(iter/s)": 0.361763
},
{
"acc": 0.99032946,
"epoch": 9.58904109589041,
"grad_norm": 5.639761924743652,
"learning_rate": 9.440400427691476e-06,
"loss": 0.02993804,
"memory(GiB)": 14.16,
"step": 350,
"train_speed(iter/s)": 0.361739
},
{
"acc": 0.98722763,
"epoch": 9.726027397260275,
"grad_norm": 5.573471546173096,
"learning_rate": 9.419101146334908e-06,
"loss": 0.04273846,
"memory(GiB)": 14.16,
"step": 355,
"train_speed(iter/s)": 0.361815
},
{
"acc": 0.98906002,
"epoch": 9.863013698630137,
"grad_norm": 5.205529689788818,
"learning_rate": 9.397429019156841e-06,
"loss": 0.04300301,
"memory(GiB)": 14.16,
"step": 360,
"train_speed(iter/s)": 0.361905
},
{
"acc": 0.9917551,
"epoch": 10.0,
"grad_norm": 5.506292343139648,
"learning_rate": 9.375385874872248e-06,
"loss": 0.03177897,
"memory(GiB)": 14.16,
"step": 365,
"train_speed(iter/s)": 0.361986
},
{
"acc": 0.99265499,
"epoch": 10.136986301369863,
"grad_norm": 5.0279035568237305,
"learning_rate": 9.352973573502874e-06,
"loss": 0.03047763,
"memory(GiB)": 14.16,
"step": 370,
"train_speed(iter/s)": 0.3619
},
{
"acc": 0.99043932,
"epoch": 10.273972602739725,
"grad_norm": 7.282947540283203,
"learning_rate": 9.330194006220301e-06,
"loss": 0.03883767,
"memory(GiB)": 14.16,
"step": 375,
"train_speed(iter/s)": 0.3619
},
{
"acc": 0.99266891,
"epoch": 10.41095890410959,
"grad_norm": 6.475697040557861,
"learning_rate": 9.307049095186364e-06,
"loss": 0.03223814,
"memory(GiB)": 14.16,
"step": 380,
"train_speed(iter/s)": 0.361879
},
{
"acc": 0.98734608,
"epoch": 10.547945205479452,
"grad_norm": 2.9214179515838623,
"learning_rate": 9.28354079339095e-06,
"loss": 0.04384069,
"memory(GiB)": 14.16,
"step": 385,
"train_speed(iter/s)": 0.361963
},
{
"acc": 0.99313297,
"epoch": 10.684931506849315,
"grad_norm": 4.704584121704102,
"learning_rate": 9.259671084487218e-06,
"loss": 0.02514983,
"memory(GiB)": 14.16,
"step": 390,
"train_speed(iter/s)": 0.361864
},
{
"acc": 0.990868,
"epoch": 10.821917808219178,
"grad_norm": 4.704314231872559,
"learning_rate": 9.235441982624191e-06,
"loss": 0.02952582,
"memory(GiB)": 14.16,
"step": 395,
"train_speed(iter/s)": 0.36222
},
{
"acc": 0.99545174,
"epoch": 10.95890410958904,
"grad_norm": 4.499762058258057,
"learning_rate": 9.210855532276836e-06,
"loss": 0.01564558,
"memory(GiB)": 14.16,
"step": 400,
"train_speed(iter/s)": 0.362296
},
{
"acc": 0.9944725,
"epoch": 11.095890410958905,
"grad_norm": 7.498542785644531,
"learning_rate": 9.185913808073513e-06,
"loss": 0.02198397,
"memory(GiB)": 14.16,
"step": 405,
"train_speed(iter/s)": 0.362254
},
{
"acc": 0.98989115,
"epoch": 11.232876712328768,
"grad_norm": 3.8143303394317627,
"learning_rate": 9.16061891462094e-06,
"loss": 0.0327835,
"memory(GiB)": 14.16,
"step": 410,
"train_speed(iter/s)": 0.362508
},
{
"acc": 0.99730492,
"epoch": 11.36986301369863,
"grad_norm": 3.9523301124572754,
"learning_rate": 9.134972986326595e-06,
"loss": 0.01258684,
"memory(GiB)": 14.16,
"step": 415,
"train_speed(iter/s)": 0.362542
},
{
"acc": 0.99241066,
"epoch": 11.506849315068493,
"grad_norm": 6.334254741668701,
"learning_rate": 9.108978187218613e-06,
"loss": 0.03454852,
"memory(GiB)": 14.16,
"step": 420,
"train_speed(iter/s)": 0.362651
},
{
"acc": 0.99217281,
"epoch": 11.643835616438356,
"grad_norm": 6.370650291442871,
"learning_rate": 9.08263671076319e-06,
"loss": 0.03252776,
"memory(GiB)": 14.16,
"step": 425,
"train_speed(iter/s)": 0.362697
},
{
"acc": 0.98822365,
"epoch": 11.780821917808218,
"grad_norm": 3.232943534851074,
"learning_rate": 9.05595077967948e-06,
"loss": 0.04269191,
"memory(GiB)": 14.16,
"step": 430,
"train_speed(iter/s)": 0.362683
},
{
"acc": 0.99225941,
"epoch": 11.917808219178083,
"grad_norm": 4.822254180908203,
"learning_rate": 9.028922645752062e-06,
"loss": 0.02760777,
"memory(GiB)": 14.16,
"step": 435,
"train_speed(iter/s)": 0.362655
},
{
"acc": 0.9954505,
"epoch": 12.054794520547945,
"grad_norm": 3.2365639209747314,
"learning_rate": 9.00155458964091e-06,
"loss": 0.01916433,
"memory(GiB)": 14.16,
"step": 440,
"train_speed(iter/s)": 0.3626
},
{
"acc": 0.99313316,
"epoch": 12.191780821917808,
"grad_norm": 3.7720203399658203,
"learning_rate": 8.973848920688967e-06,
"loss": 0.03937365,
"memory(GiB)": 14.16,
"step": 445,
"train_speed(iter/s)": 0.362571
},
{
"acc": 0.99251375,
"epoch": 12.32876712328767,
"grad_norm": 4.069283485412598,
"learning_rate": 8.94580797672727e-06,
"loss": 0.02898619,
"memory(GiB)": 14.16,
"step": 450,
"train_speed(iter/s)": 0.362736
},
{
"acc": 0.99321842,
"epoch": 12.465753424657533,
"grad_norm": 0.9725887775421143,
"learning_rate": 8.917434123877686e-06,
"loss": 0.02265764,
"memory(GiB)": 14.16,
"step": 455,
"train_speed(iter/s)": 0.362774
},
{
"acc": 0.99323349,
"epoch": 12.602739726027398,
"grad_norm": 4.508816719055176,
"learning_rate": 8.888729756353248e-06,
"loss": 0.02885826,
"memory(GiB)": 14.16,
"step": 460,
"train_speed(iter/s)": 0.362813
},
{
"acc": 0.99727192,
"epoch": 12.73972602739726,
"grad_norm": 2.479684352874756,
"learning_rate": 8.859697296256147e-06,
"loss": 0.01712638,
"memory(GiB)": 14.16,
"step": 465,
"train_speed(iter/s)": 0.362768
},
{
"acc": 0.99502192,
"epoch": 12.876712328767123,
"grad_norm": 1.5512564182281494,
"learning_rate": 8.83033919337333e-06,
"loss": 0.022619,
"memory(GiB)": 14.16,
"step": 470,
"train_speed(iter/s)": 0.362919
},
{
"acc": 0.99404383,
"epoch": 13.013698630136986,
"grad_norm": 5.0392680168151855,
"learning_rate": 8.800657924969805e-06,
"loss": 0.0215001,
"memory(GiB)": 14.16,
"step": 475,
"train_speed(iter/s)": 0.362773
},
{
"acc": 0.99045715,
"epoch": 13.150684931506849,
"grad_norm": 3.143148183822632,
"learning_rate": 8.770655995579593e-06,
"loss": 0.02810604,
"memory(GiB)": 14.16,
"step": 480,
"train_speed(iter/s)": 0.362874
},
{
"acc": 0.99417992,
"epoch": 13.287671232876713,
"grad_norm": 2.0431466102600098,
"learning_rate": 8.740335936794398e-06,
"loss": 0.02953114,
"memory(GiB)": 14.16,
"step": 485,
"train_speed(iter/s)": 0.362814
},
{
"acc": 0.99732151,
"epoch": 13.424657534246576,
"grad_norm": 2.4842429161071777,
"learning_rate": 8.709700307049991e-06,
"loss": 0.01085737,
"memory(GiB)": 14.16,
"step": 490,
"train_speed(iter/s)": 0.362739
},
{
"acc": 0.99217415,
"epoch": 13.561643835616438,
"grad_norm": 4.454080581665039,
"learning_rate": 8.678751691410323e-06,
"loss": 0.02852642,
"memory(GiB)": 14.16,
"step": 495,
"train_speed(iter/s)": 0.363042
},
{
"acc": 0.99452591,
"epoch": 13.698630136986301,
"grad_norm": 6.032941818237305,
"learning_rate": 8.647492701349395e-06,
"loss": 0.02294705,
"memory(GiB)": 14.16,
"step": 500,
"train_speed(iter/s)": 0.363179
},
{
"epoch": 13.698630136986301,
"eval_acc": 0.3818755593383692,
"eval_loss": 4.793926239013672,
"eval_runtime": 2033.163,
"eval_samples_per_second": 15.751,
"eval_steps_per_second": 1.969,
"step": 500
},
{
"acc": 0.99273891,
"epoch": 13.835616438356164,
"grad_norm": 7.570253849029541,
"learning_rate": 8.615925974530906e-06,
"loss": 0.03025962,
"memory(GiB)": 14.16,
"step": 505,
"train_speed(iter/s)": 0.146499
},
{
"acc": 0.99452457,
"epoch": 13.972602739726028,
"grad_norm": 0.6901392936706543,
"learning_rate": 8.584054174585673e-06,
"loss": 0.01943414,
"memory(GiB)": 14.16,
"step": 510,
"train_speed(iter/s)": 0.147597
},
{
"acc": 0.99586115,
"epoch": 14.10958904109589,
"grad_norm": 2.8410799503326416,
"learning_rate": 8.551879990886881e-06,
"loss": 0.02195611,
"memory(GiB)": 14.16,
"step": 515,
"train_speed(iter/s)": 0.148679
},
{
"acc": 0.99596558,
"epoch": 14.246575342465754,
"grad_norm": 1.6700148582458496,
"learning_rate": 8.519406138323145e-06,
"loss": 0.01128972,
"memory(GiB)": 14.16,
"step": 520,
"train_speed(iter/s)": 0.149765
},
{
"acc": 0.99503975,
"epoch": 14.383561643835616,
"grad_norm": 1.0917117595672607,
"learning_rate": 8.486635357069431e-06,
"loss": 0.01859367,
"memory(GiB)": 14.16,
"step": 525,
"train_speed(iter/s)": 0.15087
},
{
"acc": 0.99261799,
"epoch": 14.520547945205479,
"grad_norm": 7.631021022796631,
"learning_rate": 8.45357041235583e-06,
"loss": 0.02078509,
"memory(GiB)": 14.16,
"step": 530,
"train_speed(iter/s)": 0.151949
},
{
"acc": 0.99308357,
"epoch": 14.657534246575342,
"grad_norm": 3.847642421722412,
"learning_rate": 8.42021409423423e-06,
"loss": 0.02047177,
"memory(GiB)": 14.16,
"step": 535,
"train_speed(iter/s)": 0.153023
},
{
"acc": 0.99270458,
"epoch": 14.794520547945206,
"grad_norm": 6.042537689208984,
"learning_rate": 8.386569217342893e-06,
"loss": 0.0270274,
"memory(GiB)": 14.16,
"step": 540,
"train_speed(iter/s)": 0.154086
},
{
"acc": 0.99546833,
"epoch": 14.931506849315069,
"grad_norm": 4.633887767791748,
"learning_rate": 8.352638620668941e-06,
"loss": 0.01502355,
"memory(GiB)": 14.16,
"step": 545,
"train_speed(iter/s)": 0.155151
},
{
"acc": 0.99634466,
"epoch": 15.068493150684931,
"grad_norm": 1.901209831237793,
"learning_rate": 8.318425167308806e-06,
"loss": 0.01356835,
"memory(GiB)": 14.16,
"step": 550,
"train_speed(iter/s)": 0.156214
},
{
"acc": 0.99639549,
"epoch": 15.205479452054794,
"grad_norm": 4.843277931213379,
"learning_rate": 8.28393174422665e-06,
"loss": 0.01601259,
"memory(GiB)": 14.16,
"step": 555,
"train_speed(iter/s)": 0.157262
},
{
"acc": 0.99320316,
"epoch": 15.342465753424657,
"grad_norm": 5.583487033843994,
"learning_rate": 8.249161262010735e-06,
"loss": 0.01526148,
"memory(GiB)": 14.16,
"step": 560,
"train_speed(iter/s)": 0.158308
},
{
"acc": 0.99721832,
"epoch": 15.479452054794521,
"grad_norm": 5.734185218811035,
"learning_rate": 8.214116654627853e-06,
"loss": 0.01092491,
"memory(GiB)": 14.16,
"step": 565,
"train_speed(iter/s)": 0.159373
},
{
"acc": 0.99818001,
"epoch": 15.616438356164384,
"grad_norm": 2.6671762466430664,
"learning_rate": 8.178800879175737e-06,
"loss": 0.00814181,
"memory(GiB)": 14.16,
"step": 570,
"train_speed(iter/s)": 0.160399
},
{
"acc": 0.99492016,
"epoch": 15.753424657534246,
"grad_norm": 4.252832889556885,
"learning_rate": 8.143216915633535e-06,
"loss": 0.01607218,
"memory(GiB)": 14.16,
"step": 575,
"train_speed(iter/s)": 0.161443
},
{
"acc": 0.9963459,
"epoch": 15.89041095890411,
"grad_norm": 2.7702836990356445,
"learning_rate": 8.107367766610379e-06,
"loss": 0.01704216,
"memory(GiB)": 14.16,
"step": 580,
"train_speed(iter/s)": 0.162459
},
{
"acc": 0.99641209,
"epoch": 16.027397260273972,
"grad_norm": 3.121049404144287,
"learning_rate": 8.071256457091995e-06,
"loss": 0.01695579,
"memory(GiB)": 14.16,
"step": 585,
"train_speed(iter/s)": 0.163471
},
{
"acc": 0.99682541,
"epoch": 16.164383561643834,
"grad_norm": 3.980106830596924,
"learning_rate": 8.03488603418547e-06,
"loss": 0.01948266,
"memory(GiB)": 14.16,
"step": 590,
"train_speed(iter/s)": 0.164479
},
{
"acc": 0.99080048,
"epoch": 16.301369863013697,
"grad_norm": 4.650881290435791,
"learning_rate": 7.99825956686213e-06,
"loss": 0.02414289,
"memory(GiB)": 14.16,
"step": 595,
"train_speed(iter/s)": 0.16549
},
{
"acc": 0.99316874,
"epoch": 16.438356164383563,
"grad_norm": 3.7769477367401123,
"learning_rate": 7.96138014569857e-06,
"loss": 0.02379684,
"memory(GiB)": 14.16,
"step": 600,
"train_speed(iter/s)": 0.166493
},
{
"acc": 0.99821434,
"epoch": 16.575342465753426,
"grad_norm": 2.486539363861084,
"learning_rate": 7.924250882615874e-06,
"loss": 0.01166953,
"memory(GiB)": 14.16,
"step": 605,
"train_speed(iter/s)": 0.167483
},
{
"acc": 0.99491873,
"epoch": 16.71232876712329,
"grad_norm": 0.6995792984962463,
"learning_rate": 7.886874910617037e-06,
"loss": 0.01726856,
"memory(GiB)": 14.16,
"step": 610,
"train_speed(iter/s)": 0.168479
},
{
"acc": 0.99727192,
"epoch": 16.84931506849315,
"grad_norm": 1.6550129652023315,
"learning_rate": 7.849255383522576e-06,
"loss": 0.0158612,
"memory(GiB)": 14.16,
"step": 615,
"train_speed(iter/s)": 0.169527
},
{
"acc": 0.99721966,
"epoch": 16.986301369863014,
"grad_norm": 2.894073724746704,
"learning_rate": 7.811395475704436e-06,
"loss": 0.01161546,
"memory(GiB)": 14.16,
"step": 620,
"train_speed(iter/s)": 0.170515
},
{
"acc": 0.99818001,
"epoch": 17.123287671232877,
"grad_norm": 2.339505910873413,
"learning_rate": 7.773298381818106e-06,
"loss": 0.00709306,
"memory(GiB)": 14.16,
"step": 625,
"train_speed(iter/s)": 0.171471
},
{
"acc": 0.997717,
"epoch": 17.26027397260274,
"grad_norm": 2.1085383892059326,
"learning_rate": 7.734967316533076e-06,
"loss": 0.00879358,
"memory(GiB)": 14.16,
"step": 630,
"train_speed(iter/s)": 0.172477
},
{
"acc": 0.99593258,
"epoch": 17.397260273972602,
"grad_norm": 3.778745174407959,
"learning_rate": 7.696405514261554e-06,
"loss": 0.01262949,
"memory(GiB)": 14.16,
"step": 635,
"train_speed(iter/s)": 0.173456
},
{
"acc": 0.99641209,
"epoch": 17.534246575342465,
"grad_norm": 4.980679512023926,
"learning_rate": 7.657616228885571e-06,
"loss": 0.00957234,
"memory(GiB)": 14.16,
"step": 640,
"train_speed(iter/s)": 0.174442
},
{
"acc": 0.99673891,
"epoch": 17.671232876712327,
"grad_norm": 1.6658488512039185,
"learning_rate": 7.618602733482395e-06,
"loss": 0.01483861,
"memory(GiB)": 14.16,
"step": 645,
"train_speed(iter/s)": 0.1754
},
{
"acc": 0.995401,
"epoch": 17.80821917808219,
"grad_norm": 7.899285793304443,
"learning_rate": 7.579368320048354e-06,
"loss": 0.02291541,
"memory(GiB)": 14.16,
"step": 650,
"train_speed(iter/s)": 0.176359
},
{
"acc": 0.99588165,
"epoch": 17.945205479452056,
"grad_norm": 4.884225368499756,
"learning_rate": 7.539916299221047e-06,
"loss": 0.0132565,
"memory(GiB)": 14.16,
"step": 655,
"train_speed(iter/s)": 0.177313
},
{
"acc": 0.99720192,
"epoch": 18.08219178082192,
"grad_norm": 1.3362199068069458,
"learning_rate": 7.50025e-06,
"loss": 0.01240759,
"memory(GiB)": 14.16,
"step": 660,
"train_speed(iter/s)": 0.178257
},
{
"acc": 0.99860992,
"epoch": 18.21917808219178,
"grad_norm": 0.9003859758377075,
"learning_rate": 7.4603727694657576e-06,
"loss": 0.00468392,
"memory(GiB)": 14.16,
"step": 665,
"train_speed(iter/s)": 0.179216
},
{
"acc": 0.99587898,
"epoch": 18.356164383561644,
"grad_norm": 2.7398738861083984,
"learning_rate": 7.420287972497446e-06,
"loss": 0.01100588,
"memory(GiB)": 14.16,
"step": 670,
"train_speed(iter/s)": 0.180158
},
{
"acc": 0.99493923,
"epoch": 18.493150684931507,
"grad_norm": 5.460540294647217,
"learning_rate": 7.3799989914888506e-06,
"loss": 0.01662439,
"memory(GiB)": 14.16,
"step": 675,
"train_speed(iter/s)": 0.181127
},
{
"acc": 0.99722099,
"epoch": 18.63013698630137,
"grad_norm": 0.33460837602615356,
"learning_rate": 7.3395092260630015e-06,
"loss": 0.00906119,
"memory(GiB)": 14.16,
"step": 680,
"train_speed(iter/s)": 0.182049
},
{
"acc": 0.99589815,
"epoch": 18.767123287671232,
"grad_norm": 4.61140251159668,
"learning_rate": 7.298822092785316e-06,
"loss": 0.0160338,
"memory(GiB)": 14.16,
"step": 685,
"train_speed(iter/s)": 0.182979
},
{
"acc": 0.99541874,
"epoch": 18.904109589041095,
"grad_norm": 1.5101581811904907,
"learning_rate": 7.257941024875293e-06,
"loss": 0.01577311,
"memory(GiB)": 14.16,
"step": 690,
"train_speed(iter/s)": 0.183925
},
{
"acc": 0.9977005,
"epoch": 19.041095890410958,
"grad_norm": 2.02103853225708,
"learning_rate": 7.216869471916828e-06,
"loss": 0.00827418,
"memory(GiB)": 14.16,
"step": 695,
"train_speed(iter/s)": 0.184826
},
{
"acc": 0.99538565,
"epoch": 19.17808219178082,
"grad_norm": 4.640865325927734,
"learning_rate": 7.175610899567126e-06,
"loss": 0.02137535,
"memory(GiB)": 14.16,
"step": 700,
"train_speed(iter/s)": 0.185756
},
{
"acc": 0.99816341,
"epoch": 19.315068493150687,
"grad_norm": 2.2678844928741455,
"learning_rate": 7.1341687892642705e-06,
"loss": 0.01489109,
"memory(GiB)": 14.16,
"step": 705,
"train_speed(iter/s)": 0.186685
},
{
"acc": 0.997717,
"epoch": 19.45205479452055,
"grad_norm": 8.91321086883545,
"learning_rate": 7.092546637933454e-06,
"loss": 0.00950522,
"memory(GiB)": 14.16,
"step": 710,
"train_speed(iter/s)": 0.187598
},
{
"acc": 0.99584599,
"epoch": 19.589041095890412,
"grad_norm": 3.019415855407715,
"learning_rate": 7.0507479576919026e-06,
"loss": 0.01135417,
"memory(GiB)": 14.16,
"step": 715,
"train_speed(iter/s)": 0.188517
},
{
"acc": 0.99723749,
"epoch": 19.726027397260275,
"grad_norm": 3.8561668395996094,
"learning_rate": 7.0087762755525214e-06,
"loss": 0.00678846,
"memory(GiB)": 14.16,
"step": 720,
"train_speed(iter/s)": 0.18942
},
{
"acc": 0.99725533,
"epoch": 19.863013698630137,
"grad_norm": 0.6471136212348938,
"learning_rate": 6.966635133126286e-06,
"loss": 0.01252564,
"memory(GiB)": 14.16,
"step": 725,
"train_speed(iter/s)": 0.1903
},
{
"acc": 0.99634466,
"epoch": 20.0,
"grad_norm": 3.774871587753296,
"learning_rate": 6.924328086323392e-06,
"loss": 0.01890204,
"memory(GiB)": 14.16,
"step": 730,
"train_speed(iter/s)": 0.191223
},
{
"acc": 0.99721966,
"epoch": 20.136986301369863,
"grad_norm": 4.085058689117432,
"learning_rate": 6.881858705053205e-06,
"loss": 0.01011662,
"memory(GiB)": 14.16,
"step": 735,
"train_speed(iter/s)": 0.192097
},
{
"acc": 0.99905624,
"epoch": 20.273972602739725,
"grad_norm": 2.026254892349243,
"learning_rate": 6.8392305729230305e-06,
"loss": 0.00847432,
"memory(GiB)": 14.16,
"step": 740,
"train_speed(iter/s)": 0.192981
},
{
"acc": 0.99864426,
"epoch": 20.410958904109588,
"grad_norm": 1.7818002700805664,
"learning_rate": 6.796447286935725e-06,
"loss": 0.00707859,
"memory(GiB)": 14.16,
"step": 745,
"train_speed(iter/s)": 0.193875
},
{
"acc": 0.99816341,
"epoch": 20.54794520547945,
"grad_norm": 0.09219258278608322,
"learning_rate": 6.7535124571861766e-06,
"loss": 0.01978692,
"memory(GiB)": 14.16,
"step": 750,
"train_speed(iter/s)": 0.194746
},
{
"acc": 0.99819775,
"epoch": 20.684931506849313,
"grad_norm": 3.1013734340667725,
"learning_rate": 6.710429706556683e-06,
"loss": 0.00450487,
"memory(GiB)": 14.16,
"step": 755,
"train_speed(iter/s)": 0.195624
},
{
"acc": 0.99859333,
"epoch": 20.82191780821918,
"grad_norm": 0.29254209995269775,
"learning_rate": 6.667202670411245e-06,
"loss": 0.00461008,
"memory(GiB)": 14.16,
"step": 760,
"train_speed(iter/s)": 0.196517
},
{
"acc": 0.99910717,
"epoch": 20.958904109589042,
"grad_norm": 0.2512110471725464,
"learning_rate": 6.623834996288815e-06,
"loss": 0.00767698,
"memory(GiB)": 14.16,
"step": 765,
"train_speed(iter/s)": 0.197396
},
{
"acc": 0.99591599,
"epoch": 21.095890410958905,
"grad_norm": 0.5388877987861633,
"learning_rate": 6.580330343595521e-06,
"loss": 0.01597615,
"memory(GiB)": 14.16,
"step": 770,
"train_speed(iter/s)": 0.198263
},
{
"acc": 0.99862766,
"epoch": 21.232876712328768,
"grad_norm": 0.15328700840473175,
"learning_rate": 6.536692383295863e-06,
"loss": 0.00608862,
"memory(GiB)": 14.16,
"step": 775,
"train_speed(iter/s)": 0.19913
},
{
"acc": 0.99775133,
"epoch": 21.36986301369863,
"grad_norm": 0.17136460542678833,
"learning_rate": 6.492924797602972e-06,
"loss": 0.00846671,
"memory(GiB)": 14.16,
"step": 780,
"train_speed(iter/s)": 0.199987
},
{
"acc": 0.99768267,
"epoch": 21.506849315068493,
"grad_norm": 3.5933849811553955,
"learning_rate": 6.449031279667896e-06,
"loss": 0.0071015,
"memory(GiB)": 14.16,
"step": 785,
"train_speed(iter/s)": 0.200857
},
{
"acc": 0.99627323,
"epoch": 21.643835616438356,
"grad_norm": 2.7347967624664307,
"learning_rate": 6.4050155332679606e-06,
"loss": 0.01377204,
"memory(GiB)": 14.16,
"step": 790,
"train_speed(iter/s)": 0.201739
},
{
"acc": 0.99720316,
"epoch": 21.78082191780822,
"grad_norm": 3.391113758087158,
"learning_rate": 6.360881272494254e-06,
"loss": 0.00560406,
"memory(GiB)": 14.16,
"step": 795,
"train_speed(iter/s)": 0.202584
},
{
"acc": 0.99770041,
"epoch": 21.91780821917808,
"grad_norm": 0.9516264796257019,
"learning_rate": 6.316632221438214e-06,
"loss": 0.01059882,
"memory(GiB)": 14.16,
"step": 800,
"train_speed(iter/s)": 0.20342
},
{
"acc": 0.99910717,
"epoch": 22.054794520547944,
"grad_norm": 4.009815216064453,
"learning_rate": 6.2722721138774e-06,
"loss": 0.00493859,
"memory(GiB)": 14.16,
"step": 805,
"train_speed(iter/s)": 0.20423
},
{
"acc": 0.99905624,
"epoch": 22.19178082191781,
"grad_norm": 0.8211438059806824,
"learning_rate": 6.2278046929604265e-06,
"loss": 0.00547095,
"memory(GiB)": 14.16,
"step": 810,
"train_speed(iter/s)": 0.205109
},
{
"acc": 0.99680634,
"epoch": 22.328767123287673,
"grad_norm": 1.1279343366622925,
"learning_rate": 6.183233710891103e-06,
"loss": 0.01568028,
"memory(GiB)": 14.16,
"step": 815,
"train_speed(iter/s)": 0.205958
},
{
"acc": 0.99821434,
"epoch": 22.465753424657535,
"grad_norm": 2.2662060260772705,
"learning_rate": 6.1385629286118375e-06,
"loss": 0.00696406,
"memory(GiB)": 14.16,
"step": 820,
"train_speed(iter/s)": 0.206791
},
{
"acc": 0.99821434,
"epoch": 22.602739726027398,
"grad_norm": 3.2888071537017822,
"learning_rate": 6.093796115486277e-06,
"loss": 0.00824727,
"memory(GiB)": 14.16,
"step": 825,
"train_speed(iter/s)": 0.207611
},
{
"acc": 0.99864426,
"epoch": 22.73972602739726,
"grad_norm": 0.9464216232299805,
"learning_rate": 6.048937048981235e-06,
"loss": 0.00788838,
"memory(GiB)": 14.16,
"step": 830,
"train_speed(iter/s)": 0.20843
},
{
"acc": 0.99864426,
"epoch": 22.876712328767123,
"grad_norm": 0.23246127367019653,
"learning_rate": 6.003989514347962e-06,
"loss": 0.00401598,
"memory(GiB)": 14.16,
"step": 835,
"train_speed(iter/s)": 0.209242
},
{
"acc": 0.99866076,
"epoch": 23.013698630136986,
"grad_norm": 3.2754745483398438,
"learning_rate": 5.9589573043027314e-06,
"loss": 0.00324695,
"memory(GiB)": 14.16,
"step": 840,
"train_speed(iter/s)": 0.210024
},
{
"acc": 0.9980547,
"epoch": 23.15068493150685,
"grad_norm": 4.7171711921691895,
"learning_rate": 5.913844218706809e-06,
"loss": 0.0035405,
"memory(GiB)": 14.16,
"step": 845,
"train_speed(iter/s)": 0.210863
},
{
"acc": 0.99637899,
"epoch": 23.28767123287671,
"grad_norm": 0.22946955263614655,
"learning_rate": 5.8686540642458204e-06,
"loss": 0.01147373,
"memory(GiB)": 14.16,
"step": 850,
"train_speed(iter/s)": 0.211701
},
{
"acc": 0.99811125,
"epoch": 23.424657534246574,
"grad_norm": 5.581859588623047,
"learning_rate": 5.82339065410853e-06,
"loss": 0.00868064,
"memory(GiB)": 14.16,
"step": 855,
"train_speed(iter/s)": 0.212537
},
{
"acc": 0.99769344,
"epoch": 23.561643835616437,
"grad_norm": 5.6449360847473145,
"learning_rate": 5.7780578076650925e-06,
"loss": 0.01117077,
"memory(GiB)": 14.16,
"step": 860,
"train_speed(iter/s)": 0.213401
},
{
"acc": 0.99955359,
"epoch": 23.698630136986303,
"grad_norm": 0.058708298951387405,
"learning_rate": 5.732659350144769e-06,
"loss": 0.00182705,
"memory(GiB)": 14.16,
"step": 865,
"train_speed(iter/s)": 0.214229
},
{
"acc": 0.99955359,
"epoch": 23.835616438356166,
"grad_norm": 1.460488200187683,
"learning_rate": 5.687199112313132e-06,
"loss": 0.00358091,
"memory(GiB)": 14.16,
"step": 870,
"train_speed(iter/s)": 0.215057
},
{
"acc": 0.99818001,
"epoch": 23.972602739726028,
"grad_norm": 0.8150052428245544,
"learning_rate": 5.64168093014885e-06,
"loss": 0.00942515,
"memory(GiB)": 14.16,
"step": 875,
"train_speed(iter/s)": 0.215894
},
{
"acc": 0.99910717,
"epoch": 24.10958904109589,
"grad_norm": 1.0939289331436157,
"learning_rate": 5.596108644519984e-06,
"loss": 0.00471724,
"memory(GiB)": 14.16,
"step": 880,
"train_speed(iter/s)": 0.216682
},
{
"acc": 0.99808903,
"epoch": 24.246575342465754,
"grad_norm": 6.786465644836426,
"learning_rate": 5.5504861008599e-06,
"loss": 0.00497846,
"memory(GiB)": 14.16,
"step": 885,
"train_speed(iter/s)": 0.217499
},
{
"acc": 0.99864416,
"epoch": 24.383561643835616,
"grad_norm": 5.07835054397583,
"learning_rate": 5.504817148842783e-06,
"loss": 0.00418225,
"memory(GiB)": 14.16,
"step": 890,
"train_speed(iter/s)": 0.218324
},
{
"acc": 0.99821434,
"epoch": 24.52054794520548,
"grad_norm": 2.3909006118774414,
"learning_rate": 5.4591056420587975e-06,
"loss": 0.00267169,
"memory(GiB)": 14.16,
"step": 895,
"train_speed(iter/s)": 0.219135
},
{
"acc": 0.99864416,
"epoch": 24.65753424657534,
"grad_norm": 0.555738627910614,
"learning_rate": 5.413355437688927e-06,
"loss": 0.00462395,
"memory(GiB)": 14.16,
"step": 900,
"train_speed(iter/s)": 0.219963
},
{
"acc": 0.99594774,
"epoch": 24.794520547945204,
"grad_norm": 5.093243598937988,
"learning_rate": 5.367570396179488e-06,
"loss": 0.01022252,
"memory(GiB)": 14.16,
"step": 905,
"train_speed(iter/s)": 0.220772
},
{
"acc": 0.99728842,
"epoch": 24.931506849315067,
"grad_norm": 3.7607083320617676,
"learning_rate": 5.321754380916395e-06,
"loss": 0.01267306,
"memory(GiB)": 14.16,
"step": 910,
"train_speed(iter/s)": 0.221621
},
{
"acc": 0.99598217,
"epoch": 25.068493150684933,
"grad_norm": 2.7842702865600586,
"learning_rate": 5.275911257899149e-06,
"loss": 0.01249768,
"memory(GiB)": 14.16,
"step": 915,
"train_speed(iter/s)": 0.222392
},
{
"acc": 0.99814568,
"epoch": 25.205479452054796,
"grad_norm": 2.1749532222747803,
"learning_rate": 5.23004489541464e-06,
"loss": 0.01107962,
"memory(GiB)": 14.16,
"step": 920,
"train_speed(iter/s)": 0.223218
},
{
"acc": 0.99907284,
"epoch": 25.34246575342466,
"grad_norm": 0.12532441318035126,
"learning_rate": 5.184159163710717e-06,
"loss": 0.00567983,
"memory(GiB)": 14.16,
"step": 925,
"train_speed(iter/s)": 0.22402
},
{
"acc": 0.99862642,
"epoch": 25.47945205479452,
"grad_norm": 3.7313835620880127,
"learning_rate": 5.1382579346696275e-06,
"loss": 0.00543302,
"memory(GiB)": 14.16,
"step": 930,
"train_speed(iter/s)": 0.22481
},
{
"acc": 0.99594784,
"epoch": 25.616438356164384,
"grad_norm": 3.0569019317626953,
"learning_rate": 5.092345081481297e-06,
"loss": 0.01230588,
"memory(GiB)": 14.16,
"step": 935,
"train_speed(iter/s)": 0.225594
},
{
"acc": 0.99909058,
"epoch": 25.753424657534246,
"grad_norm": 0.2874479293823242,
"learning_rate": 5.0464244783165105e-06,
"loss": 0.0029504,
"memory(GiB)": 14.16,
"step": 940,
"train_speed(iter/s)": 0.226373
},
{
"acc": 0.99818115,
"epoch": 25.89041095890411,
"grad_norm": 6.2819695472717285,
"learning_rate": 5.000500000000001e-06,
"loss": 0.00704549,
"memory(GiB)": 14.16,
"step": 945,
"train_speed(iter/s)": 0.227172
},
{
"acc": 0.99818001,
"epoch": 26.027397260273972,
"grad_norm": 2.6562278270721436,
"learning_rate": 4.954575521683491e-06,
"loss": 0.00467317,
"memory(GiB)": 14.16,
"step": 950,
"train_speed(iter/s)": 0.227913
},
{
"acc": 0.997717,
"epoch": 26.164383561643834,
"grad_norm": 0.46010449528694153,
"learning_rate": 4.908654918518704e-06,
"loss": 0.0066583,
"memory(GiB)": 14.16,
"step": 955,
"train_speed(iter/s)": 0.228686
},
{
"acc": 1.0,
"epoch": 26.301369863013697,
"grad_norm": 1.1016509532928467,
"learning_rate": 4.862742065330375e-06,
"loss": 0.00110117,
"memory(GiB)": 14.16,
"step": 960,
"train_speed(iter/s)": 0.229538
},
{
"acc": 0.99866076,
"epoch": 26.438356164383563,
"grad_norm": 3.4421184062957764,
"learning_rate": 4.816840836289285e-06,
"loss": 0.00389256,
"memory(GiB)": 14.16,
"step": 965,
"train_speed(iter/s)": 0.230473
},
{
"acc": 1.0,
"epoch": 26.575342465753426,
"grad_norm": 1.7454206943511963,
"learning_rate": 4.770955104585361e-06,
"loss": 0.00087426,
"memory(GiB)": 14.16,
"step": 970,
"train_speed(iter/s)": 0.231175
},
{
"acc": 0.99866076,
"epoch": 26.71232876712329,
"grad_norm": 0.08459863811731339,
"learning_rate": 4.725088742100851e-06,
"loss": 0.00253912,
"memory(GiB)": 14.16,
"step": 975,
"train_speed(iter/s)": 0.231908
},
{
"acc": 0.99821434,
"epoch": 26.84931506849315,
"grad_norm": 0.7988649010658264,
"learning_rate": 4.679245619083607e-06,
"loss": 0.00461807,
"memory(GiB)": 14.16,
"step": 980,
"train_speed(iter/s)": 0.232806
},
{
"acc": 0.99866076,
"epoch": 26.986301369863014,
"grad_norm": 0.03472264111042023,
"learning_rate": 4.633429603820513e-06,
"loss": 0.0050515,
"memory(GiB)": 14.16,
"step": 985,
"train_speed(iter/s)": 0.233323
},
{
"acc": 1.0,
"epoch": 27.123287671232877,
"grad_norm": 1.552517056465149,
"learning_rate": 4.587644562311076e-06,
"loss": 0.00116102,
"memory(GiB)": 14.16,
"step": 990,
"train_speed(iter/s)": 0.234015
},
{
"acc": 0.99866076,
"epoch": 27.26027397260274,
"grad_norm": 2.796733856201172,
"learning_rate": 4.541894357941205e-06,
"loss": 0.0039554,
"memory(GiB)": 14.16,
"step": 995,
"train_speed(iter/s)": 0.234715
},
{
"acc": 0.99910717,
"epoch": 27.397260273972602,
"grad_norm": 0.08924784511327744,
"learning_rate": 4.4961828511572195e-06,
"loss": 0.00281882,
"memory(GiB)": 14.16,
"step": 1000,
"train_speed(iter/s)": 0.235411
},
{
"epoch": 27.397260273972602,
"eval_acc": 0.376108506949877,
"eval_loss": 5.226269721984863,
"eval_runtime": 1966.6531,
"eval_samples_per_second": 16.284,
"eval_steps_per_second": 2.035,
"step": 1000
},
{
"acc": 0.99955359,
"epoch": 27.534246575342465,
"grad_norm": 0.20645824074745178,
"learning_rate": 4.450513899140101e-06,
"loss": 0.00193416,
"memory(GiB)": 14.16,
"step": 1005,
"train_speed(iter/s)": 0.160727
},
{
"acc": 0.99910717,
"epoch": 27.671232876712327,
"grad_norm": 2.83465838432312,
"learning_rate": 4.404891355480016e-06,
"loss": 0.00531424,
"memory(GiB)": 14.16,
"step": 1010,
"train_speed(iter/s)": 0.161302
},
{
"acc": 0.99910717,
"epoch": 27.80821917808219,
"grad_norm": 1.058475375175476,
"learning_rate": 4.359319069851151e-06,
"loss": 0.00214943,
"memory(GiB)": 14.16,
"step": 1015,
"train_speed(iter/s)": 0.161884
},
{
"acc": 1.0,
"epoch": 27.945205479452056,
"grad_norm": 0.5197652578353882,
"learning_rate": 4.313800887686869e-06,
"loss": 0.00063238,
"memory(GiB)": 14.16,
"step": 1020,
"train_speed(iter/s)": 0.162463
},
{
"acc": 0.99910717,
"epoch": 28.08219178082192,
"grad_norm": 0.02172599360346794,
"learning_rate": 4.268340649855233e-06,
"loss": 0.00572151,
"memory(GiB)": 14.16,
"step": 1025,
"train_speed(iter/s)": 0.163028
},
{
"acc": 0.99910717,
"epoch": 28.21917808219178,
"grad_norm": 1.721336007118225,
"learning_rate": 4.222942192334907e-06,
"loss": 0.00307167,
"memory(GiB)": 14.16,
"step": 1030,
"train_speed(iter/s)": 0.163606
},
{
"acc": 0.99866076,
"epoch": 28.356164383561644,
"grad_norm": 0.07104966044425964,
"learning_rate": 4.1776093458914725e-06,
"loss": 0.0070457,
"memory(GiB)": 14.16,
"step": 1035,
"train_speed(iter/s)": 0.164181
},
{
"acc": 0.99866076,
"epoch": 28.493150684931507,
"grad_norm": 4.222721099853516,
"learning_rate": 4.1323459357541826e-06,
"loss": 0.00498358,
"memory(GiB)": 14.16,
"step": 1040,
"train_speed(iter/s)": 0.164784
},
{
"acc": 1.0,
"epoch": 28.63013698630137,
"grad_norm": 0.08688988536596298,
"learning_rate": 4.087155781293192e-06,
"loss": 0.0007615,
"memory(GiB)": 14.16,
"step": 1045,
"train_speed(iter/s)": 0.165367
},
{
"acc": 0.99955359,
"epoch": 28.767123287671232,
"grad_norm": 0.03878637030720711,
"learning_rate": 4.042042695697272e-06,
"loss": 0.00484578,
"memory(GiB)": 14.16,
"step": 1050,
"train_speed(iter/s)": 0.165931
},
{
"acc": 0.99955359,
"epoch": 28.904109589041095,
"grad_norm": 0.5024237036705017,
"learning_rate": 3.997010485652039e-06,
"loss": 0.00233584,
"memory(GiB)": 14.16,
"step": 1055,
"train_speed(iter/s)": 0.166493
},
{
"acc": 0.99910717,
"epoch": 29.041095890410958,
"grad_norm": 0.9032835960388184,
"learning_rate": 3.952062951018766e-06,
"loss": 0.00431595,
"memory(GiB)": 14.16,
"step": 1060,
"train_speed(iter/s)": 0.167071
},
{
"acc": 1.0,
"epoch": 29.17808219178082,
"grad_norm": 0.07504996657371521,
"learning_rate": 3.907203884513724e-06,
"loss": 0.00013832,
"memory(GiB)": 14.16,
"step": 1065,
"train_speed(iter/s)": 0.167643
},
{
"acc": 1.0,
"epoch": 29.315068493150687,
"grad_norm": 0.041768305003643036,
"learning_rate": 3.862437071388162e-06,
"loss": 0.00039022,
"memory(GiB)": 14.16,
"step": 1070,
"train_speed(iter/s)": 0.168201
},
{
"acc": 0.99955359,
"epoch": 29.45205479452055,
"grad_norm": 0.09327519685029984,
"learning_rate": 3.817766289108899e-06,
"loss": 0.00143108,
"memory(GiB)": 14.16,
"step": 1075,
"train_speed(iter/s)": 0.168777
},
{
"acc": 0.99955359,
"epoch": 29.589041095890412,
"grad_norm": 0.03159390017390251,
"learning_rate": 3.773195307039575e-06,
"loss": 0.00081171,
"memory(GiB)": 14.16,
"step": 1080,
"train_speed(iter/s)": 0.169341
},
{
"acc": 0.99910717,
"epoch": 29.726027397260275,
"grad_norm": 2.1395320892333984,
"learning_rate": 3.728727886122599e-06,
"loss": 0.00190442,
"memory(GiB)": 14.16,
"step": 1085,
"train_speed(iter/s)": 0.169923
},
{
"acc": 0.99866076,
"epoch": 29.863013698630137,
"grad_norm": 5.531327724456787,
"learning_rate": 3.6843677785617874e-06,
"loss": 0.00634567,
"memory(GiB)": 14.16,
"step": 1090,
"train_speed(iter/s)": 0.170491
},
{
"acc": 0.99910717,
"epoch": 30.0,
"grad_norm": 0.03689517825841904,
"learning_rate": 3.640118727505748e-06,
"loss": 0.00395082,
"memory(GiB)": 14.16,
"step": 1095,
"train_speed(iter/s)": 0.171066
},
{
"acc": 0.99955359,
"epoch": 30.136986301369863,
"grad_norm": 0.017297176644206047,
"learning_rate": 3.5959844667320403e-06,
"loss": 0.00074339,
"memory(GiB)": 14.16,
"step": 1100,
"train_speed(iter/s)": 0.171615
},
{
"acc": 0.99955359,
"epoch": 30.273972602739725,
"grad_norm": 0.017922429367899895,
"learning_rate": 3.5519687203321056e-06,
"loss": 0.00269048,
"memory(GiB)": 14.16,
"step": 1105,
"train_speed(iter/s)": 0.172174
},
{
"acc": 0.99910717,
"epoch": 30.410958904109588,
"grad_norm": 0.04719838500022888,
"learning_rate": 3.5080752023970284e-06,
"loss": 0.00416398,
"memory(GiB)": 14.16,
"step": 1110,
"train_speed(iter/s)": 0.172722
},
{
"acc": 0.99910717,
"epoch": 30.54794520547945,
"grad_norm": 0.02601473033428192,
"learning_rate": 3.4643076167041395e-06,
"loss": 0.0034888,
"memory(GiB)": 14.16,
"step": 1115,
"train_speed(iter/s)": 0.173288
},
{
"acc": 0.99910717,
"epoch": 30.684931506849313,
"grad_norm": 0.03908325359225273,
"learning_rate": 3.4206696564044813e-06,
"loss": 0.00179875,
"memory(GiB)": 14.16,
"step": 1120,
"train_speed(iter/s)": 0.173848
},
{
"acc": 0.99819775,
"epoch": 30.82191780821918,
"grad_norm": 2.540851354598999,
"learning_rate": 3.377165003711185e-06,
"loss": 0.00962915,
"memory(GiB)": 14.16,
"step": 1125,
"train_speed(iter/s)": 0.174398
},
{
"acc": 0.99910717,
"epoch": 30.958904109589042,
"grad_norm": 0.023008601740002632,
"learning_rate": 3.3337973295887587e-06,
"loss": 0.00751298,
"memory(GiB)": 14.16,
"step": 1130,
"train_speed(iter/s)": 0.174948
},
{
"acc": 0.99955359,
"epoch": 31.095890410958905,
"grad_norm": 0.0744655579328537,
"learning_rate": 3.2905702934433197e-06,
"loss": 0.00133921,
"memory(GiB)": 14.16,
"step": 1135,
"train_speed(iter/s)": 0.175478
},
{
"acc": 1.0,
"epoch": 31.232876712328768,
"grad_norm": 0.04030351713299751,
"learning_rate": 3.247487542813825e-06,
"loss": 0.0002436,
"memory(GiB)": 14.16,
"step": 1140,
"train_speed(iter/s)": 0.176017
},
{
"acc": 1.0,
"epoch": 31.36986301369863,
"grad_norm": 0.21374386548995972,
"learning_rate": 3.204552713064278e-06,
"loss": 0.00017974,
"memory(GiB)": 14.16,
"step": 1145,
"train_speed(iter/s)": 0.176558
},
{
"acc": 1.0,
"epoch": 31.506849315068493,
"grad_norm": 0.012107456102967262,
"learning_rate": 3.1617694270769713e-06,
"loss": 0.00015874,
"memory(GiB)": 14.16,
"step": 1150,
"train_speed(iter/s)": 0.177106
},
{
"acc": 1.0,
"epoch": 31.643835616438356,
"grad_norm": 0.12086984515190125,
"learning_rate": 3.119141294946797e-06,
"loss": 0.00084028,
"memory(GiB)": 14.16,
"step": 1155,
"train_speed(iter/s)": 0.177658
},
{
"acc": 0.99910717,
"epoch": 31.78082191780822,
"grad_norm": 3.515671968460083,
"learning_rate": 3.0766719136766093e-06,
"loss": 0.00295761,
"memory(GiB)": 14.16,
"step": 1160,
"train_speed(iter/s)": 0.178197
},
{
"acc": 0.99910717,
"epoch": 31.91780821917808,
"grad_norm": 0.011739728040993214,
"learning_rate": 3.034364866873715e-06,
"loss": 0.00487542,
"memory(GiB)": 14.16,
"step": 1165,
"train_speed(iter/s)": 0.178737
},
{
"acc": 1.0,
"epoch": 32.054794520547944,
"grad_norm": 0.6842294335365295,
"learning_rate": 2.9922237244474807e-06,
"loss": 0.00015365,
"memory(GiB)": 14.16,
"step": 1170,
"train_speed(iter/s)": 0.179255
},
{
"acc": 1.0,
"epoch": 32.19178082191781,
"grad_norm": 0.22703398764133453,
"learning_rate": 2.950252042308099e-06,
"loss": 0.00036469,
"memory(GiB)": 14.16,
"step": 1175,
"train_speed(iter/s)": 0.179789
},
{
"acc": 1.0,
"epoch": 32.32876712328767,
"grad_norm": 0.48301902413368225,
"learning_rate": 2.9084533620665478e-06,
"loss": 0.00040778,
"memory(GiB)": 14.16,
"step": 1180,
"train_speed(iter/s)": 0.18032
},
{
"acc": 0.99910717,
"epoch": 32.465753424657535,
"grad_norm": 3.3718252182006836,
"learning_rate": 2.86683121073573e-06,
"loss": 0.00395589,
"memory(GiB)": 14.16,
"step": 1185,
"train_speed(iter/s)": 0.180852
},
{
"acc": 1.0,
"epoch": 32.602739726027394,
"grad_norm": 0.021095439791679382,
"learning_rate": 2.825389100432876e-06,
"loss": 0.0002179,
"memory(GiB)": 14.16,
"step": 1190,
"train_speed(iter/s)": 0.181384
},
{
"acc": 1.0,
"epoch": 32.73972602739726,
"grad_norm": 0.03395378589630127,
"learning_rate": 2.7841305280831743e-06,
"loss": 0.0001625,
"memory(GiB)": 14.16,
"step": 1195,
"train_speed(iter/s)": 0.181912
},
{
"acc": 1.0,
"epoch": 32.87671232876713,
"grad_norm": 0.015184338204562664,
"learning_rate": 2.743058975124708e-06,
"loss": 0.00051196,
"memory(GiB)": 14.16,
"step": 1200,
"train_speed(iter/s)": 0.182449
},
{
"acc": 1.0,
"epoch": 33.013698630136986,
"grad_norm": 0.03929471969604492,
"learning_rate": 2.7021779072146866e-06,
"loss": 0.0004342,
"memory(GiB)": 14.16,
"step": 1205,
"train_speed(iter/s)": 0.182964
},
{
"acc": 1.0,
"epoch": 33.15068493150685,
"grad_norm": 0.014112686738371849,
"learning_rate": 2.661490773937e-06,
"loss": 6.217e-05,
"memory(GiB)": 14.16,
"step": 1210,
"train_speed(iter/s)": 0.183491
},
{
"acc": 0.99910717,
"epoch": 33.28767123287671,
"grad_norm": 0.07489810883998871,
"learning_rate": 2.6210010085111507e-06,
"loss": 0.00106858,
"memory(GiB)": 14.16,
"step": 1215,
"train_speed(iter/s)": 0.184013
},
{
"acc": 1.0,
"epoch": 33.42465753424658,
"grad_norm": 0.013716904446482658,
"learning_rate": 2.580712027502557e-06,
"loss": 0.00010475,
"memory(GiB)": 14.16,
"step": 1220,
"train_speed(iter/s)": 0.184539
},
{
"acc": 1.0,
"epoch": 33.56164383561644,
"grad_norm": 0.03437013924121857,
"learning_rate": 2.5406272305342438e-06,
"loss": 8.954e-05,
"memory(GiB)": 14.16,
"step": 1225,
"train_speed(iter/s)": 0.185072
},
{
"acc": 0.99955359,
"epoch": 33.6986301369863,
"grad_norm": 0.0419132262468338,
"learning_rate": 2.500749999999999e-06,
"loss": 0.00213626,
"memory(GiB)": 14.16,
"step": 1230,
"train_speed(iter/s)": 0.185598
},
{
"acc": 0.99955359,
"epoch": 33.83561643835616,
"grad_norm": 0.00895242765545845,
"learning_rate": 2.461083700778954e-06,
"loss": 0.00524443,
"memory(GiB)": 14.16,
"step": 1235,
"train_speed(iter/s)": 0.186129
},
{
"acc": 1.0,
"epoch": 33.97260273972603,
"grad_norm": 0.0046184309758245945,
"learning_rate": 2.4216316799516488e-06,
"loss": 9.459e-05,
"memory(GiB)": 14.16,
"step": 1240,
"train_speed(iter/s)": 0.186651
},
{
"acc": 0.99955359,
"epoch": 34.10958904109589,
"grad_norm": 0.00644602719694376,
"learning_rate": 2.3823972665176048e-06,
"loss": 0.00276474,
"memory(GiB)": 14.16,
"step": 1245,
"train_speed(iter/s)": 0.187167
},
{
"acc": 1.0,
"epoch": 34.24657534246575,
"grad_norm": 0.04513326287269592,
"learning_rate": 2.34338377111443e-06,
"loss": 8.529e-05,
"memory(GiB)": 14.16,
"step": 1250,
"train_speed(iter/s)": 0.187696
},
{
"acc": 0.99955359,
"epoch": 34.38356164383562,
"grad_norm": 0.00604225741699338,
"learning_rate": 2.304594485738447e-06,
"loss": 0.00061723,
"memory(GiB)": 14.16,
"step": 1255,
"train_speed(iter/s)": 0.188214
},
{
"acc": 1.0,
"epoch": 34.52054794520548,
"grad_norm": 0.007053479552268982,
"learning_rate": 2.266032683466928e-06,
"loss": 0.00040206,
"memory(GiB)": 14.16,
"step": 1260,
"train_speed(iter/s)": 0.188743
},
{
"acc": 1.0,
"epoch": 34.657534246575345,
"grad_norm": 0.012396584264934063,
"learning_rate": 2.227701618181895e-06,
"loss": 4.734e-05,
"memory(GiB)": 14.16,
"step": 1265,
"train_speed(iter/s)": 0.189268
},
{
"acc": 1.0,
"epoch": 34.794520547945204,
"grad_norm": 0.2615416944026947,
"learning_rate": 2.189604524295565e-06,
"loss": 0.00017459,
"memory(GiB)": 14.16,
"step": 1270,
"train_speed(iter/s)": 0.18978
},
{
"acc": 1.0,
"epoch": 34.93150684931507,
"grad_norm": 0.9177928566932678,
"learning_rate": 2.1517446164774243e-06,
"loss": 0.00016437,
"memory(GiB)": 14.16,
"step": 1275,
"train_speed(iter/s)": 0.190291
},
{
"acc": 1.0,
"epoch": 35.06849315068493,
"grad_norm": 0.005634276662021875,
"learning_rate": 2.114125089382966e-06,
"loss": 6.107e-05,
"memory(GiB)": 14.16,
"step": 1280,
"train_speed(iter/s)": 0.190787
},
{
"acc": 1.0,
"epoch": 35.205479452054796,
"grad_norm": 0.005844338797032833,
"learning_rate": 2.0767491173841266e-06,
"loss": 0.00024583,
"memory(GiB)": 14.16,
"step": 1285,
"train_speed(iter/s)": 0.191287
},
{
"acc": 0.99955359,
"epoch": 35.342465753424655,
"grad_norm": 0.008257429115474224,
"learning_rate": 2.039619854301433e-06,
"loss": 0.00127686,
"memory(GiB)": 14.16,
"step": 1290,
"train_speed(iter/s)": 0.191799
},
{
"acc": 0.99955359,
"epoch": 35.47945205479452,
"grad_norm": 0.14188018441200256,
"learning_rate": 2.0027404331378715e-06,
"loss": 0.00085992,
"memory(GiB)": 14.16,
"step": 1295,
"train_speed(iter/s)": 0.192303
},
{
"acc": 1.0,
"epoch": 35.61643835616438,
"grad_norm": 0.004589778371155262,
"learning_rate": 1.9661139658145304e-06,
"loss": 5.643e-05,
"memory(GiB)": 14.16,
"step": 1300,
"train_speed(iter/s)": 0.192822
},
{
"acc": 1.0,
"epoch": 35.75342465753425,
"grad_norm": 0.005317226517945528,
"learning_rate": 1.929743542908006e-06,
"loss": 3.488e-05,
"memory(GiB)": 14.16,
"step": 1305,
"train_speed(iter/s)": 0.193346
},
{
"acc": 1.0,
"epoch": 35.89041095890411,
"grad_norm": 0.17880931496620178,
"learning_rate": 1.8936322333896213e-06,
"loss": 0.00010323,
"memory(GiB)": 14.16,
"step": 1310,
"train_speed(iter/s)": 0.193861
},
{
"acc": 1.0,
"epoch": 36.02739726027397,
"grad_norm": 0.026366814970970154,
"learning_rate": 1.857783084366465e-06,
"loss": 0.00022611,
"memory(GiB)": 14.16,
"step": 1315,
"train_speed(iter/s)": 0.194343
},
{
"acc": 1.0,
"epoch": 36.16438356164384,
"grad_norm": 0.011201135814189911,
"learning_rate": 1.8221991208242658e-06,
"loss": 3.839e-05,
"memory(GiB)": 14.16,
"step": 1320,
"train_speed(iter/s)": 0.194845
},
{
"acc": 1.0,
"epoch": 36.3013698630137,
"grad_norm": 0.008422702550888062,
"learning_rate": 1.7868833453721465e-06,
"loss": 8.309e-05,
"memory(GiB)": 14.16,
"step": 1325,
"train_speed(iter/s)": 0.195352
},
{
"acc": 1.0,
"epoch": 36.43835616438356,
"grad_norm": 0.027778884395956993,
"learning_rate": 1.7518387379892654e-06,
"loss": 4.668e-05,
"memory(GiB)": 14.16,
"step": 1330,
"train_speed(iter/s)": 0.195857
},
{
"acc": 1.0,
"epoch": 36.57534246575342,
"grad_norm": 0.010711952112615108,
"learning_rate": 1.717068255773352e-06,
"loss": 3.179e-05,
"memory(GiB)": 14.16,
"step": 1335,
"train_speed(iter/s)": 0.196353
},
{
"acc": 1.0,
"epoch": 36.71232876712329,
"grad_norm": 0.004275246057659388,
"learning_rate": 1.6825748326911945e-06,
"loss": 0.00023135,
"memory(GiB)": 14.16,
"step": 1340,
"train_speed(iter/s)": 0.196844
},
{
"acc": 1.0,
"epoch": 36.84931506849315,
"grad_norm": 0.003764290129765868,
"learning_rate": 1.6483613793310607e-06,
"loss": 4.05e-05,
"memory(GiB)": 14.16,
"step": 1345,
"train_speed(iter/s)": 0.197337
},
{
"acc": 1.0,
"epoch": 36.986301369863014,
"grad_norm": 0.025480693206191063,
"learning_rate": 1.6144307826571068e-06,
"loss": 3.893e-05,
"memory(GiB)": 14.16,
"step": 1350,
"train_speed(iter/s)": 0.19784
},
{
"acc": 0.99955359,
"epoch": 37.12328767123287,
"grad_norm": 0.002638956531882286,
"learning_rate": 1.580785905765769e-06,
"loss": 0.00189444,
"memory(GiB)": 14.16,
"step": 1355,
"train_speed(iter/s)": 0.198304
},
{
"acc": 1.0,
"epoch": 37.26027397260274,
"grad_norm": 2.218669891357422,
"learning_rate": 1.5474295876441716e-06,
"loss": 0.00040235,
"memory(GiB)": 14.16,
"step": 1360,
"train_speed(iter/s)": 0.198788
},
{
"acc": 1.0,
"epoch": 37.397260273972606,
"grad_norm": 0.004326330963522196,
"learning_rate": 1.51436464293057e-06,
"loss": 2.629e-05,
"memory(GiB)": 14.16,
"step": 1365,
"train_speed(iter/s)": 0.199269
},
{
"acc": 1.0,
"epoch": 37.534246575342465,
"grad_norm": 0.00336137181147933,
"learning_rate": 1.4815938616768564e-06,
"loss": 2.298e-05,
"memory(GiB)": 14.16,
"step": 1370,
"train_speed(iter/s)": 0.199749
},
{
"acc": 1.0,
"epoch": 37.67123287671233,
"grad_norm": 0.0043685161508619785,
"learning_rate": 1.4491200091131203e-06,
"loss": 2.556e-05,
"memory(GiB)": 14.16,
"step": 1375,
"train_speed(iter/s)": 0.200239
},
{
"acc": 1.0,
"epoch": 37.80821917808219,
"grad_norm": 0.003170077223330736,
"learning_rate": 1.4169458254143287e-06,
"loss": 2.185e-05,
"memory(GiB)": 14.16,
"step": 1380,
"train_speed(iter/s)": 0.200734
},
{
"acc": 1.0,
"epoch": 37.945205479452056,
"grad_norm": 0.0025261647533625364,
"learning_rate": 1.3850740254690947e-06,
"loss": 0.00010961,
"memory(GiB)": 14.16,
"step": 1385,
"train_speed(iter/s)": 0.201222
},
{
"acc": 1.0,
"epoch": 38.082191780821915,
"grad_norm": 0.002983207581564784,
"learning_rate": 1.3535072986506058e-06,
"loss": 2.55e-05,
"memory(GiB)": 14.16,
"step": 1390,
"train_speed(iter/s)": 0.201683
},
{
"acc": 1.0,
"epoch": 38.21917808219178,
"grad_norm": 0.7226250171661377,
"learning_rate": 1.3222483085896786e-06,
"loss": 0.00014088,
"memory(GiB)": 14.16,
"step": 1395,
"train_speed(iter/s)": 0.202156
},
{
"acc": 1.0,
"epoch": 38.35616438356164,
"grad_norm": 0.0026145747397094965,
"learning_rate": 1.2912996929500105e-06,
"loss": 1.867e-05,
"memory(GiB)": 14.16,
"step": 1400,
"train_speed(iter/s)": 0.202643
},
{
"acc": 1.0,
"epoch": 38.49315068493151,
"grad_norm": 0.002422385849058628,
"learning_rate": 1.2606640632056035e-06,
"loss": 2.782e-05,
"memory(GiB)": 14.16,
"step": 1405,
"train_speed(iter/s)": 0.203134
},
{
"acc": 1.0,
"epoch": 38.63013698630137,
"grad_norm": 0.005694146268069744,
"learning_rate": 1.230344004420408e-06,
"loss": 2.287e-05,
"memory(GiB)": 14.16,
"step": 1410,
"train_speed(iter/s)": 0.20361
},
{
"acc": 1.0,
"epoch": 38.76712328767123,
"grad_norm": 0.0027258628979325294,
"learning_rate": 1.2003420750301944e-06,
"loss": 0.00018693,
"memory(GiB)": 14.16,
"step": 1415,
"train_speed(iter/s)": 0.204094
},
{
"acc": 1.0,
"epoch": 38.9041095890411,
"grad_norm": 0.0033724328968673944,
"learning_rate": 1.1706608066266701e-06,
"loss": 2.27e-05,
"memory(GiB)": 14.16,
"step": 1420,
"train_speed(iter/s)": 0.204571
},
{
"acc": 1.0,
"epoch": 39.04109589041096,
"grad_norm": 0.0026123709976673126,
"learning_rate": 1.141302703743854e-06,
"loss": 1.855e-05,
"memory(GiB)": 14.16,
"step": 1425,
"train_speed(iter/s)": 0.205022
},
{
"acc": 1.0,
"epoch": 39.178082191780824,
"grad_norm": 0.0019495452288538218,
"learning_rate": 1.1122702436467527e-06,
"loss": 1.743e-05,
"memory(GiB)": 14.16,
"step": 1430,
"train_speed(iter/s)": 0.205495
},
{
"acc": 1.0,
"epoch": 39.31506849315068,
"grad_norm": 0.004156290087848902,
"learning_rate": 1.083565876122317e-06,
"loss": 1.877e-05,
"memory(GiB)": 14.16,
"step": 1435,
"train_speed(iter/s)": 0.205967
},
{
"acc": 1.0,
"epoch": 39.45205479452055,
"grad_norm": 0.002886646194383502,
"learning_rate": 1.0551920232727294e-06,
"loss": 1.751e-05,
"memory(GiB)": 14.16,
"step": 1440,
"train_speed(iter/s)": 0.206455
},
{
"acc": 1.0,
"epoch": 39.58904109589041,
"grad_norm": 0.004523637238889933,
"learning_rate": 1.0271510793110337e-06,
"loss": 1.72e-05,
"memory(GiB)": 14.16,
"step": 1445,
"train_speed(iter/s)": 0.206938
},
{
"acc": 1.0,
"epoch": 39.726027397260275,
"grad_norm": 0.002039379673078656,
"learning_rate": 9.994454103590919e-07,
"loss": 0.00028988,
"memory(GiB)": 14.16,
"step": 1450,
"train_speed(iter/s)": 0.207413
},
{
"acc": 1.0,
"epoch": 39.863013698630134,
"grad_norm": 0.0019272951176390052,
"learning_rate": 9.720773542479399e-07,
"loss": 1.637e-05,
"memory(GiB)": 14.16,
"step": 1455,
"train_speed(iter/s)": 0.207887
},
{
"acc": 1.0,
"epoch": 40.0,
"grad_norm": 0.0028609074652194977,
"learning_rate": 9.450492203205211e-07,
"loss": 1.569e-05,
"memory(GiB)": 14.16,
"step": 1460,
"train_speed(iter/s)": 0.208361
},
{
"acc": 1.0,
"epoch": 40.136986301369866,
"grad_norm": 0.0020151259377598763,
"learning_rate": 9.183632892368126e-07,
"loss": 1.691e-05,
"memory(GiB)": 14.16,
"step": 1465,
"train_speed(iter/s)": 0.208817
},
{
"acc": 1.0,
"epoch": 40.273972602739725,
"grad_norm": 0.0018815897637978196,
"learning_rate": 8.920218127813862e-07,
"loss": 1.482e-05,
"memory(GiB)": 14.16,
"step": 1470,
"train_speed(iter/s)": 0.209272
},
{
"acc": 1.0,
"epoch": 40.41095890410959,
"grad_norm": 0.0017830530414357781,
"learning_rate": 8.660270136734065e-07,
"loss": 1.544e-05,
"memory(GiB)": 14.16,
"step": 1475,
"train_speed(iter/s)": 0.209727
},
{
"acc": 1.0,
"epoch": 40.54794520547945,
"grad_norm": 0.0013965211110189557,
"learning_rate": 8.403810853790619e-07,
"loss": 1.878e-05,
"memory(GiB)": 14.16,
"step": 1480,
"train_speed(iter/s)": 0.210196
},
{
"acc": 1.0,
"epoch": 40.68493150684932,
"grad_norm": 0.0022451053373515606,
"learning_rate": 8.150861919264897e-07,
"loss": 1.5e-05,
"memory(GiB)": 14.16,
"step": 1485,
"train_speed(iter/s)": 0.210655
},
{
"acc": 1.0,
"epoch": 40.821917808219176,
"grad_norm": 0.0060085877776145935,
"learning_rate": 7.901444677231659e-07,
"loss": 1.531e-05,
"memory(GiB)": 14.16,
"step": 1490,
"train_speed(iter/s)": 0.211128
},
{
"acc": 1.0,
"epoch": 40.95890410958904,
"grad_norm": 0.0016883641947060823,
"learning_rate": 7.65558017375808e-07,
"loss": 1.456e-05,
"memory(GiB)": 14.16,
"step": 1495,
"train_speed(iter/s)": 0.211591
},
{
"acc": 1.0,
"epoch": 41.0958904109589,
"grad_norm": 0.007958967238664627,
"learning_rate": 7.413289155127845e-07,
"loss": 1.438e-05,
"memory(GiB)": 14.16,
"step": 1500,
"train_speed(iter/s)": 0.212023
},
{
"epoch": 41.0958904109589,
"eval_acc": 0.3744250158022868,
"eval_loss": 5.587606430053711,
"eval_runtime": 1965.1675,
"eval_samples_per_second": 16.296,
"eval_steps_per_second": 2.037,
"step": 1500
},
{
"acc": 1.0,
"epoch": 41.23287671232877,
"grad_norm": 0.0023259874433279037,
"learning_rate": 7.174592066090488e-07,
"loss": 1.527e-05,
"memory(GiB)": 14.16,
"step": 1505,
"train_speed(iter/s)": 0.165697
},
{
"acc": 1.0,
"epoch": 41.36986301369863,
"grad_norm": 0.0012143112253397703,
"learning_rate": 6.939509048136372e-07,
"loss": 1.22e-05,
"memory(GiB)": 14.16,
"step": 1510,
"train_speed(iter/s)": 0.166098
},
{
"acc": 1.0,
"epoch": 41.50684931506849,
"grad_norm": 0.0015943313483148813,
"learning_rate": 6.708059937796999e-07,
"loss": 1.281e-05,
"memory(GiB)": 14.16,
"step": 1515,
"train_speed(iter/s)": 0.166498
},
{
"acc": 1.0,
"epoch": 41.64383561643836,
"grad_norm": 0.0013340068981051445,
"learning_rate": 6.480264264971263e-07,
"loss": 1.167e-05,
"memory(GiB)": 14.16,
"step": 1520,
"train_speed(iter/s)": 0.166899
},
{
"acc": 1.0,
"epoch": 41.78082191780822,
"grad_norm": 0.0011501106200739741,
"learning_rate": 6.256141251277526e-07,
"loss": 1.187e-05,
"memory(GiB)": 14.16,
"step": 1525,
"train_speed(iter/s)": 0.167303
},
{
"acc": 1.0,
"epoch": 41.917808219178085,
"grad_norm": 0.0013626530999317765,
"learning_rate": 6.035709808431585e-07,
"loss": 1.142e-05,
"memory(GiB)": 14.16,
"step": 1530,
"train_speed(iter/s)": 0.167706
},
{
"acc": 1.0,
"epoch": 42.054794520547944,
"grad_norm": 0.0012834669323638082,
"learning_rate": 5.818988536650921e-07,
"loss": 1.071e-05,
"memory(GiB)": 14.16,
"step": 1535,
"train_speed(iter/s)": 0.16809
},
{
"acc": 1.0,
"epoch": 42.19178082191781,
"grad_norm": 0.0013576337369158864,
"learning_rate": 5.605995723085264e-07,
"loss": 1.142e-05,
"memory(GiB)": 14.16,
"step": 1540,
"train_speed(iter/s)": 0.168482
},
{
"acc": 1.0,
"epoch": 42.32876712328767,
"grad_norm": 0.0012725357664749026,
"learning_rate": 5.396749340273402e-07,
"loss": 1.051e-05,
"memory(GiB)": 14.16,
"step": 1545,
"train_speed(iter/s)": 0.168875
},
{
"acc": 1.0,
"epoch": 42.465753424657535,
"grad_norm": 0.0012627997202798724,
"learning_rate": 5.191267044626674e-07,
"loss": 2.435e-05,
"memory(GiB)": 14.16,
"step": 1550,
"train_speed(iter/s)": 0.169269
},
{
"acc": 1.0,
"epoch": 42.602739726027394,
"grad_norm": 0.051344264298677444,
"learning_rate": 4.989566174939183e-07,
"loss": 2.21e-05,
"memory(GiB)": 14.16,
"step": 1555,
"train_speed(iter/s)": 0.169675
},
{
"acc": 1.0,
"epoch": 42.73972602739726,
"grad_norm": 0.0012037245323881507,
"learning_rate": 4.791663750924617e-07,
"loss": 8.75e-06,
"memory(GiB)": 14.16,
"step": 1560,
"train_speed(iter/s)": 0.170075
},
{
"acc": 1.0,
"epoch": 42.87671232876713,
"grad_norm": 0.0008853294420987368,
"learning_rate": 4.5975764717801586e-07,
"loss": 9.42e-06,
"memory(GiB)": 14.16,
"step": 1565,
"train_speed(iter/s)": 0.170474
},
{
"acc": 1.0,
"epoch": 43.013698630136986,
"grad_norm": 0.0008409248548559844,
"learning_rate": 4.407320714777398e-07,
"loss": 8.56e-06,
"memory(GiB)": 14.16,
"step": 1570,
"train_speed(iter/s)": 0.170865
},
{
"acc": 1.0,
"epoch": 43.15068493150685,
"grad_norm": 0.000995440874248743,
"learning_rate": 4.2209125338804007e-07,
"loss": 9.18e-06,
"memory(GiB)": 14.16,
"step": 1575,
"train_speed(iter/s)": 0.171254
},
{
"acc": 1.0,
"epoch": 43.28767123287671,
"grad_norm": 0.0008801660733297467,
"learning_rate": 4.0383676583910706e-07,
"loss": 7.03e-06,
"memory(GiB)": 14.16,
"step": 1580,
"train_speed(iter/s)": 0.17164
},
{
"acc": 1.0,
"epoch": 43.42465753424658,
"grad_norm": 0.0007337812567129731,
"learning_rate": 3.859701491621833e-07,
"loss": 7.99e-06,
"memory(GiB)": 14.16,
"step": 1585,
"train_speed(iter/s)": 0.172037
},
{
"acc": 1.0,
"epoch": 43.56164383561644,
"grad_norm": 0.0009611019631847739,
"learning_rate": 3.6849291095959866e-07,
"loss": 8.49e-06,
"memory(GiB)": 14.16,
"step": 1590,
"train_speed(iter/s)": 0.172427
},
{
"acc": 1.0,
"epoch": 43.6986301369863,
"grad_norm": 0.0009541260078549385,
"learning_rate": 3.5140652597754917e-07,
"loss": 7.95e-06,
"memory(GiB)": 14.16,
"step": 1595,
"train_speed(iter/s)": 0.172819
},
{
"acc": 1.0,
"epoch": 43.83561643835616,
"grad_norm": 0.0007106245611794293,
"learning_rate": 3.3471243598165825e-07,
"loss": 7.9e-06,
"memory(GiB)": 14.16,
"step": 1600,
"train_speed(iter/s)": 0.173214
},
{
"acc": 1.0,
"epoch": 43.97260273972603,
"grad_norm": 0.0014047367731109262,
"learning_rate": 3.184120496353248e-07,
"loss": 7.76e-06,
"memory(GiB)": 14.16,
"step": 1605,
"train_speed(iter/s)": 0.173593
},
{
"acc": 1.0,
"epoch": 44.10958904109589,
"grad_norm": 0.0007362644537352026,
"learning_rate": 3.025067423808514e-07,
"loss": 7.02e-06,
"memory(GiB)": 14.16,
"step": 1610,
"train_speed(iter/s)": 0.17397
},
{
"acc": 1.0,
"epoch": 44.24657534246575,
"grad_norm": 0.0006499322480522096,
"learning_rate": 2.8699785632338603e-07,
"loss": 6.68e-06,
"memory(GiB)": 14.16,
"step": 1615,
"train_speed(iter/s)": 0.174356
},
{
"acc": 1.0,
"epoch": 44.38356164383562,
"grad_norm": 0.0008433948969468474,
"learning_rate": 2.7188670011767715e-07,
"loss": 6.54e-06,
"memory(GiB)": 14.16,
"step": 1620,
"train_speed(iter/s)": 0.174739
},
{
"acc": 1.0,
"epoch": 44.52054794520548,
"grad_norm": 0.0009852251969277859,
"learning_rate": 2.571745488576417e-07,
"loss": 6.99e-06,
"memory(GiB)": 14.16,
"step": 1625,
"train_speed(iter/s)": 0.175119
},
{
"acc": 1.0,
"epoch": 44.657534246575345,
"grad_norm": 0.0006441141595132649,
"learning_rate": 2.42862643968775e-07,
"loss": 6.3e-06,
"memory(GiB)": 14.16,
"step": 1630,
"train_speed(iter/s)": 0.175506
},
{
"acc": 1.0,
"epoch": 44.794520547945204,
"grad_norm": 0.0006608326220884919,
"learning_rate": 2.289521931034023e-07,
"loss": 5.96e-06,
"memory(GiB)": 14.16,
"step": 1635,
"train_speed(iter/s)": 0.175891
},
{
"acc": 1.0,
"epoch": 44.93150684931507,
"grad_norm": 0.0005597140407189727,
"learning_rate": 2.1544437003876737e-07,
"loss": 5.43e-06,
"memory(GiB)": 14.16,
"step": 1640,
"train_speed(iter/s)": 0.176268
},
{
"acc": 1.0,
"epoch": 45.06849315068493,
"grad_norm": 0.000521883659530431,
"learning_rate": 2.023403145779931e-07,
"loss": 5.55e-06,
"memory(GiB)": 14.16,
"step": 1645,
"train_speed(iter/s)": 0.176636
},
{
"acc": 1.0,
"epoch": 45.205479452054796,
"grad_norm": 0.000552397221326828,
"learning_rate": 1.8964113245390256e-07,
"loss": 5.25e-06,
"memory(GiB)": 14.16,
"step": 1650,
"train_speed(iter/s)": 0.17702
},
{
"acc": 1.0,
"epoch": 45.342465753424655,
"grad_norm": 0.0008391111623495817,
"learning_rate": 1.7734789523571958e-07,
"loss": 5.77e-06,
"memory(GiB)": 14.16,
"step": 1655,
"train_speed(iter/s)": 0.177399
},
{
"acc": 1.0,
"epoch": 45.47945205479452,
"grad_norm": 0.0007058508927002549,
"learning_rate": 1.654616402386414e-07,
"loss": 5.41e-06,
"memory(GiB)": 14.16,
"step": 1660,
"train_speed(iter/s)": 0.17778
},
{
"acc": 1.0,
"epoch": 45.61643835616438,
"grad_norm": 0.0005362857482396066,
"learning_rate": 1.5398337043631723e-07,
"loss": 5.57e-06,
"memory(GiB)": 14.16,
"step": 1665,
"train_speed(iter/s)": 0.178163
},
{
"acc": 1.0,
"epoch": 45.75342465753425,
"grad_norm": 0.0008399708895012736,
"learning_rate": 1.429140543762108e-07,
"loss": 5.51e-06,
"memory(GiB)": 14.16,
"step": 1670,
"train_speed(iter/s)": 0.178535
},
{
"acc": 1.0,
"epoch": 45.89041095890411,
"grad_norm": 0.0004938300116918981,
"learning_rate": 1.3225462609787475e-07,
"loss": 4.95e-06,
"memory(GiB)": 14.16,
"step": 1675,
"train_speed(iter/s)": 0.178916
},
{
"acc": 1.0,
"epoch": 46.02739726027397,
"grad_norm": 0.00045353075256571174,
"learning_rate": 1.220059850541356e-07,
"loss": 4.52e-06,
"memory(GiB)": 14.16,
"step": 1680,
"train_speed(iter/s)": 0.179293
},
{
"acc": 1.0,
"epoch": 46.16438356164384,
"grad_norm": 0.000485074648167938,
"learning_rate": 1.1216899603519877e-07,
"loss": 4.22e-06,
"memory(GiB)": 14.16,
"step": 1685,
"train_speed(iter/s)": 0.179668
},
{
"acc": 1.0,
"epoch": 46.3013698630137,
"grad_norm": 0.0004754703550133854,
"learning_rate": 1.0274448909567412e-07,
"loss": 4.68e-06,
"memory(GiB)": 14.16,
"step": 1690,
"train_speed(iter/s)": 0.180037
},
{
"acc": 1.0,
"epoch": 46.43835616438356,
"grad_norm": 0.0005320140044204891,
"learning_rate": 9.373325948453684e-08,
"loss": 4.76e-06,
"memory(GiB)": 14.16,
"step": 1695,
"train_speed(iter/s)": 0.180414
},
{
"acc": 1.0,
"epoch": 46.57534246575342,
"grad_norm": 0.0006507379002869129,
"learning_rate": 8.513606757802232e-08,
"loss": 4.92e-06,
"memory(GiB)": 14.16,
"step": 1700,
"train_speed(iter/s)": 0.180781
},
{
"acc": 1.0,
"epoch": 46.71232876712329,
"grad_norm": 0.00036800041561946273,
"learning_rate": 7.695363881546601e-08,
"loss": 4.32e-06,
"memory(GiB)": 14.16,
"step": 1705,
"train_speed(iter/s)": 0.181149
},
{
"acc": 1.0,
"epoch": 46.84931506849315,
"grad_norm": 0.0005077983951196074,
"learning_rate": 6.918666363808976e-08,
"loss": 4.51e-06,
"memory(GiB)": 14.16,
"step": 1710,
"train_speed(iter/s)": 0.181526
},
{
"acc": 1.0,
"epoch": 46.986301369863014,
"grad_norm": 0.00036885106237605214,
"learning_rate": 6.183579743074136e-08,
"loss": 3.97e-06,
"memory(GiB)": 14.16,
"step": 1715,
"train_speed(iter/s)": 0.181896
},
{
"acc": 1.0,
"epoch": 47.12328767123287,
"grad_norm": 0.0005106101161800325,
"learning_rate": 5.49016604665933e-08,
"loss": 4.26e-06,
"memory(GiB)": 14.16,
"step": 1720,
"train_speed(iter/s)": 0.182255
},
{
"acc": 1.0,
"epoch": 47.26027397260274,
"grad_norm": 0.0004045426903758198,
"learning_rate": 4.838483785480255e-08,
"loss": 4.1e-06,
"memory(GiB)": 14.16,
"step": 1725,
"train_speed(iter/s)": 0.182623
},
{
"acc": 1.0,
"epoch": 47.397260273972606,
"grad_norm": 0.00039062247378751636,
"learning_rate": 4.2285879491139524e-08,
"loss": 4.1e-06,
"memory(GiB)": 14.16,
"step": 1730,
"train_speed(iter/s)": 0.182986
},
{
"acc": 1.0,
"epoch": 47.534246575342465,
"grad_norm": 0.0004385727515909821,
"learning_rate": 3.660530001158591e-08,
"loss": 4.19e-06,
"memory(GiB)": 14.16,
"step": 1735,
"train_speed(iter/s)": 0.183351
},
{
"acc": 1.0,
"epoch": 47.67123287671233,
"grad_norm": 0.0004706868203356862,
"learning_rate": 3.1343578748911556e-08,
"loss": 3.9e-06,
"memory(GiB)": 14.16,
"step": 1740,
"train_speed(iter/s)": 0.183717
},
{
"acc": 1.0,
"epoch": 47.80821917808219,
"grad_norm": 0.0005658628651872277,
"learning_rate": 2.6501159692225276e-08,
"loss": 3.95e-06,
"memory(GiB)": 14.16,
"step": 1745,
"train_speed(iter/s)": 0.184078
},
{
"acc": 1.0,
"epoch": 47.945205479452056,
"grad_norm": 0.00047224326408468187,
"learning_rate": 2.2078451449511957e-08,
"loss": 4.29e-06,
"memory(GiB)": 14.16,
"step": 1750,
"train_speed(iter/s)": 0.184474
},
{
"acc": 1.0,
"epoch": 48.082191780821915,
"grad_norm": 0.0004509200807660818,
"learning_rate": 1.80758272131541e-08,
"loss": 4.3e-06,
"memory(GiB)": 14.16,
"step": 1755,
"train_speed(iter/s)": 0.184853
},
{
"acc": 1.0,
"epoch": 48.21917808219178,
"grad_norm": 0.00045020331162959337,
"learning_rate": 1.4493624728440738e-08,
"loss": 4.35e-06,
"memory(GiB)": 14.16,
"step": 1760,
"train_speed(iter/s)": 0.185233
},
{
"acc": 1.0,
"epoch": 48.35616438356164,
"grad_norm": 0.00043748278403654695,
"learning_rate": 1.1332146265068076e-08,
"loss": 4.28e-06,
"memory(GiB)": 14.16,
"step": 1765,
"train_speed(iter/s)": 0.18561
},
{
"acc": 1.0,
"epoch": 48.49315068493151,
"grad_norm": 0.0003865604812745005,
"learning_rate": 8.591658591635788e-09,
"loss": 3.95e-06,
"memory(GiB)": 14.16,
"step": 1770,
"train_speed(iter/s)": 0.185983
},
{
"acc": 1.0,
"epoch": 48.63013698630137,
"grad_norm": 0.0005739238113164902,
"learning_rate": 6.272392953132284e-09,
"loss": 4.09e-06,
"memory(GiB)": 14.16,
"step": 1775,
"train_speed(iter/s)": 0.186366
},
{
"acc": 1.0,
"epoch": 48.76712328767123,
"grad_norm": 0.0004063249798491597,
"learning_rate": 4.3745450514278e-09,
"loss": 3.76e-06,
"memory(GiB)": 14.16,
"step": 1780,
"train_speed(iter/s)": 0.186745
},
{
"acc": 1.0,
"epoch": 48.9041095890411,
"grad_norm": 0.0003818414988927543,
"learning_rate": 2.8982750287553984e-09,
"loss": 3.65e-06,
"memory(GiB)": 14.16,
"step": 1785,
"train_speed(iter/s)": 0.187115
},
{
"acc": 1.0,
"epoch": 49.04109589041096,
"grad_norm": 0.0005809293361380696,
"learning_rate": 1.843707454203115e-09,
"loss": 4.11e-06,
"memory(GiB)": 14.16,
"step": 1790,
"train_speed(iter/s)": 0.187485
},
{
"acc": 1.0,
"epoch": 49.178082191780824,
"grad_norm": 0.00041514140320941806,
"learning_rate": 1.210931313197315e-09,
"loss": 3.93e-06,
"memory(GiB)": 14.16,
"step": 1795,
"train_speed(iter/s)": 0.187855
},
{
"acc": 1.0,
"epoch": 49.31506849315068,
"grad_norm": 0.0005256779259070754,
"learning_rate": 1e-09,
"loss": 3.98e-06,
"memory(GiB)": 14.16,
"step": 1800,
"train_speed(iter/s)": 0.188225
},
{
"epoch": 49.31506849315068,
"eval_acc": 0.3744860345334727,
"eval_loss": 5.818352222442627,
"eval_runtime": 1948.2317,
"eval_samples_per_second": 16.437,
"eval_steps_per_second": 2.055,
"step": 1800
}
],
"logging_steps": 5,
"max_steps": 1800,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.503384077997179e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}