|
{ |
|
"best_metric": 4.79392624, |
|
"best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputde2d/qwen2-vl-2b-instruct/v1-20241108-205643/checkpoint-500", |
|
"epoch": 49.31506849315068, |
|
"eval_steps": 500, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.85866278, |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 11.529897689819336, |
|
"learning_rate": 0.0, |
|
"loss": 0.41227522, |
|
"memory(GiB)": 12.7, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.042692 |
|
}, |
|
{ |
|
"acc": 0.82054573, |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 13.506338119506836, |
|
"learning_rate": 3.576679971701948e-06, |
|
"loss": 0.50167066, |
|
"memory(GiB)": 14.16, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.146289 |
|
}, |
|
{ |
|
"acc": 0.87029715, |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 11.584628105163574, |
|
"learning_rate": 5.117072191244584e-06, |
|
"loss": 0.41271429, |
|
"memory(GiB)": 14.16, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.206768 |
|
}, |
|
{ |
|
"acc": 0.86857662, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 17.546506881713867, |
|
"learning_rate": 6.018143876079656e-06, |
|
"loss": 0.39275663, |
|
"memory(GiB)": 14.16, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.242483 |
|
}, |
|
{ |
|
"acc": 0.86033154, |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 19.42036247253418, |
|
"learning_rate": 6.65746441078722e-06, |
|
"loss": 0.42753201, |
|
"memory(GiB)": 14.16, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.263319 |
|
}, |
|
{ |
|
"acc": 0.8581007, |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 17.666065216064453, |
|
"learning_rate": 7.153359943403896e-06, |
|
"loss": 0.43485794, |
|
"memory(GiB)": 14.16, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.277966 |
|
}, |
|
{ |
|
"acc": 0.85695076, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 12.121685028076172, |
|
"learning_rate": 7.558536095622292e-06, |
|
"loss": 0.42965946, |
|
"memory(GiB)": 14.16, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.289626 |
|
}, |
|
{ |
|
"acc": 0.85145502, |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 12.511621475219727, |
|
"learning_rate": 7.901107651134205e-06, |
|
"loss": 0.45605674, |
|
"memory(GiB)": 14.16, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.297359 |
|
}, |
|
{ |
|
"acc": 0.88358593, |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 10.614742279052734, |
|
"learning_rate": 8.197856630329855e-06, |
|
"loss": 0.36642389, |
|
"memory(GiB)": 14.16, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.304027 |
|
}, |
|
{ |
|
"acc": 0.87005787, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 12.311365127563477, |
|
"learning_rate": 8.459607780457364e-06, |
|
"loss": 0.43741484, |
|
"memory(GiB)": 14.16, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.309632 |
|
}, |
|
{ |
|
"acc": 0.87604589, |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 13.369311332702637, |
|
"learning_rate": 8.693752162946532e-06, |
|
"loss": 0.39061749, |
|
"memory(GiB)": 14.16, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.313752 |
|
}, |
|
{ |
|
"acc": 0.89183826, |
|
"epoch": 1.5068493150684932, |
|
"grad_norm": 13.052772521972656, |
|
"learning_rate": 8.905561521090629e-06, |
|
"loss": 0.34727774, |
|
"memory(GiB)": 14.16, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.317026 |
|
}, |
|
{ |
|
"acc": 0.88800755, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 14.654471397399902, |
|
"learning_rate": 9.098928315164927e-06, |
|
"loss": 0.34038644, |
|
"memory(GiB)": 14.16, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.320617 |
|
}, |
|
{ |
|
"acc": 0.88333483, |
|
"epoch": 1.7808219178082192, |
|
"grad_norm": 12.527990341186523, |
|
"learning_rate": 9.27680852241303e-06, |
|
"loss": 0.34145203, |
|
"memory(GiB)": 14.16, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.323067 |
|
}, |
|
{ |
|
"acc": 0.88713379, |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 13.173103332519531, |
|
"learning_rate": 9.441499870676842e-06, |
|
"loss": 0.3459826, |
|
"memory(GiB)": 14.16, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.326063 |
|
}, |
|
{ |
|
"acc": 0.90485744, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 11.286486625671387, |
|
"learning_rate": 9.594823847781604e-06, |
|
"loss": 0.29746895, |
|
"memory(GiB)": 14.16, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.328132 |
|
}, |
|
{ |
|
"acc": 0.91220999, |
|
"epoch": 2.191780821917808, |
|
"grad_norm": 10.608201026916504, |
|
"learning_rate": 9.73824884987249e-06, |
|
"loss": 0.27589982, |
|
"memory(GiB)": 14.16, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.329693 |
|
}, |
|
{ |
|
"acc": 0.92394562, |
|
"epoch": 2.328767123287671, |
|
"grad_norm": 13.439018249511719, |
|
"learning_rate": 9.872975930033608e-06, |
|
"loss": 0.26322646, |
|
"memory(GiB)": 14.16, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.331593 |
|
}, |
|
{ |
|
"acc": 0.91119957, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 12.91903018951416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.26378374, |
|
"memory(GiB)": 14.16, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.333498 |
|
}, |
|
{ |
|
"acc": 0.9276722, |
|
"epoch": 2.602739726027397, |
|
"grad_norm": 10.661258697509766, |
|
"learning_rate": 9.999789068686803e-06, |
|
"loss": 0.23127136, |
|
"memory(GiB)": 14.16, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.335181 |
|
}, |
|
{ |
|
"acc": 0.94177589, |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 8.819624900817871, |
|
"learning_rate": 9.999156292545797e-06, |
|
"loss": 0.21489761, |
|
"memory(GiB)": 14.16, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.336761 |
|
}, |
|
{ |
|
"acc": 0.93883839, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 9.24251937866211, |
|
"learning_rate": 9.998101724971245e-06, |
|
"loss": 0.20122993, |
|
"memory(GiB)": 14.16, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.337843 |
|
}, |
|
{ |
|
"acc": 0.93721886, |
|
"epoch": 3.0136986301369864, |
|
"grad_norm": 6.485929012298584, |
|
"learning_rate": 9.996625454948572e-06, |
|
"loss": 0.19496574, |
|
"memory(GiB)": 14.16, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.338939 |
|
}, |
|
{ |
|
"acc": 0.94114161, |
|
"epoch": 3.1506849315068495, |
|
"grad_norm": 9.10759449005127, |
|
"learning_rate": 9.99472760704687e-06, |
|
"loss": 0.20500426, |
|
"memory(GiB)": 14.16, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.340322 |
|
}, |
|
{ |
|
"acc": 0.96779289, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 9.064125061035156, |
|
"learning_rate": 9.992408341408366e-06, |
|
"loss": 0.11549917, |
|
"memory(GiB)": 14.16, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.341348 |
|
}, |
|
{ |
|
"acc": 0.93218994, |
|
"epoch": 3.4246575342465753, |
|
"grad_norm": 10.008238792419434, |
|
"learning_rate": 9.989667853734933e-06, |
|
"loss": 0.21996279, |
|
"memory(GiB)": 14.16, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.342439 |
|
}, |
|
{ |
|
"acc": 0.93686333, |
|
"epoch": 3.5616438356164384, |
|
"grad_norm": 11.974565505981445, |
|
"learning_rate": 9.98650637527156e-06, |
|
"loss": 0.19973722, |
|
"memory(GiB)": 14.16, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.343123 |
|
}, |
|
{ |
|
"acc": 0.95756645, |
|
"epoch": 3.6986301369863015, |
|
"grad_norm": 9.8711576461792, |
|
"learning_rate": 9.982924172786847e-06, |
|
"loss": 0.15214539, |
|
"memory(GiB)": 14.16, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.34449 |
|
}, |
|
{ |
|
"acc": 0.95660496, |
|
"epoch": 3.8356164383561646, |
|
"grad_norm": 6.757988452911377, |
|
"learning_rate": 9.97892154855049e-06, |
|
"loss": 0.15905871, |
|
"memory(GiB)": 14.16, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.345239 |
|
}, |
|
{ |
|
"acc": 0.95482464, |
|
"epoch": 3.9726027397260273, |
|
"grad_norm": 8.047441482543945, |
|
"learning_rate": 9.974498840307775e-06, |
|
"loss": 0.16302727, |
|
"memory(GiB)": 14.16, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.345602 |
|
}, |
|
{ |
|
"acc": 0.94146061, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 7.961703777313232, |
|
"learning_rate": 9.96965642125109e-06, |
|
"loss": 0.19785479, |
|
"memory(GiB)": 14.16, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.346007 |
|
}, |
|
{ |
|
"acc": 0.96698723, |
|
"epoch": 4.2465753424657535, |
|
"grad_norm": 6.472661972045898, |
|
"learning_rate": 9.964394699988415e-06, |
|
"loss": 0.11739849, |
|
"memory(GiB)": 14.16, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.346863 |
|
}, |
|
{ |
|
"acc": 0.9542901, |
|
"epoch": 4.383561643835616, |
|
"grad_norm": 8.756787300109863, |
|
"learning_rate": 9.958714120508861e-06, |
|
"loss": 0.13702551, |
|
"memory(GiB)": 14.16, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.348349 |
|
}, |
|
{ |
|
"acc": 0.95916128, |
|
"epoch": 4.52054794520548, |
|
"grad_norm": 9.755017280578613, |
|
"learning_rate": 9.952615162145197e-06, |
|
"loss": 0.13223737, |
|
"memory(GiB)": 14.16, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.349345 |
|
}, |
|
{ |
|
"acc": 0.96206884, |
|
"epoch": 4.657534246575342, |
|
"grad_norm": 8.553181648254395, |
|
"learning_rate": 9.946098339533407e-06, |
|
"loss": 0.11991118, |
|
"memory(GiB)": 14.16, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.349712 |
|
}, |
|
{ |
|
"acc": 0.96347275, |
|
"epoch": 4.794520547945205, |
|
"grad_norm": 7.194893836975098, |
|
"learning_rate": 9.93916420256926e-06, |
|
"loss": 0.10365121, |
|
"memory(GiB)": 14.16, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.350314 |
|
}, |
|
{ |
|
"acc": 0.97044868, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 6.540927410125732, |
|
"learning_rate": 9.93181333636191e-06, |
|
"loss": 0.10110762, |
|
"memory(GiB)": 14.16, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.350746 |
|
}, |
|
{ |
|
"acc": 0.97869854, |
|
"epoch": 5.068493150684931, |
|
"grad_norm": 6.64502477645874, |
|
"learning_rate": 9.924046361184535e-06, |
|
"loss": 0.06834425, |
|
"memory(GiB)": 14.16, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.351047 |
|
}, |
|
{ |
|
"acc": 0.97149448, |
|
"epoch": 5.205479452054795, |
|
"grad_norm": 7.438776016235352, |
|
"learning_rate": 9.91586393242198e-06, |
|
"loss": 0.09642395, |
|
"memory(GiB)": 14.16, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.351567 |
|
}, |
|
{ |
|
"acc": 0.96277952, |
|
"epoch": 5.342465753424658, |
|
"grad_norm": 9.334355354309082, |
|
"learning_rate": 9.907266740515464e-06, |
|
"loss": 0.10700824, |
|
"memory(GiB)": 14.16, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.352169 |
|
}, |
|
{ |
|
"acc": 0.97186604, |
|
"epoch": 5.47945205479452, |
|
"grad_norm": 5.772711753845215, |
|
"learning_rate": 9.898255510904326e-06, |
|
"loss": 0.07952163, |
|
"memory(GiB)": 14.16, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.352683 |
|
}, |
|
{ |
|
"acc": 0.98101072, |
|
"epoch": 5.616438356164384, |
|
"grad_norm": 9.092942237854004, |
|
"learning_rate": 9.888831003964803e-06, |
|
"loss": 0.06738672, |
|
"memory(GiB)": 14.16, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.353043 |
|
}, |
|
{ |
|
"acc": 0.97831497, |
|
"epoch": 5.7534246575342465, |
|
"grad_norm": 8.003717422485352, |
|
"learning_rate": 9.878994014945866e-06, |
|
"loss": 0.06806564, |
|
"memory(GiB)": 14.16, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.354182 |
|
}, |
|
{ |
|
"acc": 0.97665091, |
|
"epoch": 5.890410958904109, |
|
"grad_norm": 6.545485496520996, |
|
"learning_rate": 9.868745373902128e-06, |
|
"loss": 0.07062781, |
|
"memory(GiB)": 14.16, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.354891 |
|
}, |
|
{ |
|
"acc": 0.97873678, |
|
"epoch": 6.027397260273973, |
|
"grad_norm": 4.454226493835449, |
|
"learning_rate": 9.85808594562379e-06, |
|
"loss": 0.07400095, |
|
"memory(GiB)": 14.16, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.355094 |
|
}, |
|
{ |
|
"acc": 0.97500896, |
|
"epoch": 6.164383561643835, |
|
"grad_norm": 9.327370643615723, |
|
"learning_rate": 9.847016629563683e-06, |
|
"loss": 0.07909623, |
|
"memory(GiB)": 14.16, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.355416 |
|
}, |
|
{ |
|
"acc": 0.97549095, |
|
"epoch": 6.301369863013699, |
|
"grad_norm": 7.767273426055908, |
|
"learning_rate": 9.835538359761359e-06, |
|
"loss": 0.08394684, |
|
"memory(GiB)": 14.16, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.35587 |
|
}, |
|
{ |
|
"acc": 0.98198967, |
|
"epoch": 6.438356164383562, |
|
"grad_norm": 8.520513534545898, |
|
"learning_rate": 9.823652104764282e-06, |
|
"loss": 0.06493338, |
|
"memory(GiB)": 14.16, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.356338 |
|
}, |
|
{ |
|
"acc": 0.98189783, |
|
"epoch": 6.575342465753424, |
|
"grad_norm": 6.741430282592773, |
|
"learning_rate": 9.811358867546099e-06, |
|
"loss": 0.06953114, |
|
"memory(GiB)": 14.16, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.356559 |
|
}, |
|
{ |
|
"acc": 0.9792799, |
|
"epoch": 6.712328767123288, |
|
"grad_norm": 6.579135894775391, |
|
"learning_rate": 9.798659685422008e-06, |
|
"loss": 0.07183629, |
|
"memory(GiB)": 14.16, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.357198 |
|
}, |
|
{ |
|
"acc": 0.97903948, |
|
"epoch": 6.8493150684931505, |
|
"grad_norm": 7.918185234069824, |
|
"learning_rate": 9.785555629961232e-06, |
|
"loss": 0.06570032, |
|
"memory(GiB)": 14.16, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.35739 |
|
}, |
|
{ |
|
"acc": 0.98690357, |
|
"epoch": 6.986301369863014, |
|
"grad_norm": 4.936428546905518, |
|
"learning_rate": 9.772047806896599e-06, |
|
"loss": 0.04573858, |
|
"memory(GiB)": 14.16, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.358183 |
|
}, |
|
{ |
|
"acc": 0.98013973, |
|
"epoch": 7.123287671232877, |
|
"grad_norm": 6.603614330291748, |
|
"learning_rate": 9.758137356031226e-06, |
|
"loss": 0.06317404, |
|
"memory(GiB)": 14.16, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.358435 |
|
}, |
|
{ |
|
"acc": 0.98552742, |
|
"epoch": 7.260273972602739, |
|
"grad_norm": 5.6785173416137695, |
|
"learning_rate": 9.74382545114236e-06, |
|
"loss": 0.05590855, |
|
"memory(GiB)": 14.16, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.359116 |
|
}, |
|
{ |
|
"acc": 0.98451328, |
|
"epoch": 7.397260273972603, |
|
"grad_norm": 6.470608711242676, |
|
"learning_rate": 9.729113299882324e-06, |
|
"loss": 0.05722108, |
|
"memory(GiB)": 14.16, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.359102 |
|
}, |
|
{ |
|
"acc": 0.98782816, |
|
"epoch": 7.534246575342466, |
|
"grad_norm": 4.879244804382324, |
|
"learning_rate": 9.714002143676614e-06, |
|
"loss": 0.0392652, |
|
"memory(GiB)": 14.16, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.359249 |
|
}, |
|
{ |
|
"acc": 0.98015614, |
|
"epoch": 7.671232876712329, |
|
"grad_norm": 5.897606372833252, |
|
"learning_rate": 9.69849325761915e-06, |
|
"loss": 0.0653078, |
|
"memory(GiB)": 14.16, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.359463 |
|
}, |
|
{ |
|
"acc": 0.98269339, |
|
"epoch": 7.808219178082192, |
|
"grad_norm": 8.748714447021484, |
|
"learning_rate": 9.682587950364676e-06, |
|
"loss": 0.04879735, |
|
"memory(GiB)": 14.16, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.359431 |
|
}, |
|
{ |
|
"acc": 0.99092007, |
|
"epoch": 7.945205479452055, |
|
"grad_norm": 4.962334156036377, |
|
"learning_rate": 9.666287564018344e-06, |
|
"loss": 0.03704912, |
|
"memory(GiB)": 14.16, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.359385 |
|
}, |
|
{ |
|
"acc": 0.98640242, |
|
"epoch": 8.082191780821917, |
|
"grad_norm": 7.194764137268066, |
|
"learning_rate": 9.649593474022452e-06, |
|
"loss": 0.05298281, |
|
"memory(GiB)": 14.16, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.359641 |
|
}, |
|
{ |
|
"acc": 0.98602715, |
|
"epoch": 8.219178082191782, |
|
"grad_norm": 7.44851541519165, |
|
"learning_rate": 9.632507089040402e-06, |
|
"loss": 0.04129619, |
|
"memory(GiB)": 14.16, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.359864 |
|
}, |
|
{ |
|
"acc": 0.98549156, |
|
"epoch": 8.356164383561644, |
|
"grad_norm": 8.171492576599121, |
|
"learning_rate": 9.615029850837819e-06, |
|
"loss": 0.04942346, |
|
"memory(GiB)": 14.16, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.359882 |
|
}, |
|
{ |
|
"acc": 0.98449697, |
|
"epoch": 8.493150684931507, |
|
"grad_norm": 6.328600883483887, |
|
"learning_rate": 9.597163234160894e-06, |
|
"loss": 0.05851363, |
|
"memory(GiB)": 14.16, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.359848 |
|
}, |
|
{ |
|
"acc": 0.99007683, |
|
"epoch": 8.63013698630137, |
|
"grad_norm": 5.6946258544921875, |
|
"learning_rate": 9.57890874661196e-06, |
|
"loss": 0.03352974, |
|
"memory(GiB)": 14.16, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.360126 |
|
}, |
|
{ |
|
"acc": 0.98694916, |
|
"epoch": 8.767123287671232, |
|
"grad_norm": 4.585356712341309, |
|
"learning_rate": 9.56026792852226e-06, |
|
"loss": 0.04656056, |
|
"memory(GiB)": 14.16, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.360741 |
|
}, |
|
{ |
|
"acc": 0.98873882, |
|
"epoch": 8.904109589041095, |
|
"grad_norm": 7.50302791595459, |
|
"learning_rate": 9.541242352821985e-06, |
|
"loss": 0.03722157, |
|
"memory(GiB)": 14.16, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.360963 |
|
}, |
|
{ |
|
"acc": 0.98872223, |
|
"epoch": 9.04109589041096, |
|
"grad_norm": 8.641664505004883, |
|
"learning_rate": 9.52183362490754e-06, |
|
"loss": 0.04286454, |
|
"memory(GiB)": 14.16, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.361166 |
|
}, |
|
{ |
|
"acc": 0.99097099, |
|
"epoch": 9.178082191780822, |
|
"grad_norm": 5.386726379394531, |
|
"learning_rate": 9.502043382506082e-06, |
|
"loss": 0.02755214, |
|
"memory(GiB)": 14.16, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.361519 |
|
}, |
|
{ |
|
"acc": 0.99000244, |
|
"epoch": 9.315068493150685, |
|
"grad_norm": 4.545804977416992, |
|
"learning_rate": 9.481873295537333e-06, |
|
"loss": 0.04025009, |
|
"memory(GiB)": 14.16, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.361469 |
|
}, |
|
{ |
|
"acc": 0.99092007, |
|
"epoch": 9.452054794520548, |
|
"grad_norm": 8.062037467956543, |
|
"learning_rate": 9.461325065972662e-06, |
|
"loss": 0.04117663, |
|
"memory(GiB)": 14.16, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.361763 |
|
}, |
|
{ |
|
"acc": 0.99032946, |
|
"epoch": 9.58904109589041, |
|
"grad_norm": 5.639761924743652, |
|
"learning_rate": 9.440400427691476e-06, |
|
"loss": 0.02993804, |
|
"memory(GiB)": 14.16, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.361739 |
|
}, |
|
{ |
|
"acc": 0.98722763, |
|
"epoch": 9.726027397260275, |
|
"grad_norm": 5.573471546173096, |
|
"learning_rate": 9.419101146334908e-06, |
|
"loss": 0.04273846, |
|
"memory(GiB)": 14.16, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.361815 |
|
}, |
|
{ |
|
"acc": 0.98906002, |
|
"epoch": 9.863013698630137, |
|
"grad_norm": 5.205529689788818, |
|
"learning_rate": 9.397429019156841e-06, |
|
"loss": 0.04300301, |
|
"memory(GiB)": 14.16, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.361905 |
|
}, |
|
{ |
|
"acc": 0.9917551, |
|
"epoch": 10.0, |
|
"grad_norm": 5.506292343139648, |
|
"learning_rate": 9.375385874872248e-06, |
|
"loss": 0.03177897, |
|
"memory(GiB)": 14.16, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.361986 |
|
}, |
|
{ |
|
"acc": 0.99265499, |
|
"epoch": 10.136986301369863, |
|
"grad_norm": 5.0279035568237305, |
|
"learning_rate": 9.352973573502874e-06, |
|
"loss": 0.03047763, |
|
"memory(GiB)": 14.16, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.3619 |
|
}, |
|
{ |
|
"acc": 0.99043932, |
|
"epoch": 10.273972602739725, |
|
"grad_norm": 7.282947540283203, |
|
"learning_rate": 9.330194006220301e-06, |
|
"loss": 0.03883767, |
|
"memory(GiB)": 14.16, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.3619 |
|
}, |
|
{ |
|
"acc": 0.99266891, |
|
"epoch": 10.41095890410959, |
|
"grad_norm": 6.475697040557861, |
|
"learning_rate": 9.307049095186364e-06, |
|
"loss": 0.03223814, |
|
"memory(GiB)": 14.16, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.361879 |
|
}, |
|
{ |
|
"acc": 0.98734608, |
|
"epoch": 10.547945205479452, |
|
"grad_norm": 2.9214179515838623, |
|
"learning_rate": 9.28354079339095e-06, |
|
"loss": 0.04384069, |
|
"memory(GiB)": 14.16, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.361963 |
|
}, |
|
{ |
|
"acc": 0.99313297, |
|
"epoch": 10.684931506849315, |
|
"grad_norm": 4.704584121704102, |
|
"learning_rate": 9.259671084487218e-06, |
|
"loss": 0.02514983, |
|
"memory(GiB)": 14.16, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.361864 |
|
}, |
|
{ |
|
"acc": 0.990868, |
|
"epoch": 10.821917808219178, |
|
"grad_norm": 4.704314231872559, |
|
"learning_rate": 9.235441982624191e-06, |
|
"loss": 0.02952582, |
|
"memory(GiB)": 14.16, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.36222 |
|
}, |
|
{ |
|
"acc": 0.99545174, |
|
"epoch": 10.95890410958904, |
|
"grad_norm": 4.499762058258057, |
|
"learning_rate": 9.210855532276836e-06, |
|
"loss": 0.01564558, |
|
"memory(GiB)": 14.16, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.362296 |
|
}, |
|
{ |
|
"acc": 0.9944725, |
|
"epoch": 11.095890410958905, |
|
"grad_norm": 7.498542785644531, |
|
"learning_rate": 9.185913808073513e-06, |
|
"loss": 0.02198397, |
|
"memory(GiB)": 14.16, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.362254 |
|
}, |
|
{ |
|
"acc": 0.98989115, |
|
"epoch": 11.232876712328768, |
|
"grad_norm": 3.8143303394317627, |
|
"learning_rate": 9.16061891462094e-06, |
|
"loss": 0.0327835, |
|
"memory(GiB)": 14.16, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.362508 |
|
}, |
|
{ |
|
"acc": 0.99730492, |
|
"epoch": 11.36986301369863, |
|
"grad_norm": 3.9523301124572754, |
|
"learning_rate": 9.134972986326595e-06, |
|
"loss": 0.01258684, |
|
"memory(GiB)": 14.16, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.362542 |
|
}, |
|
{ |
|
"acc": 0.99241066, |
|
"epoch": 11.506849315068493, |
|
"grad_norm": 6.334254741668701, |
|
"learning_rate": 9.108978187218613e-06, |
|
"loss": 0.03454852, |
|
"memory(GiB)": 14.16, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.362651 |
|
}, |
|
{ |
|
"acc": 0.99217281, |
|
"epoch": 11.643835616438356, |
|
"grad_norm": 6.370650291442871, |
|
"learning_rate": 9.08263671076319e-06, |
|
"loss": 0.03252776, |
|
"memory(GiB)": 14.16, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.362697 |
|
}, |
|
{ |
|
"acc": 0.98822365, |
|
"epoch": 11.780821917808218, |
|
"grad_norm": 3.232943534851074, |
|
"learning_rate": 9.05595077967948e-06, |
|
"loss": 0.04269191, |
|
"memory(GiB)": 14.16, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.362683 |
|
}, |
|
{ |
|
"acc": 0.99225941, |
|
"epoch": 11.917808219178083, |
|
"grad_norm": 4.822254180908203, |
|
"learning_rate": 9.028922645752062e-06, |
|
"loss": 0.02760777, |
|
"memory(GiB)": 14.16, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.362655 |
|
}, |
|
{ |
|
"acc": 0.9954505, |
|
"epoch": 12.054794520547945, |
|
"grad_norm": 3.2365639209747314, |
|
"learning_rate": 9.00155458964091e-06, |
|
"loss": 0.01916433, |
|
"memory(GiB)": 14.16, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.3626 |
|
}, |
|
{ |
|
"acc": 0.99313316, |
|
"epoch": 12.191780821917808, |
|
"grad_norm": 3.7720203399658203, |
|
"learning_rate": 8.973848920688967e-06, |
|
"loss": 0.03937365, |
|
"memory(GiB)": 14.16, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.362571 |
|
}, |
|
{ |
|
"acc": 0.99251375, |
|
"epoch": 12.32876712328767, |
|
"grad_norm": 4.069283485412598, |
|
"learning_rate": 8.94580797672727e-06, |
|
"loss": 0.02898619, |
|
"memory(GiB)": 14.16, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.362736 |
|
}, |
|
{ |
|
"acc": 0.99321842, |
|
"epoch": 12.465753424657533, |
|
"grad_norm": 0.9725887775421143, |
|
"learning_rate": 8.917434123877686e-06, |
|
"loss": 0.02265764, |
|
"memory(GiB)": 14.16, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.362774 |
|
}, |
|
{ |
|
"acc": 0.99323349, |
|
"epoch": 12.602739726027398, |
|
"grad_norm": 4.508816719055176, |
|
"learning_rate": 8.888729756353248e-06, |
|
"loss": 0.02885826, |
|
"memory(GiB)": 14.16, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.362813 |
|
}, |
|
{ |
|
"acc": 0.99727192, |
|
"epoch": 12.73972602739726, |
|
"grad_norm": 2.479684352874756, |
|
"learning_rate": 8.859697296256147e-06, |
|
"loss": 0.01712638, |
|
"memory(GiB)": 14.16, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.362768 |
|
}, |
|
{ |
|
"acc": 0.99502192, |
|
"epoch": 12.876712328767123, |
|
"grad_norm": 1.5512564182281494, |
|
"learning_rate": 8.83033919337333e-06, |
|
"loss": 0.022619, |
|
"memory(GiB)": 14.16, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.362919 |
|
}, |
|
{ |
|
"acc": 0.99404383, |
|
"epoch": 13.013698630136986, |
|
"grad_norm": 5.0392680168151855, |
|
"learning_rate": 8.800657924969805e-06, |
|
"loss": 0.0215001, |
|
"memory(GiB)": 14.16, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.362773 |
|
}, |
|
{ |
|
"acc": 0.99045715, |
|
"epoch": 13.150684931506849, |
|
"grad_norm": 3.143148183822632, |
|
"learning_rate": 8.770655995579593e-06, |
|
"loss": 0.02810604, |
|
"memory(GiB)": 14.16, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.362874 |
|
}, |
|
{ |
|
"acc": 0.99417992, |
|
"epoch": 13.287671232876713, |
|
"grad_norm": 2.0431466102600098, |
|
"learning_rate": 8.740335936794398e-06, |
|
"loss": 0.02953114, |
|
"memory(GiB)": 14.16, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.362814 |
|
}, |
|
{ |
|
"acc": 0.99732151, |
|
"epoch": 13.424657534246576, |
|
"grad_norm": 2.4842429161071777, |
|
"learning_rate": 8.709700307049991e-06, |
|
"loss": 0.01085737, |
|
"memory(GiB)": 14.16, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.362739 |
|
}, |
|
{ |
|
"acc": 0.99217415, |
|
"epoch": 13.561643835616438, |
|
"grad_norm": 4.454080581665039, |
|
"learning_rate": 8.678751691410323e-06, |
|
"loss": 0.02852642, |
|
"memory(GiB)": 14.16, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.363042 |
|
}, |
|
{ |
|
"acc": 0.99452591, |
|
"epoch": 13.698630136986301, |
|
"grad_norm": 6.032941818237305, |
|
"learning_rate": 8.647492701349395e-06, |
|
"loss": 0.02294705, |
|
"memory(GiB)": 14.16, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.363179 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"eval_acc": 0.3818755593383692, |
|
"eval_loss": 4.793926239013672, |
|
"eval_runtime": 2033.163, |
|
"eval_samples_per_second": 15.751, |
|
"eval_steps_per_second": 1.969, |
|
"step": 500 |
|
}, |
|
{ |
|
"acc": 0.99273891, |
|
"epoch": 13.835616438356164, |
|
"grad_norm": 7.570253849029541, |
|
"learning_rate": 8.615925974530906e-06, |
|
"loss": 0.03025962, |
|
"memory(GiB)": 14.16, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.146499 |
|
}, |
|
{ |
|
"acc": 0.99452457, |
|
"epoch": 13.972602739726028, |
|
"grad_norm": 0.6901392936706543, |
|
"learning_rate": 8.584054174585673e-06, |
|
"loss": 0.01943414, |
|
"memory(GiB)": 14.16, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.147597 |
|
}, |
|
{ |
|
"acc": 0.99586115, |
|
"epoch": 14.10958904109589, |
|
"grad_norm": 2.8410799503326416, |
|
"learning_rate": 8.551879990886881e-06, |
|
"loss": 0.02195611, |
|
"memory(GiB)": 14.16, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.148679 |
|
}, |
|
{ |
|
"acc": 0.99596558, |
|
"epoch": 14.246575342465754, |
|
"grad_norm": 1.6700148582458496, |
|
"learning_rate": 8.519406138323145e-06, |
|
"loss": 0.01128972, |
|
"memory(GiB)": 14.16, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.149765 |
|
}, |
|
{ |
|
"acc": 0.99503975, |
|
"epoch": 14.383561643835616, |
|
"grad_norm": 1.0917117595672607, |
|
"learning_rate": 8.486635357069431e-06, |
|
"loss": 0.01859367, |
|
"memory(GiB)": 14.16, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.15087 |
|
}, |
|
{ |
|
"acc": 0.99261799, |
|
"epoch": 14.520547945205479, |
|
"grad_norm": 7.631021022796631, |
|
"learning_rate": 8.45357041235583e-06, |
|
"loss": 0.02078509, |
|
"memory(GiB)": 14.16, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.151949 |
|
}, |
|
{ |
|
"acc": 0.99308357, |
|
"epoch": 14.657534246575342, |
|
"grad_norm": 3.847642421722412, |
|
"learning_rate": 8.42021409423423e-06, |
|
"loss": 0.02047177, |
|
"memory(GiB)": 14.16, |
|
"step": 535, |
|
"train_speed(iter/s)": 0.153023 |
|
}, |
|
{ |
|
"acc": 0.99270458, |
|
"epoch": 14.794520547945206, |
|
"grad_norm": 6.042537689208984, |
|
"learning_rate": 8.386569217342893e-06, |
|
"loss": 0.0270274, |
|
"memory(GiB)": 14.16, |
|
"step": 540, |
|
"train_speed(iter/s)": 0.154086 |
|
}, |
|
{ |
|
"acc": 0.99546833, |
|
"epoch": 14.931506849315069, |
|
"grad_norm": 4.633887767791748, |
|
"learning_rate": 8.352638620668941e-06, |
|
"loss": 0.01502355, |
|
"memory(GiB)": 14.16, |
|
"step": 545, |
|
"train_speed(iter/s)": 0.155151 |
|
}, |
|
{ |
|
"acc": 0.99634466, |
|
"epoch": 15.068493150684931, |
|
"grad_norm": 1.901209831237793, |
|
"learning_rate": 8.318425167308806e-06, |
|
"loss": 0.01356835, |
|
"memory(GiB)": 14.16, |
|
"step": 550, |
|
"train_speed(iter/s)": 0.156214 |
|
}, |
|
{ |
|
"acc": 0.99639549, |
|
"epoch": 15.205479452054794, |
|
"grad_norm": 4.843277931213379, |
|
"learning_rate": 8.28393174422665e-06, |
|
"loss": 0.01601259, |
|
"memory(GiB)": 14.16, |
|
"step": 555, |
|
"train_speed(iter/s)": 0.157262 |
|
}, |
|
{ |
|
"acc": 0.99320316, |
|
"epoch": 15.342465753424657, |
|
"grad_norm": 5.583487033843994, |
|
"learning_rate": 8.249161262010735e-06, |
|
"loss": 0.01526148, |
|
"memory(GiB)": 14.16, |
|
"step": 560, |
|
"train_speed(iter/s)": 0.158308 |
|
}, |
|
{ |
|
"acc": 0.99721832, |
|
"epoch": 15.479452054794521, |
|
"grad_norm": 5.734185218811035, |
|
"learning_rate": 8.214116654627853e-06, |
|
"loss": 0.01092491, |
|
"memory(GiB)": 14.16, |
|
"step": 565, |
|
"train_speed(iter/s)": 0.159373 |
|
}, |
|
{ |
|
"acc": 0.99818001, |
|
"epoch": 15.616438356164384, |
|
"grad_norm": 2.6671762466430664, |
|
"learning_rate": 8.178800879175737e-06, |
|
"loss": 0.00814181, |
|
"memory(GiB)": 14.16, |
|
"step": 570, |
|
"train_speed(iter/s)": 0.160399 |
|
}, |
|
{ |
|
"acc": 0.99492016, |
|
"epoch": 15.753424657534246, |
|
"grad_norm": 4.252832889556885, |
|
"learning_rate": 8.143216915633535e-06, |
|
"loss": 0.01607218, |
|
"memory(GiB)": 14.16, |
|
"step": 575, |
|
"train_speed(iter/s)": 0.161443 |
|
}, |
|
{ |
|
"acc": 0.9963459, |
|
"epoch": 15.89041095890411, |
|
"grad_norm": 2.7702836990356445, |
|
"learning_rate": 8.107367766610379e-06, |
|
"loss": 0.01704216, |
|
"memory(GiB)": 14.16, |
|
"step": 580, |
|
"train_speed(iter/s)": 0.162459 |
|
}, |
|
{ |
|
"acc": 0.99641209, |
|
"epoch": 16.027397260273972, |
|
"grad_norm": 3.121049404144287, |
|
"learning_rate": 8.071256457091995e-06, |
|
"loss": 0.01695579, |
|
"memory(GiB)": 14.16, |
|
"step": 585, |
|
"train_speed(iter/s)": 0.163471 |
|
}, |
|
{ |
|
"acc": 0.99682541, |
|
"epoch": 16.164383561643834, |
|
"grad_norm": 3.980106830596924, |
|
"learning_rate": 8.03488603418547e-06, |
|
"loss": 0.01948266, |
|
"memory(GiB)": 14.16, |
|
"step": 590, |
|
"train_speed(iter/s)": 0.164479 |
|
}, |
|
{ |
|
"acc": 0.99080048, |
|
"epoch": 16.301369863013697, |
|
"grad_norm": 4.650881290435791, |
|
"learning_rate": 7.99825956686213e-06, |
|
"loss": 0.02414289, |
|
"memory(GiB)": 14.16, |
|
"step": 595, |
|
"train_speed(iter/s)": 0.16549 |
|
}, |
|
{ |
|
"acc": 0.99316874, |
|
"epoch": 16.438356164383563, |
|
"grad_norm": 3.7769477367401123, |
|
"learning_rate": 7.96138014569857e-06, |
|
"loss": 0.02379684, |
|
"memory(GiB)": 14.16, |
|
"step": 600, |
|
"train_speed(iter/s)": 0.166493 |
|
}, |
|
{ |
|
"acc": 0.99821434, |
|
"epoch": 16.575342465753426, |
|
"grad_norm": 2.486539363861084, |
|
"learning_rate": 7.924250882615874e-06, |
|
"loss": 0.01166953, |
|
"memory(GiB)": 14.16, |
|
"step": 605, |
|
"train_speed(iter/s)": 0.167483 |
|
}, |
|
{ |
|
"acc": 0.99491873, |
|
"epoch": 16.71232876712329, |
|
"grad_norm": 0.6995792984962463, |
|
"learning_rate": 7.886874910617037e-06, |
|
"loss": 0.01726856, |
|
"memory(GiB)": 14.16, |
|
"step": 610, |
|
"train_speed(iter/s)": 0.168479 |
|
}, |
|
{ |
|
"acc": 0.99727192, |
|
"epoch": 16.84931506849315, |
|
"grad_norm": 1.6550129652023315, |
|
"learning_rate": 7.849255383522576e-06, |
|
"loss": 0.0158612, |
|
"memory(GiB)": 14.16, |
|
"step": 615, |
|
"train_speed(iter/s)": 0.169527 |
|
}, |
|
{ |
|
"acc": 0.99721966, |
|
"epoch": 16.986301369863014, |
|
"grad_norm": 2.894073724746704, |
|
"learning_rate": 7.811395475704436e-06, |
|
"loss": 0.01161546, |
|
"memory(GiB)": 14.16, |
|
"step": 620, |
|
"train_speed(iter/s)": 0.170515 |
|
}, |
|
{ |
|
"acc": 0.99818001, |
|
"epoch": 17.123287671232877, |
|
"grad_norm": 2.339505910873413, |
|
"learning_rate": 7.773298381818106e-06, |
|
"loss": 0.00709306, |
|
"memory(GiB)": 14.16, |
|
"step": 625, |
|
"train_speed(iter/s)": 0.171471 |
|
}, |
|
{ |
|
"acc": 0.997717, |
|
"epoch": 17.26027397260274, |
|
"grad_norm": 2.1085383892059326, |
|
"learning_rate": 7.734967316533076e-06, |
|
"loss": 0.00879358, |
|
"memory(GiB)": 14.16, |
|
"step": 630, |
|
"train_speed(iter/s)": 0.172477 |
|
}, |
|
{ |
|
"acc": 0.99593258, |
|
"epoch": 17.397260273972602, |
|
"grad_norm": 3.778745174407959, |
|
"learning_rate": 7.696405514261554e-06, |
|
"loss": 0.01262949, |
|
"memory(GiB)": 14.16, |
|
"step": 635, |
|
"train_speed(iter/s)": 0.173456 |
|
}, |
|
{ |
|
"acc": 0.99641209, |
|
"epoch": 17.534246575342465, |
|
"grad_norm": 4.980679512023926, |
|
"learning_rate": 7.657616228885571e-06, |
|
"loss": 0.00957234, |
|
"memory(GiB)": 14.16, |
|
"step": 640, |
|
"train_speed(iter/s)": 0.174442 |
|
}, |
|
{ |
|
"acc": 0.99673891, |
|
"epoch": 17.671232876712327, |
|
"grad_norm": 1.6658488512039185, |
|
"learning_rate": 7.618602733482395e-06, |
|
"loss": 0.01483861, |
|
"memory(GiB)": 14.16, |
|
"step": 645, |
|
"train_speed(iter/s)": 0.1754 |
|
}, |
|
{ |
|
"acc": 0.995401, |
|
"epoch": 17.80821917808219, |
|
"grad_norm": 7.899285793304443, |
|
"learning_rate": 7.579368320048354e-06, |
|
"loss": 0.02291541, |
|
"memory(GiB)": 14.16, |
|
"step": 650, |
|
"train_speed(iter/s)": 0.176359 |
|
}, |
|
{ |
|
"acc": 0.99588165, |
|
"epoch": 17.945205479452056, |
|
"grad_norm": 4.884225368499756, |
|
"learning_rate": 7.539916299221047e-06, |
|
"loss": 0.0132565, |
|
"memory(GiB)": 14.16, |
|
"step": 655, |
|
"train_speed(iter/s)": 0.177313 |
|
}, |
|
{ |
|
"acc": 0.99720192, |
|
"epoch": 18.08219178082192, |
|
"grad_norm": 1.3362199068069458, |
|
"learning_rate": 7.50025e-06, |
|
"loss": 0.01240759, |
|
"memory(GiB)": 14.16, |
|
"step": 660, |
|
"train_speed(iter/s)": 0.178257 |
|
}, |
|
{ |
|
"acc": 0.99860992, |
|
"epoch": 18.21917808219178, |
|
"grad_norm": 0.9003859758377075, |
|
"learning_rate": 7.4603727694657576e-06, |
|
"loss": 0.00468392, |
|
"memory(GiB)": 14.16, |
|
"step": 665, |
|
"train_speed(iter/s)": 0.179216 |
|
}, |
|
{ |
|
"acc": 0.99587898, |
|
"epoch": 18.356164383561644, |
|
"grad_norm": 2.7398738861083984, |
|
"learning_rate": 7.420287972497446e-06, |
|
"loss": 0.01100588, |
|
"memory(GiB)": 14.16, |
|
"step": 670, |
|
"train_speed(iter/s)": 0.180158 |
|
}, |
|
{ |
|
"acc": 0.99493923, |
|
"epoch": 18.493150684931507, |
|
"grad_norm": 5.460540294647217, |
|
"learning_rate": 7.3799989914888506e-06, |
|
"loss": 0.01662439, |
|
"memory(GiB)": 14.16, |
|
"step": 675, |
|
"train_speed(iter/s)": 0.181127 |
|
}, |
|
{ |
|
"acc": 0.99722099, |
|
"epoch": 18.63013698630137, |
|
"grad_norm": 0.33460837602615356, |
|
"learning_rate": 7.3395092260630015e-06, |
|
"loss": 0.00906119, |
|
"memory(GiB)": 14.16, |
|
"step": 680, |
|
"train_speed(iter/s)": 0.182049 |
|
}, |
|
{ |
|
"acc": 0.99589815, |
|
"epoch": 18.767123287671232, |
|
"grad_norm": 4.61140251159668, |
|
"learning_rate": 7.298822092785316e-06, |
|
"loss": 0.0160338, |
|
"memory(GiB)": 14.16, |
|
"step": 685, |
|
"train_speed(iter/s)": 0.182979 |
|
}, |
|
{ |
|
"acc": 0.99541874, |
|
"epoch": 18.904109589041095, |
|
"grad_norm": 1.5101581811904907, |
|
"learning_rate": 7.257941024875293e-06, |
|
"loss": 0.01577311, |
|
"memory(GiB)": 14.16, |
|
"step": 690, |
|
"train_speed(iter/s)": 0.183925 |
|
}, |
|
{ |
|
"acc": 0.9977005, |
|
"epoch": 19.041095890410958, |
|
"grad_norm": 2.02103853225708, |
|
"learning_rate": 7.216869471916828e-06, |
|
"loss": 0.00827418, |
|
"memory(GiB)": 14.16, |
|
"step": 695, |
|
"train_speed(iter/s)": 0.184826 |
|
}, |
|
{ |
|
"acc": 0.99538565, |
|
"epoch": 19.17808219178082, |
|
"grad_norm": 4.640865325927734, |
|
"learning_rate": 7.175610899567126e-06, |
|
"loss": 0.02137535, |
|
"memory(GiB)": 14.16, |
|
"step": 700, |
|
"train_speed(iter/s)": 0.185756 |
|
}, |
|
{ |
|
"acc": 0.99816341, |
|
"epoch": 19.315068493150687, |
|
"grad_norm": 2.2678844928741455, |
|
"learning_rate": 7.1341687892642705e-06, |
|
"loss": 0.01489109, |
|
"memory(GiB)": 14.16, |
|
"step": 705, |
|
"train_speed(iter/s)": 0.186685 |
|
}, |
|
{ |
|
"acc": 0.997717, |
|
"epoch": 19.45205479452055, |
|
"grad_norm": 8.91321086883545, |
|
"learning_rate": 7.092546637933454e-06, |
|
"loss": 0.00950522, |
|
"memory(GiB)": 14.16, |
|
"step": 710, |
|
"train_speed(iter/s)": 0.187598 |
|
}, |
|
{ |
|
"acc": 0.99584599, |
|
"epoch": 19.589041095890412, |
|
"grad_norm": 3.019415855407715, |
|
"learning_rate": 7.0507479576919026e-06, |
|
"loss": 0.01135417, |
|
"memory(GiB)": 14.16, |
|
"step": 715, |
|
"train_speed(iter/s)": 0.188517 |
|
}, |
|
{ |
|
"acc": 0.99723749, |
|
"epoch": 19.726027397260275, |
|
"grad_norm": 3.8561668395996094, |
|
"learning_rate": 7.0087762755525214e-06, |
|
"loss": 0.00678846, |
|
"memory(GiB)": 14.16, |
|
"step": 720, |
|
"train_speed(iter/s)": 0.18942 |
|
}, |
|
{ |
|
"acc": 0.99725533, |
|
"epoch": 19.863013698630137, |
|
"grad_norm": 0.6471136212348938, |
|
"learning_rate": 6.966635133126286e-06, |
|
"loss": 0.01252564, |
|
"memory(GiB)": 14.16, |
|
"step": 725, |
|
"train_speed(iter/s)": 0.1903 |
|
}, |
|
{ |
|
"acc": 0.99634466, |
|
"epoch": 20.0, |
|
"grad_norm": 3.774871587753296, |
|
"learning_rate": 6.924328086323392e-06, |
|
"loss": 0.01890204, |
|
"memory(GiB)": 14.16, |
|
"step": 730, |
|
"train_speed(iter/s)": 0.191223 |
|
}, |
|
{ |
|
"acc": 0.99721966, |
|
"epoch": 20.136986301369863, |
|
"grad_norm": 4.085058689117432, |
|
"learning_rate": 6.881858705053205e-06, |
|
"loss": 0.01011662, |
|
"memory(GiB)": 14.16, |
|
"step": 735, |
|
"train_speed(iter/s)": 0.192097 |
|
}, |
|
{ |
|
"acc": 0.99905624, |
|
"epoch": 20.273972602739725, |
|
"grad_norm": 2.026254892349243, |
|
"learning_rate": 6.8392305729230305e-06, |
|
"loss": 0.00847432, |
|
"memory(GiB)": 14.16, |
|
"step": 740, |
|
"train_speed(iter/s)": 0.192981 |
|
}, |
|
{ |
|
"acc": 0.99864426, |
|
"epoch": 20.410958904109588, |
|
"grad_norm": 1.7818002700805664, |
|
"learning_rate": 6.796447286935725e-06, |
|
"loss": 0.00707859, |
|
"memory(GiB)": 14.16, |
|
"step": 745, |
|
"train_speed(iter/s)": 0.193875 |
|
}, |
|
{ |
|
"acc": 0.99816341, |
|
"epoch": 20.54794520547945, |
|
"grad_norm": 0.09219258278608322, |
|
"learning_rate": 6.7535124571861766e-06, |
|
"loss": 0.01978692, |
|
"memory(GiB)": 14.16, |
|
"step": 750, |
|
"train_speed(iter/s)": 0.194746 |
|
}, |
|
{ |
|
"acc": 0.99819775, |
|
"epoch": 20.684931506849313, |
|
"grad_norm": 3.1013734340667725, |
|
"learning_rate": 6.710429706556683e-06, |
|
"loss": 0.00450487, |
|
"memory(GiB)": 14.16, |
|
"step": 755, |
|
"train_speed(iter/s)": 0.195624 |
|
}, |
|
{ |
|
"acc": 0.99859333, |
|
"epoch": 20.82191780821918, |
|
"grad_norm": 0.29254209995269775, |
|
"learning_rate": 6.667202670411245e-06, |
|
"loss": 0.00461008, |
|
"memory(GiB)": 14.16, |
|
"step": 760, |
|
"train_speed(iter/s)": 0.196517 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 20.958904109589042, |
|
"grad_norm": 0.2512110471725464, |
|
"learning_rate": 6.623834996288815e-06, |
|
"loss": 0.00767698, |
|
"memory(GiB)": 14.16, |
|
"step": 765, |
|
"train_speed(iter/s)": 0.197396 |
|
}, |
|
{ |
|
"acc": 0.99591599, |
|
"epoch": 21.095890410958905, |
|
"grad_norm": 0.5388877987861633, |
|
"learning_rate": 6.580330343595521e-06, |
|
"loss": 0.01597615, |
|
"memory(GiB)": 14.16, |
|
"step": 770, |
|
"train_speed(iter/s)": 0.198263 |
|
}, |
|
{ |
|
"acc": 0.99862766, |
|
"epoch": 21.232876712328768, |
|
"grad_norm": 0.15328700840473175, |
|
"learning_rate": 6.536692383295863e-06, |
|
"loss": 0.00608862, |
|
"memory(GiB)": 14.16, |
|
"step": 775, |
|
"train_speed(iter/s)": 0.19913 |
|
}, |
|
{ |
|
"acc": 0.99775133, |
|
"epoch": 21.36986301369863, |
|
"grad_norm": 0.17136460542678833, |
|
"learning_rate": 6.492924797602972e-06, |
|
"loss": 0.00846671, |
|
"memory(GiB)": 14.16, |
|
"step": 780, |
|
"train_speed(iter/s)": 0.199987 |
|
}, |
|
{ |
|
"acc": 0.99768267, |
|
"epoch": 21.506849315068493, |
|
"grad_norm": 3.5933849811553955, |
|
"learning_rate": 6.449031279667896e-06, |
|
"loss": 0.0071015, |
|
"memory(GiB)": 14.16, |
|
"step": 785, |
|
"train_speed(iter/s)": 0.200857 |
|
}, |
|
{ |
|
"acc": 0.99627323, |
|
"epoch": 21.643835616438356, |
|
"grad_norm": 2.7347967624664307, |
|
"learning_rate": 6.4050155332679606e-06, |
|
"loss": 0.01377204, |
|
"memory(GiB)": 14.16, |
|
"step": 790, |
|
"train_speed(iter/s)": 0.201739 |
|
}, |
|
{ |
|
"acc": 0.99720316, |
|
"epoch": 21.78082191780822, |
|
"grad_norm": 3.391113758087158, |
|
"learning_rate": 6.360881272494254e-06, |
|
"loss": 0.00560406, |
|
"memory(GiB)": 14.16, |
|
"step": 795, |
|
"train_speed(iter/s)": 0.202584 |
|
}, |
|
{ |
|
"acc": 0.99770041, |
|
"epoch": 21.91780821917808, |
|
"grad_norm": 0.9516264796257019, |
|
"learning_rate": 6.316632221438214e-06, |
|
"loss": 0.01059882, |
|
"memory(GiB)": 14.16, |
|
"step": 800, |
|
"train_speed(iter/s)": 0.20342 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 22.054794520547944, |
|
"grad_norm": 4.009815216064453, |
|
"learning_rate": 6.2722721138774e-06, |
|
"loss": 0.00493859, |
|
"memory(GiB)": 14.16, |
|
"step": 805, |
|
"train_speed(iter/s)": 0.20423 |
|
}, |
|
{ |
|
"acc": 0.99905624, |
|
"epoch": 22.19178082191781, |
|
"grad_norm": 0.8211438059806824, |
|
"learning_rate": 6.2278046929604265e-06, |
|
"loss": 0.00547095, |
|
"memory(GiB)": 14.16, |
|
"step": 810, |
|
"train_speed(iter/s)": 0.205109 |
|
}, |
|
{ |
|
"acc": 0.99680634, |
|
"epoch": 22.328767123287673, |
|
"grad_norm": 1.1279343366622925, |
|
"learning_rate": 6.183233710891103e-06, |
|
"loss": 0.01568028, |
|
"memory(GiB)": 14.16, |
|
"step": 815, |
|
"train_speed(iter/s)": 0.205958 |
|
}, |
|
{ |
|
"acc": 0.99821434, |
|
"epoch": 22.465753424657535, |
|
"grad_norm": 2.2662060260772705, |
|
"learning_rate": 6.1385629286118375e-06, |
|
"loss": 0.00696406, |
|
"memory(GiB)": 14.16, |
|
"step": 820, |
|
"train_speed(iter/s)": 0.206791 |
|
}, |
|
{ |
|
"acc": 0.99821434, |
|
"epoch": 22.602739726027398, |
|
"grad_norm": 3.2888071537017822, |
|
"learning_rate": 6.093796115486277e-06, |
|
"loss": 0.00824727, |
|
"memory(GiB)": 14.16, |
|
"step": 825, |
|
"train_speed(iter/s)": 0.207611 |
|
}, |
|
{ |
|
"acc": 0.99864426, |
|
"epoch": 22.73972602739726, |
|
"grad_norm": 0.9464216232299805, |
|
"learning_rate": 6.048937048981235e-06, |
|
"loss": 0.00788838, |
|
"memory(GiB)": 14.16, |
|
"step": 830, |
|
"train_speed(iter/s)": 0.20843 |
|
}, |
|
{ |
|
"acc": 0.99864426, |
|
"epoch": 22.876712328767123, |
|
"grad_norm": 0.23246127367019653, |
|
"learning_rate": 6.003989514347962e-06, |
|
"loss": 0.00401598, |
|
"memory(GiB)": 14.16, |
|
"step": 835, |
|
"train_speed(iter/s)": 0.209242 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 23.013698630136986, |
|
"grad_norm": 3.2754745483398438, |
|
"learning_rate": 5.9589573043027314e-06, |
|
"loss": 0.00324695, |
|
"memory(GiB)": 14.16, |
|
"step": 840, |
|
"train_speed(iter/s)": 0.210024 |
|
}, |
|
{ |
|
"acc": 0.9980547, |
|
"epoch": 23.15068493150685, |
|
"grad_norm": 4.7171711921691895, |
|
"learning_rate": 5.913844218706809e-06, |
|
"loss": 0.0035405, |
|
"memory(GiB)": 14.16, |
|
"step": 845, |
|
"train_speed(iter/s)": 0.210863 |
|
}, |
|
{ |
|
"acc": 0.99637899, |
|
"epoch": 23.28767123287671, |
|
"grad_norm": 0.22946955263614655, |
|
"learning_rate": 5.8686540642458204e-06, |
|
"loss": 0.01147373, |
|
"memory(GiB)": 14.16, |
|
"step": 850, |
|
"train_speed(iter/s)": 0.211701 |
|
}, |
|
{ |
|
"acc": 0.99811125, |
|
"epoch": 23.424657534246574, |
|
"grad_norm": 5.581859588623047, |
|
"learning_rate": 5.82339065410853e-06, |
|
"loss": 0.00868064, |
|
"memory(GiB)": 14.16, |
|
"step": 855, |
|
"train_speed(iter/s)": 0.212537 |
|
}, |
|
{ |
|
"acc": 0.99769344, |
|
"epoch": 23.561643835616437, |
|
"grad_norm": 5.6449360847473145, |
|
"learning_rate": 5.7780578076650925e-06, |
|
"loss": 0.01117077, |
|
"memory(GiB)": 14.16, |
|
"step": 860, |
|
"train_speed(iter/s)": 0.213401 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 23.698630136986303, |
|
"grad_norm": 0.058708298951387405, |
|
"learning_rate": 5.732659350144769e-06, |
|
"loss": 0.00182705, |
|
"memory(GiB)": 14.16, |
|
"step": 865, |
|
"train_speed(iter/s)": 0.214229 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 23.835616438356166, |
|
"grad_norm": 1.460488200187683, |
|
"learning_rate": 5.687199112313132e-06, |
|
"loss": 0.00358091, |
|
"memory(GiB)": 14.16, |
|
"step": 870, |
|
"train_speed(iter/s)": 0.215057 |
|
}, |
|
{ |
|
"acc": 0.99818001, |
|
"epoch": 23.972602739726028, |
|
"grad_norm": 0.8150052428245544, |
|
"learning_rate": 5.64168093014885e-06, |
|
"loss": 0.00942515, |
|
"memory(GiB)": 14.16, |
|
"step": 875, |
|
"train_speed(iter/s)": 0.215894 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 24.10958904109589, |
|
"grad_norm": 1.0939289331436157, |
|
"learning_rate": 5.596108644519984e-06, |
|
"loss": 0.00471724, |
|
"memory(GiB)": 14.16, |
|
"step": 880, |
|
"train_speed(iter/s)": 0.216682 |
|
}, |
|
{ |
|
"acc": 0.99808903, |
|
"epoch": 24.246575342465754, |
|
"grad_norm": 6.786465644836426, |
|
"learning_rate": 5.5504861008599e-06, |
|
"loss": 0.00497846, |
|
"memory(GiB)": 14.16, |
|
"step": 885, |
|
"train_speed(iter/s)": 0.217499 |
|
}, |
|
{ |
|
"acc": 0.99864416, |
|
"epoch": 24.383561643835616, |
|
"grad_norm": 5.07835054397583, |
|
"learning_rate": 5.504817148842783e-06, |
|
"loss": 0.00418225, |
|
"memory(GiB)": 14.16, |
|
"step": 890, |
|
"train_speed(iter/s)": 0.218324 |
|
}, |
|
{ |
|
"acc": 0.99821434, |
|
"epoch": 24.52054794520548, |
|
"grad_norm": 2.3909006118774414, |
|
"learning_rate": 5.4591056420587975e-06, |
|
"loss": 0.00267169, |
|
"memory(GiB)": 14.16, |
|
"step": 895, |
|
"train_speed(iter/s)": 0.219135 |
|
}, |
|
{ |
|
"acc": 0.99864416, |
|
"epoch": 24.65753424657534, |
|
"grad_norm": 0.555738627910614, |
|
"learning_rate": 5.413355437688927e-06, |
|
"loss": 0.00462395, |
|
"memory(GiB)": 14.16, |
|
"step": 900, |
|
"train_speed(iter/s)": 0.219963 |
|
}, |
|
{ |
|
"acc": 0.99594774, |
|
"epoch": 24.794520547945204, |
|
"grad_norm": 5.093243598937988, |
|
"learning_rate": 5.367570396179488e-06, |
|
"loss": 0.01022252, |
|
"memory(GiB)": 14.16, |
|
"step": 905, |
|
"train_speed(iter/s)": 0.220772 |
|
}, |
|
{ |
|
"acc": 0.99728842, |
|
"epoch": 24.931506849315067, |
|
"grad_norm": 3.7607083320617676, |
|
"learning_rate": 5.321754380916395e-06, |
|
"loss": 0.01267306, |
|
"memory(GiB)": 14.16, |
|
"step": 910, |
|
"train_speed(iter/s)": 0.221621 |
|
}, |
|
{ |
|
"acc": 0.99598217, |
|
"epoch": 25.068493150684933, |
|
"grad_norm": 2.7842702865600586, |
|
"learning_rate": 5.275911257899149e-06, |
|
"loss": 0.01249768, |
|
"memory(GiB)": 14.16, |
|
"step": 915, |
|
"train_speed(iter/s)": 0.222392 |
|
}, |
|
{ |
|
"acc": 0.99814568, |
|
"epoch": 25.205479452054796, |
|
"grad_norm": 2.1749532222747803, |
|
"learning_rate": 5.23004489541464e-06, |
|
"loss": 0.01107962, |
|
"memory(GiB)": 14.16, |
|
"step": 920, |
|
"train_speed(iter/s)": 0.223218 |
|
}, |
|
{ |
|
"acc": 0.99907284, |
|
"epoch": 25.34246575342466, |
|
"grad_norm": 0.12532441318035126, |
|
"learning_rate": 5.184159163710717e-06, |
|
"loss": 0.00567983, |
|
"memory(GiB)": 14.16, |
|
"step": 925, |
|
"train_speed(iter/s)": 0.22402 |
|
}, |
|
{ |
|
"acc": 0.99862642, |
|
"epoch": 25.47945205479452, |
|
"grad_norm": 3.7313835620880127, |
|
"learning_rate": 5.1382579346696275e-06, |
|
"loss": 0.00543302, |
|
"memory(GiB)": 14.16, |
|
"step": 930, |
|
"train_speed(iter/s)": 0.22481 |
|
}, |
|
{ |
|
"acc": 0.99594784, |
|
"epoch": 25.616438356164384, |
|
"grad_norm": 3.0569019317626953, |
|
"learning_rate": 5.092345081481297e-06, |
|
"loss": 0.01230588, |
|
"memory(GiB)": 14.16, |
|
"step": 935, |
|
"train_speed(iter/s)": 0.225594 |
|
}, |
|
{ |
|
"acc": 0.99909058, |
|
"epoch": 25.753424657534246, |
|
"grad_norm": 0.2874479293823242, |
|
"learning_rate": 5.0464244783165105e-06, |
|
"loss": 0.0029504, |
|
"memory(GiB)": 14.16, |
|
"step": 940, |
|
"train_speed(iter/s)": 0.226373 |
|
}, |
|
{ |
|
"acc": 0.99818115, |
|
"epoch": 25.89041095890411, |
|
"grad_norm": 6.2819695472717285, |
|
"learning_rate": 5.000500000000001e-06, |
|
"loss": 0.00704549, |
|
"memory(GiB)": 14.16, |
|
"step": 945, |
|
"train_speed(iter/s)": 0.227172 |
|
}, |
|
{ |
|
"acc": 0.99818001, |
|
"epoch": 26.027397260273972, |
|
"grad_norm": 2.6562278270721436, |
|
"learning_rate": 4.954575521683491e-06, |
|
"loss": 0.00467317, |
|
"memory(GiB)": 14.16, |
|
"step": 950, |
|
"train_speed(iter/s)": 0.227913 |
|
}, |
|
{ |
|
"acc": 0.997717, |
|
"epoch": 26.164383561643834, |
|
"grad_norm": 0.46010449528694153, |
|
"learning_rate": 4.908654918518704e-06, |
|
"loss": 0.0066583, |
|
"memory(GiB)": 14.16, |
|
"step": 955, |
|
"train_speed(iter/s)": 0.228686 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 26.301369863013697, |
|
"grad_norm": 1.1016509532928467, |
|
"learning_rate": 4.862742065330375e-06, |
|
"loss": 0.00110117, |
|
"memory(GiB)": 14.16, |
|
"step": 960, |
|
"train_speed(iter/s)": 0.229538 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 26.438356164383563, |
|
"grad_norm": 3.4421184062957764, |
|
"learning_rate": 4.816840836289285e-06, |
|
"loss": 0.00389256, |
|
"memory(GiB)": 14.16, |
|
"step": 965, |
|
"train_speed(iter/s)": 0.230473 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 26.575342465753426, |
|
"grad_norm": 1.7454206943511963, |
|
"learning_rate": 4.770955104585361e-06, |
|
"loss": 0.00087426, |
|
"memory(GiB)": 14.16, |
|
"step": 970, |
|
"train_speed(iter/s)": 0.231175 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 26.71232876712329, |
|
"grad_norm": 0.08459863811731339, |
|
"learning_rate": 4.725088742100851e-06, |
|
"loss": 0.00253912, |
|
"memory(GiB)": 14.16, |
|
"step": 975, |
|
"train_speed(iter/s)": 0.231908 |
|
}, |
|
{ |
|
"acc": 0.99821434, |
|
"epoch": 26.84931506849315, |
|
"grad_norm": 0.7988649010658264, |
|
"learning_rate": 4.679245619083607e-06, |
|
"loss": 0.00461807, |
|
"memory(GiB)": 14.16, |
|
"step": 980, |
|
"train_speed(iter/s)": 0.232806 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 26.986301369863014, |
|
"grad_norm": 0.03472264111042023, |
|
"learning_rate": 4.633429603820513e-06, |
|
"loss": 0.0050515, |
|
"memory(GiB)": 14.16, |
|
"step": 985, |
|
"train_speed(iter/s)": 0.233323 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 27.123287671232877, |
|
"grad_norm": 1.552517056465149, |
|
"learning_rate": 4.587644562311076e-06, |
|
"loss": 0.00116102, |
|
"memory(GiB)": 14.16, |
|
"step": 990, |
|
"train_speed(iter/s)": 0.234015 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 27.26027397260274, |
|
"grad_norm": 2.796733856201172, |
|
"learning_rate": 4.541894357941205e-06, |
|
"loss": 0.0039554, |
|
"memory(GiB)": 14.16, |
|
"step": 995, |
|
"train_speed(iter/s)": 0.234715 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 27.397260273972602, |
|
"grad_norm": 0.08924784511327744, |
|
"learning_rate": 4.4961828511572195e-06, |
|
"loss": 0.00281882, |
|
"memory(GiB)": 14.16, |
|
"step": 1000, |
|
"train_speed(iter/s)": 0.235411 |
|
}, |
|
{ |
|
"epoch": 27.397260273972602, |
|
"eval_acc": 0.376108506949877, |
|
"eval_loss": 5.226269721984863, |
|
"eval_runtime": 1966.6531, |
|
"eval_samples_per_second": 16.284, |
|
"eval_steps_per_second": 2.035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 27.534246575342465, |
|
"grad_norm": 0.20645824074745178, |
|
"learning_rate": 4.450513899140101e-06, |
|
"loss": 0.00193416, |
|
"memory(GiB)": 14.16, |
|
"step": 1005, |
|
"train_speed(iter/s)": 0.160727 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 27.671232876712327, |
|
"grad_norm": 2.83465838432312, |
|
"learning_rate": 4.404891355480016e-06, |
|
"loss": 0.00531424, |
|
"memory(GiB)": 14.16, |
|
"step": 1010, |
|
"train_speed(iter/s)": 0.161302 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 27.80821917808219, |
|
"grad_norm": 1.058475375175476, |
|
"learning_rate": 4.359319069851151e-06, |
|
"loss": 0.00214943, |
|
"memory(GiB)": 14.16, |
|
"step": 1015, |
|
"train_speed(iter/s)": 0.161884 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 27.945205479452056, |
|
"grad_norm": 0.5197652578353882, |
|
"learning_rate": 4.313800887686869e-06, |
|
"loss": 0.00063238, |
|
"memory(GiB)": 14.16, |
|
"step": 1020, |
|
"train_speed(iter/s)": 0.162463 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 28.08219178082192, |
|
"grad_norm": 0.02172599360346794, |
|
"learning_rate": 4.268340649855233e-06, |
|
"loss": 0.00572151, |
|
"memory(GiB)": 14.16, |
|
"step": 1025, |
|
"train_speed(iter/s)": 0.163028 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 28.21917808219178, |
|
"grad_norm": 1.721336007118225, |
|
"learning_rate": 4.222942192334907e-06, |
|
"loss": 0.00307167, |
|
"memory(GiB)": 14.16, |
|
"step": 1030, |
|
"train_speed(iter/s)": 0.163606 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 28.356164383561644, |
|
"grad_norm": 0.07104966044425964, |
|
"learning_rate": 4.1776093458914725e-06, |
|
"loss": 0.0070457, |
|
"memory(GiB)": 14.16, |
|
"step": 1035, |
|
"train_speed(iter/s)": 0.164181 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 28.493150684931507, |
|
"grad_norm": 4.222721099853516, |
|
"learning_rate": 4.1323459357541826e-06, |
|
"loss": 0.00498358, |
|
"memory(GiB)": 14.16, |
|
"step": 1040, |
|
"train_speed(iter/s)": 0.164784 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 28.63013698630137, |
|
"grad_norm": 0.08688988536596298, |
|
"learning_rate": 4.087155781293192e-06, |
|
"loss": 0.0007615, |
|
"memory(GiB)": 14.16, |
|
"step": 1045, |
|
"train_speed(iter/s)": 0.165367 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 28.767123287671232, |
|
"grad_norm": 0.03878637030720711, |
|
"learning_rate": 4.042042695697272e-06, |
|
"loss": 0.00484578, |
|
"memory(GiB)": 14.16, |
|
"step": 1050, |
|
"train_speed(iter/s)": 0.165931 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 28.904109589041095, |
|
"grad_norm": 0.5024237036705017, |
|
"learning_rate": 3.997010485652039e-06, |
|
"loss": 0.00233584, |
|
"memory(GiB)": 14.16, |
|
"step": 1055, |
|
"train_speed(iter/s)": 0.166493 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 29.041095890410958, |
|
"grad_norm": 0.9032835960388184, |
|
"learning_rate": 3.952062951018766e-06, |
|
"loss": 0.00431595, |
|
"memory(GiB)": 14.16, |
|
"step": 1060, |
|
"train_speed(iter/s)": 0.167071 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 29.17808219178082, |
|
"grad_norm": 0.07504996657371521, |
|
"learning_rate": 3.907203884513724e-06, |
|
"loss": 0.00013832, |
|
"memory(GiB)": 14.16, |
|
"step": 1065, |
|
"train_speed(iter/s)": 0.167643 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 29.315068493150687, |
|
"grad_norm": 0.041768305003643036, |
|
"learning_rate": 3.862437071388162e-06, |
|
"loss": 0.00039022, |
|
"memory(GiB)": 14.16, |
|
"step": 1070, |
|
"train_speed(iter/s)": 0.168201 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 29.45205479452055, |
|
"grad_norm": 0.09327519685029984, |
|
"learning_rate": 3.817766289108899e-06, |
|
"loss": 0.00143108, |
|
"memory(GiB)": 14.16, |
|
"step": 1075, |
|
"train_speed(iter/s)": 0.168777 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 29.589041095890412, |
|
"grad_norm": 0.03159390017390251, |
|
"learning_rate": 3.773195307039575e-06, |
|
"loss": 0.00081171, |
|
"memory(GiB)": 14.16, |
|
"step": 1080, |
|
"train_speed(iter/s)": 0.169341 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 29.726027397260275, |
|
"grad_norm": 2.1395320892333984, |
|
"learning_rate": 3.728727886122599e-06, |
|
"loss": 0.00190442, |
|
"memory(GiB)": 14.16, |
|
"step": 1085, |
|
"train_speed(iter/s)": 0.169923 |
|
}, |
|
{ |
|
"acc": 0.99866076, |
|
"epoch": 29.863013698630137, |
|
"grad_norm": 5.531327724456787, |
|
"learning_rate": 3.6843677785617874e-06, |
|
"loss": 0.00634567, |
|
"memory(GiB)": 14.16, |
|
"step": 1090, |
|
"train_speed(iter/s)": 0.170491 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 30.0, |
|
"grad_norm": 0.03689517825841904, |
|
"learning_rate": 3.640118727505748e-06, |
|
"loss": 0.00395082, |
|
"memory(GiB)": 14.16, |
|
"step": 1095, |
|
"train_speed(iter/s)": 0.171066 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 30.136986301369863, |
|
"grad_norm": 0.017297176644206047, |
|
"learning_rate": 3.5959844667320403e-06, |
|
"loss": 0.00074339, |
|
"memory(GiB)": 14.16, |
|
"step": 1100, |
|
"train_speed(iter/s)": 0.171615 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 30.273972602739725, |
|
"grad_norm": 0.017922429367899895, |
|
"learning_rate": 3.5519687203321056e-06, |
|
"loss": 0.00269048, |
|
"memory(GiB)": 14.16, |
|
"step": 1105, |
|
"train_speed(iter/s)": 0.172174 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 30.410958904109588, |
|
"grad_norm": 0.04719838500022888, |
|
"learning_rate": 3.5080752023970284e-06, |
|
"loss": 0.00416398, |
|
"memory(GiB)": 14.16, |
|
"step": 1110, |
|
"train_speed(iter/s)": 0.172722 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 30.54794520547945, |
|
"grad_norm": 0.02601473033428192, |
|
"learning_rate": 3.4643076167041395e-06, |
|
"loss": 0.0034888, |
|
"memory(GiB)": 14.16, |
|
"step": 1115, |
|
"train_speed(iter/s)": 0.173288 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 30.684931506849313, |
|
"grad_norm": 0.03908325359225273, |
|
"learning_rate": 3.4206696564044813e-06, |
|
"loss": 0.00179875, |
|
"memory(GiB)": 14.16, |
|
"step": 1120, |
|
"train_speed(iter/s)": 0.173848 |
|
}, |
|
{ |
|
"acc": 0.99819775, |
|
"epoch": 30.82191780821918, |
|
"grad_norm": 2.540851354598999, |
|
"learning_rate": 3.377165003711185e-06, |
|
"loss": 0.00962915, |
|
"memory(GiB)": 14.16, |
|
"step": 1125, |
|
"train_speed(iter/s)": 0.174398 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 30.958904109589042, |
|
"grad_norm": 0.023008601740002632, |
|
"learning_rate": 3.3337973295887587e-06, |
|
"loss": 0.00751298, |
|
"memory(GiB)": 14.16, |
|
"step": 1130, |
|
"train_speed(iter/s)": 0.174948 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 31.095890410958905, |
|
"grad_norm": 0.0744655579328537, |
|
"learning_rate": 3.2905702934433197e-06, |
|
"loss": 0.00133921, |
|
"memory(GiB)": 14.16, |
|
"step": 1135, |
|
"train_speed(iter/s)": 0.175478 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 31.232876712328768, |
|
"grad_norm": 0.04030351713299751, |
|
"learning_rate": 3.247487542813825e-06, |
|
"loss": 0.0002436, |
|
"memory(GiB)": 14.16, |
|
"step": 1140, |
|
"train_speed(iter/s)": 0.176017 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 31.36986301369863, |
|
"grad_norm": 0.21374386548995972, |
|
"learning_rate": 3.204552713064278e-06, |
|
"loss": 0.00017974, |
|
"memory(GiB)": 14.16, |
|
"step": 1145, |
|
"train_speed(iter/s)": 0.176558 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 31.506849315068493, |
|
"grad_norm": 0.012107456102967262, |
|
"learning_rate": 3.1617694270769713e-06, |
|
"loss": 0.00015874, |
|
"memory(GiB)": 14.16, |
|
"step": 1150, |
|
"train_speed(iter/s)": 0.177106 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 31.643835616438356, |
|
"grad_norm": 0.12086984515190125, |
|
"learning_rate": 3.119141294946797e-06, |
|
"loss": 0.00084028, |
|
"memory(GiB)": 14.16, |
|
"step": 1155, |
|
"train_speed(iter/s)": 0.177658 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 31.78082191780822, |
|
"grad_norm": 3.515671968460083, |
|
"learning_rate": 3.0766719136766093e-06, |
|
"loss": 0.00295761, |
|
"memory(GiB)": 14.16, |
|
"step": 1160, |
|
"train_speed(iter/s)": 0.178197 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 31.91780821917808, |
|
"grad_norm": 0.011739728040993214, |
|
"learning_rate": 3.034364866873715e-06, |
|
"loss": 0.00487542, |
|
"memory(GiB)": 14.16, |
|
"step": 1165, |
|
"train_speed(iter/s)": 0.178737 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.054794520547944, |
|
"grad_norm": 0.6842294335365295, |
|
"learning_rate": 2.9922237244474807e-06, |
|
"loss": 0.00015365, |
|
"memory(GiB)": 14.16, |
|
"step": 1170, |
|
"train_speed(iter/s)": 0.179255 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.19178082191781, |
|
"grad_norm": 0.22703398764133453, |
|
"learning_rate": 2.950252042308099e-06, |
|
"loss": 0.00036469, |
|
"memory(GiB)": 14.16, |
|
"step": 1175, |
|
"train_speed(iter/s)": 0.179789 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.32876712328767, |
|
"grad_norm": 0.48301902413368225, |
|
"learning_rate": 2.9084533620665478e-06, |
|
"loss": 0.00040778, |
|
"memory(GiB)": 14.16, |
|
"step": 1180, |
|
"train_speed(iter/s)": 0.18032 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 32.465753424657535, |
|
"grad_norm": 3.3718252182006836, |
|
"learning_rate": 2.86683121073573e-06, |
|
"loss": 0.00395589, |
|
"memory(GiB)": 14.16, |
|
"step": 1185, |
|
"train_speed(iter/s)": 0.180852 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.602739726027394, |
|
"grad_norm": 0.021095439791679382, |
|
"learning_rate": 2.825389100432876e-06, |
|
"loss": 0.0002179, |
|
"memory(GiB)": 14.16, |
|
"step": 1190, |
|
"train_speed(iter/s)": 0.181384 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.73972602739726, |
|
"grad_norm": 0.03395378589630127, |
|
"learning_rate": 2.7841305280831743e-06, |
|
"loss": 0.0001625, |
|
"memory(GiB)": 14.16, |
|
"step": 1195, |
|
"train_speed(iter/s)": 0.181912 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 32.87671232876713, |
|
"grad_norm": 0.015184338204562664, |
|
"learning_rate": 2.743058975124708e-06, |
|
"loss": 0.00051196, |
|
"memory(GiB)": 14.16, |
|
"step": 1200, |
|
"train_speed(iter/s)": 0.182449 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 33.013698630136986, |
|
"grad_norm": 0.03929471969604492, |
|
"learning_rate": 2.7021779072146866e-06, |
|
"loss": 0.0004342, |
|
"memory(GiB)": 14.16, |
|
"step": 1205, |
|
"train_speed(iter/s)": 0.182964 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 33.15068493150685, |
|
"grad_norm": 0.014112686738371849, |
|
"learning_rate": 2.661490773937e-06, |
|
"loss": 6.217e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1210, |
|
"train_speed(iter/s)": 0.183491 |
|
}, |
|
{ |
|
"acc": 0.99910717, |
|
"epoch": 33.28767123287671, |
|
"grad_norm": 0.07489810883998871, |
|
"learning_rate": 2.6210010085111507e-06, |
|
"loss": 0.00106858, |
|
"memory(GiB)": 14.16, |
|
"step": 1215, |
|
"train_speed(iter/s)": 0.184013 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 33.42465753424658, |
|
"grad_norm": 0.013716904446482658, |
|
"learning_rate": 2.580712027502557e-06, |
|
"loss": 0.00010475, |
|
"memory(GiB)": 14.16, |
|
"step": 1220, |
|
"train_speed(iter/s)": 0.184539 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 33.56164383561644, |
|
"grad_norm": 0.03437013924121857, |
|
"learning_rate": 2.5406272305342438e-06, |
|
"loss": 8.954e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1225, |
|
"train_speed(iter/s)": 0.185072 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 33.6986301369863, |
|
"grad_norm": 0.0419132262468338, |
|
"learning_rate": 2.500749999999999e-06, |
|
"loss": 0.00213626, |
|
"memory(GiB)": 14.16, |
|
"step": 1230, |
|
"train_speed(iter/s)": 0.185598 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 33.83561643835616, |
|
"grad_norm": 0.00895242765545845, |
|
"learning_rate": 2.461083700778954e-06, |
|
"loss": 0.00524443, |
|
"memory(GiB)": 14.16, |
|
"step": 1235, |
|
"train_speed(iter/s)": 0.186129 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 33.97260273972603, |
|
"grad_norm": 0.0046184309758245945, |
|
"learning_rate": 2.4216316799516488e-06, |
|
"loss": 9.459e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1240, |
|
"train_speed(iter/s)": 0.186651 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 34.10958904109589, |
|
"grad_norm": 0.00644602719694376, |
|
"learning_rate": 2.3823972665176048e-06, |
|
"loss": 0.00276474, |
|
"memory(GiB)": 14.16, |
|
"step": 1245, |
|
"train_speed(iter/s)": 0.187167 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 34.24657534246575, |
|
"grad_norm": 0.04513326287269592, |
|
"learning_rate": 2.34338377111443e-06, |
|
"loss": 8.529e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1250, |
|
"train_speed(iter/s)": 0.187696 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 34.38356164383562, |
|
"grad_norm": 0.00604225741699338, |
|
"learning_rate": 2.304594485738447e-06, |
|
"loss": 0.00061723, |
|
"memory(GiB)": 14.16, |
|
"step": 1255, |
|
"train_speed(iter/s)": 0.188214 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 34.52054794520548, |
|
"grad_norm": 0.007053479552268982, |
|
"learning_rate": 2.266032683466928e-06, |
|
"loss": 0.00040206, |
|
"memory(GiB)": 14.16, |
|
"step": 1260, |
|
"train_speed(iter/s)": 0.188743 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 34.657534246575345, |
|
"grad_norm": 0.012396584264934063, |
|
"learning_rate": 2.227701618181895e-06, |
|
"loss": 4.734e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1265, |
|
"train_speed(iter/s)": 0.189268 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 34.794520547945204, |
|
"grad_norm": 0.2615416944026947, |
|
"learning_rate": 2.189604524295565e-06, |
|
"loss": 0.00017459, |
|
"memory(GiB)": 14.16, |
|
"step": 1270, |
|
"train_speed(iter/s)": 0.18978 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 34.93150684931507, |
|
"grad_norm": 0.9177928566932678, |
|
"learning_rate": 2.1517446164774243e-06, |
|
"loss": 0.00016437, |
|
"memory(GiB)": 14.16, |
|
"step": 1275, |
|
"train_speed(iter/s)": 0.190291 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 35.06849315068493, |
|
"grad_norm": 0.005634276662021875, |
|
"learning_rate": 2.114125089382966e-06, |
|
"loss": 6.107e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1280, |
|
"train_speed(iter/s)": 0.190787 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 35.205479452054796, |
|
"grad_norm": 0.005844338797032833, |
|
"learning_rate": 2.0767491173841266e-06, |
|
"loss": 0.00024583, |
|
"memory(GiB)": 14.16, |
|
"step": 1285, |
|
"train_speed(iter/s)": 0.191287 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 35.342465753424655, |
|
"grad_norm": 0.008257429115474224, |
|
"learning_rate": 2.039619854301433e-06, |
|
"loss": 0.00127686, |
|
"memory(GiB)": 14.16, |
|
"step": 1290, |
|
"train_speed(iter/s)": 0.191799 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 35.47945205479452, |
|
"grad_norm": 0.14188018441200256, |
|
"learning_rate": 2.0027404331378715e-06, |
|
"loss": 0.00085992, |
|
"memory(GiB)": 14.16, |
|
"step": 1295, |
|
"train_speed(iter/s)": 0.192303 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 35.61643835616438, |
|
"grad_norm": 0.004589778371155262, |
|
"learning_rate": 1.9661139658145304e-06, |
|
"loss": 5.643e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1300, |
|
"train_speed(iter/s)": 0.192822 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 35.75342465753425, |
|
"grad_norm": 0.005317226517945528, |
|
"learning_rate": 1.929743542908006e-06, |
|
"loss": 3.488e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1305, |
|
"train_speed(iter/s)": 0.193346 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 35.89041095890411, |
|
"grad_norm": 0.17880931496620178, |
|
"learning_rate": 1.8936322333896213e-06, |
|
"loss": 0.00010323, |
|
"memory(GiB)": 14.16, |
|
"step": 1310, |
|
"train_speed(iter/s)": 0.193861 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.02739726027397, |
|
"grad_norm": 0.026366814970970154, |
|
"learning_rate": 1.857783084366465e-06, |
|
"loss": 0.00022611, |
|
"memory(GiB)": 14.16, |
|
"step": 1315, |
|
"train_speed(iter/s)": 0.194343 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.16438356164384, |
|
"grad_norm": 0.011201135814189911, |
|
"learning_rate": 1.8221991208242658e-06, |
|
"loss": 3.839e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1320, |
|
"train_speed(iter/s)": 0.194845 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.3013698630137, |
|
"grad_norm": 0.008422702550888062, |
|
"learning_rate": 1.7868833453721465e-06, |
|
"loss": 8.309e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1325, |
|
"train_speed(iter/s)": 0.195352 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.43835616438356, |
|
"grad_norm": 0.027778884395956993, |
|
"learning_rate": 1.7518387379892654e-06, |
|
"loss": 4.668e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1330, |
|
"train_speed(iter/s)": 0.195857 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.57534246575342, |
|
"grad_norm": 0.010711952112615108, |
|
"learning_rate": 1.717068255773352e-06, |
|
"loss": 3.179e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1335, |
|
"train_speed(iter/s)": 0.196353 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.71232876712329, |
|
"grad_norm": 0.004275246057659388, |
|
"learning_rate": 1.6825748326911945e-06, |
|
"loss": 0.00023135, |
|
"memory(GiB)": 14.16, |
|
"step": 1340, |
|
"train_speed(iter/s)": 0.196844 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.84931506849315, |
|
"grad_norm": 0.003764290129765868, |
|
"learning_rate": 1.6483613793310607e-06, |
|
"loss": 4.05e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1345, |
|
"train_speed(iter/s)": 0.197337 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 36.986301369863014, |
|
"grad_norm": 0.025480693206191063, |
|
"learning_rate": 1.6144307826571068e-06, |
|
"loss": 3.893e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1350, |
|
"train_speed(iter/s)": 0.19784 |
|
}, |
|
{ |
|
"acc": 0.99955359, |
|
"epoch": 37.12328767123287, |
|
"grad_norm": 0.002638956531882286, |
|
"learning_rate": 1.580785905765769e-06, |
|
"loss": 0.00189444, |
|
"memory(GiB)": 14.16, |
|
"step": 1355, |
|
"train_speed(iter/s)": 0.198304 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.26027397260274, |
|
"grad_norm": 2.218669891357422, |
|
"learning_rate": 1.5474295876441716e-06, |
|
"loss": 0.00040235, |
|
"memory(GiB)": 14.16, |
|
"step": 1360, |
|
"train_speed(iter/s)": 0.198788 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.397260273972606, |
|
"grad_norm": 0.004326330963522196, |
|
"learning_rate": 1.51436464293057e-06, |
|
"loss": 2.629e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1365, |
|
"train_speed(iter/s)": 0.199269 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.534246575342465, |
|
"grad_norm": 0.00336137181147933, |
|
"learning_rate": 1.4815938616768564e-06, |
|
"loss": 2.298e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1370, |
|
"train_speed(iter/s)": 0.199749 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.67123287671233, |
|
"grad_norm": 0.0043685161508619785, |
|
"learning_rate": 1.4491200091131203e-06, |
|
"loss": 2.556e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1375, |
|
"train_speed(iter/s)": 0.200239 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.80821917808219, |
|
"grad_norm": 0.003170077223330736, |
|
"learning_rate": 1.4169458254143287e-06, |
|
"loss": 2.185e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1380, |
|
"train_speed(iter/s)": 0.200734 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 37.945205479452056, |
|
"grad_norm": 0.0025261647533625364, |
|
"learning_rate": 1.3850740254690947e-06, |
|
"loss": 0.00010961, |
|
"memory(GiB)": 14.16, |
|
"step": 1385, |
|
"train_speed(iter/s)": 0.201222 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.082191780821915, |
|
"grad_norm": 0.002983207581564784, |
|
"learning_rate": 1.3535072986506058e-06, |
|
"loss": 2.55e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1390, |
|
"train_speed(iter/s)": 0.201683 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.21917808219178, |
|
"grad_norm": 0.7226250171661377, |
|
"learning_rate": 1.3222483085896786e-06, |
|
"loss": 0.00014088, |
|
"memory(GiB)": 14.16, |
|
"step": 1395, |
|
"train_speed(iter/s)": 0.202156 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.35616438356164, |
|
"grad_norm": 0.0026145747397094965, |
|
"learning_rate": 1.2912996929500105e-06, |
|
"loss": 1.867e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1400, |
|
"train_speed(iter/s)": 0.202643 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.49315068493151, |
|
"grad_norm": 0.002422385849058628, |
|
"learning_rate": 1.2606640632056035e-06, |
|
"loss": 2.782e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1405, |
|
"train_speed(iter/s)": 0.203134 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.63013698630137, |
|
"grad_norm": 0.005694146268069744, |
|
"learning_rate": 1.230344004420408e-06, |
|
"loss": 2.287e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1410, |
|
"train_speed(iter/s)": 0.20361 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.76712328767123, |
|
"grad_norm": 0.0027258628979325294, |
|
"learning_rate": 1.2003420750301944e-06, |
|
"loss": 0.00018693, |
|
"memory(GiB)": 14.16, |
|
"step": 1415, |
|
"train_speed(iter/s)": 0.204094 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 38.9041095890411, |
|
"grad_norm": 0.0033724328968673944, |
|
"learning_rate": 1.1706608066266701e-06, |
|
"loss": 2.27e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1420, |
|
"train_speed(iter/s)": 0.204571 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.04109589041096, |
|
"grad_norm": 0.0026123709976673126, |
|
"learning_rate": 1.141302703743854e-06, |
|
"loss": 1.855e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1425, |
|
"train_speed(iter/s)": 0.205022 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.178082191780824, |
|
"grad_norm": 0.0019495452288538218, |
|
"learning_rate": 1.1122702436467527e-06, |
|
"loss": 1.743e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1430, |
|
"train_speed(iter/s)": 0.205495 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.31506849315068, |
|
"grad_norm": 0.004156290087848902, |
|
"learning_rate": 1.083565876122317e-06, |
|
"loss": 1.877e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1435, |
|
"train_speed(iter/s)": 0.205967 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.45205479452055, |
|
"grad_norm": 0.002886646194383502, |
|
"learning_rate": 1.0551920232727294e-06, |
|
"loss": 1.751e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1440, |
|
"train_speed(iter/s)": 0.206455 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.58904109589041, |
|
"grad_norm": 0.004523637238889933, |
|
"learning_rate": 1.0271510793110337e-06, |
|
"loss": 1.72e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1445, |
|
"train_speed(iter/s)": 0.206938 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.726027397260275, |
|
"grad_norm": 0.002039379673078656, |
|
"learning_rate": 9.994454103590919e-07, |
|
"loss": 0.00028988, |
|
"memory(GiB)": 14.16, |
|
"step": 1450, |
|
"train_speed(iter/s)": 0.207413 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 39.863013698630134, |
|
"grad_norm": 0.0019272951176390052, |
|
"learning_rate": 9.720773542479399e-07, |
|
"loss": 1.637e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1455, |
|
"train_speed(iter/s)": 0.207887 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.0, |
|
"grad_norm": 0.0028609074652194977, |
|
"learning_rate": 9.450492203205211e-07, |
|
"loss": 1.569e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1460, |
|
"train_speed(iter/s)": 0.208361 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.136986301369866, |
|
"grad_norm": 0.0020151259377598763, |
|
"learning_rate": 9.183632892368126e-07, |
|
"loss": 1.691e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1465, |
|
"train_speed(iter/s)": 0.208817 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.273972602739725, |
|
"grad_norm": 0.0018815897637978196, |
|
"learning_rate": 8.920218127813862e-07, |
|
"loss": 1.482e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1470, |
|
"train_speed(iter/s)": 0.209272 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.41095890410959, |
|
"grad_norm": 0.0017830530414357781, |
|
"learning_rate": 8.660270136734065e-07, |
|
"loss": 1.544e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1475, |
|
"train_speed(iter/s)": 0.209727 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.54794520547945, |
|
"grad_norm": 0.0013965211110189557, |
|
"learning_rate": 8.403810853790619e-07, |
|
"loss": 1.878e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1480, |
|
"train_speed(iter/s)": 0.210196 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.68493150684932, |
|
"grad_norm": 0.0022451053373515606, |
|
"learning_rate": 8.150861919264897e-07, |
|
"loss": 1.5e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1485, |
|
"train_speed(iter/s)": 0.210655 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.821917808219176, |
|
"grad_norm": 0.0060085877776145935, |
|
"learning_rate": 7.901444677231659e-07, |
|
"loss": 1.531e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1490, |
|
"train_speed(iter/s)": 0.211128 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 40.95890410958904, |
|
"grad_norm": 0.0016883641947060823, |
|
"learning_rate": 7.65558017375808e-07, |
|
"loss": 1.456e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1495, |
|
"train_speed(iter/s)": 0.211591 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.0958904109589, |
|
"grad_norm": 0.007958967238664627, |
|
"learning_rate": 7.413289155127845e-07, |
|
"loss": 1.438e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1500, |
|
"train_speed(iter/s)": 0.212023 |
|
}, |
|
{ |
|
"epoch": 41.0958904109589, |
|
"eval_acc": 0.3744250158022868, |
|
"eval_loss": 5.587606430053711, |
|
"eval_runtime": 1965.1675, |
|
"eval_samples_per_second": 16.296, |
|
"eval_steps_per_second": 2.037, |
|
"step": 1500 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.23287671232877, |
|
"grad_norm": 0.0023259874433279037, |
|
"learning_rate": 7.174592066090488e-07, |
|
"loss": 1.527e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1505, |
|
"train_speed(iter/s)": 0.165697 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.36986301369863, |
|
"grad_norm": 0.0012143112253397703, |
|
"learning_rate": 6.939509048136372e-07, |
|
"loss": 1.22e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1510, |
|
"train_speed(iter/s)": 0.166098 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.50684931506849, |
|
"grad_norm": 0.0015943313483148813, |
|
"learning_rate": 6.708059937796999e-07, |
|
"loss": 1.281e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1515, |
|
"train_speed(iter/s)": 0.166498 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.64383561643836, |
|
"grad_norm": 0.0013340068981051445, |
|
"learning_rate": 6.480264264971263e-07, |
|
"loss": 1.167e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1520, |
|
"train_speed(iter/s)": 0.166899 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.78082191780822, |
|
"grad_norm": 0.0011501106200739741, |
|
"learning_rate": 6.256141251277526e-07, |
|
"loss": 1.187e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1525, |
|
"train_speed(iter/s)": 0.167303 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 41.917808219178085, |
|
"grad_norm": 0.0013626530999317765, |
|
"learning_rate": 6.035709808431585e-07, |
|
"loss": 1.142e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1530, |
|
"train_speed(iter/s)": 0.167706 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.054794520547944, |
|
"grad_norm": 0.0012834669323638082, |
|
"learning_rate": 5.818988536650921e-07, |
|
"loss": 1.071e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1535, |
|
"train_speed(iter/s)": 0.16809 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.19178082191781, |
|
"grad_norm": 0.0013576337369158864, |
|
"learning_rate": 5.605995723085264e-07, |
|
"loss": 1.142e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1540, |
|
"train_speed(iter/s)": 0.168482 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.32876712328767, |
|
"grad_norm": 0.0012725357664749026, |
|
"learning_rate": 5.396749340273402e-07, |
|
"loss": 1.051e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1545, |
|
"train_speed(iter/s)": 0.168875 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.465753424657535, |
|
"grad_norm": 0.0012627997202798724, |
|
"learning_rate": 5.191267044626674e-07, |
|
"loss": 2.435e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1550, |
|
"train_speed(iter/s)": 0.169269 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.602739726027394, |
|
"grad_norm": 0.051344264298677444, |
|
"learning_rate": 4.989566174939183e-07, |
|
"loss": 2.21e-05, |
|
"memory(GiB)": 14.16, |
|
"step": 1555, |
|
"train_speed(iter/s)": 0.169675 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.73972602739726, |
|
"grad_norm": 0.0012037245323881507, |
|
"learning_rate": 4.791663750924617e-07, |
|
"loss": 8.75e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1560, |
|
"train_speed(iter/s)": 0.170075 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 42.87671232876713, |
|
"grad_norm": 0.0008853294420987368, |
|
"learning_rate": 4.5975764717801586e-07, |
|
"loss": 9.42e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1565, |
|
"train_speed(iter/s)": 0.170474 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.013698630136986, |
|
"grad_norm": 0.0008409248548559844, |
|
"learning_rate": 4.407320714777398e-07, |
|
"loss": 8.56e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1570, |
|
"train_speed(iter/s)": 0.170865 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.15068493150685, |
|
"grad_norm": 0.000995440874248743, |
|
"learning_rate": 4.2209125338804007e-07, |
|
"loss": 9.18e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1575, |
|
"train_speed(iter/s)": 0.171254 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.28767123287671, |
|
"grad_norm": 0.0008801660733297467, |
|
"learning_rate": 4.0383676583910706e-07, |
|
"loss": 7.03e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1580, |
|
"train_speed(iter/s)": 0.17164 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.42465753424658, |
|
"grad_norm": 0.0007337812567129731, |
|
"learning_rate": 3.859701491621833e-07, |
|
"loss": 7.99e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1585, |
|
"train_speed(iter/s)": 0.172037 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.56164383561644, |
|
"grad_norm": 0.0009611019631847739, |
|
"learning_rate": 3.6849291095959866e-07, |
|
"loss": 8.49e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1590, |
|
"train_speed(iter/s)": 0.172427 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.6986301369863, |
|
"grad_norm": 0.0009541260078549385, |
|
"learning_rate": 3.5140652597754917e-07, |
|
"loss": 7.95e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1595, |
|
"train_speed(iter/s)": 0.172819 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.83561643835616, |
|
"grad_norm": 0.0007106245611794293, |
|
"learning_rate": 3.3471243598165825e-07, |
|
"loss": 7.9e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1600, |
|
"train_speed(iter/s)": 0.173214 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 43.97260273972603, |
|
"grad_norm": 0.0014047367731109262, |
|
"learning_rate": 3.184120496353248e-07, |
|
"loss": 7.76e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1605, |
|
"train_speed(iter/s)": 0.173593 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.10958904109589, |
|
"grad_norm": 0.0007362644537352026, |
|
"learning_rate": 3.025067423808514e-07, |
|
"loss": 7.02e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1610, |
|
"train_speed(iter/s)": 0.17397 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.24657534246575, |
|
"grad_norm": 0.0006499322480522096, |
|
"learning_rate": 2.8699785632338603e-07, |
|
"loss": 6.68e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1615, |
|
"train_speed(iter/s)": 0.174356 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.38356164383562, |
|
"grad_norm": 0.0008433948969468474, |
|
"learning_rate": 2.7188670011767715e-07, |
|
"loss": 6.54e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1620, |
|
"train_speed(iter/s)": 0.174739 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.52054794520548, |
|
"grad_norm": 0.0009852251969277859, |
|
"learning_rate": 2.571745488576417e-07, |
|
"loss": 6.99e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1625, |
|
"train_speed(iter/s)": 0.175119 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.657534246575345, |
|
"grad_norm": 0.0006441141595132649, |
|
"learning_rate": 2.42862643968775e-07, |
|
"loss": 6.3e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1630, |
|
"train_speed(iter/s)": 0.175506 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.794520547945204, |
|
"grad_norm": 0.0006608326220884919, |
|
"learning_rate": 2.289521931034023e-07, |
|
"loss": 5.96e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1635, |
|
"train_speed(iter/s)": 0.175891 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 44.93150684931507, |
|
"grad_norm": 0.0005597140407189727, |
|
"learning_rate": 2.1544437003876737e-07, |
|
"loss": 5.43e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1640, |
|
"train_speed(iter/s)": 0.176268 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.06849315068493, |
|
"grad_norm": 0.000521883659530431, |
|
"learning_rate": 2.023403145779931e-07, |
|
"loss": 5.55e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1645, |
|
"train_speed(iter/s)": 0.176636 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.205479452054796, |
|
"grad_norm": 0.000552397221326828, |
|
"learning_rate": 1.8964113245390256e-07, |
|
"loss": 5.25e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1650, |
|
"train_speed(iter/s)": 0.17702 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.342465753424655, |
|
"grad_norm": 0.0008391111623495817, |
|
"learning_rate": 1.7734789523571958e-07, |
|
"loss": 5.77e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1655, |
|
"train_speed(iter/s)": 0.177399 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.47945205479452, |
|
"grad_norm": 0.0007058508927002549, |
|
"learning_rate": 1.654616402386414e-07, |
|
"loss": 5.41e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1660, |
|
"train_speed(iter/s)": 0.17778 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.61643835616438, |
|
"grad_norm": 0.0005362857482396066, |
|
"learning_rate": 1.5398337043631723e-07, |
|
"loss": 5.57e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1665, |
|
"train_speed(iter/s)": 0.178163 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.75342465753425, |
|
"grad_norm": 0.0008399708895012736, |
|
"learning_rate": 1.429140543762108e-07, |
|
"loss": 5.51e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1670, |
|
"train_speed(iter/s)": 0.178535 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 45.89041095890411, |
|
"grad_norm": 0.0004938300116918981, |
|
"learning_rate": 1.3225462609787475e-07, |
|
"loss": 4.95e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1675, |
|
"train_speed(iter/s)": 0.178916 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.02739726027397, |
|
"grad_norm": 0.00045353075256571174, |
|
"learning_rate": 1.220059850541356e-07, |
|
"loss": 4.52e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1680, |
|
"train_speed(iter/s)": 0.179293 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.16438356164384, |
|
"grad_norm": 0.000485074648167938, |
|
"learning_rate": 1.1216899603519877e-07, |
|
"loss": 4.22e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1685, |
|
"train_speed(iter/s)": 0.179668 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.3013698630137, |
|
"grad_norm": 0.0004754703550133854, |
|
"learning_rate": 1.0274448909567412e-07, |
|
"loss": 4.68e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1690, |
|
"train_speed(iter/s)": 0.180037 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.43835616438356, |
|
"grad_norm": 0.0005320140044204891, |
|
"learning_rate": 9.373325948453684e-08, |
|
"loss": 4.76e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1695, |
|
"train_speed(iter/s)": 0.180414 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.57534246575342, |
|
"grad_norm": 0.0006507379002869129, |
|
"learning_rate": 8.513606757802232e-08, |
|
"loss": 4.92e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1700, |
|
"train_speed(iter/s)": 0.180781 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.71232876712329, |
|
"grad_norm": 0.00036800041561946273, |
|
"learning_rate": 7.695363881546601e-08, |
|
"loss": 4.32e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1705, |
|
"train_speed(iter/s)": 0.181149 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.84931506849315, |
|
"grad_norm": 0.0005077983951196074, |
|
"learning_rate": 6.918666363808976e-08, |
|
"loss": 4.51e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1710, |
|
"train_speed(iter/s)": 0.181526 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 46.986301369863014, |
|
"grad_norm": 0.00036885106237605214, |
|
"learning_rate": 6.183579743074136e-08, |
|
"loss": 3.97e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1715, |
|
"train_speed(iter/s)": 0.181896 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.12328767123287, |
|
"grad_norm": 0.0005106101161800325, |
|
"learning_rate": 5.49016604665933e-08, |
|
"loss": 4.26e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1720, |
|
"train_speed(iter/s)": 0.182255 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.26027397260274, |
|
"grad_norm": 0.0004045426903758198, |
|
"learning_rate": 4.838483785480255e-08, |
|
"loss": 4.1e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1725, |
|
"train_speed(iter/s)": 0.182623 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.397260273972606, |
|
"grad_norm": 0.00039062247378751636, |
|
"learning_rate": 4.2285879491139524e-08, |
|
"loss": 4.1e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1730, |
|
"train_speed(iter/s)": 0.182986 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.534246575342465, |
|
"grad_norm": 0.0004385727515909821, |
|
"learning_rate": 3.660530001158591e-08, |
|
"loss": 4.19e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1735, |
|
"train_speed(iter/s)": 0.183351 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.67123287671233, |
|
"grad_norm": 0.0004706868203356862, |
|
"learning_rate": 3.1343578748911556e-08, |
|
"loss": 3.9e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1740, |
|
"train_speed(iter/s)": 0.183717 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.80821917808219, |
|
"grad_norm": 0.0005658628651872277, |
|
"learning_rate": 2.6501159692225276e-08, |
|
"loss": 3.95e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1745, |
|
"train_speed(iter/s)": 0.184078 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 47.945205479452056, |
|
"grad_norm": 0.00047224326408468187, |
|
"learning_rate": 2.2078451449511957e-08, |
|
"loss": 4.29e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1750, |
|
"train_speed(iter/s)": 0.184474 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.082191780821915, |
|
"grad_norm": 0.0004509200807660818, |
|
"learning_rate": 1.80758272131541e-08, |
|
"loss": 4.3e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1755, |
|
"train_speed(iter/s)": 0.184853 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.21917808219178, |
|
"grad_norm": 0.00045020331162959337, |
|
"learning_rate": 1.4493624728440738e-08, |
|
"loss": 4.35e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1760, |
|
"train_speed(iter/s)": 0.185233 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.35616438356164, |
|
"grad_norm": 0.00043748278403654695, |
|
"learning_rate": 1.1332146265068076e-08, |
|
"loss": 4.28e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1765, |
|
"train_speed(iter/s)": 0.18561 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.49315068493151, |
|
"grad_norm": 0.0003865604812745005, |
|
"learning_rate": 8.591658591635788e-09, |
|
"loss": 3.95e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1770, |
|
"train_speed(iter/s)": 0.185983 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.63013698630137, |
|
"grad_norm": 0.0005739238113164902, |
|
"learning_rate": 6.272392953132284e-09, |
|
"loss": 4.09e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1775, |
|
"train_speed(iter/s)": 0.186366 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.76712328767123, |
|
"grad_norm": 0.0004063249798491597, |
|
"learning_rate": 4.3745450514278e-09, |
|
"loss": 3.76e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1780, |
|
"train_speed(iter/s)": 0.186745 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 48.9041095890411, |
|
"grad_norm": 0.0003818414988927543, |
|
"learning_rate": 2.8982750287553984e-09, |
|
"loss": 3.65e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1785, |
|
"train_speed(iter/s)": 0.187115 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 49.04109589041096, |
|
"grad_norm": 0.0005809293361380696, |
|
"learning_rate": 1.843707454203115e-09, |
|
"loss": 4.11e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1790, |
|
"train_speed(iter/s)": 0.187485 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 49.178082191780824, |
|
"grad_norm": 0.00041514140320941806, |
|
"learning_rate": 1.210931313197315e-09, |
|
"loss": 3.93e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1795, |
|
"train_speed(iter/s)": 0.187855 |
|
}, |
|
{ |
|
"acc": 1.0, |
|
"epoch": 49.31506849315068, |
|
"grad_norm": 0.0005256779259070754, |
|
"learning_rate": 1e-09, |
|
"loss": 3.98e-06, |
|
"memory(GiB)": 14.16, |
|
"step": 1800, |
|
"train_speed(iter/s)": 0.188225 |
|
}, |
|
{ |
|
"epoch": 49.31506849315068, |
|
"eval_acc": 0.3744860345334727, |
|
"eval_loss": 5.818352222442627, |
|
"eval_runtime": 1948.2317, |
|
"eval_samples_per_second": 16.437, |
|
"eval_steps_per_second": 2.055, |
|
"step": 1800 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.503384077997179e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|