Training in progress, step 6000, checkpoint

Browse files

Files changed (2) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/trainer_state.json +704 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e263088b00686cadca49f80a54bc46c546951592cb1ea291618c32b90cb072a4
 size 685354800

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fab20f32621c18f7e567e201b068fd4e502966a6cf5803e5436f2004a34e1fe
 size 685354800

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.847986852917009,
   "eval_steps": 500,
-  "global_step": 5900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -41308,6 +41308,706 @@
       "learning_rate": 0.0005,
       "loss": 1.3641,
       "step": 5900
     }
   ],
   "logging_steps": 1,
@@ -41322,12 +42022,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.2272956063875072e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.930156121610517,
   "eval_steps": 500,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0005,
       "loss": 1.3641,
       "step": 5900
+    },
+    {
+      "epoch": 4.848808545603944,
+      "grad_norm": 0.5559499859809875,
+      "learning_rate": 0.0005,
+      "loss": 1.3687,
+      "step": 5901
+    },
+    {
+      "epoch": 4.849630238290879,
+      "grad_norm": 0.535542905330658,
+      "learning_rate": 0.0005,
+      "loss": 1.2836,
+      "step": 5902
+    },
+    {
+      "epoch": 4.8504519309778145,
+      "grad_norm": 0.5417159199714661,
+      "learning_rate": 0.0005,
+      "loss": 1.3408,
+      "step": 5903
+    },
+    {
+      "epoch": 4.851273623664749,
+      "grad_norm": 0.543630838394165,
+      "learning_rate": 0.0005,
+      "loss": 1.3996,
+      "step": 5904
+    },
+    {
+      "epoch": 4.852095316351685,
+      "grad_norm": 0.5195862054824829,
+      "learning_rate": 0.0005,
+      "loss": 1.3169,
+      "step": 5905
+    },
+    {
+      "epoch": 4.852917009038619,
+      "grad_norm": 0.5713696479797363,
+      "learning_rate": 0.0005,
+      "loss": 1.4069,
+      "step": 5906
+    },
+    {
+      "epoch": 4.853738701725555,
+      "grad_norm": 0.5813581943511963,
+      "learning_rate": 0.0005,
+      "loss": 1.3037,
+      "step": 5907
+    },
+    {
+      "epoch": 4.8545603944124895,
+      "grad_norm": 0.5637884140014648,
+      "learning_rate": 0.0005,
+      "loss": 1.3991,
+      "step": 5908
+    },
+    {
+      "epoch": 4.855382087099425,
+      "grad_norm": 0.5755516290664673,
+      "learning_rate": 0.0005,
+      "loss": 1.4464,
+      "step": 5909
+    },
+    {
+      "epoch": 4.85620377978636,
+      "grad_norm": 0.5279106497764587,
+      "learning_rate": 0.0005,
+      "loss": 1.3729,
+      "step": 5910
+    },
+    {
+      "epoch": 4.857025472473295,
+      "grad_norm": 0.5568866729736328,
+      "learning_rate": 0.0005,
+      "loss": 1.3846,
+      "step": 5911
+    },
+    {
+      "epoch": 4.85784716516023,
+      "grad_norm": 0.5258705615997314,
+      "learning_rate": 0.0005,
+      "loss": 1.3301,
+      "step": 5912
+    },
+    {
+      "epoch": 4.8586688578471655,
+      "grad_norm": 0.5256040692329407,
+      "learning_rate": 0.0005,
+      "loss": 1.3543,
+      "step": 5913
+    },
+    {
+      "epoch": 4.8594905505341,
+      "grad_norm": 0.5508636236190796,
+      "learning_rate": 0.0005,
+      "loss": 1.3168,
+      "step": 5914
+    },
+    {
+      "epoch": 4.860312243221035,
+      "grad_norm": 0.5376938581466675,
+      "learning_rate": 0.0005,
+      "loss": 1.3618,
+      "step": 5915
+    },
+    {
+      "epoch": 4.86113393590797,
+      "grad_norm": 0.513633668422699,
+      "learning_rate": 0.0005,
+      "loss": 1.279,
+      "step": 5916
+    },
+    {
+      "epoch": 4.861955628594906,
+      "grad_norm": 0.5526177287101746,
+      "learning_rate": 0.0005,
+      "loss": 1.2778,
+      "step": 5917
+    },
+    {
+      "epoch": 4.862777321281841,
+      "grad_norm": 0.5580543279647827,
+      "learning_rate": 0.0005,
+      "loss": 1.3931,
+      "step": 5918
+    },
+    {
+      "epoch": 4.863599013968775,
+      "grad_norm": 0.5640444159507751,
+      "learning_rate": 0.0005,
+      "loss": 1.4172,
+      "step": 5919
+    },
+    {
+      "epoch": 4.864420706655711,
+      "grad_norm": 0.5366266369819641,
+      "learning_rate": 0.0005,
+      "loss": 1.2472,
+      "step": 5920
+    },
+    {
+      "epoch": 4.8652423993426455,
+      "grad_norm": 0.5291760563850403,
+      "learning_rate": 0.0005,
+      "loss": 1.2402,
+      "step": 5921
+    },
+    {
+      "epoch": 4.866064092029581,
+      "grad_norm": 0.5365450978279114,
+      "learning_rate": 0.0005,
+      "loss": 1.2807,
+      "step": 5922
+    },
+    {
+      "epoch": 4.866885784716516,
+      "grad_norm": 0.5284454822540283,
+      "learning_rate": 0.0005,
+      "loss": 1.4549,
+      "step": 5923
+    },
+    {
+      "epoch": 4.867707477403451,
+      "grad_norm": 0.541029155254364,
+      "learning_rate": 0.0005,
+      "loss": 1.3154,
+      "step": 5924
+    },
+    {
+      "epoch": 4.868529170090386,
+      "grad_norm": 0.5600323677062988,
+      "learning_rate": 0.0005,
+      "loss": 1.3268,
+      "step": 5925
+    },
+    {
+      "epoch": 4.869350862777321,
+      "grad_norm": 0.5529478788375854,
+      "learning_rate": 0.0005,
+      "loss": 1.3629,
+      "step": 5926
+    },
+    {
+      "epoch": 4.870172555464256,
+      "grad_norm": 0.5471394658088684,
+      "learning_rate": 0.0005,
+      "loss": 1.2708,
+      "step": 5927
+    },
+    {
+      "epoch": 4.870994248151192,
+      "grad_norm": 0.5359401702880859,
+      "learning_rate": 0.0005,
+      "loss": 1.3363,
+      "step": 5928
+    },
+    {
+      "epoch": 4.871815940838126,
+      "grad_norm": 0.5148870348930359,
+      "learning_rate": 0.0005,
+      "loss": 1.2534,
+      "step": 5929
+    },
+    {
+      "epoch": 4.872637633525062,
+      "grad_norm": 0.5293238759040833,
+      "learning_rate": 0.0005,
+      "loss": 1.3152,
+      "step": 5930
+    },
+    {
+      "epoch": 4.8734593262119965,
+      "grad_norm": 0.5174310207366943,
+      "learning_rate": 0.0005,
+      "loss": 1.3534,
+      "step": 5931
+    },
+    {
+      "epoch": 4.874281018898932,
+      "grad_norm": 0.5341978073120117,
+      "learning_rate": 0.0005,
+      "loss": 1.3888,
+      "step": 5932
+    },
+    {
+      "epoch": 4.875102711585867,
+      "grad_norm": 0.5729701519012451,
+      "learning_rate": 0.0005,
+      "loss": 1.4173,
+      "step": 5933
+    },
+    {
+      "epoch": 4.875924404272802,
+      "grad_norm": 0.6612470746040344,
+      "learning_rate": 0.0005,
+      "loss": 1.3927,
+      "step": 5934
+    },
+    {
+      "epoch": 4.876746096959737,
+      "grad_norm": 0.5212023258209229,
+      "learning_rate": 0.0005,
+      "loss": 1.3259,
+      "step": 5935
+    },
+    {
+      "epoch": 4.8775677896466725,
+      "grad_norm": 0.5570096969604492,
+      "learning_rate": 0.0005,
+      "loss": 1.2884,
+      "step": 5936
+    },
+    {
+      "epoch": 4.878389482333607,
+      "grad_norm": 0.5613424181938171,
+      "learning_rate": 0.0005,
+      "loss": 1.2463,
+      "step": 5937
+    },
+    {
+      "epoch": 4.879211175020543,
+      "grad_norm": 0.5613168478012085,
+      "learning_rate": 0.0005,
+      "loss": 1.3115,
+      "step": 5938
+    },
+    {
+      "epoch": 4.880032867707477,
+      "grad_norm": 0.535275936126709,
+      "learning_rate": 0.0005,
+      "loss": 1.3002,
+      "step": 5939
+    },
+    {
+      "epoch": 4.880854560394413,
+      "grad_norm": 0.5672900676727295,
+      "learning_rate": 0.0005,
+      "loss": 1.3669,
+      "step": 5940
+    },
+    {
+      "epoch": 4.881676253081348,
+      "grad_norm": 0.5469388961791992,
+      "learning_rate": 0.0005,
+      "loss": 1.3652,
+      "step": 5941
+    },
+    {
+      "epoch": 4.882497945768282,
+      "grad_norm": 0.529625415802002,
+      "learning_rate": 0.0005,
+      "loss": 1.4422,
+      "step": 5942
+    },
+    {
+      "epoch": 4.883319638455218,
+      "grad_norm": 0.5889802575111389,
+      "learning_rate": 0.0005,
+      "loss": 1.3788,
+      "step": 5943
+    },
+    {
+      "epoch": 4.884141331142153,
+      "grad_norm": 0.5382450819015503,
+      "learning_rate": 0.0005,
+      "loss": 1.3318,
+      "step": 5944
+    },
+    {
+      "epoch": 4.884963023829088,
+      "grad_norm": 0.5242584347724915,
+      "learning_rate": 0.0005,
+      "loss": 1.268,
+      "step": 5945
+    },
+    {
+      "epoch": 4.885784716516023,
+      "grad_norm": 0.5483070611953735,
+      "learning_rate": 0.0005,
+      "loss": 1.4016,
+      "step": 5946
+    },
+    {
+      "epoch": 4.886606409202958,
+      "grad_norm": 0.5779204368591309,
+      "learning_rate": 0.0005,
+      "loss": 1.327,
+      "step": 5947
+    },
+    {
+      "epoch": 4.887428101889893,
+      "grad_norm": 0.5551186800003052,
+      "learning_rate": 0.0005,
+      "loss": 1.4228,
+      "step": 5948
+    },
+    {
+      "epoch": 4.888249794576828,
+      "grad_norm": 0.5995270609855652,
+      "learning_rate": 0.0005,
+      "loss": 1.3831,
+      "step": 5949
+    },
+    {
+      "epoch": 4.889071487263763,
+      "grad_norm": 0.5424328446388245,
+      "learning_rate": 0.0005,
+      "loss": 1.3815,
+      "step": 5950
+    },
+    {
+      "epoch": 4.889893179950699,
+      "grad_norm": 0.5349864959716797,
+      "learning_rate": 0.0005,
+      "loss": 1.2749,
+      "step": 5951
+    },
+    {
+      "epoch": 4.890714872637633,
+      "grad_norm": 0.5398481488227844,
+      "learning_rate": 0.0005,
+      "loss": 1.3635,
+      "step": 5952
+    },
+    {
+      "epoch": 4.891536565324569,
+      "grad_norm": 0.5872131586074829,
+      "learning_rate": 0.0005,
+      "loss": 1.3778,
+      "step": 5953
+    },
+    {
+      "epoch": 4.8923582580115035,
+      "grad_norm": 0.5435046553611755,
+      "learning_rate": 0.0005,
+      "loss": 1.3834,
+      "step": 5954
+    },
+    {
+      "epoch": 4.893179950698439,
+      "grad_norm": 0.551789402961731,
+      "learning_rate": 0.0005,
+      "loss": 1.3315,
+      "step": 5955
+    },
+    {
+      "epoch": 4.894001643385374,
+      "grad_norm": 0.5277841687202454,
+      "learning_rate": 0.0005,
+      "loss": 1.4072,
+      "step": 5956
+    },
+    {
+      "epoch": 4.894823336072309,
+      "grad_norm": 0.5183998346328735,
+      "learning_rate": 0.0005,
+      "loss": 1.2465,
+      "step": 5957
+    },
+    {
+      "epoch": 4.895645028759244,
+      "grad_norm": 0.561173677444458,
+      "learning_rate": 0.0005,
+      "loss": 1.4095,
+      "step": 5958
+    },
+    {
+      "epoch": 4.8964667214461794,
+      "grad_norm": 0.5499217510223389,
+      "learning_rate": 0.0005,
+      "loss": 1.312,
+      "step": 5959
+    },
+    {
+      "epoch": 4.897288414133114,
+      "grad_norm": 0.534344494342804,
+      "learning_rate": 0.0005,
+      "loss": 1.326,
+      "step": 5960
+    },
+    {
+      "epoch": 4.89811010682005,
+      "grad_norm": 0.5524152517318726,
+      "learning_rate": 0.0005,
+      "loss": 1.3709,
+      "step": 5961
+    },
+    {
+      "epoch": 4.898931799506984,
+      "grad_norm": 0.5235154032707214,
+      "learning_rate": 0.0005,
+      "loss": 1.212,
+      "step": 5962
+    },
+    {
+      "epoch": 4.89975349219392,
+      "grad_norm": 0.5188632607460022,
+      "learning_rate": 0.0005,
+      "loss": 1.2143,
+      "step": 5963
+    },
+    {
+      "epoch": 4.9005751848808545,
+      "grad_norm": 0.5360555648803711,
+      "learning_rate": 0.0005,
+      "loss": 1.4402,
+      "step": 5964
+    },
+    {
+      "epoch": 4.901396877567789,
+      "grad_norm": 0.5215834379196167,
+      "learning_rate": 0.0005,
+      "loss": 1.2509,
+      "step": 5965
+    },
+    {
+      "epoch": 4.902218570254725,
+      "grad_norm": 0.526918888092041,
+      "learning_rate": 0.0005,
+      "loss": 1.2802,
+      "step": 5966
+    },
+    {
+      "epoch": 4.90304026294166,
+      "grad_norm": 0.5387428402900696,
+      "learning_rate": 0.0005,
+      "loss": 1.294,
+      "step": 5967
+    },
+    {
+      "epoch": 4.903861955628595,
+      "grad_norm": 0.5555245280265808,
+      "learning_rate": 0.0005,
+      "loss": 1.4213,
+      "step": 5968
+    },
+    {
+      "epoch": 4.90468364831553,
+      "grad_norm": 0.5404963493347168,
+      "learning_rate": 0.0005,
+      "loss": 1.227,
+      "step": 5969
+    },
+    {
+      "epoch": 4.905505341002465,
+      "grad_norm": 0.5623646974563599,
+      "learning_rate": 0.0005,
+      "loss": 1.4319,
+      "step": 5970
+    },
+    {
+      "epoch": 4.9063270336894,
+      "grad_norm": 0.5389553904533386,
+      "learning_rate": 0.0005,
+      "loss": 1.297,
+      "step": 5971
+    },
+    {
+      "epoch": 4.907148726376335,
+      "grad_norm": 0.5477744936943054,
+      "learning_rate": 0.0005,
+      "loss": 1.3659,
+      "step": 5972
+    },
+    {
+      "epoch": 4.90797041906327,
+      "grad_norm": 0.5301917791366577,
+      "learning_rate": 0.0005,
+      "loss": 1.2658,
+      "step": 5973
+    },
+    {
+      "epoch": 4.908792111750206,
+      "grad_norm": 0.5291617512702942,
+      "learning_rate": 0.0005,
+      "loss": 1.3592,
+      "step": 5974
+    },
+    {
+      "epoch": 4.90961380443714,
+      "grad_norm": 0.5419930815696716,
+      "learning_rate": 0.0005,
+      "loss": 1.3125,
+      "step": 5975
+    },
+    {
+      "epoch": 4.910435497124076,
+      "grad_norm": 0.5221468210220337,
+      "learning_rate": 0.0005,
+      "loss": 1.3643,
+      "step": 5976
+    },
+    {
+      "epoch": 4.9112571898110104,
+      "grad_norm": 0.5470991134643555,
+      "learning_rate": 0.0005,
+      "loss": 1.3461,
+      "step": 5977
+    },
+    {
+      "epoch": 4.912078882497946,
+      "grad_norm": 0.5354620814323425,
+      "learning_rate": 0.0005,
+      "loss": 1.2681,
+      "step": 5978
+    },
+    {
+      "epoch": 4.912900575184881,
+      "grad_norm": 0.5851417779922485,
+      "learning_rate": 0.0005,
+      "loss": 1.3237,
+      "step": 5979
+    },
+    {
+      "epoch": 4.913722267871816,
+      "grad_norm": 0.5492926239967346,
+      "learning_rate": 0.0005,
+      "loss": 1.417,
+      "step": 5980
+    },
+    {
+      "epoch": 4.914543960558751,
+      "grad_norm": 0.5446394681930542,
+      "learning_rate": 0.0005,
+      "loss": 1.275,
+      "step": 5981
+    },
+    {
+      "epoch": 4.915365653245686,
+      "grad_norm": 0.5484851002693176,
+      "learning_rate": 0.0005,
+      "loss": 1.3842,
+      "step": 5982
+    },
+    {
+      "epoch": 4.916187345932621,
+      "grad_norm": 0.5432127714157104,
+      "learning_rate": 0.0005,
+      "loss": 1.4247,
+      "step": 5983
+    },
+    {
+      "epoch": 4.917009038619557,
+      "grad_norm": 0.5324352979660034,
+      "learning_rate": 0.0005,
+      "loss": 1.3269,
+      "step": 5984
+    },
+    {
+      "epoch": 4.917830731306491,
+      "grad_norm": 0.5508584976196289,
+      "learning_rate": 0.0005,
+      "loss": 1.1933,
+      "step": 5985
+    },
+    {
+      "epoch": 4.918652423993427,
+      "grad_norm": 0.5484975576400757,
+      "learning_rate": 0.0005,
+      "loss": 1.2294,
+      "step": 5986
+    },
+    {
+      "epoch": 4.9194741166803615,
+      "grad_norm": 0.5371730923652649,
+      "learning_rate": 0.0005,
+      "loss": 1.293,
+      "step": 5987
+    },
+    {
+      "epoch": 4.920295809367296,
+      "grad_norm": 0.5321599841117859,
+      "learning_rate": 0.0005,
+      "loss": 1.3553,
+      "step": 5988
+    },
+    {
+      "epoch": 4.921117502054232,
+      "grad_norm": 0.5347539186477661,
+      "learning_rate": 0.0005,
+      "loss": 1.3904,
+      "step": 5989
+    },
+    {
+      "epoch": 4.921939194741167,
+      "grad_norm": 0.5540315508842468,
+      "learning_rate": 0.0005,
+      "loss": 1.3358,
+      "step": 5990
+    },
+    {
+      "epoch": 4.922760887428102,
+      "grad_norm": 0.543171226978302,
+      "learning_rate": 0.0005,
+      "loss": 1.3914,
+      "step": 5991
+    },
+    {
+      "epoch": 4.923582580115037,
+      "grad_norm": 0.5221793055534363,
+      "learning_rate": 0.0005,
+      "loss": 1.3852,
+      "step": 5992
+    },
+    {
+      "epoch": 4.924404272801972,
+      "grad_norm": 0.5648449659347534,
+      "learning_rate": 0.0005,
+      "loss": 1.3695,
+      "step": 5993
+    },
+    {
+      "epoch": 4.925225965488907,
+      "grad_norm": 0.5360020995140076,
+      "learning_rate": 0.0005,
+      "loss": 1.296,
+      "step": 5994
+    },
+    {
+      "epoch": 4.926047658175842,
+      "grad_norm": 0.5223022699356079,
+      "learning_rate": 0.0005,
+      "loss": 1.2654,
+      "step": 5995
+    },
+    {
+      "epoch": 4.926869350862777,
+      "grad_norm": 0.5244916677474976,
+      "learning_rate": 0.0005,
+      "loss": 1.2287,
+      "step": 5996
+    },
+    {
+      "epoch": 4.9276910435497125,
+      "grad_norm": 0.5709188580513,
+      "learning_rate": 0.0005,
+      "loss": 1.414,
+      "step": 5997
+    },
+    {
+      "epoch": 4.928512736236647,
+      "grad_norm": 0.5473321080207825,
+      "learning_rate": 0.0005,
+      "loss": 1.3689,
+      "step": 5998
+    },
+    {
+      "epoch": 4.929334428923583,
+      "grad_norm": 0.5459017157554626,
+      "learning_rate": 0.0005,
+      "loss": 1.2876,
+      "step": 5999
+    },
+    {
+      "epoch": 4.930156121610517,
+      "grad_norm": 0.5736708641052246,
+      "learning_rate": 0.0005,
+      "loss": 1.3623,
+      "step": 6000
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2481056037404672e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null