Training in progress, step 1000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +712 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1cab7e9a8a987404549f49afdd2936be09b05aa346de6bd994dcd584e6c7c99
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:6595bf55f02395c5cbd4666c9cb95f98302cd9ef023ebeb628cdc4cf4bf4caee
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3af5b950a75d8130ae2831e845bdb2d9c0f2568b854448eb1a41e6ca5caf699
 size 18810356

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4e6e0de1bcf5bda93ea3307cefce39a83cb8a6899981f858868c87622dba1fd
 size 18810356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31fd0ef2088134d693702f76cf93ec3d3456380164b3e8cc27330c341fd530f6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7b6fd1c9514da6d7da36a4b4159526160fde1963c15f9d5ba39d98a761f42c0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0428512ada8c2471b2f37ecbdd4efa5f13e3ba0e777fddbfec0396eebc36c01a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4077036d99500a708f700f75da24d51b5300e184ad35fda49dc5a4df5596cca2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.32617559119325906,
   "eval_steps": 250,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6339,6 +6339,714 @@
       "learning_rate": 2.7091379149682685e-06,
       "loss": 2.5194,
       "step": 900
     }
   ],
   "logging_steps": 1,
@@ -6353,12 +7061,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.4885887074304e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3624173235480656,
   "eval_steps": 250,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.7091379149682685e-06,
       "loss": 2.5194,
       "step": 900
+    },
+    {
+      "epoch": 0.3265380085168071,
+      "grad_norm": 6.8190178871154785,
+      "learning_rate": 2.6557085182532582e-06,
+      "loss": 2.4827,
+      "step": 901
+    },
+    {
+      "epoch": 0.32690042584035517,
+      "grad_norm": 8.026456832885742,
+      "learning_rate": 2.602796871124663e-06,
+      "loss": 3.6636,
+      "step": 902
+    },
+    {
+      "epoch": 0.32726284316390325,
+      "grad_norm": 6.956340789794922,
+      "learning_rate": 2.5504035522157854e-06,
+      "loss": 2.8476,
+      "step": 903
+    },
+    {
+      "epoch": 0.3276252604874513,
+      "grad_norm": 5.77375602722168,
+      "learning_rate": 2.4985291344915674e-06,
+      "loss": 2.6426,
+      "step": 904
+    },
+    {
+      "epoch": 0.32798767781099936,
+      "grad_norm": 6.200889587402344,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 3.4329,
+      "step": 905
+    },
+    {
+      "epoch": 0.32835009513454744,
+      "grad_norm": 6.780580043792725,
+      "learning_rate": 2.3963392660775575e-06,
+      "loss": 3.2439,
+      "step": 906
+    },
+    {
+      "epoch": 0.3287125124580955,
+      "grad_norm": 6.5239644050598145,
+      "learning_rate": 2.3460249329197824e-06,
+      "loss": 2.5931,
+      "step": 907
+    },
+    {
+      "epoch": 0.32907492978164354,
+      "grad_norm": 7.130517482757568,
+      "learning_rate": 2.296231735998511e-06,
+      "loss": 2.6862,
+      "step": 908
+    },
+    {
+      "epoch": 0.3294373471051916,
+      "grad_norm": 6.246675968170166,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 2.8948,
+      "step": 909
+    },
+    {
+      "epoch": 0.3297997644287397,
+      "grad_norm": 7.630551338195801,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 2.615,
+      "step": 910
+    },
+    {
+      "epoch": 0.3301621817522878,
+      "grad_norm": 4.334846019744873,
+      "learning_rate": 2.149984379426906e-06,
+      "loss": 2.2025,
+      "step": 911
+    },
+    {
+      "epoch": 0.3305245990758358,
+      "grad_norm": 6.460603713989258,
+      "learning_rate": 2.102281115676258e-06,
+      "loss": 2.6511,
+      "step": 912
+    },
+    {
+      "epoch": 0.3308870163993839,
+      "grad_norm": 7.5470991134643555,
+      "learning_rate": 2.0551016537054493e-06,
+      "loss": 2.6114,
+      "step": 913
+    },
+    {
+      "epoch": 0.331249433722932,
+      "grad_norm": 6.667960166931152,
+      "learning_rate": 2.008446509461498e-06,
+      "loss": 2.8898,
+      "step": 914
+    },
+    {
+      "epoch": 0.33161185104648,
+      "grad_norm": 7.1564836502075195,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 2.7416,
+      "step": 915
+    },
+    {
+      "epoch": 0.3319742683700281,
+      "grad_norm": 6.149023056030273,
+      "learning_rate": 1.91671120926748e-06,
+      "loss": 2.5522,
+      "step": 916
+    },
+    {
+      "epoch": 0.33233668569357616,
+      "grad_norm": 8.589316368103027,
+      "learning_rate": 1.8716320565199618e-06,
+      "loss": 1.8536,
+      "step": 917
+    },
+    {
+      "epoch": 0.33269910301712424,
+      "grad_norm": 5.793923377990723,
+      "learning_rate": 1.8270792278934302e-06,
+      "loss": 2.9145,
+      "step": 918
+    },
+    {
+      "epoch": 0.33306152034067227,
+      "grad_norm": 6.867883682250977,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 3.1564,
+      "step": 919
+    },
+    {
+      "epoch": 0.33342393766422035,
+      "grad_norm": 5.381682872772217,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 2.5914,
+      "step": 920
+    },
+    {
+      "epoch": 0.33378635498776843,
+      "grad_norm": 6.665421009063721,
+      "learning_rate": 1.696583530154794e-06,
+      "loss": 2.2271,
+      "step": 921
+    },
+    {
+      "epoch": 0.33414877231131646,
+      "grad_norm": 9.370551109313965,
+      "learning_rate": 1.6541408126006463e-06,
+      "loss": 2.8091,
+      "step": 922
+    },
+    {
+      "epoch": 0.33451118963486454,
+      "grad_norm": 7.959995746612549,
+      "learning_rate": 1.6122267976168781e-06,
+      "loss": 3.6784,
+      "step": 923
+    },
+    {
+      "epoch": 0.3348736069584126,
+      "grad_norm": 7.0754475593566895,
+      "learning_rate": 1.5708419435684462e-06,
+      "loss": 2.9994,
+      "step": 924
+    },
+    {
+      "epoch": 0.3352360242819607,
+      "grad_norm": 6.9350361824035645,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.8455,
+      "step": 925
+    },
+    {
+      "epoch": 0.3355984416055087,
+      "grad_norm": 6.33629846572876,
+      "learning_rate": 1.4896615227983468e-06,
+      "loss": 2.2597,
+      "step": 926
+    },
+    {
+      "epoch": 0.3359608589290568,
+      "grad_norm": 6.027073383331299,
+      "learning_rate": 1.4498668438527597e-06,
+      "loss": 2.548,
+      "step": 927
+    },
+    {
+      "epoch": 0.3363232762526049,
+      "grad_norm": 6.1353044509887695,
+      "learning_rate": 1.4106031013849496e-06,
+      "loss": 2.6825,
+      "step": 928
+    },
+    {
+      "epoch": 0.33668569357615297,
+      "grad_norm": 4.776915550231934,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 3.3943,
+      "step": 929
+    },
+    {
+      "epoch": 0.337048110899701,
+      "grad_norm": 7.03324031829834,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 2.7431,
+      "step": 930
+    },
+    {
+      "epoch": 0.3374105282232491,
+      "grad_norm": 7.59218692779541,
+      "learning_rate": 1.2960017576088446e-06,
+      "loss": 2.9679,
+      "step": 931
+    },
+    {
+      "epoch": 0.33777294554679715,
+      "grad_norm": 6.613968372344971,
+      "learning_rate": 1.2588659967397e-06,
+      "loss": 2.2011,
+      "step": 932
+    },
+    {
+      "epoch": 0.3381353628703452,
+      "grad_norm": 5.920099258422852,
+      "learning_rate": 1.222263261102985e-06,
+      "loss": 2.3399,
+      "step": 933
+    },
+    {
+      "epoch": 0.33849778019389326,
+      "grad_norm": 9.415234565734863,
+      "learning_rate": 1.1861939509803687e-06,
+      "loss": 2.8695,
+      "step": 934
+    },
+    {
+      "epoch": 0.33886019751744134,
+      "grad_norm": 5.54183292388916,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 2.8506,
+      "step": 935
+    },
+    {
+      "epoch": 0.3392226148409894,
+      "grad_norm": 6.957115650177002,
+      "learning_rate": 1.1156571792324211e-06,
+      "loss": 2.3371,
+      "step": 936
+    },
+    {
+      "epoch": 0.33958503216453745,
+      "grad_norm": 4.749189376831055,
+      "learning_rate": 1.0811904889859336e-06,
+      "loss": 2.3668,
+      "step": 937
+    },
+    {
+      "epoch": 0.33994744948808553,
+      "grad_norm": 7.631181716918945,
+      "learning_rate": 1.0472587670027678e-06,
+      "loss": 3.3614,
+      "step": 938
+    },
+    {
+      "epoch": 0.3403098668116336,
+      "grad_norm": 5.728475570678711,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 2.0496,
+      "step": 939
+    },
+    {
+      "epoch": 0.34067228413518164,
+      "grad_norm": 6.60684871673584,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 2.7532,
+      "step": 940
+    },
+    {
+      "epoch": 0.3410347014587297,
+      "grad_norm": 6.326074123382568,
+      "learning_rate": 9.486770920760668e-07,
+      "loss": 2.5077,
+      "step": 941
+    },
+    {
+      "epoch": 0.3413971187822778,
+      "grad_norm": 7.019238471984863,
+      "learning_rate": 9.168888953011989e-07,
+      "loss": 2.0309,
+      "step": 942
+    },
+    {
+      "epoch": 0.3417595361058259,
+      "grad_norm": 8.484062194824219,
+      "learning_rate": 8.856374635655695e-07,
+      "loss": 3.3332,
+      "step": 943
+    },
+    {
+      "epoch": 0.3421219534293739,
+      "grad_norm": 7.0083842277526855,
+      "learning_rate": 8.549231386298151e-07,
+      "loss": 2.9812,
+      "step": 944
+    },
+    {
+      "epoch": 0.342484370752922,
+      "grad_norm": 6.267253875732422,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 3.0255,
+      "step": 945
+    },
+    {
+      "epoch": 0.34284678807647007,
+      "grad_norm": 8.533939361572266,
+      "learning_rate": 7.951071468283167e-07,
+      "loss": 3.2914,
+      "step": 946
+    },
+    {
+      "epoch": 0.34320920540001815,
+      "grad_norm": 6.0240373611450195,
+      "learning_rate": 7.66006134100672e-07,
+      "loss": 3.0139,
+      "step": 947
+    },
+    {
+      "epoch": 0.3435716227235662,
+      "grad_norm": 6.486016750335693,
+      "learning_rate": 7.374435364419674e-07,
+      "loss": 2.9023,
+      "step": 948
+    },
+    {
+      "epoch": 0.34393404004711425,
+      "grad_norm": 8.553296089172363,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 2.9581,
+      "step": 949
+    },
+    {
+      "epoch": 0.34429645737066233,
+      "grad_norm": 6.792461395263672,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 3.0948,
+      "step": 950
+    },
+    {
+      "epoch": 0.34465887469421036,
+      "grad_norm": 6.591213703155518,
+      "learning_rate": 6.549893279788277e-07,
+      "loss": 2.7468,
+      "step": 951
+    },
+    {
+      "epoch": 0.34502129201775844,
+      "grad_norm": 7.372961044311523,
+      "learning_rate": 6.285834552247128e-07,
+      "loss": 2.6526,
+      "step": 952
+    },
+    {
+      "epoch": 0.3453837093413065,
+      "grad_norm": 5.669467926025391,
+      "learning_rate": 6.027175003719354e-07,
+      "loss": 2.7276,
+      "step": 953
+    },
+    {
+      "epoch": 0.3457461266648546,
+      "grad_norm": 6.340987682342529,
+      "learning_rate": 5.773917462864264e-07,
+      "loss": 2.5295,
+      "step": 954
+    },
+    {
+      "epoch": 0.34610854398840263,
+      "grad_norm": 6.276302814483643,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 3.6131,
+      "step": 955
+    },
+    {
+      "epoch": 0.3464709613119507,
+      "grad_norm": 5.535942077636719,
+      "learning_rate": 5.283619423401998e-07,
+      "loss": 2.4283,
+      "step": 956
+    },
+    {
+      "epoch": 0.3468333786354988,
+      "grad_norm": 7.801699638366699,
+      "learning_rate": 5.046584286615697e-07,
+      "loss": 2.8357,
+      "step": 957
+    },
+    {
+      "epoch": 0.3471957959590468,
+      "grad_norm": 8.31689167022705,
+      "learning_rate": 4.814961881085045e-07,
+      "loss": 3.3685,
+      "step": 958
+    },
+    {
+      "epoch": 0.3475582132825949,
+      "grad_norm": 6.9537577629089355,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 3.3186,
+      "step": 959
+    },
+    {
+      "epoch": 0.347920630606143,
+      "grad_norm": 6.808418273925781,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 2.6928,
+      "step": 960
+    },
+    {
+      "epoch": 0.34828304792969106,
+      "grad_norm": 5.8220086097717285,
+      "learning_rate": 4.1525960857530243e-07,
+      "loss": 2.7677,
+      "step": 961
+    },
+    {
+      "epoch": 0.3486454652532391,
+      "grad_norm": 5.832417011260986,
+      "learning_rate": 3.9426493427611177e-07,
+      "loss": 2.819,
+      "step": 962
+    },
+    {
+      "epoch": 0.34900788257678717,
+      "grad_norm": 7.183162689208984,
+      "learning_rate": 3.738127403480507e-07,
+      "loss": 2.7156,
+      "step": 963
+    },
+    {
+      "epoch": 0.34937029990033525,
+      "grad_norm": 7.9874677658081055,
+      "learning_rate": 3.5390325045304706e-07,
+      "loss": 3.0892,
+      "step": 964
+    },
+    {
+      "epoch": 0.3497327172238833,
+      "grad_norm": 7.457160949707031,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 3.4502,
+      "step": 965
+    },
+    {
+      "epoch": 0.35009513454743135,
+      "grad_norm": 7.874043941497803,
+      "learning_rate": 3.157132477328628e-07,
+      "loss": 2.7757,
+      "step": 966
+    },
+    {
+      "epoch": 0.35045755187097943,
+      "grad_norm": 7.999273777008057,
+      "learning_rate": 2.9743315254743833e-07,
+      "loss": 3.0866,
+      "step": 967
+    },
+    {
+      "epoch": 0.3508199691945275,
+      "grad_norm": 8.767258644104004,
+      "learning_rate": 2.796965966699927e-07,
+      "loss": 2.5532,
+      "step": 968
+    },
+    {
+      "epoch": 0.35118238651807554,
+      "grad_norm": 6.00533390045166,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 2.9059,
+      "step": 969
+    },
+    {
+      "epoch": 0.3515448038416236,
+      "grad_norm": 5.833471298217773,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 3.3388,
+      "step": 970
+    },
+    {
+      "epoch": 0.3519072211651717,
+      "grad_norm": 6.837587833404541,
+      "learning_rate": 2.2975007479397738e-07,
+      "loss": 2.9694,
+      "step": 971
+    },
+    {
+      "epoch": 0.3522696384887198,
+      "grad_norm": 7.076719760894775,
+      "learning_rate": 2.1418955631781202e-07,
+      "loss": 2.2279,
+      "step": 972
+    },
+    {
+      "epoch": 0.3526320558122678,
+      "grad_norm": 5.842435359954834,
+      "learning_rate": 1.9917348748826335e-07,
+      "loss": 2.7922,
+      "step": 973
+    },
+    {
+      "epoch": 0.3529944731358159,
+      "grad_norm": 8.36257266998291,
+      "learning_rate": 1.847020325186577e-07,
+      "loss": 3.7132,
+      "step": 974
+    },
+    {
+      "epoch": 0.35335689045936397,
+      "grad_norm": 7.264481067657471,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 2.8945,
+      "step": 975
+    },
+    {
+      "epoch": 0.353719307782912,
+      "grad_norm": 5.050163269042969,
+      "learning_rate": 1.5739359123178587e-07,
+      "loss": 2.8021,
+      "step": 976
+    },
+    {
+      "epoch": 0.3540817251064601,
+      "grad_norm": 6.886010646820068,
+      "learning_rate": 1.4455690355525964e-07,
+      "loss": 3.1517,
+      "step": 977
+    },
+    {
+      "epoch": 0.35444414243000816,
+      "grad_norm": 5.721440315246582,
+      "learning_rate": 1.3226542701689215e-07,
+      "loss": 2.6086,
+      "step": 978
+    },
+    {
+      "epoch": 0.35480655975355624,
+      "grad_norm": 5.630771160125732,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 2.9555,
+      "step": 979
+    },
+    {
+      "epoch": 0.35516897707710426,
+      "grad_norm": 6.419443130493164,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 2.4218,
+      "step": 980
+    },
+    {
+      "epoch": 0.35553139440065235,
+      "grad_norm": 8.198994636535645,
+      "learning_rate": 9.866357858642205e-08,
+      "loss": 2.5117,
+      "step": 981
+    },
+    {
+      "epoch": 0.3558938117242004,
+      "grad_norm": 6.984527587890625,
+      "learning_rate": 8.855423113177664e-08,
+      "loss": 2.5872,
+      "step": 982
+    },
+    {
+      "epoch": 0.3562562290477485,
+      "grad_norm": 6.198144435882568,
+      "learning_rate": 7.899070725153613e-08,
+      "loss": 2.5865,
+      "step": 983
+    },
+    {
+      "epoch": 0.35661864637129653,
+      "grad_norm": 5.657758712768555,
+      "learning_rate": 6.997311153086883e-08,
+      "loss": 2.7767,
+      "step": 984
+    },
+    {
+      "epoch": 0.3569810636948446,
+      "grad_norm": 8.617463111877441,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 2.5342,
+      "step": 985
+    },
+    {
+      "epoch": 0.3573434810183927,
+      "grad_norm": 6.265313625335693,
+      "learning_rate": 5.3576093056922906e-08,
+      "loss": 1.9735,
+      "step": 986
+    },
+    {
+      "epoch": 0.3577058983419407,
+      "grad_norm": 4.9574875831604,
+      "learning_rate": 4.619684961881254e-08,
+      "loss": 1.8526,
+      "step": 987
+    },
+    {
+      "epoch": 0.3580683156654888,
+      "grad_norm": 6.38330602645874,
+      "learning_rate": 3.936389296864129e-08,
+      "loss": 3.3058,
+      "step": 988
+    },
+    {
+      "epoch": 0.3584307329890369,
+      "grad_norm": 6.539586067199707,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 3.1111,
+      "step": 989
+    },
+    {
+      "epoch": 0.35879315031258496,
+      "grad_norm": 6.351942539215088,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 2.7353,
+      "step": 990
+    },
+    {
+      "epoch": 0.359155567636133,
+      "grad_norm": 8.30229377746582,
+      "learning_rate": 2.214346111164556e-08,
+      "loss": 2.9746,
+      "step": 991
+    },
+    {
+      "epoch": 0.35951798495968107,
+      "grad_norm": 6.088867664337158,
+      "learning_rate": 1.749633910153592e-08,
+      "loss": 2.5657,
+      "step": 992
+    },
+    {
+      "epoch": 0.35988040228322915,
+      "grad_norm": 5.72982931137085,
+      "learning_rate": 1.3395817743561134e-08,
+      "loss": 2.762,
+      "step": 993
+    },
+    {
+      "epoch": 0.36024281960677723,
+      "grad_norm": 7.2047576904296875,
+      "learning_rate": 9.841941880361916e-09,
+      "loss": 2.6464,
+      "step": 994
+    },
+    {
+      "epoch": 0.36060523693032526,
+      "grad_norm": 4.944203853607178,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 2.7518,
+      "step": 995
+    },
+    {
+      "epoch": 0.36096765425387334,
+      "grad_norm": 6.469346523284912,
+      "learning_rate": 4.3742761183018784e-09,
+      "loss": 3.3189,
+      "step": 996
+    },
+    {
+      "epoch": 0.3613300715774214,
+      "grad_norm": 5.523458480834961,
+      "learning_rate": 2.4605460129556445e-09,
+      "loss": 2.5312,
+      "step": 997
+    },
+    {
+      "epoch": 0.36169248890096944,
+      "grad_norm": 6.366145133972168,
+      "learning_rate": 1.0935809887702154e-09,
+      "loss": 2.7652,
+      "step": 998
+    },
+    {
+      "epoch": 0.3620549062245175,
+      "grad_norm": 6.140265941619873,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 2.759,
+      "step": 999
+    },
+    {
+      "epoch": 0.3624173235480656,
+      "grad_norm": 6.8456130027771,
+      "learning_rate": 0.0,
+      "loss": 2.8016,
+      "step": 1000
+    },
+    {
+      "epoch": 0.3624173235480656,
+      "eval_loss": 2.871779203414917,
+      "eval_runtime": 179.6734,
+      "eval_samples_per_second": 6.467,
+      "eval_steps_per_second": 3.234,
+      "step": 1000
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.209543008256e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null