Model save

Browse files

Files changed (4) hide show

adapter_config.json +5 -5
adapter_model.safetensors +1 -1
trainer_state.json +20 -20
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
     "q_proj",
-    "v_proj",
-    "down_proj",
     "up_proj",
-    "k_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "gate_proj",
     "q_proj",
+    "o_proj",
     "up_proj",
+    "down_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12201d714570f575e7cfff93e7803c37cf660a5122b1572919394b6c9928bb2e
 size 1238963672

 version https://git-lfs.github.com/spec/v1
+oid sha256:bec726b3b7b435b9c1bf06170693b344212e088ccdf8d6cbcc53ed56d796087f
 size 1238963672

trainer_state.json CHANGED Viewed

@@ -10,68 +10,68 @@
   "log_history": [
     {
       "epoch": 0.48697345994643293,
-      "grad_norm": 0.47981783747673035,
       "learning_rate": 8.790243902439025e-05,
-      "loss": 0.9053,
       "step": 250
     },
     {
       "epoch": 0.9739469198928659,
-      "grad_norm": 0.40146833658218384,
       "learning_rate": 7.570731707317074e-05,
-      "loss": 0.4368,
       "step": 500
     },
     {
       "epoch": 1.4609203798392987,
-      "grad_norm": 0.3620162308216095,
       "learning_rate": 6.351219512195123e-05,
-      "loss": 0.3713,
       "step": 750
     },
     {
       "epoch": 1.9478938397857317,
-      "grad_norm": 0.3653506636619568,
       "learning_rate": 5.131707317073171e-05,
-      "loss": 0.3497,
       "step": 1000
     },
     {
       "epoch": 2.4348672997321645,
-      "grad_norm": 0.40096959471702576,
       "learning_rate": 3.9121951219512195e-05,
-      "loss": 0.3292,
       "step": 1250
     },
     {
       "epoch": 2.9218407596785974,
-      "grad_norm": 0.42327091097831726,
       "learning_rate": 2.6926829268292686e-05,
-      "loss": 0.3126,
       "step": 1500
     },
     {
       "epoch": 3.4088142196250306,
-      "grad_norm": 0.4511185586452484,
       "learning_rate": 1.4731707317073171e-05,
-      "loss": 0.292,
       "step": 1750
     },
     {
       "epoch": 3.8957876795714634,
-      "grad_norm": 0.4421517550945282,
       "learning_rate": 2.536585365853659e-06,
-      "loss": 0.2807,
       "step": 2000
     },
     {
       "epoch": 3.9970781592403215,
       "step": 2052,
       "total_flos": 5.526699321341952e+17,
-      "train_loss": 0.4062951015450104,
-      "train_runtime": 8047.7635,
-      "train_samples_per_second": 65.317,
-      "train_steps_per_second": 0.255
     }
   ],
   "logging_steps": 250,

   "log_history": [
     {
       "epoch": 0.48697345994643293,
+      "grad_norm": 0.5063969492912292,
       "learning_rate": 8.790243902439025e-05,
+      "loss": 0.9607,
       "step": 250
     },
     {
       "epoch": 0.9739469198928659,
+      "grad_norm": 0.506633460521698,
       "learning_rate": 7.570731707317074e-05,
+      "loss": 0.5091,
       "step": 500
     },
     {
       "epoch": 1.4609203798392987,
+      "grad_norm": 0.4411795437335968,
       "learning_rate": 6.351219512195123e-05,
+      "loss": 0.3939,
       "step": 750
     },
     {
       "epoch": 1.9478938397857317,
+      "grad_norm": 0.4095625579357147,
       "learning_rate": 5.131707317073171e-05,
+      "loss": 0.3454,
       "step": 1000
     },
     {
       "epoch": 2.4348672997321645,
+      "grad_norm": 0.40540868043899536,
       "learning_rate": 3.9121951219512195e-05,
+      "loss": 0.3137,
       "step": 1250
     },
     {
       "epoch": 2.9218407596785974,
+      "grad_norm": 0.3997802734375,
       "learning_rate": 2.6926829268292686e-05,
+      "loss": 0.2979,
       "step": 1500
     },
     {
       "epoch": 3.4088142196250306,
+      "grad_norm": 0.3940906822681427,
       "learning_rate": 1.4731707317073171e-05,
+      "loss": 0.282,
       "step": 1750
     },
     {
       "epoch": 3.8957876795714634,
+      "grad_norm": 0.3510627746582031,
       "learning_rate": 2.536585365853659e-06,
+      "loss": 0.2774,
       "step": 2000
     },
     {
       "epoch": 3.9970781592403215,
       "step": 2052,
       "total_flos": 5.526699321341952e+17,
+      "train_loss": 0.4186723729555602,
+      "train_runtime": 8128.165,
+      "train_samples_per_second": 64.67,
+      "train_steps_per_second": 0.252
     }
   ],
   "logging_steps": 250,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4f19646cb13b6dd42d486f44847bea74d210d17542ab229d4dfd2d11a4d3b86
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:e85c4a85d976010cf22cf427b65efcc07b6682880b6f9b35dbfe502c268b4bdb
 size 5176