Training in progress, step 253, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +33 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffc1f62a5237766a3be226d8c10c8289df6c15ec9ecfbf7b3305adff6298b653
 size 34916720

 version https://git-lfs.github.com/spec/v1
+oid sha256:be521f6ed0a0630137abbb7c434c8998a2489c137d973627a1ea2cac6eef45d0
 size 34916720

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8941ace610526a117145195d4b2292dbbb1fd07169cb78fae1905f54faa18ce9
 size 18162996

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0cf0c2712a58e6176b31a0b1d98aff1d7435174304051ac1b51258d29ea99df
 size 18162996

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e988b69e8478bec55657cd735c074e337302fc9d7a1ac6bee3bb4df951f092b8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:118424a02948010fdd0a6232cc80e1e0b3d72274ff559563ab4afe69b94d3221
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:161a2804bfc1a2a5ca1771b0abf6ac2ecd7255f8733590d62ec6ca536d33c013
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fde1c856bf4024c4f96980812d760c4bdafe979f275c738ce67cfa5a542c585
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 17.982456140350877,
   "eval_steps": 22,
-  "global_step": 252,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7404,11 +7404,40 @@
       "rewards/reward_func_sensitivity/mean": 1.0,
       "rewards/reward_func_sensitivity/std": 0.0,
       "step": 252
     }
   ],
   "logging_steps": 1,
   "max_steps": 253,
-  "num_input_tokens_seen": 6799373,
   "num_train_epochs": 19,
   "save_steps": 42,
   "stateful_callbacks": {
@@ -7418,7 +7447,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 18.07017543859649,
   "eval_steps": 22,
+  "global_step": 253,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "rewards/reward_func_sensitivity/mean": 1.0,
       "rewards/reward_func_sensitivity/std": 0.0,
       "step": 252
+    },
+    {
+      "clip_ratio/high_max": 0.0,
+      "clip_ratio/high_mean": 0.0,
+      "clip_ratio/low_mean": 0.0,
+      "clip_ratio/low_min": 0.0,
+      "clip_ratio/region_mean": 0.0,
+      "completions/clipped_ratio": 0.0,
+      "completions/max_length": 1.0,
+      "completions/max_terminated_length": 1.0,
+      "completions/mean_length": 1.0,
+      "completions/mean_terminated_length": 1.0,
+      "completions/min_length": 1.0,
+      "completions/min_terminated_length": 1.0,
+      "epoch": 18.07017543859649,
+      "grad_norm": 4.7402841119037475e-06,
+      "kl": 6.701283276081085,
+      "learning_rate": 2.108004964086474e-08,
+      "loss": 0.2681,
+      "num_tokens": 6824209.0,
+      "reward": 139.5692596435547,
+      "reward_std": 0.0,
+      "rewards/conciseness_reward_func/mean": 10.0,
+      "rewards/conciseness_reward_func/std": 0.0,
+      "rewards/reward_func_conciseness/mean": 10.0,
+      "rewards/reward_func_conciseness/std": 0.0,
+      "rewards/reward_func_sensitivity/mean": 1.0,
+      "rewards/reward_func_sensitivity/std": 0.0,
+      "step": 253
     }
   ],
   "logging_steps": 1,
   "max_steps": 253,
+  "num_input_tokens_seen": 6824209,
   "num_train_epochs": 19,
   "save_steps": 42,
   "stateful_callbacks": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }