apriasmoro commited on
Commit
de24fa6
·
verified ·
1 Parent(s): eca0558

Training in progress, step 253, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffc1f62a5237766a3be226d8c10c8289df6c15ec9ecfbf7b3305adff6298b653
3
  size 34916720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be521f6ed0a0630137abbb7c434c8998a2489c137d973627a1ea2cac6eef45d0
3
  size 34916720
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8941ace610526a117145195d4b2292dbbb1fd07169cb78fae1905f54faa18ce9
3
  size 18162996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cf0c2712a58e6176b31a0b1d98aff1d7435174304051ac1b51258d29ea99df
3
  size 18162996
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e988b69e8478bec55657cd735c074e337302fc9d7a1ac6bee3bb4df951f092b8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118424a02948010fdd0a6232cc80e1e0b3d72274ff559563ab4afe69b94d3221
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:161a2804bfc1a2a5ca1771b0abf6ac2ecd7255f8733590d62ec6ca536d33c013
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fde1c856bf4024c4f96980812d760c4bdafe979f275c738ce67cfa5a542c585
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 17.982456140350877,
6
  "eval_steps": 22,
7
- "global_step": 252,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7404,11 +7404,40 @@
7404
  "rewards/reward_func_sensitivity/mean": 1.0,
7405
  "rewards/reward_func_sensitivity/std": 0.0,
7406
  "step": 252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7407
  }
7408
  ],
7409
  "logging_steps": 1,
7410
  "max_steps": 253,
7411
- "num_input_tokens_seen": 6799373,
7412
  "num_train_epochs": 19,
7413
  "save_steps": 42,
7414
  "stateful_callbacks": {
@@ -7418,7 +7447,7 @@
7418
  "should_evaluate": false,
7419
  "should_log": false,
7420
  "should_save": true,
7421
- "should_training_stop": false
7422
  },
7423
  "attributes": {}
7424
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 18.07017543859649,
6
  "eval_steps": 22,
7
+ "global_step": 253,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7404
  "rewards/reward_func_sensitivity/mean": 1.0,
7405
  "rewards/reward_func_sensitivity/std": 0.0,
7406
  "step": 252
7407
+ },
7408
+ {
7409
+ "clip_ratio/high_max": 0.0,
7410
+ "clip_ratio/high_mean": 0.0,
7411
+ "clip_ratio/low_mean": 0.0,
7412
+ "clip_ratio/low_min": 0.0,
7413
+ "clip_ratio/region_mean": 0.0,
7414
+ "completions/clipped_ratio": 0.0,
7415
+ "completions/max_length": 1.0,
7416
+ "completions/max_terminated_length": 1.0,
7417
+ "completions/mean_length": 1.0,
7418
+ "completions/mean_terminated_length": 1.0,
7419
+ "completions/min_length": 1.0,
7420
+ "completions/min_terminated_length": 1.0,
7421
+ "epoch": 18.07017543859649,
7422
+ "grad_norm": 4.7402841119037475e-06,
7423
+ "kl": 6.701283276081085,
7424
+ "learning_rate": 2.108004964086474e-08,
7425
+ "loss": 0.2681,
7426
+ "num_tokens": 6824209.0,
7427
+ "reward": 139.5692596435547,
7428
+ "reward_std": 0.0,
7429
+ "rewards/conciseness_reward_func/mean": 10.0,
7430
+ "rewards/conciseness_reward_func/std": 0.0,
7431
+ "rewards/reward_func_conciseness/mean": 10.0,
7432
+ "rewards/reward_func_conciseness/std": 0.0,
7433
+ "rewards/reward_func_sensitivity/mean": 1.0,
7434
+ "rewards/reward_func_sensitivity/std": 0.0,
7435
+ "step": 253
7436
  }
7437
  ],
7438
  "logging_steps": 1,
7439
  "max_steps": 253,
7440
+ "num_input_tokens_seen": 6824209,
7441
  "num_train_epochs": 19,
7442
  "save_steps": 42,
7443
  "stateful_callbacks": {
 
7447
  "should_evaluate": false,
7448
  "should_log": false,
7449
  "should_save": true,
7450
+ "should_training_stop": true
7451
  },
7452
  "attributes": {}
7453
  }