Training in progress, step 253, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 34916720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be521f6ed0a0630137abbb7c434c8998a2489c137d973627a1ea2cac6eef45d0
|
3 |
size 34916720
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18162996
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0cf0c2712a58e6176b31a0b1d98aff1d7435174304051ac1b51258d29ea99df
|
3 |
size 18162996
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:118424a02948010fdd0a6232cc80e1e0b3d72274ff559563ab4afe69b94d3221
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fde1c856bf4024c4f96980812d760c4bdafe979f275c738ce67cfa5a542c585
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch":
|
6 |
"eval_steps": 22,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7404,11 +7404,40 @@
|
|
7404 |
"rewards/reward_func_sensitivity/mean": 1.0,
|
7405 |
"rewards/reward_func_sensitivity/std": 0.0,
|
7406 |
"step": 252
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7407 |
}
|
7408 |
],
|
7409 |
"logging_steps": 1,
|
7410 |
"max_steps": 253,
|
7411 |
-
"num_input_tokens_seen":
|
7412 |
"num_train_epochs": 19,
|
7413 |
"save_steps": 42,
|
7414 |
"stateful_callbacks": {
|
@@ -7418,7 +7447,7 @@
|
|
7418 |
"should_evaluate": false,
|
7419 |
"should_log": false,
|
7420 |
"should_save": true,
|
7421 |
-
"should_training_stop":
|
7422 |
},
|
7423 |
"attributes": {}
|
7424 |
}
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 18.07017543859649,
|
6 |
"eval_steps": 22,
|
7 |
+
"global_step": 253,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7404 |
"rewards/reward_func_sensitivity/mean": 1.0,
|
7405 |
"rewards/reward_func_sensitivity/std": 0.0,
|
7406 |
"step": 252
|
7407 |
+
},
|
7408 |
+
{
|
7409 |
+
"clip_ratio/high_max": 0.0,
|
7410 |
+
"clip_ratio/high_mean": 0.0,
|
7411 |
+
"clip_ratio/low_mean": 0.0,
|
7412 |
+
"clip_ratio/low_min": 0.0,
|
7413 |
+
"clip_ratio/region_mean": 0.0,
|
7414 |
+
"completions/clipped_ratio": 0.0,
|
7415 |
+
"completions/max_length": 1.0,
|
7416 |
+
"completions/max_terminated_length": 1.0,
|
7417 |
+
"completions/mean_length": 1.0,
|
7418 |
+
"completions/mean_terminated_length": 1.0,
|
7419 |
+
"completions/min_length": 1.0,
|
7420 |
+
"completions/min_terminated_length": 1.0,
|
7421 |
+
"epoch": 18.07017543859649,
|
7422 |
+
"grad_norm": 4.7402841119037475e-06,
|
7423 |
+
"kl": 6.701283276081085,
|
7424 |
+
"learning_rate": 2.108004964086474e-08,
|
7425 |
+
"loss": 0.2681,
|
7426 |
+
"num_tokens": 6824209.0,
|
7427 |
+
"reward": 139.5692596435547,
|
7428 |
+
"reward_std": 0.0,
|
7429 |
+
"rewards/conciseness_reward_func/mean": 10.0,
|
7430 |
+
"rewards/conciseness_reward_func/std": 0.0,
|
7431 |
+
"rewards/reward_func_conciseness/mean": 10.0,
|
7432 |
+
"rewards/reward_func_conciseness/std": 0.0,
|
7433 |
+
"rewards/reward_func_sensitivity/mean": 1.0,
|
7434 |
+
"rewards/reward_func_sensitivity/std": 0.0,
|
7435 |
+
"step": 253
|
7436 |
}
|
7437 |
],
|
7438 |
"logging_steps": 1,
|
7439 |
"max_steps": 253,
|
7440 |
+
"num_input_tokens_seen": 6824209,
|
7441 |
"num_train_epochs": 19,
|
7442 |
"save_steps": 42,
|
7443 |
"stateful_callbacks": {
|
|
|
7447 |
"should_evaluate": false,
|
7448 |
"should_log": false,
|
7449 |
"should_save": true,
|
7450 |
+
"should_training_stop": true
|
7451 |
},
|
7452 |
"attributes": {}
|
7453 |
}
|