diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7197b4d7af8b3feab7cd199abdc79c0176ddfad --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:108a62208cac75b04191ec406c7c72ee9abb09b6895583d1e54fa9f117c33906 +size 14700057 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.bin b/checkpoint-100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..704e70fe2763e2bf3a5e212a66d19efd7e76c2b6 --- /dev/null +++ b/checkpoint-100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:147fb19d469d5c6f53a7ba1bcb909a7ed2128c22f59a7e480bea913c1260074d +size 14700057 diff --git a/checkpoint-100/finetuning_args.json b/checkpoint-100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-100/reward/adapter_config.json b/checkpoint-100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/reward/adapter_model.bin b/checkpoint-100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-100/value_head.bin b/checkpoint-100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..2baaf845af4f3a2a1042efdfa0aee99486395c93 --- /dev/null +++ b/checkpoint-100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395abf9eaacf5e0044ef7e7197a8bdbff58623f174fac4c27d1f82112707653d +size 17395 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.bin b/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..abb95514a59b360b8f338d523281634a8f59a75c --- /dev/null +++ b/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c073adeac9b9a45b79c54bcd1dc914711d21fe24cbb2409d2437d7a911924a +size 14700057 diff --git a/checkpoint-1000/finetuning_args.json b/checkpoint-1000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1000/reward/adapter_config.json b/checkpoint-1000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/reward/adapter_model.bin b/checkpoint-1000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1000/value_head.bin b/checkpoint-1000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da12cd64f3ab136b9437af4053c76ae59f2d8ec --- /dev/null +++ b/checkpoint-1000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733ef79dccc23227056bf9906facb14cdc257c45a14edf48c6c015e3175bf15a +size 17395 diff --git a/checkpoint-1100/README.md b/checkpoint-1100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1100/adapter_config.json b/checkpoint-1100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1100/adapter_model.bin b/checkpoint-1100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a8fceab243217cae07f2fd68a99d9b628461877 --- /dev/null +++ b/checkpoint-1100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f113366635ea7d8603f3f700df4f8a93ce616fc172d603358cb047801276cb6b +size 14700057 diff --git a/checkpoint-1100/finetuning_args.json b/checkpoint-1100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1100/reward/adapter_config.json b/checkpoint-1100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1100/reward/adapter_model.bin b/checkpoint-1100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1100/value_head.bin b/checkpoint-1100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bc717e9bae76b89911fc514bc07982722d0017f --- /dev/null +++ b/checkpoint-1100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced8059c47d165b4dd4ca5b7437c1e9a9811ab711f89cc7cfdae874717e0cb3c +size 17395 diff --git a/checkpoint-1200/README.md b/checkpoint-1200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1200/adapter_config.json b/checkpoint-1200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1200/adapter_model.bin b/checkpoint-1200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2f78bcff6bd42278eb12115e118cf2e5032e9ac --- /dev/null +++ b/checkpoint-1200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49c9db7b2dc6e86d3ba9228acbd0f316d36f7526220d99e03780281409c9a26 +size 14700057 diff --git a/checkpoint-1200/finetuning_args.json b/checkpoint-1200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1200/reward/adapter_config.json b/checkpoint-1200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1200/reward/adapter_model.bin b/checkpoint-1200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1200/value_head.bin b/checkpoint-1200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..82c3911273c3804b16a3c96c12a51c40edb9a85d --- /dev/null +++ b/checkpoint-1200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56df50a35c1ac0f719d7edf4cd843f5ed7d578cdeab92b8738cb64154f7ed84 +size 17395 diff --git a/checkpoint-1300/README.md b/checkpoint-1300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1300/adapter_config.json b/checkpoint-1300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1300/adapter_model.bin b/checkpoint-1300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..21ad3dce843db295237cab58207b1129ee730c18 --- /dev/null +++ b/checkpoint-1300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb75e3a1cc017e3c6abd3e31c5a6a4de91ccdd6411a7b430d787239b7a334af +size 14700057 diff --git a/checkpoint-1300/finetuning_args.json b/checkpoint-1300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1300/reward/adapter_config.json b/checkpoint-1300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1300/reward/adapter_model.bin b/checkpoint-1300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1300/training_args.bin b/checkpoint-1300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1300/value_head.bin b/checkpoint-1300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..58e978abf65828f544a40990322d067eaf2165da --- /dev/null +++ b/checkpoint-1300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11171e582456b91af5ee37b87fde770590492ed07e9a013769141f233cf37448 +size 17395 diff --git a/checkpoint-1400/README.md b/checkpoint-1400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1400/adapter_config.json b/checkpoint-1400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1400/adapter_model.bin b/checkpoint-1400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..483bac3c1d69868b1f9d4a6e54e8029af0306b6a --- /dev/null +++ b/checkpoint-1400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635293e34c81a24270015784797554594f1ac2947dfbcd33612610d1ce794852 +size 14700057 diff --git a/checkpoint-1400/finetuning_args.json b/checkpoint-1400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1400/reward/adapter_config.json b/checkpoint-1400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1400/reward/adapter_model.bin b/checkpoint-1400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1400/training_args.bin b/checkpoint-1400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1400/value_head.bin b/checkpoint-1400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1777167d230b9e54e1af4fbb403cf31efa16ea5c --- /dev/null +++ b/checkpoint-1400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce03e6d45da226d136e9f37dba6a24445dae8d8ea5e1cac08e27058653767fa +size 17395 diff --git a/checkpoint-1500/README.md b/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1500/adapter_config.json b/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1500/adapter_model.bin b/checkpoint-1500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a518406a13d988601aa8f95af2a353585374ee3f --- /dev/null +++ b/checkpoint-1500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d97eafb2e65ca579146efb1a27f938b98f717aa202aec88299163ba65e7e37 +size 14700057 diff --git a/checkpoint-1500/finetuning_args.json b/checkpoint-1500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1500/reward/adapter_config.json b/checkpoint-1500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1500/reward/adapter_model.bin b/checkpoint-1500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1500/value_head.bin b/checkpoint-1500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f3b12fe4afbbe03bf335802fe887c7dc7342cf4 --- /dev/null +++ b/checkpoint-1500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b0e3f59f33f2a76105f3c5daada00c19ec25faae425990f183ab2c484ed3b7 +size 17395 diff --git a/checkpoint-1600/README.md b/checkpoint-1600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1600/adapter_config.json b/checkpoint-1600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1600/adapter_model.bin b/checkpoint-1600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..65caf3a81a9d40773822141cb3de205bb6403d5f --- /dev/null +++ b/checkpoint-1600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:becfc967bb66bb428bfa9586a408cf223b434979153d82c70931809e4b4df273 +size 14700057 diff --git a/checkpoint-1600/finetuning_args.json b/checkpoint-1600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1600/reward/adapter_config.json b/checkpoint-1600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1600/reward/adapter_model.bin b/checkpoint-1600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1600/training_args.bin b/checkpoint-1600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1600/value_head.bin b/checkpoint-1600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..825688d2a375691daba79379dc82a8c46fbddbe7 --- /dev/null +++ b/checkpoint-1600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e22c0cf19b34ceff6e00d1e00bf4d3df07abfe0992ea0b5d1b04fd22042612 +size 17395 diff --git a/checkpoint-1700/README.md b/checkpoint-1700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1700/adapter_config.json b/checkpoint-1700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1700/adapter_model.bin b/checkpoint-1700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ced57d06ebc2091161af4b2cf62c0b7017aacaf --- /dev/null +++ b/checkpoint-1700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0152251307bb600cfc1f28c1b843370df193f2892ac3e7966911946bbfe22420 +size 14700057 diff --git a/checkpoint-1700/finetuning_args.json b/checkpoint-1700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1700/reward/adapter_config.json b/checkpoint-1700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1700/reward/adapter_model.bin b/checkpoint-1700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1700/training_args.bin b/checkpoint-1700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1700/value_head.bin b/checkpoint-1700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..b36c8ac4b97ec9a8b208e6727202c5a5e76e3ea5 --- /dev/null +++ b/checkpoint-1700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7833b16e7a6132a16f3a6bfe3264ad944d2410e09f69f0eb194e3401b943c3ed +size 17395 diff --git a/checkpoint-1800/README.md b/checkpoint-1800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1800/adapter_config.json b/checkpoint-1800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1800/adapter_model.bin b/checkpoint-1800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..69d36d5ee3f902b1c70722e47979402bb0741023 --- /dev/null +++ b/checkpoint-1800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388bf93c6d236531db8d651971e70dc5856902aadb5c058beed9647421f7ed59 +size 14700057 diff --git a/checkpoint-1800/finetuning_args.json b/checkpoint-1800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1800/reward/adapter_config.json b/checkpoint-1800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1800/reward/adapter_model.bin b/checkpoint-1800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1800/training_args.bin b/checkpoint-1800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1800/value_head.bin b/checkpoint-1800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3bc1960fa6424f51c8c560b99d7dba0f922742c --- /dev/null +++ b/checkpoint-1800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd90333dd70490c2459176d5df041d47513a4ddd3461035b15e312bf25c49ed +size 17395 diff --git a/checkpoint-1900/README.md b/checkpoint-1900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-1900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-1900/adapter_config.json b/checkpoint-1900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1900/adapter_model.bin b/checkpoint-1900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..376ef75f8eede4cde850a27121f43e823fba8036 --- /dev/null +++ b/checkpoint-1900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b72462498c3d0536b32003bf4a4ba403d94d7e7eea15853268008839d2ac96aa +size 14700057 diff --git a/checkpoint-1900/finetuning_args.json b/checkpoint-1900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-1900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-1900/reward/adapter_config.json b/checkpoint-1900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-1900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1900/reward/adapter_model.bin b/checkpoint-1900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-1900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-1900/training_args.bin b/checkpoint-1900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-1900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-1900/value_head.bin b/checkpoint-1900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..47ca9903e916aba4177764c978358efc2b4d6409 --- /dev/null +++ b/checkpoint-1900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69345f4dff8165e5b7ca6ad287d084e8057ff697a853fd7d8c36ecff17af3ff0 +size 17395 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.bin b/checkpoint-200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..01791e7ce593dc51408eba09f6e0961f75436e74 --- /dev/null +++ b/checkpoint-200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e584a479b0733f0e9726f070c60c9e19d63e7ac75ce66059cfb7eda5890d9cb3 +size 14700057 diff --git a/checkpoint-200/finetuning_args.json b/checkpoint-200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-200/reward/adapter_config.json b/checkpoint-200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-200/reward/adapter_model.bin b/checkpoint-200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-200/value_head.bin b/checkpoint-200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c79c4c2aa76bdae27a51e23f3258822d684b884 --- /dev/null +++ b/checkpoint-200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2c777374cf850eeffae2d658218fc9dbf25129efcd2f7bab50c6f7f8b6b333 +size 17395 diff --git a/checkpoint-2000/README.md b/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2000/adapter_config.json b/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2000/adapter_model.bin b/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c46a2eb56a61b577849390db6f6cea8500826f45 --- /dev/null +++ b/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db036169eddee81a0b9f9bc719239dc5ed0773eac11912ddade536a4ff2f826e +size 14700057 diff --git a/checkpoint-2000/finetuning_args.json b/checkpoint-2000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2000/reward/adapter_config.json b/checkpoint-2000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2000/reward/adapter_model.bin b/checkpoint-2000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2000/value_head.bin b/checkpoint-2000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..4cdfd28b0265976e1a3b9fd2e81a1ed2fec4130c --- /dev/null +++ b/checkpoint-2000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898e7abc32f92613de921811329ffdc7bfd553980700b30707cd737eda9bacba +size 17395 diff --git a/checkpoint-2100/README.md b/checkpoint-2100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2100/adapter_config.json b/checkpoint-2100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2100/adapter_model.bin b/checkpoint-2100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..857dd0bb9be7d72ab9b646fddb0e0ce542048053 --- /dev/null +++ b/checkpoint-2100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a1f81a21dfe585a796486aef6c7010f03bd59d13ba0593d5a7a5e0915783ec +size 14700057 diff --git a/checkpoint-2100/finetuning_args.json b/checkpoint-2100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2100/reward/adapter_config.json b/checkpoint-2100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2100/reward/adapter_model.bin b/checkpoint-2100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2100/training_args.bin b/checkpoint-2100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2100/value_head.bin b/checkpoint-2100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..a353e342485edb9807eddccc5b5c148d6f868c68 --- /dev/null +++ b/checkpoint-2100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f427edd8a599123720838826297ad445bbbbb13f4a85748c94e2d315cb3c24 +size 17395 diff --git a/checkpoint-2200/README.md b/checkpoint-2200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2200/adapter_config.json b/checkpoint-2200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2200/adapter_model.bin b/checkpoint-2200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c589b4c864071a484f82fbe7e0b339ebd45b7693 --- /dev/null +++ b/checkpoint-2200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fba5786aaf8eade07e3db318344f46128927aec2b7f319367fa8557f962eb0 +size 14700057 diff --git a/checkpoint-2200/finetuning_args.json b/checkpoint-2200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2200/reward/adapter_config.json b/checkpoint-2200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2200/reward/adapter_model.bin b/checkpoint-2200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2200/training_args.bin b/checkpoint-2200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2200/value_head.bin b/checkpoint-2200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..90402c31e200e8489d8af8d4c52d46f224280b43 --- /dev/null +++ b/checkpoint-2200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f60108ff40d27b6a5d42296749dea3cd55a946dfc6d188169db9fc20910948 +size 17395 diff --git a/checkpoint-2300/README.md b/checkpoint-2300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2300/adapter_config.json b/checkpoint-2300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2300/adapter_model.bin b/checkpoint-2300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..698d26108d18fbd4323c01eda44a78428b20f424 --- /dev/null +++ b/checkpoint-2300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45b1aa7daee097fa91291d02850fde6416808d85ffb957bada6c5dd5293edb5 +size 14700057 diff --git a/checkpoint-2300/finetuning_args.json b/checkpoint-2300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2300/reward/adapter_config.json b/checkpoint-2300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2300/reward/adapter_model.bin b/checkpoint-2300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2300/training_args.bin b/checkpoint-2300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2300/value_head.bin b/checkpoint-2300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..edf2c1c08d3d15497d6cb73db99ab6c6793fb902 --- /dev/null +++ b/checkpoint-2300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a7bf2c6ae2cf2bfa0bde0958735c8624a320184a6d0b8dccc43e69a12ad0aa +size 17395 diff --git a/checkpoint-2400/README.md b/checkpoint-2400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2400/adapter_config.json b/checkpoint-2400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2400/adapter_model.bin b/checkpoint-2400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8105fa16b155e3f0d16a81e569131400f2120898 --- /dev/null +++ b/checkpoint-2400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9fae35fd9faa5a012e9e4e147f806b15ef336fadbbada53deb7843a0fb8b7c0 +size 14700057 diff --git a/checkpoint-2400/finetuning_args.json b/checkpoint-2400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2400/reward/adapter_config.json b/checkpoint-2400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2400/reward/adapter_model.bin b/checkpoint-2400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2400/training_args.bin b/checkpoint-2400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2400/value_head.bin b/checkpoint-2400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..38a532b5f55a77bf4bfebd820b927d79575f3b3e --- /dev/null +++ b/checkpoint-2400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81318e24b1eb8ab30ef33bc4b731e70a1760b3ccd926cd372f84e0bf6e854fbc +size 17395 diff --git a/checkpoint-2500/README.md b/checkpoint-2500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2500/adapter_config.json b/checkpoint-2500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2500/adapter_model.bin b/checkpoint-2500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e9abfcb2495d45d8a740f3ef9600455e2a8617e --- /dev/null +++ b/checkpoint-2500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349e4d8ea4a316104d1cf744de09f6e1145d87aacdda3239482e1440094f311c +size 14700057 diff --git a/checkpoint-2500/finetuning_args.json b/checkpoint-2500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2500/reward/adapter_config.json b/checkpoint-2500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2500/reward/adapter_model.bin b/checkpoint-2500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2500/value_head.bin b/checkpoint-2500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4857c9a7228aa9ca35ab9a30d675c5ac4df3785 --- /dev/null +++ b/checkpoint-2500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:975e4094e0254acbb8672c279c0ddf9c3cfea7e3f36f60c2f6870f3b4502ef6c +size 17395 diff --git a/checkpoint-2600/README.md b/checkpoint-2600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2600/adapter_config.json b/checkpoint-2600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2600/adapter_model.bin b/checkpoint-2600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a1714b1878df7b814ebaaea0e7112438a5029bf --- /dev/null +++ b/checkpoint-2600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c41e345a5da98054aa83a162246045bcc40319a5e579271f4ce23e4eac4b8d +size 14700057 diff --git a/checkpoint-2600/finetuning_args.json b/checkpoint-2600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2600/reward/adapter_config.json b/checkpoint-2600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2600/reward/adapter_model.bin b/checkpoint-2600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2600/training_args.bin b/checkpoint-2600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2600/value_head.bin b/checkpoint-2600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..d39bbf4f1dbe707499e5d7e06b39d538adae1a90 --- /dev/null +++ b/checkpoint-2600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e8146cdf19c078ba492543090f13d802d30f58b32d40f1d1ac5c2392bbb62d +size 17395 diff --git a/checkpoint-2700/README.md b/checkpoint-2700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2700/adapter_config.json b/checkpoint-2700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2700/adapter_model.bin b/checkpoint-2700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3fe6678bb0ed518632e569529f39d29a229951ac --- /dev/null +++ b/checkpoint-2700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fcb0348570cfbb4f9d4aaee35be1683cd200ea2efab6afb04c5aaf3b3bcd2f +size 14700057 diff --git a/checkpoint-2700/finetuning_args.json b/checkpoint-2700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2700/reward/adapter_config.json b/checkpoint-2700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2700/reward/adapter_model.bin b/checkpoint-2700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2700/training_args.bin b/checkpoint-2700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2700/value_head.bin b/checkpoint-2700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..2efde4b20dc3735bc872100132fa1e9652dbb858 --- /dev/null +++ b/checkpoint-2700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ddaf2260dfce6de0f49be8bf833db234abbf0cb5567ee612480e1ad09e9ea25 +size 17395 diff --git a/checkpoint-2800/README.md b/checkpoint-2800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2800/adapter_config.json b/checkpoint-2800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2800/adapter_model.bin b/checkpoint-2800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..65c3fdbcb8219730b0a30beec49b73aa3bfce403 --- /dev/null +++ b/checkpoint-2800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bafea9e9586691c5a32c66e9eaf8b71c8b85ce6193b603e72998e5462362d5 +size 14700057 diff --git a/checkpoint-2800/finetuning_args.json b/checkpoint-2800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2800/reward/adapter_config.json b/checkpoint-2800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2800/reward/adapter_model.bin b/checkpoint-2800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2800/training_args.bin b/checkpoint-2800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2800/value_head.bin b/checkpoint-2800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cb0607c911a45cf0c3bb7f14280ca481558a7f9 --- /dev/null +++ b/checkpoint-2800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f652ddf3574462541eb4b97be16bd452f16905c84463b2e1202e6fb503ab6f0f +size 17395 diff --git a/checkpoint-2900/README.md b/checkpoint-2900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-2900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-2900/adapter_config.json b/checkpoint-2900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2900/adapter_model.bin b/checkpoint-2900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b04712f78f8ad4084140e804fb1cef117b797d0c --- /dev/null +++ b/checkpoint-2900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a07c2fa62cc37c17f5b519e64669f99395bb026d9b08748bd7a3ae41da90856 +size 14700057 diff --git a/checkpoint-2900/finetuning_args.json b/checkpoint-2900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-2900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-2900/reward/adapter_config.json b/checkpoint-2900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-2900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2900/reward/adapter_model.bin b/checkpoint-2900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-2900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-2900/training_args.bin b/checkpoint-2900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-2900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-2900/value_head.bin b/checkpoint-2900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..de6b01d59a9a0d436c06ab4899c05dbf33f77b29 --- /dev/null +++ b/checkpoint-2900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a92fc0a9b5f9920ecf7dc5db629e40ebb670ca33ca9923bc6c974ad5024ff5 +size 17395 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.bin b/checkpoint-300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b72b355b8ac888e8bf0d1b80037bb869fb8d9c0f --- /dev/null +++ b/checkpoint-300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0202ce4479141d54c4a2b41df5ff344b7c5833175420a08d0ee811ca86b1e7 +size 14700057 diff --git a/checkpoint-300/finetuning_args.json b/checkpoint-300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-300/reward/adapter_config.json b/checkpoint-300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-300/reward/adapter_model.bin b/checkpoint-300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-300/value_head.bin b/checkpoint-300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b1858d38595c59817cf2260efb5b68056a3e817 --- /dev/null +++ b/checkpoint-300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50c273bd2e207f770e80238b35a02f4366980ce06cc2638c03e1ad68616d750 +size 17395 diff --git a/checkpoint-3000/README.md b/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3000/adapter_config.json b/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3000/adapter_model.bin b/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c3f9614bf9e6320eb7ef1ab6bb801f53177b769 --- /dev/null +++ b/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ea13e44d2bc28eaaba026bf85ed88e968f7590aa66e58dcfd42a6b3456bc8a +size 14700057 diff --git a/checkpoint-3000/finetuning_args.json b/checkpoint-3000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3000/reward/adapter_config.json b/checkpoint-3000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3000/reward/adapter_model.bin b/checkpoint-3000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3000/value_head.bin b/checkpoint-3000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f1adf9e68407b107a448ffa71ab735d61327485 --- /dev/null +++ b/checkpoint-3000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf466891d4c77428711049d08e17384b10c1a4f9b0abdc9a745b1cda7ca367f +size 17395 diff --git a/checkpoint-3100/README.md b/checkpoint-3100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3100/adapter_config.json b/checkpoint-3100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3100/adapter_model.bin b/checkpoint-3100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e06ab2450d80b7d7e93a171d54d3a2f4ca4a685 --- /dev/null +++ b/checkpoint-3100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610f1fd3f8c3febfa232dee0c1b229cfc71e2078a52cc2ddcacff9fdff4f8cfb +size 14700057 diff --git a/checkpoint-3100/finetuning_args.json b/checkpoint-3100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3100/reward/adapter_config.json b/checkpoint-3100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3100/reward/adapter_model.bin b/checkpoint-3100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3100/training_args.bin b/checkpoint-3100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3100/value_head.bin b/checkpoint-3100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a7fb5315526532aa9423d8ee3b0be75707988d1 --- /dev/null +++ b/checkpoint-3100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba227026335f81667aff726531d093665507b9bf600522d9d4d42d348612a380 +size 17395 diff --git a/checkpoint-3200/README.md b/checkpoint-3200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3200/adapter_config.json b/checkpoint-3200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3200/adapter_model.bin b/checkpoint-3200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..be5551bae17f3f8a01c6bcc6c5b938612b391333 --- /dev/null +++ b/checkpoint-3200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb89fa75d0861a7c482ebe0f77a4f771b1dddd683ad2ccc7d2a52ff692cdabb +size 14700057 diff --git a/checkpoint-3200/finetuning_args.json b/checkpoint-3200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3200/reward/adapter_config.json b/checkpoint-3200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3200/reward/adapter_model.bin b/checkpoint-3200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3200/training_args.bin b/checkpoint-3200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3200/value_head.bin b/checkpoint-3200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e25bf638cca347ba3920eb4bc6ac2f665b384e8 --- /dev/null +++ b/checkpoint-3200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980b918e50426f2983d40a659d5ee4a6469854120a0e4b37b3d109ee41184fdb +size 17395 diff --git a/checkpoint-3300/README.md b/checkpoint-3300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3300/adapter_config.json b/checkpoint-3300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3300/adapter_model.bin b/checkpoint-3300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b9892747947fe78386ae6ca65c8150d9d691667 --- /dev/null +++ b/checkpoint-3300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7ca2cec9dbdd15acbf403f3c0f13c8414bcdce431088e01079589f14db98c5 +size 14700057 diff --git a/checkpoint-3300/finetuning_args.json b/checkpoint-3300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3300/reward/adapter_config.json b/checkpoint-3300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3300/reward/adapter_model.bin b/checkpoint-3300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3300/training_args.bin b/checkpoint-3300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3300/value_head.bin b/checkpoint-3300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..d7b184007a46c458040ac81bb364e8f805abd3bc --- /dev/null +++ b/checkpoint-3300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7154d47e151cf518ed0a573524f80770ad47ce805b1e27e41e54ce5a304e85 +size 17395 diff --git a/checkpoint-3400/README.md b/checkpoint-3400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3400/adapter_config.json b/checkpoint-3400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3400/adapter_model.bin b/checkpoint-3400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..412d666495944546383580a070a18bca9ffe1187 --- /dev/null +++ b/checkpoint-3400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bb27d79ac5f91e9c2359c4caae6cf53e0063f6ae7b653f7a4c2d70a0de3c00 +size 14700057 diff --git a/checkpoint-3400/finetuning_args.json b/checkpoint-3400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3400/reward/adapter_config.json b/checkpoint-3400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3400/reward/adapter_model.bin b/checkpoint-3400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3400/training_args.bin b/checkpoint-3400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3400/value_head.bin b/checkpoint-3400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d7cacbed6660292cfd4f9906f1ec5067991cff2 --- /dev/null +++ b/checkpoint-3400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d223d0fc57dd9ce42a3ad10308b59f737999892d296631506338439d20adfb7 +size 17395 diff --git a/checkpoint-3500/README.md b/checkpoint-3500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3500/adapter_config.json b/checkpoint-3500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3500/adapter_model.bin b/checkpoint-3500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a788d5197998fab6b40269844adf7afe04c1487 --- /dev/null +++ b/checkpoint-3500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18bc340db8bbed829fc12ae25ed092fe1003fc5db346505fb668909edf99abad +size 14700057 diff --git a/checkpoint-3500/finetuning_args.json b/checkpoint-3500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3500/reward/adapter_config.json b/checkpoint-3500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3500/reward/adapter_model.bin b/checkpoint-3500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3500/training_args.bin b/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3500/value_head.bin b/checkpoint-3500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..4dd579718b6ea7e0126c9b4e8a44edeed4ed2970 --- /dev/null +++ b/checkpoint-3500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572a31d8cad6c9f37b6f28318260cdfd3d428abd916fb53e1f55bcd42ba61b8 +size 17395 diff --git a/checkpoint-3600/README.md b/checkpoint-3600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3600/adapter_config.json b/checkpoint-3600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3600/adapter_model.bin b/checkpoint-3600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe93d2cff584d9fb852dad6dd41f0b4405a6ab04 --- /dev/null +++ b/checkpoint-3600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2397fdbb2718d2dd1f324d23c2101e705862e3a291636234eed69dc77674df73 +size 14700057 diff --git a/checkpoint-3600/finetuning_args.json b/checkpoint-3600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3600/reward/adapter_config.json b/checkpoint-3600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3600/reward/adapter_model.bin b/checkpoint-3600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3600/training_args.bin b/checkpoint-3600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3600/value_head.bin b/checkpoint-3600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..4684e2d7defd52a32935a2bcff4b70f37ad25833 --- /dev/null +++ b/checkpoint-3600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7f83a24c3eec19a4d2d3a94444d8dfc5eb27cb3e1c67075cd4be0a288ff158 +size 17395 diff --git a/checkpoint-3700/README.md b/checkpoint-3700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3700/adapter_config.json b/checkpoint-3700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3700/adapter_model.bin b/checkpoint-3700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..311bbec5b8d2bbdd56ba0ed135ce63a478693ffe --- /dev/null +++ b/checkpoint-3700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d605dac3a1e0f12345cfed35cbadf3c8f6f875e86f893768a03dd3b4179cc3f +size 14700057 diff --git a/checkpoint-3700/finetuning_args.json b/checkpoint-3700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3700/reward/adapter_config.json b/checkpoint-3700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3700/reward/adapter_model.bin b/checkpoint-3700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3700/training_args.bin b/checkpoint-3700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3700/value_head.bin b/checkpoint-3700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..178d59aa05aa00b2278c6b62eeb30045161ee683 --- /dev/null +++ b/checkpoint-3700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14a36a5cb1cc558e876a59f7d2906cd0b960d2bac887c6d4f31c7932b22cd56d +size 17395 diff --git a/checkpoint-3800/README.md b/checkpoint-3800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3800/adapter_config.json b/checkpoint-3800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3800/adapter_model.bin b/checkpoint-3800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcf7fb8a161165031b7f6fdd4840c6a1b2e480cd --- /dev/null +++ b/checkpoint-3800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e939721ffe4e4f70fb40cbb50f886c6315fdbdd702e418459fd65f1a481716c +size 14700057 diff --git a/checkpoint-3800/finetuning_args.json b/checkpoint-3800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3800/reward/adapter_config.json b/checkpoint-3800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3800/reward/adapter_model.bin b/checkpoint-3800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3800/training_args.bin b/checkpoint-3800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3800/value_head.bin b/checkpoint-3800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0bb84dd16bfcf874c8cfe0c0d516e49fa45d6484 --- /dev/null +++ b/checkpoint-3800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65589052089e3b5c11bc5f0390044c29c4df82063bd3ae85f700d0138dc1b5fc +size 17395 diff --git a/checkpoint-3900/README.md b/checkpoint-3900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-3900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-3900/adapter_config.json b/checkpoint-3900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3900/adapter_model.bin b/checkpoint-3900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2397625b14558da185a7e40cd6fc17997c0ac8ec --- /dev/null +++ b/checkpoint-3900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a6f04c16f0388b45c18c7d8b01157d74a614f10558fd9580bb28581ea6a1ad +size 14700057 diff --git a/checkpoint-3900/finetuning_args.json b/checkpoint-3900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-3900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-3900/reward/adapter_config.json b/checkpoint-3900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-3900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3900/reward/adapter_model.bin b/checkpoint-3900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-3900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-3900/training_args.bin b/checkpoint-3900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-3900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-3900/value_head.bin b/checkpoint-3900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..268bb1b836348d15dd19bf5dcb9e20118b1ed945 --- /dev/null +++ b/checkpoint-3900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167ca3024863b708c6f06a1187dffe36b14d42361e68f6d116a7c328f617438e +size 17395 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.bin b/checkpoint-400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..be6b9a23292edc9e6d499eba8bd6fcbc1d083338 --- /dev/null +++ b/checkpoint-400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abe7d7776b48cbe37be592a775dd8f63f505b15941ec9c7e3b6d52f010209cf +size 14700057 diff --git a/checkpoint-400/finetuning_args.json b/checkpoint-400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-400/reward/adapter_config.json b/checkpoint-400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-400/reward/adapter_model.bin b/checkpoint-400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-400/value_head.bin b/checkpoint-400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfc4de9bc3d90331b114da47b80ba28264d3b36e --- /dev/null +++ b/checkpoint-400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220b0715ad838c56904a9a3d71bde585a2db91e722af8e4cbc4abcfbcd5b3173 +size 17395 diff --git a/checkpoint-4000/README.md b/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4000/adapter_config.json b/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4000/adapter_model.bin b/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a08ef01e662ba4eda98793eb968e95c1193108e9 --- /dev/null +++ b/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18eee644c0846ad0b702bd3c80022a742acf48da5c8bdca76c38a55adb3cd90b +size 14700057 diff --git a/checkpoint-4000/finetuning_args.json b/checkpoint-4000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4000/reward/adapter_config.json b/checkpoint-4000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4000/reward/adapter_model.bin b/checkpoint-4000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4000/value_head.bin b/checkpoint-4000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..6786ba2ca38d40c5054373184495a59331af80bd --- /dev/null +++ b/checkpoint-4000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26186b5d65d88d15a2dfe4dd6a1e4e605433eccf17f2aefa7cca230b1e8d162 +size 17395 diff --git a/checkpoint-4100/README.md b/checkpoint-4100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4100/adapter_config.json b/checkpoint-4100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4100/adapter_model.bin b/checkpoint-4100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..46d468a2d53552e9232a4190eac9e63170131f78 --- /dev/null +++ b/checkpoint-4100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36a50e08d10d1e90d753398f581c6f4b3bf2cef42c90b590979b4d6fb8b5bfa +size 14700057 diff --git a/checkpoint-4100/finetuning_args.json b/checkpoint-4100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4100/reward/adapter_config.json b/checkpoint-4100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4100/reward/adapter_model.bin b/checkpoint-4100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4100/training_args.bin b/checkpoint-4100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4100/value_head.bin b/checkpoint-4100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..e01448c6d595642e14f85d854b68a55d5f1d1aa3 --- /dev/null +++ b/checkpoint-4100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11759b70ce1dd6b815ee586afd5de77eba44358c61fa5c1e18b030720fc6a0f +size 17395 diff --git a/checkpoint-4200/README.md b/checkpoint-4200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4200/adapter_config.json b/checkpoint-4200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4200/adapter_model.bin b/checkpoint-4200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c85b4fde7b5d330963c01cc2033d9e506c7b99c3 --- /dev/null +++ b/checkpoint-4200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1365bbfa049dd9a27e93035edc1f8b54c20e6be45bef155cae945b84d8bca05 +size 14700057 diff --git a/checkpoint-4200/finetuning_args.json b/checkpoint-4200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4200/reward/adapter_config.json b/checkpoint-4200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4200/reward/adapter_model.bin b/checkpoint-4200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4200/training_args.bin b/checkpoint-4200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4200/value_head.bin b/checkpoint-4200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cd8b495abee9dd8aa3edcdf72bde5c5bf7f99e9 --- /dev/null +++ b/checkpoint-4200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3846548214cf9abe02750caca75ee512529cd0c1e0ade681d5c236c8ba22e40d +size 17395 diff --git a/checkpoint-4300/README.md b/checkpoint-4300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4300/adapter_config.json b/checkpoint-4300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4300/adapter_model.bin b/checkpoint-4300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bea1d22d0466c3e1f2c8413675046d3a56989f7 --- /dev/null +++ b/checkpoint-4300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4842904e59f6659689751fe4ed00a1b3c63fecdd1ee5dce66d4c457d2b5f59 +size 14700057 diff --git a/checkpoint-4300/finetuning_args.json b/checkpoint-4300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4300/reward/adapter_config.json b/checkpoint-4300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4300/reward/adapter_model.bin b/checkpoint-4300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4300/training_args.bin b/checkpoint-4300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4300/value_head.bin b/checkpoint-4300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..e33a6cd194da28bf85cbad783adbb4cb65072ef2 --- /dev/null +++ b/checkpoint-4300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908db711bd50f317316f9f813420750aa60288c9ae44c7b66c6174514f165cee +size 17395 diff --git a/checkpoint-4400/README.md b/checkpoint-4400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4400/adapter_config.json b/checkpoint-4400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4400/adapter_model.bin b/checkpoint-4400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d67cd44c0540d3e3b41bc88f1ec79df1db55555c --- /dev/null +++ b/checkpoint-4400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be10b7cff0e446a6bd18a9b33e37ac58e811770e1f3414bf8190622ef6f708f8 +size 14700057 diff --git a/checkpoint-4400/finetuning_args.json b/checkpoint-4400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4400/reward/adapter_config.json b/checkpoint-4400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4400/reward/adapter_model.bin b/checkpoint-4400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4400/training_args.bin b/checkpoint-4400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4400/value_head.bin b/checkpoint-4400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ac0f7bbd7ba4da0a050529a97d2faaec70e7062 --- /dev/null +++ b/checkpoint-4400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde41f450b65b58fa8280749284d2e93f333ad6885906ac2b61a078f835b8f9e +size 17395 diff --git a/checkpoint-4500/README.md b/checkpoint-4500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4500/adapter_config.json b/checkpoint-4500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4500/adapter_model.bin b/checkpoint-4500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..90d1fe7afaa016bbedd08aa0d1d669e11d583412 --- /dev/null +++ b/checkpoint-4500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de288b93dc356c0f5721a208b192d9c363632dd7f883f120132d375d721da6bc +size 14700057 diff --git a/checkpoint-4500/finetuning_args.json b/checkpoint-4500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4500/reward/adapter_config.json b/checkpoint-4500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4500/reward/adapter_model.bin b/checkpoint-4500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4500/training_args.bin b/checkpoint-4500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4500/value_head.bin b/checkpoint-4500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dae913d3a294837478a98fbaabd344641d91ee0 --- /dev/null +++ b/checkpoint-4500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9259c5a0a89ea465462c59efd18e4520dc8bd80520f0adf6c774a28991a9467 +size 17395 diff --git a/checkpoint-4600/README.md b/checkpoint-4600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4600/adapter_config.json b/checkpoint-4600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4600/adapter_model.bin b/checkpoint-4600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a37d2b50c096b917c0b0f21b9dcefaeeba9af74 --- /dev/null +++ b/checkpoint-4600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:063364ad41a9dbe709a0da6ddd4adc9fb3d664d34f29ec6540f03242211cbef7 +size 14700057 diff --git a/checkpoint-4600/finetuning_args.json b/checkpoint-4600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4600/reward/adapter_config.json b/checkpoint-4600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4600/reward/adapter_model.bin b/checkpoint-4600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4600/training_args.bin b/checkpoint-4600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4600/value_head.bin b/checkpoint-4600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd572eba084b86304638ff2b5fefde89f2dfe9c7 --- /dev/null +++ b/checkpoint-4600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d14954980b08d10455ba362f3e53ba49202642c69321f530bed349b7141b6b +size 17395 diff --git a/checkpoint-4700/README.md b/checkpoint-4700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4700/adapter_config.json b/checkpoint-4700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4700/adapter_model.bin b/checkpoint-4700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..05aec9a4761176a7c5f5da01e889c8da001e09c5 --- /dev/null +++ b/checkpoint-4700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0873a72a3a63a8728e4c72a7aa896fe114885aa07f6b7b4e2fdd378cb7066205 +size 14700057 diff --git a/checkpoint-4700/finetuning_args.json b/checkpoint-4700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4700/reward/adapter_config.json b/checkpoint-4700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4700/reward/adapter_model.bin b/checkpoint-4700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4700/training_args.bin b/checkpoint-4700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4700/value_head.bin b/checkpoint-4700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..7666fb7a5071e4fd9cd7aef4e9352175ce672b25 --- /dev/null +++ b/checkpoint-4700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89be1d4124c66c6a1e74a5f7a64892cf568b1e69e36d3ec13364f3f44bfb4c20 +size 17395 diff --git a/checkpoint-4800/README.md b/checkpoint-4800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4800/adapter_config.json b/checkpoint-4800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4800/adapter_model.bin b/checkpoint-4800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..02904aca277d9f9e8556d9eec7226eeabe45aa88 --- /dev/null +++ b/checkpoint-4800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610a34653a0160ca9e897e89a750d9b4b88f8a073f9b5b4f41635af91b86bf7c +size 14700057 diff --git a/checkpoint-4800/finetuning_args.json b/checkpoint-4800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4800/reward/adapter_config.json b/checkpoint-4800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4800/reward/adapter_model.bin b/checkpoint-4800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4800/training_args.bin b/checkpoint-4800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4800/value_head.bin b/checkpoint-4800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..47dd23e31f4d61779fc07d86e8c753ddefee55f1 --- /dev/null +++ b/checkpoint-4800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190c546512ba887f9124ef3df5d9f022db699ce740658716ea79f26d5dd40620 +size 17395 diff --git a/checkpoint-4900/README.md b/checkpoint-4900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-4900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-4900/adapter_config.json b/checkpoint-4900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4900/adapter_model.bin b/checkpoint-4900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..30622087b6db5da682034e254e0584d56df996c0 --- /dev/null +++ b/checkpoint-4900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d42f4fc30c21a659712e5508cb5e268cd70e87e662b420f23d9c9cacd02747 +size 14700057 diff --git a/checkpoint-4900/finetuning_args.json b/checkpoint-4900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-4900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-4900/reward/adapter_config.json b/checkpoint-4900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-4900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4900/reward/adapter_model.bin b/checkpoint-4900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-4900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-4900/training_args.bin b/checkpoint-4900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-4900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-4900/value_head.bin b/checkpoint-4900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..be26d460681d2a4d58c65b32a95967962cccfc41 --- /dev/null +++ b/checkpoint-4900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448af9744d82d55f40570dac440a862e5acaab872cf3caa37ed7913cd1ae7174 +size 17395 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.bin b/checkpoint-500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a73bb0598729b9fc0c3cb999551599b5e4ed324f --- /dev/null +++ b/checkpoint-500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3d4201ca904558094e3984613d59d7cc753d2fa36547f7551c4e5eed699d1e +size 14700057 diff --git a/checkpoint-500/finetuning_args.json b/checkpoint-500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-500/reward/adapter_config.json b/checkpoint-500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-500/reward/adapter_model.bin b/checkpoint-500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-500/value_head.bin b/checkpoint-500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..3588dc24067ac54bc93b63fa23957f151569e203 --- /dev/null +++ b/checkpoint-500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8eba99c8333a95bf21d5fc661e1e192c068c03c61093cab2d41f1aa4a374d7 +size 17395 diff --git a/checkpoint-5000/README.md b/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5000/adapter_config.json b/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5000/adapter_model.bin b/checkpoint-5000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..92333d95a81f4ca7ee1228ab014aca3de1aa7040 --- /dev/null +++ b/checkpoint-5000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95947272e6c992eca1d6f761e5657a0c6bf4ae014fc63ddf5cd101129dbf9f0b +size 14700057 diff --git a/checkpoint-5000/finetuning_args.json b/checkpoint-5000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5000/reward/adapter_config.json b/checkpoint-5000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5000/reward/adapter_model.bin b/checkpoint-5000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5000/value_head.bin b/checkpoint-5000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4a5dc2a0df6da72204a2d70a534dd075c23e8bf --- /dev/null +++ b/checkpoint-5000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48011af49287ba7621e4d89972a5e13b19df1d3f27267770e3ad2523d51250e8 +size 17395 diff --git a/checkpoint-5100/README.md b/checkpoint-5100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5100/adapter_config.json b/checkpoint-5100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5100/adapter_model.bin b/checkpoint-5100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e51d6b1eb6ff795b30b90478e0143c8939894c6b --- /dev/null +++ b/checkpoint-5100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262e9e193dd3d8c3641c5d2f681cb85d0027525fdd8cbbbbc2ae528e257ba354 +size 14700057 diff --git a/checkpoint-5100/finetuning_args.json b/checkpoint-5100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5100/reward/adapter_config.json b/checkpoint-5100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5100/reward/adapter_model.bin b/checkpoint-5100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5100/training_args.bin b/checkpoint-5100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5100/value_head.bin b/checkpoint-5100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..eae5efcc1ba9fc6a2de1008627d85f4f32db5037 --- /dev/null +++ b/checkpoint-5100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c9d318dce6c0ab631d743c6cddbb990ee16787e29bcbed7c8521e3f1727f3d +size 17395 diff --git a/checkpoint-5200/README.md b/checkpoint-5200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5200/adapter_config.json b/checkpoint-5200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5200/adapter_model.bin b/checkpoint-5200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b02e553fef7394f7cfdc7892c281c3a88897dfa --- /dev/null +++ b/checkpoint-5200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54417b431dcefb4298acd75d56402215c9c4fc6e1beaf52b90a55884a7025fb +size 14700057 diff --git a/checkpoint-5200/finetuning_args.json b/checkpoint-5200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5200/reward/adapter_config.json b/checkpoint-5200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5200/reward/adapter_model.bin b/checkpoint-5200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5200/training_args.bin b/checkpoint-5200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5200/value_head.bin b/checkpoint-5200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..73e3abb0ac3448b1dcf1a666fba07d14ad90119f --- /dev/null +++ b/checkpoint-5200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cab193ab3edd1fb5572b2614f653502aabcd98ab9e3aa451e4184f8a619d317 +size 17395 diff --git a/checkpoint-5300/README.md b/checkpoint-5300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5300/adapter_config.json b/checkpoint-5300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5300/adapter_model.bin b/checkpoint-5300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..84d6c5ee9e1f9ee79d4a937c4bcae5aad5a77766 --- /dev/null +++ b/checkpoint-5300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb02c0a367d9fc022e1d4ae88d7c834c20dda8635f6e6f458a03129f3fc8ec53 +size 14700057 diff --git a/checkpoint-5300/finetuning_args.json b/checkpoint-5300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5300/reward/adapter_config.json b/checkpoint-5300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5300/reward/adapter_model.bin b/checkpoint-5300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5300/training_args.bin b/checkpoint-5300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5300/value_head.bin b/checkpoint-5300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d9cc1d3d833df627c304b7797fc9a88f165d11 --- /dev/null +++ b/checkpoint-5300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5194627185e295627c5a06f63debf135e4a9a8b801107ba4b260fdb19fdf9587 +size 17395 diff --git a/checkpoint-5400/README.md b/checkpoint-5400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5400/adapter_config.json b/checkpoint-5400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5400/adapter_model.bin b/checkpoint-5400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..218ff78cb43b774fdc0fd1500e6664dff9286df3 --- /dev/null +++ b/checkpoint-5400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a34027bdfd2429c9e884fda171dc5bed8f84a8b9318e6ee90797fba4a32489 +size 14700057 diff --git a/checkpoint-5400/finetuning_args.json b/checkpoint-5400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5400/reward/adapter_config.json b/checkpoint-5400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5400/reward/adapter_model.bin b/checkpoint-5400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5400/training_args.bin b/checkpoint-5400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5400/value_head.bin b/checkpoint-5400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..8aa5d152994dced0a3e918120074eaafe092627e --- /dev/null +++ b/checkpoint-5400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cddf6e7f262d0be0744db3082267f02ac58d43edfac3e090ac2ea30e48acba4 +size 17395 diff --git a/checkpoint-5500/README.md b/checkpoint-5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5500/adapter_config.json b/checkpoint-5500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5500/adapter_model.bin b/checkpoint-5500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..39bdb431bb76070c0452aa8b2fe7d843101bd392 --- /dev/null +++ b/checkpoint-5500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324a2a485a4a4676c28f32ce8733aa7785b5473159e539dc8567734ed81c82b3 +size 14700057 diff --git a/checkpoint-5500/finetuning_args.json b/checkpoint-5500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5500/reward/adapter_config.json b/checkpoint-5500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5500/reward/adapter_model.bin b/checkpoint-5500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5500/training_args.bin b/checkpoint-5500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5500/value_head.bin b/checkpoint-5500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..88dbec5e47fe51c41808b5e3534ff471920c3cb4 --- /dev/null +++ b/checkpoint-5500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d590dd912ece49662f6398e930ebcb9146a5f81247282ee6eedcb0fdb18181 +size 17395 diff --git a/checkpoint-5600/README.md b/checkpoint-5600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5600/adapter_config.json b/checkpoint-5600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5600/adapter_model.bin b/checkpoint-5600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..44f65e0c1ff2e8221d761a31649b63502f816686 --- /dev/null +++ b/checkpoint-5600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3343a9b859e351f08c4193ce79038b8c11fca965317d1a3ff204fe8b755b3f86 +size 14700057 diff --git a/checkpoint-5600/finetuning_args.json b/checkpoint-5600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5600/reward/adapter_config.json b/checkpoint-5600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5600/reward/adapter_model.bin b/checkpoint-5600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5600/training_args.bin b/checkpoint-5600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5600/value_head.bin b/checkpoint-5600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..06cc974e00fe090d8f33a09a54dc0b529e61d174 --- /dev/null +++ b/checkpoint-5600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20d3e024b36efdb6183cf191de25dc85aa89da304baf0a1a0c38bb09a8a0153 +size 17395 diff --git a/checkpoint-5700/README.md b/checkpoint-5700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5700/adapter_config.json b/checkpoint-5700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5700/adapter_model.bin b/checkpoint-5700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cbc86c3f27f17fc3d8b4e5bc9497b1c4c417d7d --- /dev/null +++ b/checkpoint-5700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e2147e6cdc614eacb2c0a457ebf55fe19b65234639d11c0d12fca8fb0438bdd +size 14700057 diff --git a/checkpoint-5700/finetuning_args.json b/checkpoint-5700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5700/reward/adapter_config.json b/checkpoint-5700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5700/reward/adapter_model.bin b/checkpoint-5700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5700/training_args.bin b/checkpoint-5700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5700/value_head.bin b/checkpoint-5700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1492c605da64777eda97b7c52b3aa897692b9356 --- /dev/null +++ b/checkpoint-5700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184fd29808dc44c48d1b9a96c5b0e8b3b2077ec5b2bf016667d23943b418519f +size 17395 diff --git a/checkpoint-5800/README.md b/checkpoint-5800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5800/adapter_config.json b/checkpoint-5800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5800/adapter_model.bin b/checkpoint-5800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9765158290528faf9301152be802f53dfbdd0835 --- /dev/null +++ b/checkpoint-5800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a233e3316af628e1b3659fadcde518df53856ed2bbb96d8ada0a690af5e623 +size 14700057 diff --git a/checkpoint-5800/finetuning_args.json b/checkpoint-5800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5800/reward/adapter_config.json b/checkpoint-5800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5800/reward/adapter_model.bin b/checkpoint-5800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5800/training_args.bin b/checkpoint-5800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5800/value_head.bin b/checkpoint-5800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd43551f7d04e6515dbb2cf22dde979f2ab78fff --- /dev/null +++ b/checkpoint-5800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b857c19ce6c81732b1441edf3aaf53e514d1b80186108c29c0fc3269ee7ad1 +size 17395 diff --git a/checkpoint-5900/README.md b/checkpoint-5900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-5900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-5900/adapter_config.json b/checkpoint-5900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5900/adapter_model.bin b/checkpoint-5900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5af5782141b221268c9007da6f04955f467e9b3a --- /dev/null +++ b/checkpoint-5900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607eecf26a2da18c55f53bad4cc4a377b557e935c0803441d2a835d97fe2b07d +size 14700057 diff --git a/checkpoint-5900/finetuning_args.json b/checkpoint-5900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-5900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-5900/reward/adapter_config.json b/checkpoint-5900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-5900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5900/reward/adapter_model.bin b/checkpoint-5900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-5900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-5900/training_args.bin b/checkpoint-5900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-5900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-5900/value_head.bin b/checkpoint-5900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..e6bd8d835eacfdd59ada5d8e2846ee6d24be03a0 --- /dev/null +++ b/checkpoint-5900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2432a125e1b7909e0ce2d12d7381add557b70a78afc395969355f907fa862e3f +size 17395 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.bin b/checkpoint-600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b11a46e1912a50d61ed3d7a5900e424410a9deb --- /dev/null +++ b/checkpoint-600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861d906f424b2ae2a6098993785ffff218d4f8bda61b8912ea6f0752424fba94 +size 14700057 diff --git a/checkpoint-600/finetuning_args.json b/checkpoint-600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-600/reward/adapter_config.json b/checkpoint-600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-600/reward/adapter_model.bin b/checkpoint-600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-600/value_head.bin b/checkpoint-600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e0491b7966f31694e38afe0a7c34fa7f49aa25b --- /dev/null +++ b/checkpoint-600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e955dd37d1c1c6c7e9171f1e88950093d693cfe357cbbe2578c770378a1bec77 +size 17395 diff --git a/checkpoint-6000/README.md b/checkpoint-6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6000/adapter_config.json b/checkpoint-6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6000/adapter_model.bin b/checkpoint-6000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3b12a670eb136f2566cbb47d57ffaa6fff26c4d --- /dev/null +++ b/checkpoint-6000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77e03ed418d8ab2709ebf7fb3629530ccfe2c6655627142e8ddc68b1a7d6b71 +size 14700057 diff --git a/checkpoint-6000/finetuning_args.json b/checkpoint-6000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6000/reward/adapter_config.json b/checkpoint-6000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6000/reward/adapter_model.bin b/checkpoint-6000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6000/training_args.bin b/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6000/value_head.bin b/checkpoint-6000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1c048449976004170e4e6e365a90a16abf01baa --- /dev/null +++ b/checkpoint-6000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de92c0d21d4a99b2a11a077bfa532b818762ec001cf7bd2059fdc51fbec5fad4 +size 17395 diff --git a/checkpoint-6100/README.md b/checkpoint-6100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6100/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6100/adapter_config.json b/checkpoint-6100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6100/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6100/adapter_model.bin b/checkpoint-6100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..07ef42f810c7bcb961ee72c9eae1dcb1aca7cd41 --- /dev/null +++ b/checkpoint-6100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f396ac0ee3c2cb42cf8599e5121d0d6651a8e4a028cba37986de44643acb2823 +size 14700057 diff --git a/checkpoint-6100/finetuning_args.json b/checkpoint-6100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6100/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6100/reward/adapter_config.json b/checkpoint-6100/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6100/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6100/reward/adapter_model.bin b/checkpoint-6100/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6100/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6100/training_args.bin b/checkpoint-6100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6100/value_head.bin b/checkpoint-6100/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..916e0efdf52e7c8cf1fda8563e2c3a19caee5136 --- /dev/null +++ b/checkpoint-6100/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183d99133430027176f78d068a9ad0d87bf57b684f335a9114954af9cfc8be61 +size 17395 diff --git a/checkpoint-6200/README.md b/checkpoint-6200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6200/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6200/adapter_config.json b/checkpoint-6200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6200/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6200/adapter_model.bin b/checkpoint-6200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf167c5f83ab606f79afaebb9daa4df0bd8b063e --- /dev/null +++ b/checkpoint-6200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86524cdea583b1ef024105ab10508debbe42a63ea9a233cffd18485ec1fc7141 +size 14700057 diff --git a/checkpoint-6200/finetuning_args.json b/checkpoint-6200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6200/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6200/reward/adapter_config.json b/checkpoint-6200/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6200/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6200/reward/adapter_model.bin b/checkpoint-6200/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6200/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6200/training_args.bin b/checkpoint-6200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6200/value_head.bin b/checkpoint-6200/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..313445143eef8a8c54cd7813471f29faf737c2e0 --- /dev/null +++ b/checkpoint-6200/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0422a5f3e9df50a54813264963738cf9d44ab52b7d25db9919bce377f44f2d +size 17395 diff --git a/checkpoint-6300/README.md b/checkpoint-6300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6300/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6300/adapter_config.json b/checkpoint-6300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6300/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6300/adapter_model.bin b/checkpoint-6300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2676d61afccc9fb813238bfe60e55c37fbe2509 --- /dev/null +++ b/checkpoint-6300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2757102da5f7bd2bec99435120875f851afd5f39658acbf3c2a9ecc31e171e +size 14700057 diff --git a/checkpoint-6300/finetuning_args.json b/checkpoint-6300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6300/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6300/reward/adapter_config.json b/checkpoint-6300/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6300/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6300/reward/adapter_model.bin b/checkpoint-6300/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6300/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6300/training_args.bin b/checkpoint-6300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6300/value_head.bin b/checkpoint-6300/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..be92d45d384f52cac6de15399178a306e2531e0a --- /dev/null +++ b/checkpoint-6300/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a3302d7d711f7b6b8775e97ef856859d6ba0ee982b8c9c45f3c6550c1e10a6 +size 17395 diff --git a/checkpoint-6400/README.md b/checkpoint-6400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6400/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6400/adapter_config.json b/checkpoint-6400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6400/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6400/adapter_model.bin b/checkpoint-6400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a33174250593aca3ae63c4845084cc361736b920 --- /dev/null +++ b/checkpoint-6400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e2fe4779633b9f4ae43178e5b6ead451b710696750ca60a6a31c6428c604702 +size 14700057 diff --git a/checkpoint-6400/finetuning_args.json b/checkpoint-6400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6400/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6400/reward/adapter_config.json b/checkpoint-6400/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6400/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6400/reward/adapter_model.bin b/checkpoint-6400/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6400/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6400/training_args.bin b/checkpoint-6400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6400/value_head.bin b/checkpoint-6400/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..832dae62bddef8b9e438f233cad9de91fcc38304 --- /dev/null +++ b/checkpoint-6400/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741cd4141481df1abc3db0b70b63e47342cf7174d1bacb0dbffd48ea40398a4b +size 17395 diff --git a/checkpoint-6500/README.md b/checkpoint-6500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6500/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6500/adapter_config.json b/checkpoint-6500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6500/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6500/adapter_model.bin b/checkpoint-6500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec8e1e83e504728e4b37fe989a331bf2154f3129 --- /dev/null +++ b/checkpoint-6500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d0ae8fea9b01bc015cd1e1304f099e3fc7e654d538d8176b56f814664cc9b3 +size 14700057 diff --git a/checkpoint-6500/finetuning_args.json b/checkpoint-6500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6500/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6500/reward/adapter_config.json b/checkpoint-6500/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6500/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6500/reward/adapter_model.bin b/checkpoint-6500/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6500/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6500/training_args.bin b/checkpoint-6500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6500/value_head.bin b/checkpoint-6500/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..9630c704e7c465be14f19c7615019bb4d63122d6 --- /dev/null +++ b/checkpoint-6500/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72323e2b8d3db23bce0bba70ecf3bdc44c77f1293f00a836a779e12774fd96b3 +size 17395 diff --git a/checkpoint-6600/README.md b/checkpoint-6600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6600/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6600/adapter_config.json b/checkpoint-6600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6600/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6600/adapter_model.bin b/checkpoint-6600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c54ceb455229b8bd4198daa3f86ddbe6541aeea --- /dev/null +++ b/checkpoint-6600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f172554ba9c7699c2a53f33e5f9cf3c18d4bd5ff69d1e22b49a37cc523d119f +size 14700057 diff --git a/checkpoint-6600/finetuning_args.json b/checkpoint-6600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6600/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6600/reward/adapter_config.json b/checkpoint-6600/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6600/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6600/reward/adapter_model.bin b/checkpoint-6600/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6600/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6600/training_args.bin b/checkpoint-6600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6600/value_head.bin b/checkpoint-6600/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c899eb7a4075d4b3d70c8bbcadcbe406667248a --- /dev/null +++ b/checkpoint-6600/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55794d9a669283a79621fb2b4705b316fc1e55ee72829994fd38d40d04cc98fe +size 17395 diff --git a/checkpoint-6700/README.md b/checkpoint-6700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6700/adapter_config.json b/checkpoint-6700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6700/adapter_model.bin b/checkpoint-6700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..872dbab15e56b29ab62d3453d8dc4d467490e358 --- /dev/null +++ b/checkpoint-6700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d055a2a7ac8eb76e9357b0e9297f48a78109bc2c4afa7b9327ae0724ed35861 +size 14700057 diff --git a/checkpoint-6700/finetuning_args.json b/checkpoint-6700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6700/reward/adapter_config.json b/checkpoint-6700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6700/reward/adapter_model.bin b/checkpoint-6700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6700/training_args.bin b/checkpoint-6700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6700/value_head.bin b/checkpoint-6700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..0602e989dd167121297957817595af216ef97bf5 --- /dev/null +++ b/checkpoint-6700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec458de470ac303d265edb9897dd38a52716af3f6bec2a79abce392a60f0d5f +size 17395 diff --git a/checkpoint-6800/README.md b/checkpoint-6800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6800/adapter_config.json b/checkpoint-6800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6800/adapter_model.bin b/checkpoint-6800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..76fee9558a4891c6309e27fe06a9c96a1ec3b1a9 --- /dev/null +++ b/checkpoint-6800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42a10e0192ba5fc2d9e0d61726622cdc2e63b2435ea29d422da393f201be301 +size 14700057 diff --git a/checkpoint-6800/finetuning_args.json b/checkpoint-6800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6800/reward/adapter_config.json b/checkpoint-6800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6800/reward/adapter_model.bin b/checkpoint-6800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6800/training_args.bin b/checkpoint-6800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6800/value_head.bin b/checkpoint-6800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..b138105dad2076180fe9a6c0852736642edeb130 --- /dev/null +++ b/checkpoint-6800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a15f9fb17fee5ae55d8e89fcb07525f53dcfddeaa585afdaf2046a6ebc2c3b33 +size 17395 diff --git a/checkpoint-6900/README.md b/checkpoint-6900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-6900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-6900/adapter_config.json b/checkpoint-6900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6900/adapter_model.bin b/checkpoint-6900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c603e70b9764a1e0f1fdf9ba9113e5f13b21272f --- /dev/null +++ b/checkpoint-6900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996cf5fddec3ce775827a4bf3bf98ad03aa70cbd632be222e8cff719d47c3057 +size 14700057 diff --git a/checkpoint-6900/finetuning_args.json b/checkpoint-6900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-6900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-6900/reward/adapter_config.json b/checkpoint-6900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-6900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6900/reward/adapter_model.bin b/checkpoint-6900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-6900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-6900/training_args.bin b/checkpoint-6900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-6900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-6900/value_head.bin b/checkpoint-6900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..245bcd155e4ebc296bfeb5410b6b6587e9157241 --- /dev/null +++ b/checkpoint-6900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6199f58e3875ce94077bb32e843a85c822e7a6e0dba53de6de73d7a627ce4457 +size 17395 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.bin b/checkpoint-700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb43bf84e464ce5a8b9f2b63d8126ca6c7983418 --- /dev/null +++ b/checkpoint-700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6537ffadac1274cf124af03abfb46c288e6229b0e1271130754bfef886a8551 +size 14700057 diff --git a/checkpoint-700/finetuning_args.json b/checkpoint-700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-700/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-700/reward/adapter_config.json b/checkpoint-700/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-700/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-700/reward/adapter_model.bin b/checkpoint-700/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-700/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-700/value_head.bin b/checkpoint-700/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..357b4d6392c9cc69592213c25a439c30280ef4cc --- /dev/null +++ b/checkpoint-700/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83919fd9fafb5b0607a770834707e0a9d0bc5bc4c5882e8aefc41906c7ec9dba +size 17395 diff --git a/checkpoint-7000/README.md b/checkpoint-7000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-7000/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-7000/adapter_config.json b/checkpoint-7000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-7000/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-7000/adapter_model.bin b/checkpoint-7000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..caac38db1acd25073755da7257f58890767e1c9b --- /dev/null +++ b/checkpoint-7000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9211243ace728d3f46a606416678d76600b3edb0e0f74f286b2baa688f282354 +size 14700057 diff --git a/checkpoint-7000/finetuning_args.json b/checkpoint-7000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-7000/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-7000/reward/adapter_config.json b/checkpoint-7000/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-7000/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-7000/reward/adapter_model.bin b/checkpoint-7000/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-7000/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-7000/training_args.bin b/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-7000/value_head.bin b/checkpoint-7000/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..13c6b093e0bd8581d278c770e568daa25abe43c2 --- /dev/null +++ b/checkpoint-7000/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc33d4de60f0750f542b45fcc73674912fdeb21ffe4fdd86316e5127691b755a +size 17395 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.bin b/checkpoint-800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f983c3848cb8cae36044a2846db85101acd01f0f --- /dev/null +++ b/checkpoint-800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d845515affed40fc9910e8c7e5942a3ff6b62be9a63378995c95e51855eef23 +size 14700057 diff --git a/checkpoint-800/finetuning_args.json b/checkpoint-800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-800/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-800/reward/adapter_config.json b/checkpoint-800/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-800/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-800/reward/adapter_model.bin b/checkpoint-800/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-800/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-800/value_head.bin b/checkpoint-800/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..01932c582434b65678066f69510b6a2bbe906618 --- /dev/null +++ b/checkpoint-800/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5ec6574d863799e94de51c538f427d0e6b062a275ef633214db8bf121e7df0 +size 17395 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d290c902d52b2091d490255d35c9be86df84f437 --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,3 @@ +--- +library_name: peft +--- diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.bin b/checkpoint-900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..06c64ff0ae71237d1eb1a06caf125c25d4ea2ffd --- /dev/null +++ b/checkpoint-900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7cdd9f270d0484a178a14854aed0a6b3e3e71a827b67fe6faaf92ba12fdf94 +size 14700057 diff --git a/checkpoint-900/finetuning_args.json b/checkpoint-900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/checkpoint-900/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/checkpoint-900/reward/adapter_config.json b/checkpoint-900/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/checkpoint-900/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-900/reward/adapter_model.bin b/checkpoint-900/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/checkpoint-900/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/checkpoint-900/value_head.bin b/checkpoint-900/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..1dcde151c3aaa2a71d9546aac6accf916ef48a48 --- /dev/null +++ b/checkpoint-900/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec87ef8d8da0cff3f8c045a45a5fc31317dcb559ae1406bd14e134859699f0d3 +size 17395 diff --git a/finetuning_args.json b/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..9cb756f5ac88d279d7ccc9081a3e0efaa5e27fe3 --- /dev/null +++ b/finetuning_args.json @@ -0,0 +1,13 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "query_key_value" + ], + "name_module_trainable": "mlp", + "num_layer_trainable": 3, + "pre_seq_len": 16, + "prefix_projection": false +} diff --git a/reward/adapter_config.json b/reward/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..20bd1fa301a8cd038d8833a5567b3c2415601bd8 --- /dev/null +++ b/reward/adapter_config.json @@ -0,0 +1,19 @@ +{ + "base_model_name_or_path": "THUDM/chatglm-6b", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "query_key_value" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/reward/adapter_model.bin b/reward/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8710093665dfdb9819e2f8817a1e25a4ccdd9935 --- /dev/null +++ b/reward/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406 +size 443 diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..29e1ee8bf5a578412195af2190e1e099723cdbb7 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,706 @@ +{"current_steps": 9, "total_steps": 7063, "loss": 0.2855, "reward": 0.7376, "learning_rate": 9.999612341935039e-06, "epoch": 0.0, "percentage": 0.13, "elapsed_time": "0:02:18", "remaining_time": "1 day, 6:10:00"} +{"current_steps": 19, "total_steps": 7063, "loss": 0.275, "reward": 0.722, "learning_rate": 9.99771375326247e-06, "epoch": 0.0, "percentage": 0.27, "elapsed_time": "0:04:36", "remaining_time": "1 day, 4:26:56"} +{"current_steps": 29, "total_steps": 7063, "loss": 0.2593, "reward": 1.2159, "learning_rate": 9.99423363154034e-06, "epoch": 0.0, "percentage": 0.41, "elapsed_time": "0:06:53", "remaining_time": "1 day, 3:51:29"} +{"current_steps": 39, "total_steps": 7063, "loss": 0.2415, "reward": 1.1798, "learning_rate": 9.98975021111248e-06, "epoch": 0.01, "percentage": 0.55, "elapsed_time": "0:09:09", "remaining_time": "1 day, 3:28:10"} +{"current_steps": 49, "total_steps": 7063, "loss": 0.2528, "reward": 1.3194, "learning_rate": 9.983268624014507e-06, "epoch": 0.01, "percentage": 0.69, "elapsed_time": "0:11:24", "remaining_time": "1 day, 3:13:34"} +{"current_steps": 59, "total_steps": 7063, "loss": 0.2665, "reward": 1.1497, "learning_rate": 9.975210075047007e-06, "epoch": 0.01, "percentage": 0.84, "elapsed_time": "0:13:27", "remaining_time": "1 day, 2:37:22"} +{"current_steps": 69, "total_steps": 7063, "loss": 0.2498, "reward": 0.8864, "learning_rate": 9.965577114348328e-06, "epoch": 0.01, "percentage": 0.98, "elapsed_time": "0:15:17", "remaining_time": "1 day, 1:50:08"} +{"current_steps": 79, "total_steps": 7063, "loss": 0.2489, "reward": 1.3557, "learning_rate": 9.954372790281476e-06, "epoch": 0.01, "percentage": 1.12, "elapsed_time": "0:17:12", "remaining_time": "1 day, 1:21:04"} +{"current_steps": 89, "total_steps": 7063, "loss": 0.2236, "reward": 0.8862, "learning_rate": 9.941600648469446e-06, "epoch": 0.01, "percentage": 1.26, "elapsed_time": "0:19:17", "remaining_time": "1 day, 1:11:44"} +{"current_steps": 99, "total_steps": 7063, "loss": 0.1953, "reward": 0.9721, "learning_rate": 9.92726473067321e-06, "epoch": 0.01, "percentage": 1.4, "elapsed_time": "0:21:16", "remaining_time": "1 day, 0:56:41"} +{"current_steps": 109, "total_steps": 7063, "loss": 0.2028, "reward": 1.3082, "learning_rate": 9.91136957351269e-06, "epoch": 0.02, "percentage": 1.54, "elapsed_time": "0:23:17", "remaining_time": "1 day, 0:46:08"} +{"current_steps": 119, "total_steps": 7063, "loss": 0.2024, "reward": 1.037, "learning_rate": 9.893920207031147e-06, "epoch": 0.02, "percentage": 1.68, "elapsed_time": "0:25:09", "remaining_time": "1 day, 0:28:10"} +{"current_steps": 129, "total_steps": 7063, "loss": 0.1851, "reward": 1.3262, "learning_rate": 9.874922153103414e-06, "epoch": 0.02, "percentage": 1.83, "elapsed_time": "0:27:11", "remaining_time": "1 day, 0:21:50"} +{"current_steps": 139, "total_steps": 7063, "loss": 0.1869, "reward": 1.4129, "learning_rate": 9.854381423688484e-06, "epoch": 0.02, "percentage": 1.97, "elapsed_time": "0:29:03", "remaining_time": "1 day, 0:07:10"} +{"current_steps": 149, "total_steps": 7063, "loss": 0.174, "reward": 1.7339, "learning_rate": 9.832304518927032e-06, "epoch": 0.02, "percentage": 2.11, "elapsed_time": "0:31:02", "remaining_time": "1 day, 0:00:14"} +{"current_steps": 159, "total_steps": 7063, "loss": 0.1867, "reward": 1.6142, "learning_rate": 9.808698425084422e-06, "epoch": 0.02, "percentage": 2.25, "elapsed_time": "0:32:48", "remaining_time": "23:44:24"} +{"current_steps": 169, "total_steps": 7063, "loss": 0.1922, "reward": 1.2096, "learning_rate": 9.783570612339908e-06, "epoch": 0.02, "percentage": 2.39, "elapsed_time": "0:34:44", "remaining_time": "23:37:32"} +{"current_steps": 179, "total_steps": 7063, "loss": 0.1731, "reward": 1.2908, "learning_rate": 9.756929032422675e-06, "epoch": 0.03, "percentage": 2.53, "elapsed_time": "0:36:45", "remaining_time": "23:33:31"} +{"current_steps": 189, "total_steps": 7063, "loss": 0.1698, "reward": 1.8801, "learning_rate": 9.728782116095511e-06, "epoch": 0.03, "percentage": 2.68, "elapsed_time": "0:38:48", "remaining_time": "23:31:19"} +{"current_steps": 199, "total_steps": 7063, "loss": 0.1775, "reward": 1.3951, "learning_rate": 9.69913877048688e-06, "epoch": 0.03, "percentage": 2.82, "elapsed_time": "0:40:42", "remaining_time": "23:24:07"} +{"current_steps": 209, "total_steps": 7063, "loss": 0.1776, "reward": 1.7341, "learning_rate": 9.668008376272242e-06, "epoch": 0.03, "percentage": 2.96, "elapsed_time": "0:42:43", "remaining_time": "23:21:24"} +{"current_steps": 219, "total_steps": 7063, "loss": 0.1701, "reward": 1.2821, "learning_rate": 9.635400784705537e-06, "epoch": 0.03, "percentage": 3.1, "elapsed_time": "0:44:47", "remaining_time": "23:19:37"} +{"current_steps": 229, "total_steps": 7063, "loss": 0.1645, "reward": 1.606, "learning_rate": 9.601326314501736e-06, "epoch": 0.03, "percentage": 3.24, "elapsed_time": "0:46:54", "remaining_time": "23:19:55"} +{"current_steps": 239, "total_steps": 7063, "loss": 0.1665, "reward": 1.6989, "learning_rate": 9.565795748571473e-06, "epoch": 0.03, "percentage": 3.38, "elapsed_time": "0:48:54", "remaining_time": "23:16:33"} +{"current_steps": 249, "total_steps": 7063, "loss": 0.162, "reward": 1.9307, "learning_rate": 9.52882033060878e-06, "epoch": 0.04, "percentage": 3.53, "elapsed_time": "0:50:56", "remaining_time": "23:14:00"} +{"current_steps": 259, "total_steps": 7063, "loss": 0.1672, "reward": 1.6651, "learning_rate": 9.490411761532994e-06, "epoch": 0.04, "percentage": 3.67, "elapsed_time": "0:52:53", "remaining_time": "23:09:35"} +{"current_steps": 269, "total_steps": 7063, "loss": 0.1824, "reward": 1.155, "learning_rate": 9.450582195786009e-06, "epoch": 0.04, "percentage": 3.81, "elapsed_time": "0:54:44", "remaining_time": "23:02:33"} +{"current_steps": 279, "total_steps": 7063, "loss": 0.1994, "reward": 1.5433, "learning_rate": 9.409344237485962e-06, "epoch": 0.04, "percentage": 3.95, "elapsed_time": "0:56:34", "remaining_time": "22:55:43"} +{"current_steps": 289, "total_steps": 7063, "loss": 0.1849, "reward": 1.6171, "learning_rate": 9.366710936438656e-06, "epoch": 0.04, "percentage": 4.09, "elapsed_time": "0:58:24", "remaining_time": "22:49:01"} +{"current_steps": 299, "total_steps": 7063, "loss": 0.1827, "reward": 1.7545, "learning_rate": 9.322695784007936e-06, "epoch": 0.04, "percentage": 4.23, "elapsed_time": "1:00:20", "remaining_time": "22:44:54"} +{"current_steps": 309, "total_steps": 7063, "loss": 0.2003, "reward": 1.5035, "learning_rate": 9.277312708846318e-06, "epoch": 0.04, "percentage": 4.37, "elapsed_time": "1:02:13", "remaining_time": "22:40:02"} +{"current_steps": 319, "total_steps": 7063, "loss": 0.1857, "reward": 1.62, "learning_rate": 9.230576072487254e-06, "epoch": 0.05, "percentage": 4.52, "elapsed_time": "1:04:06", "remaining_time": "22:35:26"} +{"current_steps": 329, "total_steps": 7063, "loss": 0.1695, "reward": 1.5519, "learning_rate": 9.182500664800408e-06, "epoch": 0.05, "percentage": 4.66, "elapsed_time": "1:06:00", "remaining_time": "22:31:08"} +{"current_steps": 339, "total_steps": 7063, "loss": 0.1876, "reward": 1.7547, "learning_rate": 9.133101699311382e-06, "epoch": 0.05, "percentage": 4.8, "elapsed_time": "1:07:54", "remaining_time": "22:27:03"} +{"current_steps": 349, "total_steps": 7063, "loss": 0.1843, "reward": 1.3446, "learning_rate": 9.082394808387372e-06, "epoch": 0.05, "percentage": 4.94, "elapsed_time": "1:09:51", "remaining_time": "22:23:59"} +{"current_steps": 359, "total_steps": 7063, "loss": 0.1749, "reward": 1.6229, "learning_rate": 9.030396038290285e-06, "epoch": 0.05, "percentage": 5.08, "elapsed_time": "1:11:41", "remaining_time": "22:18:51"} +{"current_steps": 369, "total_steps": 7063, "loss": 0.1981, "reward": 1.2249, "learning_rate": 8.977121844098891e-06, "epoch": 0.05, "percentage": 5.22, "elapsed_time": "1:13:32", "remaining_time": "22:14:08"} +{"current_steps": 379, "total_steps": 7063, "loss": 0.1955, "reward": 1.3132, "learning_rate": 8.922589084501567e-06, "epoch": 0.05, "percentage": 5.37, "elapsed_time": "1:15:25", "remaining_time": "22:10:15"} +{"current_steps": 389, "total_steps": 7063, "loss": 0.1977, "reward": 1.5104, "learning_rate": 8.866815016461373e-06, "epoch": 0.06, "percentage": 5.51, "elapsed_time": "1:17:19", "remaining_time": "22:06:33"} +{"current_steps": 399, "total_steps": 7063, "loss": 0.1719, "reward": 1.6746, "learning_rate": 8.809817289755034e-06, "epoch": 0.06, "percentage": 5.65, "elapsed_time": "1:19:15", "remaining_time": "22:03:43"} +{"current_steps": 409, "total_steps": 7063, "loss": 0.17, "reward": 1.6719, "learning_rate": 8.751613941387669e-06, "epoch": 0.06, "percentage": 5.79, "elapsed_time": "1:21:19", "remaining_time": "22:02:58"} +{"current_steps": 419, "total_steps": 7063, "loss": 0.1927, "reward": 1.6034, "learning_rate": 8.692223389884945e-06, "epoch": 0.06, "percentage": 5.93, "elapsed_time": "1:23:21", "remaining_time": "22:01:48"} +{"current_steps": 429, "total_steps": 7063, "loss": 0.1759, "reward": 1.746, "learning_rate": 8.63166442946451e-06, "epoch": 0.06, "percentage": 6.07, "elapsed_time": "1:25:29", "remaining_time": "22:01:54"} +{"current_steps": 439, "total_steps": 7063, "loss": 0.1667, "reward": 1.7022, "learning_rate": 8.569956224088549e-06, "epoch": 0.06, "percentage": 6.22, "elapsed_time": "1:27:33", "remaining_time": "22:01:08"} +{"current_steps": 449, "total_steps": 7063, "loss": 0.1903, "reward": 1.5584, "learning_rate": 8.507118301399305e-06, "epoch": 0.06, "percentage": 6.36, "elapsed_time": "1:29:34", "remaining_time": "21:59:35"} +{"current_steps": 459, "total_steps": 7063, "loss": 0.1737, "reward": 1.7671, "learning_rate": 8.443170546539546e-06, "epoch": 0.06, "percentage": 6.5, "elapsed_time": "1:31:34", "remaining_time": "21:57:28"} +{"current_steps": 469, "total_steps": 7063, "loss": 0.1814, "reward": 2.2036, "learning_rate": 8.378133195859885e-06, "epoch": 0.07, "percentage": 6.64, "elapsed_time": "1:33:28", "remaining_time": "21:54:18"} +{"current_steps": 479, "total_steps": 7063, "loss": 0.1912, "reward": 1.7684, "learning_rate": 8.31202683051495e-06, "epoch": 0.07, "percentage": 6.78, "elapsed_time": "1:35:20", "remaining_time": "21:50:28"} +{"current_steps": 489, "total_steps": 7063, "loss": 0.1997, "reward": 1.9846, "learning_rate": 8.24487236995046e-06, "epoch": 0.07, "percentage": 6.92, "elapsed_time": "1:37:21", "remaining_time": "21:48:45"} +{"current_steps": 499, "total_steps": 7063, "loss": 0.1778, "reward": 1.7488, "learning_rate": 8.176691065283236e-06, "epoch": 0.07, "percentage": 7.06, "elapsed_time": "1:39:29", "remaining_time": "21:48:43"} +{"current_steps": 509, "total_steps": 7063, "loss": 0.1843, "reward": 1.6116, "learning_rate": 8.107504492576258e-06, "epoch": 0.07, "percentage": 7.21, "elapsed_time": "1:41:32", "remaining_time": "21:47:22"} +{"current_steps": 519, "total_steps": 7063, "loss": 0.1809, "reward": 1.9922, "learning_rate": 8.03733454601089e-06, "epoch": 0.07, "percentage": 7.35, "elapsed_time": "1:43:34", "remaining_time": "21:45:53"} +{"current_steps": 529, "total_steps": 7063, "loss": 0.1781, "reward": 2.1922, "learning_rate": 7.96620343095844e-06, "epoch": 0.07, "percentage": 7.49, "elapsed_time": "1:45:35", "remaining_time": "21:44:14"} +{"current_steps": 539, "total_steps": 7063, "loss": 0.1959, "reward": 1.9254, "learning_rate": 7.894133656953241e-06, "epoch": 0.08, "percentage": 7.63, "elapsed_time": "1:47:41", "remaining_time": "21:43:29"} +{"current_steps": 549, "total_steps": 7063, "loss": 0.2071, "reward": 1.6299, "learning_rate": 7.821148030569475e-06, "epoch": 0.08, "percentage": 7.77, "elapsed_time": "1:49:50", "remaining_time": "21:43:11"} +{"current_steps": 559, "total_steps": 7063, "loss": 0.1883, "reward": 1.9824, "learning_rate": 7.747269648204006e-06, "epoch": 0.08, "percentage": 7.91, "elapsed_time": "1:51:54", "remaining_time": "21:42:03"} +{"current_steps": 569, "total_steps": 7063, "loss": 0.2009, "reward": 1.9649, "learning_rate": 7.672521888767501e-06, "epoch": 0.08, "percentage": 8.06, "elapsed_time": "1:54:01", "remaining_time": "21:41:27"} +{"current_steps": 579, "total_steps": 7063, "loss": 0.1975, "reward": 1.7986, "learning_rate": 7.596928406286133e-06, "epoch": 0.08, "percentage": 8.2, "elapsed_time": "1:56:07", "remaining_time": "21:40:27"} +{"current_steps": 589, "total_steps": 7063, "loss": 0.185, "reward": 2.3423, "learning_rate": 7.520513122416237e-06, "epoch": 0.08, "percentage": 8.34, "elapsed_time": "1:58:14", "remaining_time": "21:39:40"} +{"current_steps": 599, "total_steps": 7063, "loss": 0.1919, "reward": 1.9097, "learning_rate": 7.443300218874276e-06, "epoch": 0.08, "percentage": 8.48, "elapsed_time": "2:00:20", "remaining_time": "21:38:41"} +{"current_steps": 609, "total_steps": 7063, "loss": 0.1971, "reward": 1.6558, "learning_rate": 7.365314129784498e-06, "epoch": 0.09, "percentage": 8.62, "elapsed_time": "2:02:36", "remaining_time": "21:39:17"} +{"current_steps": 619, "total_steps": 7063, "loss": 0.1985, "reward": 1.7381, "learning_rate": 7.286579533946727e-06, "epoch": 0.09, "percentage": 8.76, "elapsed_time": "2:04:39", "remaining_time": "21:37:41"} +{"current_steps": 629, "total_steps": 7063, "loss": 0.2122, "reward": 1.6083, "learning_rate": 7.207121347026728e-06, "epoch": 0.09, "percentage": 8.91, "elapsed_time": "2:06:51", "remaining_time": "21:37:34"} +{"current_steps": 639, "total_steps": 7063, "loss": 0.1916, "reward": 2.0961, "learning_rate": 7.126964713671609e-06, "epoch": 0.09, "percentage": 9.05, "elapsed_time": "2:08:59", "remaining_time": "21:36:50"} +{"current_steps": 649, "total_steps": 7063, "loss": 0.1757, "reward": 1.895, "learning_rate": 7.046134999552765e-06, "epoch": 0.09, "percentage": 9.19, "elapsed_time": "2:11:08", "remaining_time": "21:36:00"} +{"current_steps": 659, "total_steps": 7063, "loss": 0.2049, "reward": 2.0765, "learning_rate": 6.964657783338879e-06, "epoch": 0.09, "percentage": 9.33, "elapsed_time": "2:13:22", "remaining_time": "21:36:09"} +{"current_steps": 669, "total_steps": 7063, "loss": 0.1991, "reward": 1.7684, "learning_rate": 6.882558848601516e-06, "epoch": 0.09, "percentage": 9.47, "elapsed_time": "2:15:37", "remaining_time": "21:36:12"} +{"current_steps": 679, "total_steps": 7063, "loss": 0.1916, "reward": 1.4878, "learning_rate": 6.799864175655886e-06, "epoch": 0.1, "percentage": 9.61, "elapsed_time": "2:17:52", "remaining_time": "21:36:20"} +{"current_steps": 689, "total_steps": 7063, "loss": 0.1762, "reward": 1.9724, "learning_rate": 6.716599933339328e-06, "epoch": 0.1, "percentage": 9.76, "elapsed_time": "2:20:09", "remaining_time": "21:36:38"} +{"current_steps": 699, "total_steps": 7063, "loss": 0.1778, "reward": 1.5174, "learning_rate": 6.632792470730155e-06, "epoch": 0.1, "percentage": 9.9, "elapsed_time": "2:22:23", "remaining_time": "21:36:27"} +{"current_steps": 709, "total_steps": 7063, "loss": 0.1683, "reward": 1.9022, "learning_rate": 6.5484683088094525e-06, "epoch": 0.1, "percentage": 10.04, "elapsed_time": "2:24:39", "remaining_time": "21:36:23"} +{"current_steps": 719, "total_steps": 7063, "loss": 0.1777, "reward": 1.8625, "learning_rate": 6.4636541320684755e-06, "epoch": 0.1, "percentage": 10.18, "elapsed_time": "2:26:56", "remaining_time": "21:36:32"} +{"current_steps": 729, "total_steps": 7063, "loss": 0.1821, "reward": 1.8226, "learning_rate": 6.378376780064313e-06, "epoch": 0.1, "percentage": 10.32, "elapsed_time": "2:29:10", "remaining_time": "21:36:07"} +{"current_steps": 739, "total_steps": 7063, "loss": 0.1752, "reward": 1.535, "learning_rate": 6.292663238926471e-06, "epoch": 0.1, "percentage": 10.46, "elapsed_time": "2:31:27", "remaining_time": "21:36:10"} +{"current_steps": 749, "total_steps": 7063, "loss": 0.1853, "reward": 1.6309, "learning_rate": 6.206540632817073e-06, "epoch": 0.11, "percentage": 10.6, "elapsed_time": "2:33:44", "remaining_time": "21:36:00"} +{"current_steps": 759, "total_steps": 7063, "loss": 0.1836, "reward": 1.4308, "learning_rate": 6.12003621534739e-06, "epoch": 0.11, "percentage": 10.75, "elapsed_time": "2:35:55", "remaining_time": "21:35:01"} +{"current_steps": 769, "total_steps": 7063, "loss": 0.1656, "reward": 1.6336, "learning_rate": 6.033177360953402e-06, "epoch": 0.11, "percentage": 10.89, "elapsed_time": "2:38:10", "remaining_time": "21:34:36"} +{"current_steps": 779, "total_steps": 7063, "loss": 0.1735, "reward": 1.3411, "learning_rate": 5.9459915562331075e-06, "epoch": 0.11, "percentage": 11.03, "elapsed_time": "2:40:29", "remaining_time": "21:34:36"} +{"current_steps": 789, "total_steps": 7063, "loss": 0.163, "reward": 1.3965, "learning_rate": 5.8585063912483694e-06, "epoch": 0.11, "percentage": 11.17, "elapsed_time": "2:42:46", "remaining_time": "21:34:24"} +{"current_steps": 799, "total_steps": 7063, "loss": 0.1873, "reward": 1.2479, "learning_rate": 5.770749550793997e-06, "epoch": 0.11, "percentage": 11.31, "elapsed_time": "2:45:06", "remaining_time": "21:34:27"} +{"current_steps": 809, "total_steps": 7063, "loss": 0.1717, "reward": 1.7367, "learning_rate": 5.682748805636855e-06, "epoch": 0.11, "percentage": 11.45, "elapsed_time": "2:47:23", "remaining_time": "21:34:01"} +{"current_steps": 819, "total_steps": 7063, "loss": 0.1735, "reward": 1.9298, "learning_rate": 5.594532003727772e-06, "epoch": 0.12, "percentage": 11.6, "elapsed_time": "2:49:40", "remaining_time": "21:33:38"} +{"current_steps": 829, "total_steps": 7063, "loss": 0.1944, "reward": 1.7795, "learning_rate": 5.506127061389015e-06, "epoch": 0.12, "percentage": 11.74, "elapsed_time": "2:51:56", "remaining_time": "21:32:56"} +{"current_steps": 839, "total_steps": 7063, "loss": 0.1597, "reward": 1.8013, "learning_rate": 5.417561954480141e-06, "epoch": 0.12, "percentage": 11.88, "elapsed_time": "2:54:15", "remaining_time": "21:32:45"} +{"current_steps": 849, "total_steps": 7063, "loss": 0.1848, "reward": 1.5395, "learning_rate": 5.32886470954499e-06, "epoch": 0.12, "percentage": 12.02, "elapsed_time": "2:56:37", "remaining_time": "21:32:47"} +{"current_steps": 859, "total_steps": 7063, "loss": 0.1773, "reward": 1.6115, "learning_rate": 5.240063394942656e-06, "epoch": 0.12, "percentage": 12.16, "elapsed_time": "2:58:58", "remaining_time": "21:32:35"} +{"current_steps": 869, "total_steps": 7063, "loss": 0.1817, "reward": 1.1288, "learning_rate": 5.151186111965214e-06, "epoch": 0.12, "percentage": 12.3, "elapsed_time": "3:01:16", "remaining_time": "21:32:02"} +{"current_steps": 879, "total_steps": 7063, "loss": 0.1786, "reward": 1.5076, "learning_rate": 5.06226098594503e-06, "epoch": 0.12, "percentage": 12.45, "elapsed_time": "3:03:35", "remaining_time": "21:31:35"} +{"current_steps": 889, "total_steps": 7063, "loss": 0.1925, "reward": 1.6208, "learning_rate": 4.973316157354464e-06, "epoch": 0.13, "percentage": 12.59, "elapsed_time": "3:05:53", "remaining_time": "21:31:01"} +{"current_steps": 899, "total_steps": 7063, "loss": 0.1887, "reward": 1.0692, "learning_rate": 4.8843797729007815e-06, "epoch": 0.13, "percentage": 12.73, "elapsed_time": "3:08:11", "remaining_time": "21:30:23"} +{"current_steps": 909, "total_steps": 7063, "loss": 0.1736, "reward": 1.3587, "learning_rate": 4.795479976619088e-06, "epoch": 0.13, "percentage": 12.87, "elapsed_time": "3:10:27", "remaining_time": "21:29:24"} +{"current_steps": 919, "total_steps": 7063, "loss": 0.1914, "reward": 1.6322, "learning_rate": 4.7066449009661146e-06, "epoch": 0.13, "percentage": 13.01, "elapsed_time": "3:12:38", "remaining_time": "21:27:52"} +{"current_steps": 929, "total_steps": 7063, "loss": 0.1851, "reward": 1.6663, "learning_rate": 4.617902657917662e-06, "epoch": 0.13, "percentage": 13.15, "elapsed_time": "3:14:57", "remaining_time": "21:27:17"} +{"current_steps": 939, "total_steps": 7063, "loss": 0.1947, "reward": 1.5436, "learning_rate": 4.5292813300725406e-06, "epoch": 0.13, "percentage": 13.29, "elapsed_time": "3:17:15", "remaining_time": "21:26:29"} +{"current_steps": 949, "total_steps": 7063, "loss": 0.1975, "reward": 1.591, "learning_rate": 4.440808961765778e-06, "epoch": 0.13, "percentage": 13.44, "elapsed_time": "3:19:32", "remaining_time": "21:25:30"} +{"current_steps": 959, "total_steps": 7063, "loss": 0.1912, "reward": 1.5074, "learning_rate": 4.352513550193965e-06, "epoch": 0.14, "percentage": 13.58, "elapsed_time": "3:21:55", "remaining_time": "21:25:16"} +{"current_steps": 969, "total_steps": 7063, "loss": 0.1783, "reward": 0.999, "learning_rate": 4.2644230365555e-06, "epoch": 0.14, "percentage": 13.72, "elapsed_time": "3:24:11", "remaining_time": "21:24:12"} +{"current_steps": 979, "total_steps": 7063, "loss": 0.205, "reward": 1.3848, "learning_rate": 4.176565297208565e-06, "epoch": 0.14, "percentage": 13.86, "elapsed_time": "3:26:29", "remaining_time": "21:23:12"} +{"current_steps": 989, "total_steps": 7063, "loss": 0.2145, "reward": 1.1693, "learning_rate": 4.088968134849611e-06, "epoch": 0.14, "percentage": 14.0, "elapsed_time": "3:28:47", "remaining_time": "21:22:18"} +{"current_steps": 999, "total_steps": 7063, "loss": 0.2206, "reward": 1.2571, "learning_rate": 4.001659269715164e-06, "epoch": 0.14, "percentage": 14.14, "elapsed_time": "3:30:57", "remaining_time": "21:20:31"} +{"current_steps": 1009, "total_steps": 7063, "loss": 0.2008, "reward": 1.6519, "learning_rate": 3.914666330809712e-06, "epoch": 0.14, "percentage": 14.29, "elapsed_time": "3:33:16", "remaining_time": "21:19:41"} +{"current_steps": 1019, "total_steps": 7063, "loss": 0.2291, "reward": 1.3704, "learning_rate": 3.828016847162479e-06, "epoch": 0.14, "percentage": 14.43, "elapsed_time": "3:35:34", "remaining_time": "21:18:38"} +{"current_steps": 1029, "total_steps": 7063, "loss": 0.1886, "reward": 1.7832, "learning_rate": 3.7417382391158208e-06, "epoch": 0.15, "percentage": 14.57, "elapsed_time": "3:37:53", "remaining_time": "21:17:44"} +{"current_steps": 1039, "total_steps": 7063, "loss": 0.2089, "reward": 1.6251, "learning_rate": 3.6558578096480236e-06, "epoch": 0.15, "percentage": 14.71, "elapsed_time": "3:40:10", "remaining_time": "21:16:32"} +{"current_steps": 1049, "total_steps": 7063, "loss": 0.1983, "reward": 1.2402, "learning_rate": 3.5704027357332476e-06, "epoch": 0.15, "percentage": 14.85, "elapsed_time": "3:42:19", "remaining_time": "21:14:38"} +{"current_steps": 1059, "total_steps": 7063, "loss": 0.2186, "reward": 1.3914, "learning_rate": 3.4854000597413275e-06, "epoch": 0.15, "percentage": 14.99, "elapsed_time": "3:44:41", "remaining_time": "21:13:52"} +{"current_steps": 1069, "total_steps": 7063, "loss": 0.2031, "reward": 1.6209, "learning_rate": 3.400876680880184e-06, "epoch": 0.15, "percentage": 15.14, "elapsed_time": "3:46:58", "remaining_time": "21:12:41"} +{"current_steps": 1079, "total_steps": 7063, "loss": 0.2185, "reward": 1.0933, "learning_rate": 3.316859346683536e-06, "epoch": 0.15, "percentage": 15.28, "elapsed_time": "3:49:17", "remaining_time": "21:11:40"} +{"current_steps": 1089, "total_steps": 7063, "loss": 0.2212, "reward": 1.3092, "learning_rate": 3.233374644546607e-06, "epoch": 0.15, "percentage": 15.42, "elapsed_time": "3:51:42", "remaining_time": "21:11:08"} +{"current_steps": 1099, "total_steps": 7063, "loss": 0.2131, "reward": 1.0979, "learning_rate": 3.1504489933125066e-06, "epoch": 0.16, "percentage": 15.56, "elapsed_time": "3:54:08", "remaining_time": "21:10:36"} +{"current_steps": 1109, "total_steps": 7063, "loss": 0.2027, "reward": 1.3274, "learning_rate": 3.068108634911958e-06, "epoch": 0.16, "percentage": 15.7, "elapsed_time": "3:56:24", "remaining_time": "21:09:13"} +{"current_steps": 1119, "total_steps": 7063, "loss": 0.2107, "reward": 1.7021, "learning_rate": 2.9863796260590017e-06, "epoch": 0.16, "percentage": 15.84, "elapsed_time": "3:58:47", "remaining_time": "21:08:27"} +{"current_steps": 1129, "total_steps": 7063, "loss": 0.2234, "reward": 1.5654, "learning_rate": 2.9052878300053122e-06, "epoch": 0.16, "percentage": 15.98, "elapsed_time": "4:01:09", "remaining_time": "21:07:31"} +{"current_steps": 1139, "total_steps": 7063, "loss": 0.2363, "reward": 1.3214, "learning_rate": 2.8248589083557478e-06, "epoch": 0.16, "percentage": 16.13, "elapsed_time": "4:03:28", "remaining_time": "21:06:17"} +{"current_steps": 1149, "total_steps": 7063, "loss": 0.2293, "reward": 1.3435, "learning_rate": 2.7451183129476843e-06, "epoch": 0.16, "percentage": 16.27, "elapsed_time": "4:05:48", "remaining_time": "21:05:11"} +{"current_steps": 1159, "total_steps": 7063, "loss": 0.245, "reward": 1.4686, "learning_rate": 2.666091277796769e-06, "epoch": 0.16, "percentage": 16.41, "elapsed_time": "4:08:11", "remaining_time": "21:04:18"} +{"current_steps": 1169, "total_steps": 7063, "loss": 0.2216, "reward": 1.38, "learning_rate": 2.5878028111115692e-06, "epoch": 0.17, "percentage": 16.55, "elapsed_time": "4:10:23", "remaining_time": "21:02:28"} +{"current_steps": 1179, "total_steps": 7063, "loss": 0.2204, "reward": 1.3663, "learning_rate": 2.510277687379693e-06, "epoch": 0.17, "percentage": 16.69, "elapsed_time": "4:12:39", "remaining_time": "21:00:56"} +{"current_steps": 1189, "total_steps": 7063, "loss": 0.2143, "reward": 1.5055, "learning_rate": 2.4335404395278793e-06, "epoch": 0.17, "percentage": 16.83, "elapsed_time": "4:14:59", "remaining_time": "20:59:41"} +{"current_steps": 1199, "total_steps": 7063, "loss": 0.2343, "reward": 1.2318, "learning_rate": 2.357615351158507e-06, "epoch": 0.17, "percentage": 16.98, "elapsed_time": "4:17:17", "remaining_time": "20:58:22"} +{"current_steps": 1209, "total_steps": 7063, "loss": 0.2296, "reward": 1.3596, "learning_rate": 2.282526448865034e-06, "epoch": 0.17, "percentage": 17.12, "elapsed_time": "4:19:42", "remaining_time": "20:57:31"} +{"current_steps": 1219, "total_steps": 7063, "loss": 0.2347, "reward": 1.8732, "learning_rate": 2.208297494628734e-06, "epoch": 0.17, "percentage": 17.26, "elapsed_time": "4:22:06", "remaining_time": "20:56:35"} +{"current_steps": 1229, "total_steps": 7063, "loss": 0.2326, "reward": 1.3454, "learning_rate": 2.1349519782991995e-06, "epoch": 0.17, "percentage": 17.4, "elapsed_time": "4:24:27", "remaining_time": "20:55:20"} +{"current_steps": 1239, "total_steps": 7063, "loss": 0.244, "reward": 1.8838, "learning_rate": 2.062513110160938e-06, "epoch": 0.18, "percentage": 17.54, "elapsed_time": "4:26:46", "remaining_time": "20:54:00"} +{"current_steps": 1249, "total_steps": 7063, "loss": 0.2634, "reward": 1.5223, "learning_rate": 1.9910038135884573e-06, "epoch": 0.18, "percentage": 17.68, "elapsed_time": "4:29:00", "remaining_time": "20:52:13"} +{"current_steps": 1259, "total_steps": 7063, "loss": 0.2283, "reward": 1.7335, "learning_rate": 1.920446717792122e-06, "epoch": 0.18, "percentage": 17.83, "elapsed_time": "4:31:20", "remaining_time": "20:50:54"} +{"current_steps": 1269, "total_steps": 7063, "loss": 0.2352, "reward": 1.1295, "learning_rate": 1.8508641506571213e-06, "epoch": 0.18, "percentage": 17.97, "elapsed_time": "4:33:30", "remaining_time": "20:48:46"} +{"current_steps": 1279, "total_steps": 7063, "loss": 0.2543, "reward": 1.2374, "learning_rate": 1.7822781316777738e-06, "epoch": 0.18, "percentage": 18.11, "elapsed_time": "4:35:45", "remaining_time": "20:47:04"} +{"current_steps": 1289, "total_steps": 7063, "loss": 0.2583, "reward": 1.191, "learning_rate": 1.7147103649894336e-06, "epoch": 0.18, "percentage": 18.25, "elapsed_time": "4:38:07", "remaining_time": "20:45:48"} +{"current_steps": 1299, "total_steps": 7063, "loss": 0.2475, "reward": 1.1935, "learning_rate": 1.6481822325001817e-06, "epoch": 0.18, "percentage": 18.39, "elapsed_time": "4:40:24", "remaining_time": "20:44:14"} +{"current_steps": 1309, "total_steps": 7063, "loss": 0.2583, "reward": 1.3764, "learning_rate": 1.5827147871245042e-06, "epoch": 0.19, "percentage": 18.53, "elapsed_time": "4:42:44", "remaining_time": "20:42:52"} +{"current_steps": 1319, "total_steps": 7063, "loss": 0.2756, "reward": 0.9899, "learning_rate": 1.5183287461210578e-06, "epoch": 0.19, "percentage": 18.67, "elapsed_time": "4:44:59", "remaining_time": "20:41:06"} +{"current_steps": 1329, "total_steps": 7063, "loss": 0.2693, "reward": 1.9678, "learning_rate": 1.4550444845366767e-06, "epoch": 0.19, "percentage": 18.82, "elapsed_time": "4:47:20", "remaining_time": "20:39:44"} +{"current_steps": 1339, "total_steps": 7063, "loss": 0.2551, "reward": 1.3028, "learning_rate": 1.3928820287586542e-06, "epoch": 0.19, "percentage": 18.96, "elapsed_time": "4:49:44", "remaining_time": "20:38:34"} +{"current_steps": 1349, "total_steps": 7063, "loss": 0.2811, "reward": 1.1528, "learning_rate": 1.3318610501773743e-06, "epoch": 0.19, "percentage": 19.1, "elapsed_time": "4:52:06", "remaining_time": "20:37:17"} +{"current_steps": 1359, "total_steps": 7063, "loss": 0.2728, "reward": 1.3348, "learning_rate": 1.2720008589612642e-06, "epoch": 0.19, "percentage": 19.24, "elapsed_time": "4:54:23", "remaining_time": "20:35:39"} +{"current_steps": 1369, "total_steps": 7063, "loss": 0.2606, "reward": 1.3202, "learning_rate": 1.213320397946079e-06, "epoch": 0.19, "percentage": 19.38, "elapsed_time": "4:56:44", "remaining_time": "20:34:13"} +{"current_steps": 1379, "total_steps": 7063, "loss": 0.2795, "reward": 1.5301, "learning_rate": 1.1558382366404014e-06, "epoch": 0.2, "percentage": 19.52, "elapsed_time": "4:59:05", "remaining_time": "20:32:50"} +{"current_steps": 1389, "total_steps": 7063, "loss": 0.2739, "reward": 1.417, "learning_rate": 1.0995725653493155e-06, "epoch": 0.2, "percentage": 19.67, "elapsed_time": "5:01:24", "remaining_time": "20:31:15"} +{"current_steps": 1399, "total_steps": 7063, "loss": 0.2953, "reward": 1.4266, "learning_rate": 1.0445411894180397e-06, "epoch": 0.2, "percentage": 19.81, "elapsed_time": "5:03:43", "remaining_time": "20:29:41"} +{"current_steps": 1409, "total_steps": 7063, "loss": 0.2637, "reward": 1.5203, "learning_rate": 9.907615235974206e-07, "epoch": 0.2, "percentage": 19.95, "elapsed_time": "5:06:01", "remaining_time": "20:28:02"} +{"current_steps": 1419, "total_steps": 7063, "loss": 0.2821, "reward": 1.5489, "learning_rate": 9.382505865329972e-07, "epoch": 0.2, "percentage": 20.09, "elapsed_time": "5:08:14", "remaining_time": "20:26:01"} +{"current_steps": 1429, "total_steps": 7063, "loss": 0.2894, "reward": 1.4433, "learning_rate": 8.870249953794418e-07, "epoch": 0.2, "percentage": 20.23, "elapsed_time": "5:10:34", "remaining_time": "20:24:28"} +{"current_steps": 1439, "total_steps": 7063, "loss": 0.3028, "reward": 0.7403, "learning_rate": 8.371009605420277e-07, "epoch": 0.2, "percentage": 20.37, "elapsed_time": "5:12:55", "remaining_time": "20:22:58"} +{"current_steps": 1449, "total_steps": 7063, "loss": 0.2893, "reward": 1.714, "learning_rate": 7.88494280546836e-07, "epoch": 0.21, "percentage": 20.52, "elapsed_time": "5:15:21", "remaining_time": "20:21:49"} +{"current_steps": 1459, "total_steps": 7063, "loss": 0.3189, "reward": 1.4108, "learning_rate": 7.412203370412801e-07, "epoch": 0.21, "percentage": 20.66, "elapsed_time": "5:17:45", "remaining_time": "20:20:29"} +{"current_steps": 1469, "total_steps": 7063, "loss": 0.3155, "reward": 1.8459, "learning_rate": 6.952940899265754e-07, "epoch": 0.21, "percentage": 20.8, "elapsed_time": "5:20:01", "remaining_time": "20:18:41"} +{"current_steps": 1479, "total_steps": 7063, "loss": 0.3231, "reward": 1.6761, "learning_rate": 6.507300726236476e-07, "epoch": 0.21, "percentage": 20.94, "elapsed_time": "5:22:22", "remaining_time": "20:17:09"} +{"current_steps": 1489, "total_steps": 7063, "loss": 0.3309, "reward": 1.4689, "learning_rate": 6.075423874740216e-07, "epoch": 0.21, "percentage": 21.08, "elapsed_time": "5:24:47", "remaining_time": "20:15:51"} +{"current_steps": 1499, "total_steps": 7063, "loss": 0.3089, "reward": 1.1545, "learning_rate": 5.657447012771117e-07, "epoch": 0.21, "percentage": 21.22, "elapsed_time": "5:27:02", "remaining_time": "20:13:54"} +{"current_steps": 1509, "total_steps": 7063, "loss": 0.3586, "reward": 1.3766, "learning_rate": 5.253502409653488e-07, "epoch": 0.21, "percentage": 21.36, "elapsed_time": "5:29:20", "remaining_time": "20:12:09"} +{"current_steps": 1519, "total_steps": 7063, "loss": 0.3262, "reward": 1.49, "learning_rate": 4.863717894184949e-07, "epoch": 0.22, "percentage": 21.51, "elapsed_time": "5:31:43", "remaining_time": "20:10:44"} +{"current_steps": 1529, "total_steps": 7063, "loss": 0.3213, "reward": 1.5069, "learning_rate": 4.4882168141849037e-07, "epoch": 0.22, "percentage": 21.65, "elapsed_time": "5:34:01", "remaining_time": "20:08:57"} +{"current_steps": 1539, "total_steps": 7063, "loss": 0.3275, "reward": 1.4944, "learning_rate": 4.1271179974609167e-07, "epoch": 0.22, "percentage": 21.79, "elapsed_time": "5:36:23", "remaining_time": "20:07:25"} +{"current_steps": 1549, "total_steps": 7063, "loss": 0.3374, "reward": 1.1834, "learning_rate": 3.78053571420553e-07, "epoch": 0.22, "percentage": 21.93, "elapsed_time": "5:38:33", "remaining_time": "20:05:10"} +{"current_steps": 1559, "total_steps": 7063, "loss": 0.3382, "reward": 1.3526, "learning_rate": 3.44857964083527e-07, "epoch": 0.22, "percentage": 22.07, "elapsed_time": "5:40:48", "remaining_time": "20:03:13"} +{"current_steps": 1569, "total_steps": 7063, "loss": 0.3413, "reward": 1.3988, "learning_rate": 3.1313548252834615e-07, "epoch": 0.22, "percentage": 22.21, "elapsed_time": "5:43:06", "remaining_time": "20:01:26"} +{"current_steps": 1579, "total_steps": 7063, "loss": 0.3608, "reward": 1.1628, "learning_rate": 2.8289616537576005e-07, "epoch": 0.22, "percentage": 22.36, "elapsed_time": "5:45:15", "remaining_time": "19:59:07"} +{"current_steps": 1589, "total_steps": 7063, "loss": 0.3299, "reward": 1.0217, "learning_rate": 2.5414958189720837e-07, "epoch": 0.22, "percentage": 22.5, "elapsed_time": "5:47:34", "remaining_time": "19:57:23"} +{"current_steps": 1599, "total_steps": 7063, "loss": 0.4025, "reward": 1.5555, "learning_rate": 2.2690482898660438e-07, "epoch": 0.23, "percentage": 22.64, "elapsed_time": "5:49:57", "remaining_time": "19:55:50"} +{"current_steps": 1609, "total_steps": 7063, "loss": 0.378, "reward": 1.9486, "learning_rate": 2.0117052828161953e-07, "epoch": 0.23, "percentage": 22.78, "elapsed_time": "5:52:19", "remaining_time": "19:54:15"} +{"current_steps": 1619, "total_steps": 7063, "loss": 0.3806, "reward": 1.1219, "learning_rate": 1.7695482343534686e-07, "epoch": 0.23, "percentage": 22.92, "elapsed_time": "5:54:40", "remaining_time": "19:52:38"} +{"current_steps": 1629, "total_steps": 7063, "loss": 0.3702, "reward": 1.4053, "learning_rate": 1.5426537753923775e-07, "epoch": 0.23, "percentage": 23.06, "elapsed_time": "5:57:03", "remaining_time": "19:51:05"} +{"current_steps": 1639, "total_steps": 7063, "loss": 0.3674, "reward": 1.2728, "learning_rate": 1.3310937069810181e-07, "epoch": 0.23, "percentage": 23.21, "elapsed_time": "5:59:27", "remaining_time": "19:49:34"} +{"current_steps": 1649, "total_steps": 7063, "loss": 0.3861, "reward": 1.2004, "learning_rate": 1.1349349775795604e-07, "epoch": 0.23, "percentage": 23.35, "elapsed_time": "6:01:46", "remaining_time": "19:47:47"} +{"current_steps": 1659, "total_steps": 7063, "loss": 0.3939, "reward": 1.2368, "learning_rate": 9.542396618743233e-08, "epoch": 0.23, "percentage": 23.49, "elapsed_time": "6:04:08", "remaining_time": "19:46:09"} +{"current_steps": 1669, "total_steps": 7063, "loss": 0.3778, "reward": 1.7163, "learning_rate": 7.890649411341378e-08, "epoch": 0.24, "percentage": 23.63, "elapsed_time": "6:06:27", "remaining_time": "19:44:20"} +{"current_steps": 1679, "total_steps": 7063, "loss": 0.4324, "reward": 1.3397, "learning_rate": 6.394630851152717e-08, "epoch": 0.24, "percentage": 23.77, "elapsed_time": "6:08:46", "remaining_time": "19:42:31"} +{"current_steps": 1689, "total_steps": 7063, "loss": 0.4204, "reward": 1.0164, "learning_rate": 5.0548143552061055e-08, "epoch": 0.24, "percentage": 23.91, "elapsed_time": "6:11:06", "remaining_time": "19:40:45"} +{"current_steps": 1699, "total_steps": 7063, "loss": 0.4092, "reward": 1.6092, "learning_rate": 3.871623910182865e-08, "epoch": 0.24, "percentage": 24.05, "elapsed_time": "6:13:22", "remaining_time": "19:38:47"} +{"current_steps": 1709, "total_steps": 7063, "loss": 0.4246, "reward": 1.5384, "learning_rate": 2.845433938245823e-08, "epoch": 0.24, "percentage": 24.2, "elapsed_time": "6:15:41", "remaining_time": "19:36:59"} +{"current_steps": 1719, "total_steps": 7063, "loss": 0.3803, "reward": 1.2442, "learning_rate": 1.976569178552934e-08, "epoch": 0.24, "percentage": 24.34, "elapsed_time": "6:17:55", "remaining_time": "19:34:54"} +{"current_steps": 1729, "total_steps": 7063, "loss": 0.4257, "reward": 1.5448, "learning_rate": 1.2653045844930322e-08, "epoch": 0.24, "percentage": 24.48, "elapsed_time": "6:20:18", "remaining_time": "19:33:15"} +{"current_steps": 1739, "total_steps": 7063, "loss": 0.4955, "reward": 1.1579, "learning_rate": 7.1186523667665655e-09, "epoch": 0.25, "percentage": 24.62, "elapsed_time": "6:22:35", "remaining_time": "19:31:19"} +{"current_steps": 1749, "total_steps": 7063, "loss": 0.478, "reward": 1.1204, "learning_rate": 3.164262717086919e-09, "epoch": 0.25, "percentage": 24.76, "elapsed_time": "6:24:56", "remaining_time": "19:29:33"} +{"current_steps": 1759, "total_steps": 7063, "loss": 0.4577, "reward": 1.1286, "learning_rate": 7.911282676653642e-10, "epoch": 0.25, "percentage": 24.9, "elapsed_time": "6:27:11", "remaining_time": "19:27:29"} +{"current_steps": 1769, "total_steps": 7063, "loss": 0.4663, "reward": 1.3502, "learning_rate": 0.0, "epoch": 0.25, "percentage": 25.05, "elapsed_time": "6:29:30", "remaining_time": "19:25:38"} +{"current_steps": 1779, "total_steps": 7063, "loss": 0.4789, "reward": 1.5621, "learning_rate": 7.911282676653642e-10, "epoch": 0.25, "percentage": 25.19, "elapsed_time": "6:31:53", "remaining_time": "19:24:01"} +{"current_steps": 1789, "total_steps": 7063, "loss": 0.4973, "reward": 1.1185, "learning_rate": 3.164262717086919e-09, "epoch": 0.25, "percentage": 25.33, "elapsed_time": "6:34:14", "remaining_time": "19:22:12"} +{"current_steps": 1799, "total_steps": 7063, "loss": 0.4797, "reward": 1.4696, "learning_rate": 7.1186523667665655e-09, "epoch": 0.25, "percentage": 25.47, "elapsed_time": "6:36:33", "remaining_time": "19:20:21"} +{"current_steps": 1809, "total_steps": 7063, "loss": 0.478, "reward": 1.1051, "learning_rate": 1.2653045844930322e-08, "epoch": 0.26, "percentage": 25.61, "elapsed_time": "6:38:56", "remaining_time": "19:18:41"} +{"current_steps": 1819, "total_steps": 7063, "loss": 0.5791, "reward": 1.267, "learning_rate": 1.976569178552934e-08, "epoch": 0.26, "percentage": 25.75, "elapsed_time": "6:41:17", "remaining_time": "19:16:51"} +{"current_steps": 1829, "total_steps": 7063, "loss": 0.4895, "reward": 1.3581, "learning_rate": 2.845433938245823e-08, "epoch": 0.26, "percentage": 25.9, "elapsed_time": "6:43:33", "remaining_time": "19:14:51"} +{"current_steps": 1839, "total_steps": 7063, "loss": 0.4846, "reward": 1.2857, "learning_rate": 3.871623910182809e-08, "epoch": 0.26, "percentage": 26.04, "elapsed_time": "6:45:52", "remaining_time": "19:12:57"} +{"current_steps": 1849, "total_steps": 7063, "loss": 0.5179, "reward": 0.9826, "learning_rate": 5.05481435520605e-08, "epoch": 0.26, "percentage": 26.18, "elapsed_time": "6:48:05", "remaining_time": "19:10:48"} +{"current_steps": 1859, "total_steps": 7063, "loss": 0.5052, "reward": 1.5427, "learning_rate": 6.394630851152661e-08, "epoch": 0.26, "percentage": 26.32, "elapsed_time": "6:50:27", "remaining_time": "19:09:00"} +{"current_steps": 1869, "total_steps": 7063, "loss": 0.508, "reward": 1.4578, "learning_rate": 7.890649411341267e-08, "epoch": 0.26, "percentage": 26.46, "elapsed_time": "6:52:46", "remaining_time": "19:07:06"} +{"current_steps": 1879, "total_steps": 7063, "loss": 0.5538, "reward": 1.3281, "learning_rate": 9.542396618743177e-08, "epoch": 0.27, "percentage": 26.6, "elapsed_time": "6:55:10", "remaining_time": "19:05:26"} +{"current_steps": 1889, "total_steps": 7063, "loss": 0.5131, "reward": 0.9273, "learning_rate": 1.1349349775795659e-07, "epoch": 0.27, "percentage": 26.75, "elapsed_time": "6:57:32", "remaining_time": "19:03:37"} +{"current_steps": 1899, "total_steps": 7063, "loss": 0.5548, "reward": 1.1374, "learning_rate": 1.3310937069810181e-07, "epoch": 0.27, "percentage": 26.89, "elapsed_time": "6:59:47", "remaining_time": "19:01:32"} +{"current_steps": 1909, "total_steps": 7063, "loss": 0.6159, "reward": 1.3267, "learning_rate": 1.542653775392383e-07, "epoch": 0.27, "percentage": 27.03, "elapsed_time": "7:02:05", "remaining_time": "18:59:33"} +{"current_steps": 1919, "total_steps": 7063, "loss": 0.554, "reward": 1.2857, "learning_rate": 1.7695482343534686e-07, "epoch": 0.27, "percentage": 27.17, "elapsed_time": "7:04:21", "remaining_time": "18:57:32"} +{"current_steps": 1929, "total_steps": 7063, "loss": 0.5569, "reward": 1.354, "learning_rate": 2.0117052828161953e-07, "epoch": 0.27, "percentage": 27.31, "elapsed_time": "7:06:40", "remaining_time": "18:55:34"} +{"current_steps": 1939, "total_steps": 7063, "loss": 0.6261, "reward": 1.116, "learning_rate": 2.2690482898660438e-07, "epoch": 0.27, "percentage": 27.45, "elapsed_time": "7:09:00", "remaining_time": "18:53:40"} +{"current_steps": 1949, "total_steps": 7063, "loss": 0.6047, "reward": 1.5964, "learning_rate": 2.5414958189720784e-07, "epoch": 0.28, "percentage": 27.59, "elapsed_time": "7:11:18", "remaining_time": "18:51:41"} +{"current_steps": 1959, "total_steps": 7063, "loss": 0.6132, "reward": 1.2502, "learning_rate": 2.8289616537575947e-07, "epoch": 0.28, "percentage": 27.74, "elapsed_time": "7:13:37", "remaining_time": "18:49:46"} +{"current_steps": 1969, "total_steps": 7063, "loss": 0.5468, "reward": 1.2231, "learning_rate": 3.1313548252834557e-07, "epoch": 0.28, "percentage": 27.88, "elapsed_time": "7:15:51", "remaining_time": "18:47:37"} +{"current_steps": 1979, "total_steps": 7063, "loss": 0.5976, "reward": 1.0455, "learning_rate": 3.4485796408352645e-07, "epoch": 0.28, "percentage": 28.02, "elapsed_time": "7:18:09", "remaining_time": "18:45:36"} +{"current_steps": 1989, "total_steps": 7063, "loss": 0.6308, "reward": 0.9335, "learning_rate": 3.7805357142055245e-07, "epoch": 0.28, "percentage": 28.16, "elapsed_time": "7:20:28", "remaining_time": "18:43:40"} +{"current_steps": 1999, "total_steps": 7063, "loss": 0.6496, "reward": 1.6818, "learning_rate": 4.127117997460911e-07, "epoch": 0.28, "percentage": 28.3, "elapsed_time": "7:22:56", "remaining_time": "18:42:04"} +{"current_steps": 2009, "total_steps": 7063, "loss": 0.6392, "reward": 1.3996, "learning_rate": 4.488216814184898e-07, "epoch": 0.28, "percentage": 28.44, "elapsed_time": "7:25:22", "remaining_time": "18:40:25"} +{"current_steps": 2019, "total_steps": 7063, "loss": 0.6849, "reward": 1.2105, "learning_rate": 4.863717894184944e-07, "epoch": 0.29, "percentage": 28.59, "elapsed_time": "7:27:41", "remaining_time": "18:38:28"} +{"current_steps": 2029, "total_steps": 7063, "loss": 0.6536, "reward": 1.5729, "learning_rate": 5.253502409653483e-07, "epoch": 0.29, "percentage": 28.73, "elapsed_time": "7:30:07", "remaining_time": "18:36:47"} +{"current_steps": 2039, "total_steps": 7063, "loss": 0.6429, "reward": 1.3701, "learning_rate": 5.657447012771112e-07, "epoch": 0.29, "percentage": 28.87, "elapsed_time": "7:32:22", "remaining_time": "18:34:36"} +{"current_steps": 2049, "total_steps": 7063, "loss": 0.6516, "reward": 1.1983, "learning_rate": 6.075423874740211e-07, "epoch": 0.29, "percentage": 29.01, "elapsed_time": "7:34:38", "remaining_time": "18:32:32"} +{"current_steps": 2059, "total_steps": 7063, "loss": 0.676, "reward": 1.1014, "learning_rate": 6.50730072623646e-07, "epoch": 0.29, "percentage": 29.15, "elapsed_time": "7:36:49", "remaining_time": "18:30:13"} +{"current_steps": 2069, "total_steps": 7063, "loss": 0.6676, "reward": 1.1706, "learning_rate": 6.952940899265737e-07, "epoch": 0.29, "percentage": 29.29, "elapsed_time": "7:39:03", "remaining_time": "18:28:01"} +{"current_steps": 2079, "total_steps": 7063, "loss": 0.6, "reward": 1.5086, "learning_rate": 7.365668228328826e-07, "epoch": 0.29, "percentage": 29.44, "elapsed_time": "7:41:23", "remaining_time": "18:26:06"} +{"current_steps": 2089, "total_steps": 7063, "loss": 0.6679, "reward": 1.2791, "learning_rate": 7.837066628946432e-07, "epoch": 0.3, "percentage": 29.58, "elapsed_time": "7:43:42", "remaining_time": "18:24:06"} +{"current_steps": 2099, "total_steps": 7063, "loss": 0.7675, "reward": 0.9776, "learning_rate": 8.321807544939064e-07, "epoch": 0.3, "percentage": 29.72, "elapsed_time": "7:45:59", "remaining_time": "18:22:03"} +{"current_steps": 2109, "total_steps": 7063, "loss": 0.7572, "reward": 0.9887, "learning_rate": 8.819737579410242e-07, "epoch": 0.3, "percentage": 29.86, "elapsed_time": "7:48:13", "remaining_time": "18:19:52"} +{"current_steps": 2119, "total_steps": 7063, "loss": 0.6559, "reward": 1.4601, "learning_rate": 9.330699161749757e-07, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "7:50:28", "remaining_time": "18:17:42"} +{"current_steps": 2129, "total_steps": 7063, "loss": 0.6565, "reward": 0.9818, "learning_rate": 9.854530597496987e-07, "epoch": 0.3, "percentage": 30.14, "elapsed_time": "7:52:42", "remaining_time": "18:15:30"} +{"current_steps": 2139, "total_steps": 7063, "loss": 0.7718, "reward": 1.2949, "learning_rate": 1.0391066119509434e-06, "epoch": 0.3, "percentage": 30.28, "elapsed_time": "7:54:57", "remaining_time": "18:13:22"} +{"current_steps": 2149, "total_steps": 7063, "loss": 0.7361, "reward": 1.2424, "learning_rate": 1.0940135940419849e-06, "epoch": 0.3, "percentage": 30.43, "elapsed_time": "7:57:13", "remaining_time": "18:11:13"} +{"current_steps": 2159, "total_steps": 7063, "loss": 0.7542, "reward": 1.5161, "learning_rate": 1.1501566306365751e-06, "epoch": 0.31, "percentage": 30.57, "elapsed_time": "7:59:33", "remaining_time": "18:09:16"} +{"current_steps": 2169, "total_steps": 7063, "loss": 0.8184, "reward": 1.0929, "learning_rate": 1.2075179551973992e-06, "epoch": 0.31, "percentage": 30.71, "elapsed_time": "8:01:54", "remaining_time": "18:07:20"} +{"current_steps": 2179, "total_steps": 7063, "loss": 0.7978, "reward": 1.0925, "learning_rate": 1.2660794156583274e-06, "epoch": 0.31, "percentage": 30.85, "elapsed_time": "8:04:13", "remaining_time": "18:05:19"} +{"current_steps": 2189, "total_steps": 7063, "loss": 0.827, "reward": 1.184, "learning_rate": 1.3258224801686499e-06, "epoch": 0.31, "percentage": 30.99, "elapsed_time": "8:06:29", "remaining_time": "18:03:13"} +{"current_steps": 2199, "total_steps": 7063, "loss": 0.823, "reward": 1.4141, "learning_rate": 1.3867282429575173e-06, "epoch": 0.31, "percentage": 31.13, "elapsed_time": "8:08:55", "remaining_time": "18:01:27"} +{"current_steps": 2209, "total_steps": 7063, "loss": 0.7578, "reward": 1.3979, "learning_rate": 1.4487774303166852e-06, "epoch": 0.31, "percentage": 31.28, "elapsed_time": "8:11:12", "remaining_time": "17:59:21"} +{"current_steps": 2219, "total_steps": 7063, "loss": 0.7393, "reward": 1.1531, "learning_rate": 1.5119504066997131e-06, "epoch": 0.31, "percentage": 31.42, "elapsed_time": "8:13:27", "remaining_time": "17:57:12"} +{"current_steps": 2229, "total_steps": 7063, "loss": 0.766, "reward": 1.1854, "learning_rate": 1.5762271809356506e-06, "epoch": 0.32, "percentage": 31.56, "elapsed_time": "8:15:49", "remaining_time": "17:55:17"} +{"current_steps": 2239, "total_steps": 7063, "loss": 0.8646, "reward": 1.1649, "learning_rate": 1.6415874125552804e-06, "epoch": 0.32, "percentage": 31.7, "elapsed_time": "8:18:09", "remaining_time": "17:53:17"} +{"current_steps": 2249, "total_steps": 7063, "loss": 0.8664, "reward": 1.2346, "learning_rate": 1.708010418227873e-06, "epoch": 0.32, "percentage": 31.84, "elapsed_time": "8:20:27", "remaining_time": "17:51:13"} +{"current_steps": 2259, "total_steps": 7063, "loss": 1.0222, "reward": 1.5159, "learning_rate": 1.7754751783064666e-06, "epoch": 0.32, "percentage": 31.98, "elapsed_time": "8:22:52", "remaining_time": "17:49:24"} +{"current_steps": 2269, "total_steps": 7063, "loss": 0.8788, "reward": 0.8888, "learning_rate": 1.8439603434795529e-06, "epoch": 0.32, "percentage": 32.13, "elapsed_time": "8:25:08", "remaining_time": "17:47:17"} +{"current_steps": 2279, "total_steps": 7063, "loss": 0.9189, "reward": 1.0559, "learning_rate": 1.9134442415270972e-06, "epoch": 0.32, "percentage": 32.27, "elapsed_time": "8:27:29", "remaining_time": "17:45:18"} +{"current_steps": 2289, "total_steps": 7063, "loss": 0.8378, "reward": 1.3561, "learning_rate": 1.9839048841787427e-06, "epoch": 0.32, "percentage": 32.41, "elapsed_time": "8:29:49", "remaining_time": "17:43:19"} +{"current_steps": 2299, "total_steps": 7063, "loss": 0.9199, "reward": 1.0484, "learning_rate": 2.055319974072026e-06, "epoch": 0.33, "percentage": 32.55, "elapsed_time": "8:32:12", "remaining_time": "17:41:23"} +{"current_steps": 2309, "total_steps": 7063, "loss": 0.914, "reward": 1.1703, "learning_rate": 2.1276669118084022e-06, "epoch": 0.33, "percentage": 32.69, "elapsed_time": "8:34:27", "remaining_time": "17:39:14"} +{"current_steps": 2319, "total_steps": 7063, "loss": 0.911, "reward": 1.1389, "learning_rate": 2.2009228031048692e-06, "epoch": 0.33, "percentage": 32.83, "elapsed_time": "8:36:44", "remaining_time": "17:37:05"} +{"current_steps": 2329, "total_steps": 7063, "loss": 0.958, "reward": 1.4305, "learning_rate": 2.275064466038872e-06, "epoch": 0.33, "percentage": 32.97, "elapsed_time": "8:39:03", "remaining_time": "17:35:03"} +{"current_steps": 2339, "total_steps": 7063, "loss": 0.9604, "reward": 1.1523, "learning_rate": 2.3500684383842686e-06, "epoch": 0.33, "percentage": 33.12, "elapsed_time": "8:41:22", "remaining_time": "17:33:00"} +{"current_steps": 2349, "total_steps": 7063, "loss": 0.9629, "reward": 1.3146, "learning_rate": 2.425910985035971e-06, "epoch": 0.33, "percentage": 33.26, "elapsed_time": "8:43:34", "remaining_time": "17:30:42"} +{"current_steps": 2359, "total_steps": 7063, "loss": 0.9562, "reward": 1.2473, "learning_rate": 2.502568105520966e-06, "epoch": 0.33, "percentage": 33.4, "elapsed_time": "8:45:57", "remaining_time": "17:28:46"} +{"current_steps": 2369, "total_steps": 7063, "loss": 0.9781, "reward": 1.1404, "learning_rate": 2.580015541593278e-06, "epoch": 0.34, "percentage": 33.54, "elapsed_time": "8:48:26", "remaining_time": "17:27:04"} +{"current_steps": 2379, "total_steps": 7063, "loss": 1.0955, "reward": 1.5582, "learning_rate": 2.6582287849105325e-06, "epoch": 0.34, "percentage": 33.68, "elapsed_time": "8:50:44", "remaining_time": "17:24:59"} +{"current_steps": 2389, "total_steps": 7063, "loss": 1.0466, "reward": 0.991, "learning_rate": 2.737183084789652e-06, "epoch": 0.34, "percentage": 33.82, "elapsed_time": "8:52:59", "remaining_time": "17:22:47"} +{"current_steps": 2399, "total_steps": 7063, "loss": 1.0117, "reward": 1.1598, "learning_rate": 2.816853456039237e-06, "epoch": 0.34, "percentage": 33.97, "elapsed_time": "8:55:17", "remaining_time": "17:20:40"} +{"current_steps": 2409, "total_steps": 7063, "loss": 0.995, "reward": 1.3519, "learning_rate": 2.89721468686618e-06, "epoch": 0.34, "percentage": 34.11, "elapsed_time": "8:57:31", "remaining_time": "17:18:27"} +{"current_steps": 2419, "total_steps": 7063, "loss": 1.0312, "reward": 1.2049, "learning_rate": 2.978241346853943e-06, "epoch": 0.34, "percentage": 34.25, "elapsed_time": "8:59:54", "remaining_time": "17:16:30"} +{"current_steps": 2429, "total_steps": 7063, "loss": 0.9939, "reward": 1.6098, "learning_rate": 3.0599077950100663e-06, "epoch": 0.34, "percentage": 34.39, "elapsed_time": "9:02:18", "remaining_time": "17:14:35"} +{"current_steps": 2439, "total_steps": 7063, "loss": 1.034, "reward": 1.6221, "learning_rate": 3.1421881878802834e-06, "epoch": 0.35, "percentage": 34.53, "elapsed_time": "9:04:38", "remaining_time": "17:12:33"} +{"current_steps": 2449, "total_steps": 7063, "loss": 1.1193, "reward": 1.2305, "learning_rate": 3.225056487726733e-06, "epoch": 0.35, "percentage": 34.67, "elapsed_time": "9:06:59", "remaining_time": "17:10:33"} +{"current_steps": 2459, "total_steps": 7063, "loss": 1.0559, "reward": 1.1538, "learning_rate": 3.30848647076761e-06, "epoch": 0.35, "percentage": 34.82, "elapsed_time": "9:09:19", "remaining_time": "17:08:30"} +{"current_steps": 2469, "total_steps": 7063, "loss": 1.0754, "reward": 1.4628, "learning_rate": 3.3924517354757323e-06, "epoch": 0.35, "percentage": 34.96, "elapsed_time": "9:11:36", "remaining_time": "17:06:21"} +{"current_steps": 2479, "total_steps": 7063, "loss": 1.1396, "reward": 1.6104, "learning_rate": 3.4769257109333308e-06, "epoch": 0.35, "percentage": 35.1, "elapsed_time": "9:13:54", "remaining_time": "17:04:14"} +{"current_steps": 2489, "total_steps": 7063, "loss": 1.1429, "reward": 1.0368, "learning_rate": 3.5618816652404684e-06, "epoch": 0.35, "percentage": 35.24, "elapsed_time": "9:16:17", "remaining_time": "17:02:17"} +{"current_steps": 2499, "total_steps": 7063, "loss": 1.1072, "reward": 0.9329, "learning_rate": 3.6472927139743597e-06, "epoch": 0.35, "percentage": 35.38, "elapsed_time": "9:18:36", "remaining_time": "17:00:11"} +{"current_steps": 2509, "total_steps": 7063, "loss": 1.0086, "reward": 1.07, "learning_rate": 3.733131828696984e-06, "epoch": 0.36, "percentage": 35.52, "elapsed_time": "9:20:56", "remaining_time": "16:58:09"} +{"current_steps": 2519, "total_steps": 7063, "loss": 1.0952, "reward": 1.268, "learning_rate": 3.8193718455083016e-06, "epoch": 0.36, "percentage": 35.66, "elapsed_time": "9:23:12", "remaining_time": "16:55:57"} +{"current_steps": 2529, "total_steps": 7063, "loss": 1.0826, "reward": 1.5212, "learning_rate": 3.905985473642267e-06, "epoch": 0.36, "percentage": 35.81, "elapsed_time": "9:25:32", "remaining_time": "16:53:54"} +{"current_steps": 2539, "total_steps": 7063, "loss": 1.1337, "reward": 1.5485, "learning_rate": 3.992945304103048e-06, "epoch": 0.36, "percentage": 35.95, "elapsed_time": "9:27:46", "remaining_time": "16:51:39"} +{"current_steps": 2549, "total_steps": 7063, "loss": 1.1041, "reward": 0.9738, "learning_rate": 4.080223818338628e-06, "epoch": 0.36, "percentage": 36.09, "elapsed_time": "9:30:04", "remaining_time": "16:49:32"} +{"current_steps": 2559, "total_steps": 7063, "loss": 1.131, "reward": 1.4512, "learning_rate": 4.1677933969491075e-06, "epoch": 0.36, "percentage": 36.23, "elapsed_time": "9:32:25", "remaining_time": "16:47:30"} +{"current_steps": 2569, "total_steps": 7063, "loss": 1.2036, "reward": 1.5905, "learning_rate": 4.255626328426871e-06, "epoch": 0.36, "percentage": 36.37, "elapsed_time": "9:34:48", "remaining_time": "16:45:31"} +{"current_steps": 2579, "total_steps": 7063, "loss": 1.2255, "reward": 1.4039, "learning_rate": 4.34369481792595e-06, "epoch": 0.37, "percentage": 36.51, "elapsed_time": "9:37:06", "remaining_time": "16:43:23"} +{"current_steps": 2589, "total_steps": 7063, "loss": 1.1984, "reward": 1.2347, "learning_rate": 4.4319709960577265e-06, "epoch": 0.37, "percentage": 36.66, "elapsed_time": "9:39:27", "remaining_time": "16:41:20"} +{"current_steps": 2599, "total_steps": 7063, "loss": 1.2755, "reward": 1.0769, "learning_rate": 4.520426927710256e-06, "epoch": 0.37, "percentage": 36.8, "elapsed_time": "9:41:48", "remaining_time": "16:39:18"} +{"current_steps": 2609, "total_steps": 7063, "loss": 1.168, "reward": 1.3889, "learning_rate": 4.609034620888345e-06, "epoch": 0.37, "percentage": 36.94, "elapsed_time": "9:44:04", "remaining_time": "16:37:06"} +{"current_steps": 2619, "total_steps": 7063, "loss": 1.2547, "reward": 1.0953, "learning_rate": 4.697766035571672e-06, "epoch": 0.37, "percentage": 37.08, "elapsed_time": "9:46:23", "remaining_time": "16:35:00"} +{"current_steps": 2629, "total_steps": 7063, "loss": 1.2799, "reward": 1.2669, "learning_rate": 4.786593092588078e-06, "epoch": 0.37, "percentage": 37.22, "elapsed_time": "9:48:40", "remaining_time": "16:32:50"} +{"current_steps": 2639, "total_steps": 7063, "loss": 1.2906, "reward": 1.154, "learning_rate": 4.875487682499274e-06, "epoch": 0.37, "percentage": 37.36, "elapsed_time": "9:50:59", "remaining_time": "16:30:44"} +{"current_steps": 2649, "total_steps": 7063, "loss": 1.3511, "reward": 1.0815, "learning_rate": 4.964421674496092e-06, "epoch": 0.38, "percentage": 37.51, "elapsed_time": "9:53:16", "remaining_time": "16:28:33"} +{"current_steps": 2659, "total_steps": 7063, "loss": 1.1756, "reward": 0.9634, "learning_rate": 5.053366925300517e-06, "epoch": 0.38, "percentage": 37.65, "elapsed_time": "9:55:35", "remaining_time": "16:26:26"} +{"current_steps": 2669, "total_steps": 7063, "loss": 1.381, "reward": 1.0239, "learning_rate": 5.142295288071672e-06, "epoch": 0.38, "percentage": 37.79, "elapsed_time": "9:57:55", "remaining_time": "16:24:22"} +{"current_steps": 2679, "total_steps": 7063, "loss": 1.3535, "reward": 1.0717, "learning_rate": 5.231178621312932e-06, "epoch": 0.38, "percentage": 37.93, "elapsed_time": "10:00:13", "remaining_time": "16:22:14"} +{"current_steps": 2689, "total_steps": 7063, "loss": 1.3538, "reward": 1.1411, "learning_rate": 5.319988797777313e-06, "epoch": 0.38, "percentage": 38.07, "elapsed_time": "10:02:33", "remaining_time": "16:20:08"} +{"current_steps": 2699, "total_steps": 7063, "loss": 1.4044, "reward": 1.1376, "learning_rate": 5.408697713368389e-06, "epoch": 0.38, "percentage": 38.21, "elapsed_time": "10:04:53", "remaining_time": "16:18:03"} +{"current_steps": 2709, "total_steps": 7063, "loss": 1.4148, "reward": 1.0292, "learning_rate": 5.497277296033871e-06, "epoch": 0.38, "percentage": 38.35, "elapsed_time": "10:07:18", "remaining_time": "16:16:05"} +{"current_steps": 2719, "total_steps": 7063, "loss": 1.3704, "reward": 1.2713, "learning_rate": 5.585699514649054e-06, "epoch": 0.38, "percentage": 38.5, "elapsed_time": "10:09:36", "remaining_time": "16:13:56"} +{"current_steps": 2729, "total_steps": 7063, "loss": 1.413, "reward": 0.897, "learning_rate": 5.673936387887281e-06, "epoch": 0.39, "percentage": 38.64, "elapsed_time": "10:11:57", "remaining_time": "16:11:51"} +{"current_steps": 2739, "total_steps": 7063, "loss": 1.3321, "reward": 1.7666, "learning_rate": 5.761959993074679e-06, "epoch": 0.39, "percentage": 38.78, "elapsed_time": "10:14:20", "remaining_time": "16:09:51"} +{"current_steps": 2749, "total_steps": 7063, "loss": 1.3719, "reward": 1.0575, "learning_rate": 5.849742475026331e-06, "epoch": 0.39, "percentage": 38.92, "elapsed_time": "10:16:40", "remaining_time": "16:07:45"} +{"current_steps": 2759, "total_steps": 7063, "loss": 1.4532, "reward": 1.2209, "learning_rate": 5.937256054861096e-06, "epoch": 0.39, "percentage": 39.06, "elapsed_time": "10:19:05", "remaining_time": "16:05:47"} +{"current_steps": 2769, "total_steps": 7063, "loss": 1.3653, "reward": 1.3277, "learning_rate": 6.024473038792244e-06, "epoch": 0.39, "percentage": 39.2, "elapsed_time": "10:21:26", "remaining_time": "16:03:42"} +{"current_steps": 2779, "total_steps": 7063, "loss": 1.4962, "reward": 1.511, "learning_rate": 6.1113658268912125e-06, "epoch": 0.39, "percentage": 39.35, "elapsed_time": "10:23:51", "remaining_time": "16:01:43"} +{"current_steps": 2789, "total_steps": 7063, "loss": 1.584, "reward": 1.2515, "learning_rate": 6.197906921821628e-06, "epoch": 0.39, "percentage": 39.49, "elapsed_time": "10:26:08", "remaining_time": "15:59:32"} +{"current_steps": 2799, "total_steps": 7063, "loss": 1.65, "reward": 1.1758, "learning_rate": 6.284068937540895e-06, "epoch": 0.4, "percentage": 39.63, "elapsed_time": "10:28:24", "remaining_time": "15:57:19"} +{"current_steps": 2809, "total_steps": 7063, "loss": 1.5406, "reward": 1.1564, "learning_rate": 6.3698246079665124e-06, "epoch": 0.4, "percentage": 39.77, "elapsed_time": "10:30:47", "remaining_time": "15:55:17"} +{"current_steps": 2819, "total_steps": 7063, "loss": 1.5352, "reward": 0.8774, "learning_rate": 6.455146795604489e-06, "epoch": 0.4, "percentage": 39.91, "elapsed_time": "10:33:01", "remaining_time": "15:53:00"} +{"current_steps": 2829, "total_steps": 7063, "loss": 1.5166, "reward": 1.0879, "learning_rate": 6.5400085001370186e-06, "epoch": 0.4, "percentage": 40.05, "elapsed_time": "10:35:19", "remaining_time": "15:50:51"} +{"current_steps": 2839, "total_steps": 7063, "loss": 1.5348, "reward": 1.0728, "learning_rate": 6.624382866966792e-06, "epoch": 0.4, "percentage": 40.2, "elapsed_time": "10:37:37", "remaining_time": "15:48:41"} +{"current_steps": 2849, "total_steps": 7063, "loss": 1.7907, "reward": 1.0792, "learning_rate": 6.708243195715136e-06, "epoch": 0.4, "percentage": 40.34, "elapsed_time": "10:39:54", "remaining_time": "15:46:30"} +{"current_steps": 2859, "total_steps": 7063, "loss": 1.4988, "reward": 1.1279, "learning_rate": 6.791562948671411e-06, "epoch": 0.4, "percentage": 40.48, "elapsed_time": "10:42:09", "remaining_time": "15:44:15"} +{"current_steps": 2869, "total_steps": 7063, "loss": 1.6644, "reward": 1.101, "learning_rate": 6.874315759190883e-06, "epoch": 0.41, "percentage": 40.62, "elapsed_time": "10:44:38", "remaining_time": "15:42:21"} +{"current_steps": 2879, "total_steps": 7063, "loss": 1.5802, "reward": 1.2685, "learning_rate": 6.956475440038508e-06, "epoch": 0.41, "percentage": 40.76, "elapsed_time": "10:46:59", "remaining_time": "15:40:15"} +{"current_steps": 2889, "total_steps": 7063, "loss": 1.5477, "reward": 1.4426, "learning_rate": 7.029890534302395e-06, "epoch": 0.41, "percentage": 40.9, "elapsed_time": "10:49:25", "remaining_time": "15:38:16"} +{"current_steps": 2899, "total_steps": 7063, "loss": 1.6061, "reward": 1.3666, "learning_rate": 7.110851801807612e-06, "epoch": 0.41, "percentage": 41.04, "elapsed_time": "10:51:42", "remaining_time": "15:36:04"} +{"current_steps": 2909, "total_steps": 7063, "loss": 1.688, "reward": 1.2262, "learning_rate": 7.191145087501121e-06, "epoch": 0.41, "percentage": 41.19, "elapsed_time": "10:54:02", "remaining_time": "15:33:57"} +{"current_steps": 2919, "total_steps": 7063, "loss": 1.5746, "reward": 1.1827, "learning_rate": 7.2707449824677236e-06, "epoch": 0.41, "percentage": 41.33, "elapsed_time": "10:56:23", "remaining_time": "15:31:51"} +{"current_steps": 2929, "total_steps": 7063, "loss": 1.9032, "reward": 1.197, "learning_rate": 7.34962629721661e-06, "epoch": 0.41, "percentage": 41.47, "elapsed_time": "10:58:45", "remaining_time": "15:29:46"} +{"current_steps": 2939, "total_steps": 7063, "loss": 1.6993, "reward": 1.0374, "learning_rate": 7.427764069652624e-06, "epoch": 0.42, "percentage": 41.61, "elapsed_time": "11:01:04", "remaining_time": "15:27:37"} +{"current_steps": 2949, "total_steps": 7063, "loss": 1.8476, "reward": 1.122, "learning_rate": 7.505133572975546e-06, "epoch": 0.42, "percentage": 41.75, "elapsed_time": "11:03:27", "remaining_time": "15:25:33"} +{"current_steps": 2959, "total_steps": 7063, "loss": 1.7903, "reward": 0.9016, "learning_rate": 7.581710323504927e-06, "epoch": 0.42, "percentage": 41.89, "elapsed_time": "11:05:37", "remaining_time": "15:23:12"} +{"current_steps": 2969, "total_steps": 7063, "loss": 1.7651, "reward": 1.4757, "learning_rate": 7.65747008842797e-06, "epoch": 0.42, "percentage": 42.04, "elapsed_time": "11:07:56", "remaining_time": "15:21:02"} +{"current_steps": 2979, "total_steps": 7063, "loss": 1.7973, "reward": 1.1568, "learning_rate": 7.732388893468042e-06, "epoch": 0.42, "percentage": 42.18, "elapsed_time": "11:10:14", "remaining_time": "15:18:50"} +{"current_steps": 2989, "total_steps": 7063, "loss": 1.7356, "reward": 1.043, "learning_rate": 7.806443030471356e-06, "epoch": 0.42, "percentage": 42.32, "elapsed_time": "11:12:27", "remaining_time": "15:16:33"} +{"current_steps": 2999, "total_steps": 7063, "loss": 1.6922, "reward": 1.229, "learning_rate": 7.879609064909478e-06, "epoch": 0.42, "percentage": 42.46, "elapsed_time": "11:14:51", "remaining_time": "15:14:30"} +{"current_steps": 3009, "total_steps": 7063, "loss": 1.5853, "reward": 0.8997, "learning_rate": 7.951863843295167e-06, "epoch": 0.43, "percentage": 42.6, "elapsed_time": "11:17:12", "remaining_time": "15:12:23"} +{"current_steps": 3019, "total_steps": 7063, "loss": 1.8937, "reward": 1.0902, "learning_rate": 8.023184500509366e-06, "epoch": 0.43, "percentage": 42.74, "elapsed_time": "11:19:31", "remaining_time": "15:10:14"} +{"current_steps": 3029, "total_steps": 7063, "loss": 1.8137, "reward": 0.6018, "learning_rate": 8.093548467036875e-06, "epoch": 0.43, "percentage": 42.89, "elapsed_time": "11:21:48", "remaining_time": "15:08:01"} +{"current_steps": 3039, "total_steps": 7063, "loss": 1.8954, "reward": 1.3788, "learning_rate": 8.162933476108515e-06, "epoch": 0.43, "percentage": 43.03, "elapsed_time": "11:24:08", "remaining_time": "15:05:53"} +{"current_steps": 3049, "total_steps": 7063, "loss": 1.8599, "reward": 1.2312, "learning_rate": 8.231317570747481e-06, "epoch": 0.43, "percentage": 43.17, "elapsed_time": "11:26:27", "remaining_time": "15:03:43"} +{"current_steps": 3059, "total_steps": 7063, "loss": 1.9199, "reward": 1.1653, "learning_rate": 8.298679110717638e-06, "epoch": 0.43, "percentage": 43.31, "elapsed_time": "11:28:50", "remaining_time": "15:01:38"} +{"current_steps": 3069, "total_steps": 7063, "loss": 1.9247, "reward": 1.2953, "learning_rate": 8.364996779371618e-06, "epoch": 0.43, "percentage": 43.45, "elapsed_time": "11:31:13", "remaining_time": "14:59:33"} +{"current_steps": 3079, "total_steps": 7063, "loss": 1.8365, "reward": 1.4825, "learning_rate": 8.430249590396486e-06, "epoch": 0.44, "percentage": 43.59, "elapsed_time": "11:33:38", "remaining_time": "14:57:31"} +{"current_steps": 3089, "total_steps": 7063, "loss": 1.9522, "reward": 1.4488, "learning_rate": 8.494416894454915e-06, "epoch": 0.44, "percentage": 43.73, "elapsed_time": "11:35:51", "remaining_time": "14:55:13"} +{"current_steps": 3099, "total_steps": 7063, "loss": 1.8893, "reward": 1.0284, "learning_rate": 8.557478385719662e-06, "epoch": 0.44, "percentage": 43.88, "elapsed_time": "11:38:10", "remaining_time": "14:53:03"} +{"current_steps": 3109, "total_steps": 7063, "loss": 1.9354, "reward": 1.1625, "learning_rate": 8.619414108299387e-06, "epoch": 0.44, "percentage": 44.02, "elapsed_time": "11:40:25", "remaining_time": "14:50:47"} +{"current_steps": 3119, "total_steps": 7063, "loss": 2.0966, "reward": 1.1395, "learning_rate": 8.680204462553725e-06, "epoch": 0.44, "percentage": 44.16, "elapsed_time": "11:42:39", "remaining_time": "14:48:30"} +{"current_steps": 3129, "total_steps": 7063, "loss": 2.0063, "reward": 1.8746, "learning_rate": 8.739830211295619e-06, "epoch": 0.44, "percentage": 44.3, "elapsed_time": "11:45:09", "remaining_time": "14:46:34"} +{"current_steps": 3139, "total_steps": 7063, "loss": 2.0812, "reward": 0.5741, "learning_rate": 8.798272485878943e-06, "epoch": 0.44, "percentage": 44.44, "elapsed_time": "11:47:27", "remaining_time": "14:44:23"} +{"current_steps": 3149, "total_steps": 7063, "loss": 2.2512, "reward": 1.0503, "learning_rate": 8.855512792169513e-06, "epoch": 0.45, "percentage": 44.58, "elapsed_time": "11:49:49", "remaining_time": "14:42:16"} +{"current_steps": 3159, "total_steps": 7063, "loss": 2.2182, "reward": 1.8183, "learning_rate": 8.911533016397595e-06, "epoch": 0.45, "percentage": 44.73, "elapsed_time": "11:52:09", "remaining_time": "14:40:06"} +{"current_steps": 3169, "total_steps": 7063, "loss": 1.9811, "reward": 1.1761, "learning_rate": 8.966315430890007e-06, "epoch": 0.45, "percentage": 44.87, "elapsed_time": "11:54:32", "remaining_time": "14:38:01"} +{"current_steps": 3179, "total_steps": 7063, "loss": 2.1593, "reward": 1.2371, "learning_rate": 9.019842699680076e-06, "epoch": 0.45, "percentage": 45.01, "elapsed_time": "11:56:57", "remaining_time": "14:35:57"} +{"current_steps": 3189, "total_steps": 7063, "loss": 2.0525, "reward": 0.915, "learning_rate": 9.072097883993627e-06, "epoch": 0.45, "percentage": 45.15, "elapsed_time": "11:59:12", "remaining_time": "14:33:42"} +{"current_steps": 3199, "total_steps": 7063, "loss": 2.3064, "reward": 0.7924, "learning_rate": 9.123064447609291e-06, "epoch": 0.45, "percentage": 45.29, "elapsed_time": "12:01:32", "remaining_time": "14:31:32"} +{"current_steps": 3209, "total_steps": 7063, "loss": 2.1051, "reward": 0.9017, "learning_rate": 9.172726262091392e-06, "epoch": 0.45, "percentage": 45.43, "elapsed_time": "12:03:46", "remaining_time": "14:29:14"} +{"current_steps": 3219, "total_steps": 7063, "loss": 2.064, "reward": 1.1497, "learning_rate": 9.221067611893823e-06, "epoch": 0.46, "percentage": 45.58, "elapsed_time": "12:06:04", "remaining_time": "14:27:03"} +{"current_steps": 3229, "total_steps": 7063, "loss": 2.175, "reward": 1.3865, "learning_rate": 9.268073199333256e-06, "epoch": 0.46, "percentage": 45.72, "elapsed_time": "12:08:29", "remaining_time": "14:24:58"} +{"current_steps": 3239, "total_steps": 7063, "loss": 2.0706, "reward": 0.8848, "learning_rate": 9.313728149430105e-06, "epoch": 0.46, "percentage": 45.86, "elapsed_time": "12:10:45", "remaining_time": "14:22:44"} +{"current_steps": 3249, "total_steps": 7063, "loss": 2.1955, "reward": 0.97, "learning_rate": 9.358018014615742e-06, "epoch": 0.46, "percentage": 46.0, "elapsed_time": "12:13:03", "remaining_time": "14:20:31"} +{"current_steps": 3259, "total_steps": 7063, "loss": 2.193, "reward": 0.7877, "learning_rate": 9.400928779304435e-06, "epoch": 0.46, "percentage": 46.14, "elapsed_time": "12:15:20", "remaining_time": "14:18:18"} +{"current_steps": 3269, "total_steps": 7063, "loss": 2.1918, "reward": 1.0283, "learning_rate": 9.44244686432861e-06, "epoch": 0.46, "percentage": 46.28, "elapsed_time": "12:17:35", "remaining_time": "14:16:02"} +{"current_steps": 3279, "total_steps": 7063, "loss": 2.2743, "reward": 1.3865, "learning_rate": 9.482559131235998e-06, "epoch": 0.46, "percentage": 46.43, "elapsed_time": "12:19:53", "remaining_time": "14:13:50"} +{"current_steps": 3289, "total_steps": 7063, "loss": 2.0822, "reward": 1.0819, "learning_rate": 9.521252886447305e-06, "epoch": 0.47, "percentage": 46.57, "elapsed_time": "12:22:15", "remaining_time": "14:11:42"} +{"current_steps": 3299, "total_steps": 7063, "loss": 2.3622, "reward": 0.9896, "learning_rate": 9.55851588527312e-06, "epoch": 0.47, "percentage": 46.71, "elapsed_time": "12:24:34", "remaining_time": "14:09:31"} +{"current_steps": 3309, "total_steps": 7063, "loss": 2.2891, "reward": 0.938, "learning_rate": 9.594336335788757e-06, "epoch": 0.47, "percentage": 46.85, "elapsed_time": "12:26:49", "remaining_time": "14:07:15"} +{"current_steps": 3319, "total_steps": 7063, "loss": 2.5214, "reward": 1.1171, "learning_rate": 9.628702902565832e-06, "epoch": 0.47, "percentage": 46.99, "elapsed_time": "12:29:06", "remaining_time": "14:05:01"} +{"current_steps": 3329, "total_steps": 7063, "loss": 2.3513, "reward": 1.368, "learning_rate": 9.661604710259371e-06, "epoch": 0.47, "percentage": 47.13, "elapsed_time": "12:31:23", "remaining_time": "14:02:48"} +{"current_steps": 3339, "total_steps": 7063, "loss": 2.3446, "reward": 0.889, "learning_rate": 9.693031347049324e-06, "epoch": 0.47, "percentage": 47.27, "elapsed_time": "12:33:41", "remaining_time": "14:00:36"} +{"current_steps": 3349, "total_steps": 7063, "loss": 2.382, "reward": 1.2531, "learning_rate": 9.722972867935398e-06, "epoch": 0.47, "percentage": 47.42, "elapsed_time": "12:36:00", "remaining_time": "13:58:24"} +{"current_steps": 3359, "total_steps": 7063, "loss": 2.6059, "reward": 1.0768, "learning_rate": 9.751419797884179e-06, "epoch": 0.48, "percentage": 47.56, "elapsed_time": "12:38:29", "remaining_time": "13:56:23"} +{"current_steps": 3369, "total_steps": 7063, "loss": 2.5748, "reward": 1.2895, "learning_rate": 9.778363134827496e-06, "epoch": 0.48, "percentage": 47.7, "elapsed_time": "12:40:57", "remaining_time": "13:54:21"} +{"current_steps": 3379, "total_steps": 7063, "loss": 2.446, "reward": 1.58, "learning_rate": 9.803794352511163e-06, "epoch": 0.48, "percentage": 47.84, "elapsed_time": "12:43:18", "remaining_time": "13:52:12"} +{"current_steps": 3389, "total_steps": 7063, "loss": 2.6801, "reward": 1.4807, "learning_rate": 9.8277054031931e-06, "epoch": 0.48, "percentage": 47.98, "elapsed_time": "12:45:36", "remaining_time": "13:49:59"} +{"current_steps": 3399, "total_steps": 7063, "loss": 2.5744, "reward": 1.6297, "learning_rate": 9.850088720190065e-06, "epoch": 0.48, "percentage": 48.12, "elapsed_time": "12:47:50", "remaining_time": "13:47:42"} +{"current_steps": 3409, "total_steps": 7063, "loss": 2.425, "reward": 1.0175, "learning_rate": 9.870937220272141e-06, "epoch": 0.48, "percentage": 48.27, "elapsed_time": "12:50:07", "remaining_time": "13:45:28"} +{"current_steps": 3419, "total_steps": 7063, "loss": 2.5949, "reward": 0.9162, "learning_rate": 9.890244305904224e-06, "epoch": 0.48, "percentage": 48.41, "elapsed_time": "12:52:29", "remaining_time": "13:43:20"} +{"current_steps": 3429, "total_steps": 7063, "loss": 2.7776, "reward": 1.0664, "learning_rate": 9.90800386733383e-06, "epoch": 0.49, "percentage": 48.55, "elapsed_time": "12:54:49", "remaining_time": "13:41:08"} +{"current_steps": 3439, "total_steps": 7063, "loss": 2.5816, "reward": 0.9759, "learning_rate": 9.924210284524536e-06, "epoch": 0.49, "percentage": 48.69, "elapsed_time": "12:57:09", "remaining_time": "13:38:57"} +{"current_steps": 3449, "total_steps": 7063, "loss": 2.6804, "reward": 1.2715, "learning_rate": 9.938858428934433e-06, "epoch": 0.49, "percentage": 48.83, "elapsed_time": "12:59:32", "remaining_time": "13:36:50"} +{"current_steps": 3459, "total_steps": 7063, "loss": 2.9037, "reward": 0.9395, "learning_rate": 9.95194366513908e-06, "epoch": 0.49, "percentage": 48.97, "elapsed_time": "13:01:51", "remaining_time": "13:34:38"} +{"current_steps": 3469, "total_steps": 7063, "loss": 2.7221, "reward": 1.0787, "learning_rate": 9.963461852298375e-06, "epoch": 0.49, "percentage": 49.12, "elapsed_time": "13:04:15", "remaining_time": "13:32:30"} +{"current_steps": 3479, "total_steps": 7063, "loss": 2.739, "reward": 0.8561, "learning_rate": 9.973409345466938e-06, "epoch": 0.49, "percentage": 49.26, "elapsed_time": "13:06:34", "remaining_time": "13:30:19"} +{"current_steps": 3489, "total_steps": 7063, "loss": 2.9711, "reward": 0.6989, "learning_rate": 9.981782996747553e-06, "epoch": 0.49, "percentage": 49.4, "elapsed_time": "13:08:54", "remaining_time": "13:28:08"} +{"current_steps": 3499, "total_steps": 7063, "loss": 2.714, "reward": 1.2962, "learning_rate": 9.988580156287328e-06, "epoch": 0.5, "percentage": 49.54, "elapsed_time": "13:11:08", "remaining_time": "13:25:50"} +{"current_steps": 3509, "total_steps": 7063, "loss": 2.8646, "reward": 1.0722, "learning_rate": 9.99379867311624e-06, "epoch": 0.5, "percentage": 49.68, "elapsed_time": "13:13:31", "remaining_time": "13:23:41"} +{"current_steps": 3519, "total_steps": 7063, "loss": 2.7756, "reward": 1.2141, "learning_rate": 9.99743689582782e-06, "epoch": 0.5, "percentage": 49.82, "elapsed_time": "13:15:51", "remaining_time": "13:21:30"} +{"current_steps": 3529, "total_steps": 7063, "loss": 2.8301, "reward": 0.9352, "learning_rate": 9.999493673101737e-06, "epoch": 0.5, "percentage": 49.96, "elapsed_time": "13:18:05", "remaining_time": "13:19:13"} +{"current_steps": 3539, "total_steps": 7063, "loss": 2.9438, "reward": 1.6458, "learning_rate": 9.999968354068129e-06, "epoch": 0.5, "percentage": 50.11, "elapsed_time": "13:20:25", "remaining_time": "13:17:02"} +{"current_steps": 3549, "total_steps": 7063, "loss": 2.9145, "reward": 0.976, "learning_rate": 9.99886078851359e-06, "epoch": 0.5, "percentage": 50.25, "elapsed_time": "13:22:42", "remaining_time": "13:14:47"} +{"current_steps": 3559, "total_steps": 7063, "loss": 3.1017, "reward": 1.5394, "learning_rate": 9.996171326928681e-06, "epoch": 0.5, "percentage": 50.39, "elapsed_time": "13:24:56", "remaining_time": "13:12:29"} +{"current_steps": 3569, "total_steps": 7063, "loss": 3.2175, "reward": 1.26, "learning_rate": 9.99190082039704e-06, "epoch": 0.51, "percentage": 50.53, "elapsed_time": "13:27:15", "remaining_time": "13:10:17"} +{"current_steps": 3579, "total_steps": 7063, "loss": 3.3646, "reward": 1.0458, "learning_rate": 9.986050620326042e-06, "epoch": 0.51, "percentage": 50.67, "elapsed_time": "13:29:34", "remaining_time": "13:08:04"} +{"current_steps": 3589, "total_steps": 7063, "loss": 3.1451, "reward": 0.8256, "learning_rate": 9.978622578019142e-06, "epoch": 0.51, "percentage": 50.81, "elapsed_time": "13:31:55", "remaining_time": "13:05:54"} +{"current_steps": 3599, "total_steps": 7063, "loss": 3.2034, "reward": 1.0347, "learning_rate": 9.969619044090037e-06, "epoch": 0.51, "percentage": 50.96, "elapsed_time": "13:34:24", "remaining_time": "13:03:51"} +{"current_steps": 3609, "total_steps": 7063, "loss": 3.0279, "reward": 1.2227, "learning_rate": 9.959042867718814e-06, "epoch": 0.51, "percentage": 51.1, "elapsed_time": "13:36:43", "remaining_time": "13:01:38"} +{"current_steps": 3619, "total_steps": 7063, "loss": 3.1472, "reward": 0.9186, "learning_rate": 9.946897395750301e-06, "epoch": 0.51, "percentage": 51.24, "elapsed_time": "13:39:00", "remaining_time": "12:59:24"} +{"current_steps": 3629, "total_steps": 7063, "loss": 3.3048, "reward": 1.1239, "learning_rate": 9.93318647163498e-06, "epoch": 0.51, "percentage": 51.38, "elapsed_time": "13:41:25", "remaining_time": "12:57:17"} +{"current_steps": 3639, "total_steps": 7063, "loss": 3.2461, "reward": 1.0214, "learning_rate": 9.917914434212713e-06, "epoch": 0.52, "percentage": 51.52, "elapsed_time": "13:43:44", "remaining_time": "12:55:04"} +{"current_steps": 3649, "total_steps": 7063, "loss": 3.6282, "reward": 0.8601, "learning_rate": 9.901086116339697e-06, "epoch": 0.52, "percentage": 51.66, "elapsed_time": "13:46:03", "remaining_time": "12:52:51"} +{"current_steps": 3659, "total_steps": 7063, "loss": 3.5003, "reward": 1.4156, "learning_rate": 9.882706843359122e-06, "epoch": 0.52, "percentage": 51.81, "elapsed_time": "13:48:22", "remaining_time": "12:50:38"} +{"current_steps": 3669, "total_steps": 7063, "loss": 3.4925, "reward": 1.298, "learning_rate": 9.862782431415948e-06, "epoch": 0.52, "percentage": 51.95, "elapsed_time": "13:50:38", "remaining_time": "12:48:23"} +{"current_steps": 3679, "total_steps": 7063, "loss": 3.2247, "reward": 1.0714, "learning_rate": 9.84131918561637e-06, "epoch": 0.52, "percentage": 52.09, "elapsed_time": "13:53:05", "remaining_time": "12:46:17"} +{"current_steps": 3689, "total_steps": 7063, "loss": 3.1443, "reward": 0.6895, "learning_rate": 9.818323898032577e-06, "epoch": 0.52, "percentage": 52.23, "elapsed_time": "13:55:32", "remaining_time": "12:44:11"} +{"current_steps": 3699, "total_steps": 7063, "loss": 3.2626, "reward": 0.8729, "learning_rate": 9.79380384555339e-06, "epoch": 0.52, "percentage": 52.37, "elapsed_time": "13:57:47", "remaining_time": "12:41:54"} +{"current_steps": 3709, "total_steps": 7063, "loss": 3.2382, "reward": 0.8079, "learning_rate": 9.767766787581457e-06, "epoch": 0.53, "percentage": 52.51, "elapsed_time": "14:00:03", "remaining_time": "12:39:38"} +{"current_steps": 3719, "total_steps": 7063, "loss": 3.3773, "reward": 0.7502, "learning_rate": 9.740220963577808e-06, "epoch": 0.53, "percentage": 52.65, "elapsed_time": "14:02:22", "remaining_time": "12:37:26"} +{"current_steps": 3729, "total_steps": 7063, "loss": 3.6027, "reward": 0.9402, "learning_rate": 9.71117509045445e-06, "epoch": 0.53, "percentage": 52.8, "elapsed_time": "14:04:44", "remaining_time": "12:35:15"} +{"current_steps": 3739, "total_steps": 7063, "loss": 3.5544, "reward": 1.4967, "learning_rate": 9.680638359815904e-06, "epoch": 0.53, "percentage": 52.94, "elapsed_time": "14:07:08", "remaining_time": "12:33:06"} +{"current_steps": 3749, "total_steps": 7063, "loss": 3.6005, "reward": 0.8315, "learning_rate": 9.648620435050486e-06, "epoch": 0.53, "percentage": 53.08, "elapsed_time": "14:09:24", "remaining_time": "12:30:51"} +{"current_steps": 3759, "total_steps": 7063, "loss": 3.668, "reward": 0.8521, "learning_rate": 9.615131448272347e-06, "epoch": 0.53, "percentage": 53.22, "elapsed_time": "14:11:46", "remaining_time": "12:28:40"} +{"current_steps": 3769, "total_steps": 7063, "loss": 3.67, "reward": 0.8598, "learning_rate": 9.580181997115115e-06, "epoch": 0.53, "percentage": 53.36, "elapsed_time": "14:14:08", "remaining_time": "12:26:29"} +{"current_steps": 3779, "total_steps": 7063, "loss": 3.6283, "reward": 1.1778, "learning_rate": 9.5437831413783e-06, "epoch": 0.54, "percentage": 53.5, "elapsed_time": "14:16:27", "remaining_time": "12:24:16"} +{"current_steps": 3789, "total_steps": 7063, "loss": 3.5786, "reward": 1.1969, "learning_rate": 9.505946399527368e-06, "epoch": 0.54, "percentage": 53.65, "elapsed_time": "14:18:46", "remaining_time": "12:22:02"} +{"current_steps": 3799, "total_steps": 7063, "loss": 3.7483, "reward": 0.853, "learning_rate": 9.466683745048738e-06, "epoch": 0.54, "percentage": 53.79, "elapsed_time": "14:20:59", "remaining_time": "12:19:44"} +{"current_steps": 3809, "total_steps": 7063, "loss": 4.1113, "reward": 0.9846, "learning_rate": 9.426007602660732e-06, "epoch": 0.54, "percentage": 53.93, "elapsed_time": "14:23:14", "remaining_time": "12:17:28"} +{"current_steps": 3819, "total_steps": 7063, "loss": 4.06, "reward": 0.7688, "learning_rate": 9.383930844381784e-06, "epoch": 0.54, "percentage": 54.07, "elapsed_time": "14:25:32", "remaining_time": "12:15:13"} +{"current_steps": 3829, "total_steps": 7063, "loss": 4.0023, "reward": 1.2128, "learning_rate": 9.34046678545704e-06, "epoch": 0.54, "percentage": 54.21, "elapsed_time": "14:27:50", "remaining_time": "12:12:59"} +{"current_steps": 3839, "total_steps": 7063, "loss": 4.136, "reward": 1.1385, "learning_rate": 9.295629180144766e-06, "epoch": 0.54, "percentage": 54.35, "elapsed_time": "14:30:12", "remaining_time": "12:10:47"} +{"current_steps": 3849, "total_steps": 7063, "loss": 4.3136, "reward": 0.6312, "learning_rate": 9.249432217363756e-06, "epoch": 0.54, "percentage": 54.5, "elapsed_time": "14:32:30", "remaining_time": "12:08:34"} +{"current_steps": 3859, "total_steps": 7063, "loss": 3.7442, "reward": 1.0192, "learning_rate": 9.20189051620328e-06, "epoch": 0.55, "percentage": 54.64, "elapsed_time": "14:34:47", "remaining_time": "12:06:18"} +{"current_steps": 3869, "total_steps": 7063, "loss": 3.9179, "reward": 0.9526, "learning_rate": 9.153019121296797e-06, "epoch": 0.55, "percentage": 54.78, "elapsed_time": "14:37:03", "remaining_time": "12:04:02"} +{"current_steps": 3879, "total_steps": 7063, "loss": 4.0701, "reward": 0.8944, "learning_rate": 9.102833498061115e-06, "epoch": 0.55, "percentage": 54.92, "elapsed_time": "14:39:30", "remaining_time": "12:01:55"} +{"current_steps": 3889, "total_steps": 7063, "loss": 4.2683, "reward": 1.1349, "learning_rate": 9.05134952780229e-06, "epoch": 0.55, "percentage": 55.06, "elapsed_time": "14:42:00", "remaining_time": "11:59:50"} +{"current_steps": 3899, "total_steps": 7063, "loss": 4.0388, "reward": 0.9505, "learning_rate": 8.998583502690016e-06, "epoch": 0.55, "percentage": 55.2, "elapsed_time": "14:44:19", "remaining_time": "11:57:37"} +{"current_steps": 3909, "total_steps": 7063, "loss": 4.0588, "reward": 0.8472, "learning_rate": 8.944552120601899e-06, "epoch": 0.55, "percentage": 55.34, "elapsed_time": "14:46:35", "remaining_time": "11:55:20"} +{"current_steps": 3919, "total_steps": 7063, "loss": 4.2467, "reward": 0.9783, "learning_rate": 8.889272479839426e-06, "epoch": 0.55, "percentage": 55.49, "elapsed_time": "14:48:51", "remaining_time": "11:53:04"} +{"current_steps": 3929, "total_steps": 7063, "loss": 4.2177, "reward": 1.1977, "learning_rate": 8.832762073717176e-06, "epoch": 0.56, "percentage": 55.63, "elapsed_time": "14:51:17", "remaining_time": "11:50:56"} +{"current_steps": 3939, "total_steps": 7063, "loss": 4.2541, "reward": 0.8039, "learning_rate": 8.77503878502702e-06, "epoch": 0.56, "percentage": 55.77, "elapsed_time": "14:53:40", "remaining_time": "11:48:45"} +{"current_steps": 3949, "total_steps": 7063, "loss": 4.5727, "reward": 0.8618, "learning_rate": 8.716120880379124e-06, "epoch": 0.56, "percentage": 55.91, "elapsed_time": "14:55:59", "remaining_time": "11:46:32"} +{"current_steps": 3959, "total_steps": 7063, "loss": 4.4046, "reward": 1.1126, "learning_rate": 8.656027004421407e-06, "epoch": 0.56, "percentage": 56.05, "elapsed_time": "14:58:19", "remaining_time": "11:44:18"} +{"current_steps": 3969, "total_steps": 7063, "loss": 4.3201, "reward": 0.986, "learning_rate": 8.59477617393948e-06, "epoch": 0.56, "percentage": 56.19, "elapsed_time": "15:00:34", "remaining_time": "11:42:02"} +{"current_steps": 3979, "total_steps": 7063, "loss": 4.7678, "reward": 0.9263, "learning_rate": 8.532387771838694e-06, "epoch": 0.56, "percentage": 56.34, "elapsed_time": "15:02:59", "remaining_time": "11:39:53"} +{"current_steps": 3989, "total_steps": 7063, "loss": 4.699, "reward": 1.0621, "learning_rate": 8.468881541010453e-06, "epoch": 0.56, "percentage": 56.48, "elapsed_time": "15:05:18", "remaining_time": "11:37:38"} +{"current_steps": 3999, "total_steps": 7063, "loss": 4.3774, "reward": 1.1508, "learning_rate": 8.404277578084491e-06, "epoch": 0.57, "percentage": 56.62, "elapsed_time": "15:07:31", "remaining_time": "11:35:19"} +{"current_steps": 4009, "total_steps": 7063, "loss": 4.5082, "reward": 0.7571, "learning_rate": 8.338596327069336e-06, "epoch": 0.57, "percentage": 56.76, "elapsed_time": "15:09:52", "remaining_time": "11:33:07"} +{"current_steps": 4019, "total_steps": 7063, "loss": 4.6633, "reward": 1.2757, "learning_rate": 8.271858572882709e-06, "epoch": 0.57, "percentage": 56.9, "elapsed_time": "15:12:10", "remaining_time": "11:30:53"} +{"current_steps": 4029, "total_steps": 7063, "loss": 5.087, "reward": 1.2216, "learning_rate": 8.204085434774164e-06, "epoch": 0.57, "percentage": 57.04, "elapsed_time": "15:14:30", "remaining_time": "11:28:39"} +{"current_steps": 4039, "total_steps": 7063, "loss": 4.3665, "reward": 1.0773, "learning_rate": 8.135298359641825e-06, "epoch": 0.57, "percentage": 57.19, "elapsed_time": "15:16:44", "remaining_time": "11:26:21"} +{"current_steps": 4049, "total_steps": 7063, "loss": 4.8474, "reward": 1.1603, "learning_rate": 8.065519115245542e-06, "epoch": 0.57, "percentage": 57.33, "elapsed_time": "15:19:04", "remaining_time": "11:24:08"} +{"current_steps": 4059, "total_steps": 7063, "loss": 4.9222, "reward": 1.3296, "learning_rate": 7.994769783318399e-06, "epoch": 0.57, "percentage": 57.47, "elapsed_time": "15:21:27", "remaining_time": "11:21:57"} +{"current_steps": 4069, "total_steps": 7063, "loss": 4.9875, "reward": 1.1223, "learning_rate": 7.923072752578967e-06, "epoch": 0.58, "percentage": 57.61, "elapsed_time": "15:23:44", "remaining_time": "11:19:41"} +{"current_steps": 4079, "total_steps": 7063, "loss": 5.3475, "reward": 0.9262, "learning_rate": 7.850450711646325e-06, "epoch": 0.58, "percentage": 57.75, "elapsed_time": "15:26:06", "remaining_time": "11:17:29"} +{"current_steps": 4089, "total_steps": 7063, "loss": 5.2078, "reward": 0.9273, "learning_rate": 7.776926641860262e-06, "epoch": 0.58, "percentage": 57.89, "elapsed_time": "15:28:21", "remaining_time": "11:15:12"} +{"current_steps": 4099, "total_steps": 7063, "loss": 5.227, "reward": 1.255, "learning_rate": 7.702523810008753e-06, "epoch": 0.58, "percentage": 58.03, "elapsed_time": "15:30:38", "remaining_time": "11:12:57"} +{"current_steps": 4109, "total_steps": 7063, "loss": 5.5427, "reward": 1.2129, "learning_rate": 7.627265760965192e-06, "epoch": 0.58, "percentage": 58.18, "elapsed_time": "15:32:59", "remaining_time": "11:10:43"} +{"current_steps": 4119, "total_steps": 7063, "loss": 5.0962, "reward": 1.0178, "learning_rate": 7.551176310237556e-06, "epoch": 0.58, "percentage": 58.32, "elapsed_time": "15:35:12", "remaining_time": "11:08:25"} +{"current_steps": 4129, "total_steps": 7063, "loss": 5.2122, "reward": 1.2638, "learning_rate": 7.474279536431992e-06, "epoch": 0.58, "percentage": 58.46, "elapsed_time": "15:37:23", "remaining_time": "11:06:05"} +{"current_steps": 4139, "total_steps": 7063, "loss": 5.7452, "reward": 1.0502, "learning_rate": 7.396599773633082e-06, "epoch": 0.59, "percentage": 58.6, "elapsed_time": "15:39:40", "remaining_time": "11:03:49"} +{"current_steps": 4149, "total_steps": 7063, "loss": 5.3589, "reward": 1.0121, "learning_rate": 7.318161603703284e-06, "epoch": 0.59, "percentage": 58.74, "elapsed_time": "15:41:57", "remaining_time": "11:01:34"} +{"current_steps": 4159, "total_steps": 7063, "loss": 5.2267, "reward": 0.9817, "learning_rate": 7.238989848504011e-06, "epoch": 0.59, "percentage": 58.88, "elapsed_time": "15:44:18", "remaining_time": "10:59:21"} +{"current_steps": 4169, "total_steps": 7063, "loss": 5.964, "reward": 1.487, "learning_rate": 7.159109562040667e-06, "epoch": 0.59, "percentage": 59.03, "elapsed_time": "15:46:41", "remaining_time": "10:57:10"} +{"current_steps": 4179, "total_steps": 7063, "loss": 5.9483, "reward": 0.9745, "learning_rate": 7.078546022534321e-06, "epoch": 0.59, "percentage": 59.17, "elapsed_time": "15:48:59", "remaining_time": "10:54:54"} +{"current_steps": 4189, "total_steps": 7063, "loss": 5.592, "reward": 1.4021, "learning_rate": 6.9973247244223385e-06, "epoch": 0.59, "percentage": 59.31, "elapsed_time": "15:51:13", "remaining_time": "10:52:37"} +{"current_steps": 4199, "total_steps": 7063, "loss": 5.8987, "reward": 1.2619, "learning_rate": 6.915471370290685e-06, "epoch": 0.59, "percentage": 59.45, "elapsed_time": "15:53:32", "remaining_time": "10:50:22"} +{"current_steps": 4209, "total_steps": 7063, "loss": 5.6118, "reward": 0.7087, "learning_rate": 6.8330118627402506e-06, "epoch": 0.6, "percentage": 59.59, "elapsed_time": "15:55:47", "remaining_time": "10:48:05"} +{"current_steps": 4219, "total_steps": 7063, "loss": 5.7941, "reward": 1.0972, "learning_rate": 6.7499722961899895e-06, "epoch": 0.6, "percentage": 59.73, "elapsed_time": "15:58:03", "remaining_time": "10:45:49"} +{"current_steps": 4229, "total_steps": 7063, "loss": 5.8458, "reward": 0.7238, "learning_rate": 6.666378948619261e-06, "epoch": 0.6, "percentage": 59.88, "elapsed_time": "16:00:23", "remaining_time": "10:43:35"} +{"current_steps": 4239, "total_steps": 7063, "loss": 6.0383, "reward": 0.841, "learning_rate": 6.582258273252182e-06, "epoch": 0.6, "percentage": 60.02, "elapsed_time": "16:02:45", "remaining_time": "10:41:23"} +{"current_steps": 4249, "total_steps": 7063, "loss": 5.7172, "reward": 0.8323, "learning_rate": 6.4976368901864125e-06, "epoch": 0.6, "percentage": 60.16, "elapsed_time": "16:05:00", "remaining_time": "10:39:05"} +{"current_steps": 4259, "total_steps": 7063, "loss": 6.9222, "reward": 0.9498, "learning_rate": 6.412541577969238e-06, "epoch": 0.6, "percentage": 60.3, "elapsed_time": "16:07:25", "remaining_time": "10:36:55"} +{"current_steps": 4269, "total_steps": 7063, "loss": 5.6869, "reward": 1.1194, "learning_rate": 6.32699926512342e-06, "epoch": 0.6, "percentage": 60.44, "elapsed_time": "16:09:49", "remaining_time": "10:34:44"} +{"current_steps": 4279, "total_steps": 7063, "loss": 6.5483, "reward": 1.0819, "learning_rate": 6.2410370216256875e-06, "epoch": 0.61, "percentage": 60.58, "elapsed_time": "16:12:05", "remaining_time": "10:32:27"} +{"current_steps": 4289, "total_steps": 7063, "loss": 6.4802, "reward": 0.8398, "learning_rate": 6.154682050340339e-06, "epoch": 0.61, "percentage": 60.72, "elapsed_time": "16:14:25", "remaining_time": "10:30:13"} +{"current_steps": 4299, "total_steps": 7063, "loss": 6.7995, "reward": 1.3388, "learning_rate": 6.06796167841092e-06, "epoch": 0.61, "percentage": 60.87, "elapsed_time": "16:16:43", "remaining_time": "10:27:58"} +{"current_steps": 4309, "total_steps": 7063, "loss": 6.4248, "reward": 1.0594, "learning_rate": 5.980903348612461e-06, "epoch": 0.61, "percentage": 61.01, "elapsed_time": "16:19:05", "remaining_time": "10:25:45"} +{"current_steps": 4319, "total_steps": 7063, "loss": 6.9055, "reward": 1.1437, "learning_rate": 5.893534610667239e-06, "epoch": 0.61, "percentage": 61.15, "elapsed_time": "16:21:23", "remaining_time": "10:23:30"} +{"current_steps": 4329, "total_steps": 7063, "loss": 6.9643, "reward": 0.952, "learning_rate": 5.805883112526556e-06, "epoch": 0.61, "percentage": 61.29, "elapsed_time": "16:23:46", "remaining_time": "10:21:18"} +{"current_steps": 4339, "total_steps": 7063, "loss": 7.0254, "reward": 1.459, "learning_rate": 5.717976591621577e-06, "epoch": 0.61, "percentage": 61.43, "elapsed_time": "16:26:05", "remaining_time": "10:19:03"} +{"current_steps": 4349, "total_steps": 7063, "loss": 7.2209, "reward": 0.9245, "learning_rate": 5.6298428660857275e-06, "epoch": 0.62, "percentage": 61.57, "elapsed_time": "16:28:28", "remaining_time": "10:16:51"} +{"current_steps": 4359, "total_steps": 7063, "loss": 7.022, "reward": 0.9485, "learning_rate": 5.541509825951659e-06, "epoch": 0.62, "percentage": 61.72, "elapsed_time": "16:30:44", "remaining_time": "10:14:34"} +{"current_steps": 4369, "total_steps": 7063, "loss": 6.937, "reward": 0.6953, "learning_rate": 5.453005424325387e-06, "epoch": 0.62, "percentage": 61.86, "elapsed_time": "16:33:10", "remaining_time": "10:12:24"} +{"current_steps": 4379, "total_steps": 7063, "loss": 7.3764, "reward": 0.8788, "learning_rate": 5.364357668540476e-06, "epoch": 0.62, "percentage": 62.0, "elapsed_time": "16:35:27", "remaining_time": "10:10:08"} +{"current_steps": 4389, "total_steps": 7063, "loss": 7.2731, "reward": 1.1819, "learning_rate": 5.275594611295118e-06, "epoch": 0.62, "percentage": 62.14, "elapsed_time": "16:37:40", "remaining_time": "10:07:49"} +{"current_steps": 4399, "total_steps": 7063, "loss": 7.1828, "reward": 0.6716, "learning_rate": 5.186744341774788e-06, "epoch": 0.62, "percentage": 62.28, "elapsed_time": "16:39:59", "remaining_time": "10:05:35"} +{"current_steps": 4409, "total_steps": 7063, "loss": 6.8891, "reward": 1.2351, "learning_rate": 5.097834976763426e-06, "epoch": 0.62, "percentage": 62.42, "elapsed_time": "16:42:15", "remaining_time": "10:03:18"} +{"current_steps": 4419, "total_steps": 7063, "loss": 7.9363, "reward": 0.9614, "learning_rate": 5.008894651745785e-06, "epoch": 0.63, "percentage": 62.57, "elapsed_time": "16:44:31", "remaining_time": "10:01:02"} +{"current_steps": 4429, "total_steps": 7063, "loss": 7.6238, "reward": 1.1227, "learning_rate": 4.9199515120039774e-06, "epoch": 0.63, "percentage": 62.71, "elapsed_time": "16:46:55", "remaining_time": "9:58:49"} +{"current_steps": 4439, "total_steps": 7063, "loss": 7.9218, "reward": 1.1009, "learning_rate": 4.831033703710816e-06, "epoch": 0.63, "percentage": 62.85, "elapsed_time": "16:49:14", "remaining_time": "9:56:35"} +{"current_steps": 4449, "total_steps": 7063, "loss": 7.5313, "reward": 1.0618, "learning_rate": 4.742169365022975e-06, "epoch": 0.63, "percentage": 62.99, "elapsed_time": "16:51:38", "remaining_time": "9:54:23"} +{"current_steps": 4459, "total_steps": 7063, "loss": 7.3797, "reward": 0.7438, "learning_rate": 4.6533866171765685e-06, "epoch": 0.63, "percentage": 63.13, "elapsed_time": "16:53:56", "remaining_time": "9:52:07"} +{"current_steps": 4469, "total_steps": 7063, "loss": 8.207, "reward": 1.3113, "learning_rate": 4.564713555588208e-06, "epoch": 0.63, "percentage": 63.27, "elapsed_time": "16:56:07", "remaining_time": "9:49:47"} +{"current_steps": 4479, "total_steps": 7063, "loss": 7.6581, "reward": 1.1344, "learning_rate": 4.476178240964121e-06, "epoch": 0.63, "percentage": 63.41, "elapsed_time": "16:58:29", "remaining_time": "9:47:35"} +{"current_steps": 4489, "total_steps": 7063, "loss": 7.7654, "reward": 1.3073, "learning_rate": 4.38780869042036e-06, "epoch": 0.64, "percentage": 63.56, "elapsed_time": "17:00:50", "remaining_time": "9:45:20"} +{"current_steps": 4499, "total_steps": 7063, "loss": 7.7657, "reward": 1.0558, "learning_rate": 4.2996328686166846e-06, "epoch": 0.64, "percentage": 63.7, "elapsed_time": "17:03:05", "remaining_time": "9:43:03"} +{"current_steps": 4509, "total_steps": 7063, "loss": 8.169, "reward": 1.2041, "learning_rate": 4.211678678907162e-06, "epoch": 0.64, "percentage": 63.84, "elapsed_time": "17:05:23", "remaining_time": "9:40:48"} +{"current_steps": 4519, "total_steps": 7063, "loss": 8.7798, "reward": 0.8431, "learning_rate": 4.123973954510092e-06, "epoch": 0.64, "percentage": 63.98, "elapsed_time": "17:07:41", "remaining_time": "9:38:32"} +{"current_steps": 4529, "total_steps": 7063, "loss": 8.4096, "reward": 0.7942, "learning_rate": 4.036546449700141e-06, "epoch": 0.64, "percentage": 64.12, "elapsed_time": "17:09:52", "remaining_time": "9:36:13"} +{"current_steps": 4539, "total_steps": 7063, "loss": 8.6166, "reward": 0.8147, "learning_rate": 3.958121586474061e-06, "epoch": 0.64, "percentage": 64.26, "elapsed_time": "17:12:13", "remaining_time": "9:33:59"} +{"current_steps": 4549, "total_steps": 7063, "loss": 8.1236, "reward": 0.9802, "learning_rate": 3.87129694065821e-06, "epoch": 0.64, "percentage": 64.41, "elapsed_time": "17:14:28", "remaining_time": "9:31:42"} +{"current_steps": 4559, "total_steps": 7063, "loss": 8.0437, "reward": 1.0559, "learning_rate": 3.7848294744007763e-06, "epoch": 0.65, "percentage": 64.55, "elapsed_time": "17:16:49", "remaining_time": "9:29:27"} +{"current_steps": 4569, "total_steps": 7063, "loss": 9.4977, "reward": 0.7246, "learning_rate": 3.6987465504444775e-06, "epoch": 0.65, "percentage": 64.69, "elapsed_time": "17:18:55", "remaining_time": "9:27:06"} +{"current_steps": 4579, "total_steps": 7063, "loss": 8.8468, "reward": 0.5876, "learning_rate": 3.6130754098431063e-06, "epoch": 0.65, "percentage": 64.83, "elapsed_time": "17:21:16", "remaining_time": "9:24:52"} +{"current_steps": 4589, "total_steps": 7063, "loss": 9.423, "reward": 1.0855, "learning_rate": 3.527843163341101e-06, "epoch": 0.65, "percentage": 64.97, "elapsed_time": "17:23:35", "remaining_time": "9:22:37"} +{"current_steps": 4599, "total_steps": 7063, "loss": 9.3485, "reward": 1.0619, "learning_rate": 3.4430767827942534e-06, "epoch": 0.65, "percentage": 65.11, "elapsed_time": "17:25:58", "remaining_time": "9:20:24"} +{"current_steps": 4609, "total_steps": 7063, "loss": 9.2073, "reward": 1.4898, "learning_rate": 3.3588030926345015e-06, "epoch": 0.65, "percentage": 65.26, "elapsed_time": "17:28:23", "remaining_time": "9:18:11"} +{"current_steps": 4619, "total_steps": 7063, "loss": 10.2192, "reward": 0.9771, "learning_rate": 3.2750487613812298e-06, "epoch": 0.65, "percentage": 65.4, "elapsed_time": "17:30:33", "remaining_time": "9:15:52"} +{"current_steps": 4629, "total_steps": 7063, "loss": 9.3942, "reward": 1.1159, "learning_rate": 3.1918402932020525e-06, "epoch": 0.66, "percentage": 65.54, "elapsed_time": "17:32:57", "remaining_time": "9:13:39"} +{"current_steps": 4639, "total_steps": 7063, "loss": 9.3759, "reward": 0.708, "learning_rate": 3.1092040195254756e-06, "epoch": 0.66, "percentage": 65.68, "elapsed_time": "17:35:17", "remaining_time": "9:11:25"} +{"current_steps": 4649, "total_steps": 7063, "loss": 9.9824, "reward": 1.0753, "learning_rate": 3.0271660907083157e-06, "epoch": 0.66, "percentage": 65.82, "elapsed_time": "17:37:30", "remaining_time": "9:09:07"} +{"current_steps": 4659, "total_steps": 7063, "loss": 10.3342, "reward": 1.505, "learning_rate": 2.9457524677603665e-06, "epoch": 0.66, "percentage": 65.96, "elapsed_time": "17:39:51", "remaining_time": "9:06:52"} +{"current_steps": 4669, "total_steps": 7063, "loss": 10.0353, "reward": 1.3609, "learning_rate": 2.864988914129041e-06, "epoch": 0.66, "percentage": 66.11, "elapsed_time": "17:42:15", "remaining_time": "9:04:40"} +{"current_steps": 4679, "total_steps": 7063, "loss": 9.5413, "reward": 1.0895, "learning_rate": 2.7849009875464316e-06, "epoch": 0.66, "percentage": 66.25, "elapsed_time": "17:44:31", "remaining_time": "9:02:22"} +{"current_steps": 4689, "total_steps": 7063, "loss": 11.2036, "reward": 0.9274, "learning_rate": 2.7055140319416016e-06, "epoch": 0.66, "percentage": 66.39, "elapsed_time": "17:46:53", "remaining_time": "9:00:09"} +{"current_steps": 4699, "total_steps": 7063, "loss": 10.1615, "reward": 0.7893, "learning_rate": 2.6268531694204016e-06, "epoch": 0.67, "percentage": 66.53, "elapsed_time": "17:49:02", "remaining_time": "8:57:49"} +{"current_steps": 4709, "total_steps": 7063, "loss": 10.3155, "reward": 1.0313, "learning_rate": 2.5489432923156055e-06, "epoch": 0.67, "percentage": 66.67, "elapsed_time": "17:51:23", "remaining_time": "8:55:34"} +{"current_steps": 4719, "total_steps": 7063, "loss": 10.4604, "reward": 1.2473, "learning_rate": 2.471809055309649e-06, "epoch": 0.67, "percentage": 66.81, "elapsed_time": "17:53:40", "remaining_time": "8:53:18"} +{"current_steps": 4729, "total_steps": 7063, "loss": 9.5465, "reward": 1.0535, "learning_rate": 2.3954748676326533e-06, "epoch": 0.67, "percentage": 66.95, "elapsed_time": "17:55:55", "remaining_time": "8:51:01"} +{"current_steps": 4739, "total_steps": 7063, "loss": 10.0163, "reward": 1.0787, "learning_rate": 2.3199648853380735e-06, "epoch": 0.67, "percentage": 67.1, "elapsed_time": "17:58:16", "remaining_time": "8:48:47"} +{"current_steps": 4749, "total_steps": 7063, "loss": 10.9403, "reward": 1.0524, "learning_rate": 2.245303003658512e-06, "epoch": 0.67, "percentage": 67.24, "elapsed_time": "18:00:32", "remaining_time": "8:46:30"} +{"current_steps": 4759, "total_steps": 7063, "loss": 11.018, "reward": 1.0536, "learning_rate": 2.1715128494440217e-06, "epoch": 0.67, "percentage": 67.38, "elapsed_time": "18:02:54", "remaining_time": "8:44:16"} +{"current_steps": 4769, "total_steps": 7063, "loss": 11.7558, "reward": 1.5194, "learning_rate": 2.098617773685337e-06, "epoch": 0.68, "percentage": 67.52, "elapsed_time": "18:05:12", "remaining_time": "8:42:00"} +{"current_steps": 4779, "total_steps": 7063, "loss": 10.2469, "reward": 0.7457, "learning_rate": 2.026640844124475e-06, "epoch": 0.68, "percentage": 67.66, "elapsed_time": "18:07:37", "remaining_time": "8:39:47"} +{"current_steps": 4789, "total_steps": 7063, "loss": 10.7667, "reward": 1.4715, "learning_rate": 1.955604837954867e-06, "epoch": 0.68, "percentage": 67.8, "elapsed_time": "18:09:55", "remaining_time": "8:37:32"} +{"current_steps": 4799, "total_steps": 7063, "loss": 11.7527, "reward": 0.6686, "learning_rate": 1.885532234613514e-06, "epoch": 0.68, "percentage": 67.95, "elapsed_time": "18:12:10", "remaining_time": "8:35:14"} +{"current_steps": 4809, "total_steps": 7063, "loss": 12.3804, "reward": 0.7405, "learning_rate": 1.8164452086673256e-06, "epoch": 0.68, "percentage": 68.09, "elapsed_time": "18:14:27", "remaining_time": "8:32:58"} +{"current_steps": 4819, "total_steps": 7063, "loss": 11.684, "reward": 0.8597, "learning_rate": 1.7483656227959783e-06, "epoch": 0.68, "percentage": 68.23, "elapsed_time": "18:16:46", "remaining_time": "8:30:43"} +{"current_steps": 4829, "total_steps": 7063, "loss": 12.0429, "reward": 1.1068, "learning_rate": 1.6813150208733913e-06, "epoch": 0.68, "percentage": 68.37, "elapsed_time": "18:19:08", "remaining_time": "8:28:29"} +{"current_steps": 4839, "total_steps": 7063, "loss": 12.157, "reward": 1.029, "learning_rate": 1.615314621150197e-06, "epoch": 0.69, "percentage": 68.51, "elapsed_time": "18:21:25", "remaining_time": "8:26:12"} +{"current_steps": 4849, "total_steps": 7063, "loss": 13.4578, "reward": 0.8474, "learning_rate": 1.5503853095391396e-06, "epoch": 0.69, "percentage": 68.65, "elapsed_time": "18:23:51", "remaining_time": "8:24:00"} +{"current_steps": 4859, "total_steps": 7063, "loss": 11.7422, "reward": 0.9289, "learning_rate": 1.4865476330057604e-06, "epoch": 0.69, "percentage": 68.8, "elapsed_time": "18:26:16", "remaining_time": "8:21:47"} +{"current_steps": 4869, "total_steps": 7063, "loss": 11.9643, "reward": 1.1646, "learning_rate": 1.4238217930662312e-06, "epoch": 0.69, "percentage": 68.94, "elapsed_time": "18:28:40", "remaining_time": "8:19:34"} +{"current_steps": 4879, "total_steps": 7063, "loss": 13.1756, "reward": 0.9199, "learning_rate": 1.3622276393945872e-06, "epoch": 0.69, "percentage": 69.08, "elapsed_time": "18:30:57", "remaining_time": "8:17:18"} +{"current_steps": 4889, "total_steps": 7063, "loss": 12.3998, "reward": 1.2203, "learning_rate": 1.3017846635412595e-06, "epoch": 0.69, "percentage": 69.22, "elapsed_time": "18:33:14", "remaining_time": "8:15:01"} +{"current_steps": 4899, "total_steps": 7063, "loss": 13.3518, "reward": 0.9137, "learning_rate": 1.2425119927649727e-06, "epoch": 0.69, "percentage": 69.36, "elapsed_time": "18:35:31", "remaining_time": "8:12:45"} +{"current_steps": 4909, "total_steps": 7063, "loss": 13.3779, "reward": 1.1227, "learning_rate": 1.1844283839798543e-06, "epoch": 0.7, "percentage": 69.5, "elapsed_time": "18:37:46", "remaining_time": "8:10:27"} +{"current_steps": 4919, "total_steps": 7063, "loss": 13.8457, "reward": 0.9895, "learning_rate": 1.1275522178198362e-06, "epoch": 0.7, "percentage": 69.64, "elapsed_time": "18:40:09", "remaining_time": "8:08:14"} +{"current_steps": 4929, "total_steps": 7063, "loss": 12.935, "reward": 1.0785, "learning_rate": 1.0719014928220283e-06, "epoch": 0.7, "percentage": 69.79, "elapsed_time": "18:42:23", "remaining_time": "8:05:56"} +{"current_steps": 4939, "total_steps": 7063, "loss": 14.8186, "reward": 1.18, "learning_rate": 1.0174938197311069e-06, "epoch": 0.7, "percentage": 69.93, "elapsed_time": "18:44:43", "remaining_time": "8:03:41"} +{"current_steps": 4949, "total_steps": 7063, "loss": 13.357, "reward": 1.0398, "learning_rate": 9.643464159263304e-07, "epoch": 0.7, "percentage": 70.07, "elapsed_time": "18:47:03", "remaining_time": "8:01:25"} +{"current_steps": 4959, "total_steps": 7063, "loss": 13.1203, "reward": 0.7378, "learning_rate": 9.124760999731003e-07, "epoch": 0.7, "percentage": 70.21, "elapsed_time": "18:49:13", "remaining_time": "7:59:06"} +{"current_steps": 4969, "total_steps": 7063, "loss": 13.6538, "reward": 1.0154, "learning_rate": 8.618992863006926e-07, "epoch": 0.7, "percentage": 70.35, "elapsed_time": "18:51:33", "remaining_time": "7:56:51"} +{"current_steps": 4979, "total_steps": 7063, "loss": 14.3607, "reward": 1.1812, "learning_rate": 8.126319800079063e-07, "epoch": 0.7, "percentage": 70.49, "elapsed_time": "18:53:59", "remaining_time": "7:54:38"} +{"current_steps": 4989, "total_steps": 7063, "loss": 13.1016, "reward": 1.1047, "learning_rate": 7.646897717982188e-07, "epoch": 0.71, "percentage": 70.64, "elapsed_time": "18:56:20", "remaining_time": "7:52:23"} +{"current_steps": 4999, "total_steps": 7063, "loss": 14.7443, "reward": 0.7647, "learning_rate": 7.180878330460711e-07, "epoch": 0.71, "percentage": 70.78, "elapsed_time": "18:58:41", "remaining_time": "7:50:08"} +{"current_steps": 5009, "total_steps": 7063, "loss": 14.3599, "reward": 1.174, "learning_rate": 6.728409109959033e-07, "epoch": 0.71, "percentage": 70.92, "elapsed_time": "19:00:53", "remaining_time": "7:47:50"} +{"current_steps": 5019, "total_steps": 7063, "loss": 15.0008, "reward": 1.2305, "learning_rate": 6.289633240953291e-07, "epoch": 0.71, "percentage": 71.06, "elapsed_time": "19:03:18", "remaining_time": "7:45:37"} +{"current_steps": 5029, "total_steps": 7063, "loss": 14.599, "reward": 0.6941, "learning_rate": 5.864689574640836e-07, "epoch": 0.71, "percentage": 71.2, "elapsed_time": "19:05:33", "remaining_time": "7:43:19"} +{"current_steps": 5039, "total_steps": 7063, "loss": 14.595, "reward": 1.0988, "learning_rate": 5.453712585000254e-07, "epoch": 0.71, "percentage": 71.34, "elapsed_time": "19:08:05", "remaining_time": "7:41:08"} +{"current_steps": 5049, "total_steps": 7063, "loss": 15.145, "reward": 0.9199, "learning_rate": 5.056832326237087e-07, "epoch": 0.71, "percentage": 71.49, "elapsed_time": "19:10:26", "remaining_time": "7:38:54"} +{"current_steps": 5059, "total_steps": 7063, "loss": 16.1126, "reward": 0.7843, "learning_rate": 4.674174391627939e-07, "epoch": 0.72, "percentage": 71.63, "elapsed_time": "19:12:46", "remaining_time": "7:36:38"} +{"current_steps": 5069, "total_steps": 7063, "loss": 15.0736, "reward": 1.0982, "learning_rate": 4.3058598737764466e-07, "epoch": 0.72, "percentage": 71.77, "elapsed_time": "19:15:13", "remaining_time": "7:34:26"} +{"current_steps": 5079, "total_steps": 7063, "loss": 16.5611, "reward": 0.878, "learning_rate": 3.952005326293129e-07, "epoch": 0.72, "percentage": 71.91, "elapsed_time": "19:17:37", "remaining_time": "7:32:12"} +{"current_steps": 5089, "total_steps": 7063, "loss": 16.3597, "reward": 0.8769, "learning_rate": 3.612722726912116e-07, "epoch": 0.72, "percentage": 72.05, "elapsed_time": "19:19:52", "remaining_time": "7:29:54"} +{"current_steps": 5099, "total_steps": 7063, "loss": 16.8776, "reward": 1.0628, "learning_rate": 3.288119442055371e-07, "epoch": 0.72, "percentage": 72.19, "elapsed_time": "19:22:14", "remaining_time": "7:27:39"} +{"current_steps": 5109, "total_steps": 7063, "loss": 17.8894, "reward": 1.1461, "learning_rate": 2.9782981928567435e-07, "epoch": 0.72, "percentage": 72.33, "elapsed_time": "19:24:27", "remaining_time": "7:25:21"} +{"current_steps": 5119, "total_steps": 7063, "loss": 17.7502, "reward": 1.0463, "learning_rate": 2.6833570226554526e-07, "epoch": 0.72, "percentage": 72.48, "elapsed_time": "19:26:50", "remaining_time": "7:23:07"} +{"current_steps": 5129, "total_steps": 7063, "loss": 17.9116, "reward": 0.7333, "learning_rate": 2.4033892659703293e-07, "epoch": 0.73, "percentage": 72.62, "elapsed_time": "19:29:02", "remaining_time": "7:20:48"} +{"current_steps": 5139, "total_steps": 7063, "loss": 16.8411, "reward": 0.864, "learning_rate": 2.138483518963874e-07, "epoch": 0.73, "percentage": 72.76, "elapsed_time": "19:31:27", "remaining_time": "7:18:35"} +{"current_steps": 5149, "total_steps": 7063, "loss": 17.8577, "reward": 1.0259, "learning_rate": 1.8887236114060404e-07, "epoch": 0.73, "percentage": 72.9, "elapsed_time": "19:33:48", "remaining_time": "7:16:19"} +{"current_steps": 5159, "total_steps": 7063, "loss": 17.8102, "reward": 1.1068, "learning_rate": 1.6541885801459713e-07, "epoch": 0.73, "percentage": 73.04, "elapsed_time": "19:36:05", "remaining_time": "7:14:03"} +{"current_steps": 5169, "total_steps": 7063, "loss": 18.7215, "reward": 1.0976, "learning_rate": 1.434952644100912e-07, "epoch": 0.73, "percentage": 73.18, "elapsed_time": "19:38:28", "remaining_time": "7:11:48"} +{"current_steps": 5179, "total_steps": 7063, "loss": 19.5751, "reward": 0.8139, "learning_rate": 1.2310851807693314e-07, "epoch": 0.73, "percentage": 73.33, "elapsed_time": "19:40:46", "remaining_time": "7:09:32"} +{"current_steps": 5189, "total_steps": 7063, "loss": 18.8442, "reward": 0.8037, "learning_rate": 1.0426507042765155e-07, "epoch": 0.73, "percentage": 73.47, "elapsed_time": "19:43:04", "remaining_time": "7:07:15"} +{"current_steps": 5199, "total_steps": 7063, "loss": 19.6249, "reward": 0.9081, "learning_rate": 8.69708844958822e-08, "epoch": 0.74, "percentage": 73.61, "elapsed_time": "19:45:22", "remaining_time": "7:04:59"} +{"current_steps": 5209, "total_steps": 7063, "loss": 19.3005, "reward": 0.9154, "learning_rate": 7.12314330493663e-08, "epoch": 0.74, "percentage": 73.75, "elapsed_time": "19:47:41", "remaining_time": "7:02:43"} +{"current_steps": 5219, "total_steps": 7063, "loss": 19.015, "reward": 0.5151, "learning_rate": 5.705169685809031e-08, "epoch": 0.74, "percentage": 73.89, "elapsed_time": "19:49:55", "remaining_time": "7:00:25"} +{"current_steps": 5229, "total_steps": 7063, "loss": 20.8741, "reward": 0.777, "learning_rate": 4.4436163118102284e-08, "epoch": 0.74, "percentage": 74.03, "elapsed_time": "19:52:18", "remaining_time": "6:58:11"} +{"current_steps": 5239, "total_steps": 7063, "loss": 21.1491, "reward": 1.3843, "learning_rate": 3.338882403154664e-08, "epoch": 0.74, "percentage": 74.18, "elapsed_time": "19:54:42", "remaining_time": "6:55:56"} +{"current_steps": 5249, "total_steps": 7063, "loss": 20.2964, "reward": 0.7921, "learning_rate": 2.3913175543314183e-08, "epoch": 0.74, "percentage": 74.32, "elapsed_time": "19:57:01", "remaining_time": "6:53:40"} +{"current_steps": 5259, "total_steps": 7063, "loss": 19.0763, "reward": 1.1708, "learning_rate": 1.6012216234756463e-08, "epoch": 0.74, "percentage": 74.46, "elapsed_time": "19:59:21", "remaining_time": "6:51:24"} +{"current_steps": 5269, "total_steps": 7063, "loss": 20.3877, "reward": 0.914, "learning_rate": 9.688446374773176e-09, "epoch": 0.75, "percentage": 74.6, "elapsed_time": "20:01:38", "remaining_time": "6:49:08"} +{"current_steps": 5279, "total_steps": 7063, "loss": 21.6992, "reward": 0.7239, "learning_rate": 4.943867128601176e-09, "epoch": 0.75, "percentage": 74.74, "elapsed_time": "20:04:00", "remaining_time": "6:46:53"} +{"current_steps": 5289, "total_steps": 7063, "loss": 22.0992, "reward": 1.3829, "learning_rate": 1.7799799245449279e-09, "epoch": 0.75, "percentage": 74.88, "elapsed_time": "20:06:22", "remaining_time": "6:44:37"} +{"current_steps": 5299, "total_steps": 7063, "loss": 19.1273, "reward": 0.8702, "learning_rate": 1.9778597884545504e-10, "epoch": 0.75, "percentage": 75.02, "elapsed_time": "20:08:38", "remaining_time": "6:42:21"} +{"current_steps": 5309, "total_steps": 7063, "loss": 21.5772, "reward": 1.2798, "learning_rate": 1.9778597884545504e-10, "epoch": 0.75, "percentage": 75.17, "elapsed_time": "20:10:56", "remaining_time": "6:40:04"} +{"current_steps": 5319, "total_steps": 7063, "loss": 22.6161, "reward": 1.1608, "learning_rate": 1.7799799245449279e-09, "epoch": 0.75, "percentage": 75.31, "elapsed_time": "20:13:14", "remaining_time": "6:37:47"} +{"current_steps": 5329, "total_steps": 7063, "loss": 23.0654, "reward": 1.237, "learning_rate": 4.943867128601176e-09, "epoch": 0.75, "percentage": 75.45, "elapsed_time": "20:15:32", "remaining_time": "6:35:31"} +{"current_steps": 5339, "total_steps": 7063, "loss": 23.4224, "reward": 0.7672, "learning_rate": 9.68844637477262e-09, "epoch": 0.76, "percentage": 75.59, "elapsed_time": "20:17:54", "remaining_time": "6:33:16"} +{"current_steps": 5349, "total_steps": 7063, "loss": 22.5207, "reward": 0.8759, "learning_rate": 1.6012216234756463e-08, "epoch": 0.76, "percentage": 75.73, "elapsed_time": "20:20:09", "remaining_time": "6:30:58"} +{"current_steps": 5359, "total_steps": 7063, "loss": 22.4022, "reward": 0.9359, "learning_rate": 2.3913175543313627e-08, "epoch": 0.76, "percentage": 75.87, "elapsed_time": "20:22:26", "remaining_time": "6:28:41"} +{"current_steps": 5369, "total_steps": 7063, "loss": 24.2224, "reward": 1.14, "learning_rate": 3.3388824031546087e-08, "epoch": 0.76, "percentage": 76.02, "elapsed_time": "20:24:44", "remaining_time": "6:26:25"} +{"current_steps": 5379, "total_steps": 7063, "loss": 23.5394, "reward": 0.9193, "learning_rate": 4.443616311810173e-08, "epoch": 0.76, "percentage": 76.16, "elapsed_time": "20:27:06", "remaining_time": "6:24:10"} +{"current_steps": 5389, "total_steps": 7063, "loss": 25.0942, "reward": 1.0297, "learning_rate": 5.7051696858089754e-08, "epoch": 0.76, "percentage": 76.3, "elapsed_time": "20:29:28", "remaining_time": "6:21:54"} +{"current_steps": 5399, "total_steps": 7063, "loss": 25.1026, "reward": 0.7989, "learning_rate": 7.123143304936575e-08, "epoch": 0.76, "percentage": 76.44, "elapsed_time": "20:31:48", "remaining_time": "6:19:39"} +{"current_steps": 5409, "total_steps": 7063, "loss": 24.8735, "reward": 0.8812, "learning_rate": 8.697088449588164e-08, "epoch": 0.77, "percentage": 76.58, "elapsed_time": "20:34:06", "remaining_time": "6:17:22"} +{"current_steps": 5419, "total_steps": 7063, "loss": 24.6814, "reward": 0.7561, "learning_rate": 1.0426507042765099e-07, "epoch": 0.77, "percentage": 76.72, "elapsed_time": "20:36:23", "remaining_time": "6:15:05"} +{"current_steps": 5429, "total_steps": 7063, "loss": 25.0014, "reward": 0.7191, "learning_rate": 1.2310851807693258e-07, "epoch": 0.77, "percentage": 76.87, "elapsed_time": "20:38:43", "remaining_time": "6:12:49"} +{"current_steps": 5439, "total_steps": 7063, "loss": 25.0474, "reward": 0.9854, "learning_rate": 1.434952644100901e-07, "epoch": 0.77, "percentage": 77.01, "elapsed_time": "20:41:05", "remaining_time": "6:10:34"} +{"current_steps": 5449, "total_steps": 7063, "loss": 24.6665, "reward": 1.0098, "learning_rate": 1.6541885801459601e-07, "epoch": 0.77, "percentage": 77.15, "elapsed_time": "20:43:24", "remaining_time": "6:08:17"} +{"current_steps": 5459, "total_steps": 7063, "loss": 29.131, "reward": 0.8288, "learning_rate": 1.8887236114060293e-07, "epoch": 0.77, "percentage": 77.29, "elapsed_time": "20:45:45", "remaining_time": "6:06:02"} +{"current_steps": 5469, "total_steps": 7063, "loss": 26.0392, "reward": 1.0817, "learning_rate": 2.138483518963863e-07, "epoch": 0.77, "percentage": 77.43, "elapsed_time": "20:48:02", "remaining_time": "6:03:45"} +{"current_steps": 5479, "total_steps": 7063, "loss": 29.0327, "reward": 0.8201, "learning_rate": 2.403389265970318e-07, "epoch": 0.78, "percentage": 77.57, "elapsed_time": "20:50:21", "remaining_time": "6:01:29"} +{"current_steps": 5489, "total_steps": 7063, "loss": 26.231, "reward": 1.3976, "learning_rate": 2.683357022655442e-07, "epoch": 0.78, "percentage": 77.71, "elapsed_time": "20:52:49", "remaining_time": "5:59:15"} +{"current_steps": 5499, "total_steps": 7063, "loss": 28.3856, "reward": 1.2167, "learning_rate": 2.9782981928567324e-07, "epoch": 0.78, "percentage": 77.86, "elapsed_time": "20:55:07", "remaining_time": "5:56:58"} +{"current_steps": 5509, "total_steps": 7063, "loss": 27.6945, "reward": 1.041, "learning_rate": 3.28811944205536e-07, "epoch": 0.78, "percentage": 78.0, "elapsed_time": "20:57:25", "remaining_time": "5:54:41"} +{"current_steps": 5519, "total_steps": 7063, "loss": 29.5519, "reward": 1.19, "learning_rate": 3.612722726912099e-07, "epoch": 0.78, "percentage": 78.14, "elapsed_time": "20:59:46", "remaining_time": "5:52:26"} +{"current_steps": 5529, "total_steps": 7063, "loss": 29.1206, "reward": 1.5435, "learning_rate": 3.9520053262931177e-07, "epoch": 0.78, "percentage": 78.28, "elapsed_time": "21:02:04", "remaining_time": "5:50:09"} +{"current_steps": 5539, "total_steps": 7063, "loss": 28.6978, "reward": 1.1417, "learning_rate": 4.3058598737764355e-07, "epoch": 0.78, "percentage": 78.42, "elapsed_time": "21:04:23", "remaining_time": "5:47:53"} +{"current_steps": 5549, "total_steps": 7063, "loss": 32.4786, "reward": 1.1774, "learning_rate": 4.6741743916279223e-07, "epoch": 0.79, "percentage": 78.56, "elapsed_time": "21:06:44", "remaining_time": "5:45:37"} +{"current_steps": 5559, "total_steps": 7063, "loss": 28.9053, "reward": 0.8577, "learning_rate": 5.056832326237071e-07, "epoch": 0.79, "percentage": 78.71, "elapsed_time": "21:08:58", "remaining_time": "5:43:19"} +{"current_steps": 5569, "total_steps": 7063, "loss": 33.1146, "reward": 0.9498, "learning_rate": 5.453712585000237e-07, "epoch": 0.79, "percentage": 78.85, "elapsed_time": "21:11:21", "remaining_time": "5:41:04"} +{"current_steps": 5579, "total_steps": 7063, "loss": 32.011, "reward": 1.2069, "learning_rate": 5.864689574640819e-07, "epoch": 0.79, "percentage": 78.99, "elapsed_time": "21:13:44", "remaining_time": "5:38:48"} +{"current_steps": 5589, "total_steps": 7063, "loss": 32.2405, "reward": 0.8766, "learning_rate": 6.289633240953275e-07, "epoch": 0.79, "percentage": 79.13, "elapsed_time": "21:16:03", "remaining_time": "5:36:32"} +{"current_steps": 5599, "total_steps": 7063, "loss": 31.6486, "reward": 1.0483, "learning_rate": 6.728409109959011e-07, "epoch": 0.79, "percentage": 79.27, "elapsed_time": "21:18:22", "remaining_time": "5:34:15"} +{"current_steps": 5609, "total_steps": 7063, "loss": 32.3604, "reward": 1.1886, "learning_rate": 7.180878330460695e-07, "epoch": 0.79, "percentage": 79.41, "elapsed_time": "21:20:43", "remaining_time": "5:31:59"} +{"current_steps": 5619, "total_steps": 7063, "loss": 34.0581, "reward": 1.0957, "learning_rate": 7.646897717982165e-07, "epoch": 0.8, "percentage": 79.56, "elapsed_time": "21:22:58", "remaining_time": "5:29:42"} +{"current_steps": 5629, "total_steps": 7063, "loss": 32.3892, "reward": 1.29, "learning_rate": 8.126319800079046e-07, "epoch": 0.8, "percentage": 79.7, "elapsed_time": "21:25:12", "remaining_time": "5:27:24"} +{"current_steps": 5639, "total_steps": 7063, "loss": 36.4483, "reward": 0.5732, "learning_rate": 8.618992863006909e-07, "epoch": 0.8, "percentage": 79.84, "elapsed_time": "21:27:31", "remaining_time": "5:25:08"} +{"current_steps": 5649, "total_steps": 7063, "loss": 32.9463, "reward": 0.8512, "learning_rate": 9.073599436940772e-07, "epoch": 0.8, "percentage": 79.98, "elapsed_time": "21:29:44", "remaining_time": "5:22:50"} +{"current_steps": 5659, "total_steps": 7063, "loss": 31.3008, "reward": 0.9925, "learning_rate": 9.59101641354881e-07, "epoch": 0.8, "percentage": 80.12, "elapsed_time": "21:31:59", "remaining_time": "5:20:32"} +{"current_steps": 5669, "total_steps": 7063, "loss": 35.8149, "reward": 1.0764, "learning_rate": 1.0121220865829944e-06, "epoch": 0.8, "percentage": 80.26, "elapsed_time": "21:34:25", "remaining_time": "5:18:17"} +{"current_steps": 5679, "total_steps": 7063, "loss": 32.4432, "reward": 1.0298, "learning_rate": 1.0664045009892344e-06, "epoch": 0.8, "percentage": 80.4, "elapsed_time": "21:36:42", "remaining_time": "5:16:00"} +{"current_steps": 5689, "total_steps": 7063, "loss": 36.0975, "reward": 0.7165, "learning_rate": 1.1219317068326002e-06, "epoch": 0.81, "percentage": 80.55, "elapsed_time": "21:39:10", "remaining_time": "5:13:46"} +{"current_steps": 5699, "total_steps": 7063, "loss": 36.9366, "reward": 1.1632, "learning_rate": 1.1786861324562364e-06, "epoch": 0.81, "percentage": 80.69, "elapsed_time": "21:41:31", "remaining_time": "5:11:30"} +{"current_steps": 5709, "total_steps": 7063, "loss": 37.8448, "reward": 1.0953, "learning_rate": 1.2366498178479608e-06, "epoch": 0.81, "percentage": 80.83, "elapsed_time": "21:43:47", "remaining_time": "5:09:13"} +{"current_steps": 5719, "total_steps": 7063, "loss": 35.1652, "reward": 0.526, "learning_rate": 1.2958044203237746e-06, "epoch": 0.81, "percentage": 80.97, "elapsed_time": "21:45:58", "remaining_time": "5:06:54"} +{"current_steps": 5729, "total_steps": 7063, "loss": 36.9202, "reward": 0.9273, "learning_rate": 1.3561312203324056e-06, "epoch": 0.81, "percentage": 81.11, "elapsed_time": "21:48:18", "remaining_time": "5:04:38"} +{"current_steps": 5739, "total_steps": 7063, "loss": 40.3897, "reward": 1.1308, "learning_rate": 1.417611127379146e-06, "epoch": 0.81, "percentage": 81.25, "elapsed_time": "21:50:44", "remaining_time": "5:02:23"} +{"current_steps": 5749, "total_steps": 7063, "loss": 39.9514, "reward": 1.2518, "learning_rate": 1.48022468606704e-06, "epoch": 0.81, "percentage": 81.4, "elapsed_time": "21:53:02", "remaining_time": "5:00:06"} +{"current_steps": 5759, "total_steps": 7063, "loss": 41.6909, "reward": 1.2512, "learning_rate": 1.5439520822536135e-06, "epoch": 0.82, "percentage": 81.54, "elapsed_time": "21:55:20", "remaining_time": "4:57:49"} +{"current_steps": 5769, "total_steps": 7063, "loss": 40.2497, "reward": 1.2501, "learning_rate": 1.608773149321035e-06, "epoch": 0.82, "percentage": 81.68, "elapsed_time": "21:57:42", "remaining_time": "4:55:33"} +{"current_steps": 5779, "total_steps": 7063, "loss": 40.7741, "reward": 1.0252, "learning_rate": 1.6746673745579178e-06, "epoch": 0.82, "percentage": 81.82, "elapsed_time": "21:59:59", "remaining_time": "4:53:16"} +{"current_steps": 5789, "total_steps": 7063, "loss": 42.1902, "reward": 1.2204, "learning_rate": 1.741613905650545e-06, "epoch": 0.82, "percentage": 81.96, "elapsed_time": "22:02:16", "remaining_time": "4:50:59"} +{"current_steps": 5799, "total_steps": 7063, "loss": 38.8444, "reward": 0.6678, "learning_rate": 1.8095915572816547e-06, "epoch": 0.82, "percentage": 82.1, "elapsed_time": "22:04:31", "remaining_time": "4:48:42"} +{"current_steps": 5809, "total_steps": 7063, "loss": 42.2058, "reward": 0.8402, "learning_rate": 1.878578817834545e-06, "epoch": 0.82, "percentage": 82.25, "elapsed_time": "22:06:49", "remaining_time": "4:46:25"} +{"current_steps": 5819, "total_steps": 7063, "loss": 41.4845, "reward": 1.6395, "learning_rate": 1.948553856200428e-06, "epoch": 0.82, "percentage": 82.39, "elapsed_time": "22:09:16", "remaining_time": "4:44:10"} +{"current_steps": 5829, "total_steps": 7063, "loss": 39.207, "reward": 0.7803, "learning_rate": 2.0194945286869657e-06, "epoch": 0.83, "percentage": 82.53, "elapsed_time": "22:11:33", "remaining_time": "4:41:53"} +{"current_steps": 5839, "total_steps": 7063, "loss": 42.8872, "reward": 1.2898, "learning_rate": 2.09137838602561e-06, "epoch": 0.83, "percentage": 82.67, "elapsed_time": "22:13:53", "remaining_time": "4:39:37"} +{"current_steps": 5849, "total_steps": 7063, "loss": 44.517, "reward": 1.1872, "learning_rate": 2.164182680475766e-06, "epoch": 0.83, "percentage": 82.81, "elapsed_time": "22:16:09", "remaining_time": "4:37:19"} +{"current_steps": 5859, "total_steps": 7063, "loss": 42.6027, "reward": 0.9368, "learning_rate": 2.237884373023278e-06, "epoch": 0.83, "percentage": 82.95, "elapsed_time": "22:18:26", "remaining_time": "4:35:02"} +{"current_steps": 5869, "total_steps": 7063, "loss": 43.3298, "reward": 1.2487, "learning_rate": 2.312460140671215e-06, "epoch": 0.83, "percentage": 83.1, "elapsed_time": "22:20:51", "remaining_time": "4:32:47"} +{"current_steps": 5879, "total_steps": 7063, "loss": 47.8902, "reward": 1.2663, "learning_rate": 2.387886383820429e-06, "epoch": 0.83, "percentage": 83.24, "elapsed_time": "22:23:10", "remaining_time": "4:30:30"} +{"current_steps": 5889, "total_steps": 7063, "loss": 45.3621, "reward": 0.9799, "learning_rate": 2.4641392337376947e-06, "epoch": 0.83, "percentage": 83.38, "elapsed_time": "22:25:32", "remaining_time": "4:28:14"} +{"current_steps": 5899, "total_steps": 7063, "loss": 46.7022, "reward": 0.6368, "learning_rate": 2.541194560108975e-06, "epoch": 0.84, "percentage": 83.52, "elapsed_time": "22:27:55", "remaining_time": "4:25:58"} +{"current_steps": 5909, "total_steps": 7063, "loss": 46.4664, "reward": 1.1432, "learning_rate": 2.619027978675538e-06, "epoch": 0.84, "percentage": 83.66, "elapsed_time": "22:30:15", "remaining_time": "4:23:41"} +{"current_steps": 5919, "total_steps": 7063, "loss": 50.6503, "reward": 1.2844, "learning_rate": 2.697614858950329e-06, "epoch": 0.84, "percentage": 83.8, "elapsed_time": "22:32:41", "remaining_time": "4:21:26"} +{"current_steps": 5929, "total_steps": 7063, "loss": 45.3292, "reward": 1.3174, "learning_rate": 2.7769303320123823e-06, "epoch": 0.84, "percentage": 83.94, "elapsed_time": "22:35:07", "remaining_time": "4:19:11"} +{"current_steps": 5939, "total_steps": 7063, "loss": 50.5686, "reward": 1.3988, "learning_rate": 2.8569492983765625e-06, "epoch": 0.84, "percentage": 84.09, "elapsed_time": "22:37:26", "remaining_time": "4:16:54"} +{"current_steps": 5949, "total_steps": 7063, "loss": 48.6937, "reward": 0.7872, "learning_rate": 2.9376464359363793e-06, "epoch": 0.84, "percentage": 84.23, "elapsed_time": "22:39:44", "remaining_time": "4:14:37"} +{"current_steps": 5959, "total_steps": 7063, "loss": 47.5586, "reward": 1.1002, "learning_rate": 3.0189962079771767e-06, "epoch": 0.84, "percentage": 84.37, "elapsed_time": "22:41:58", "remaining_time": "4:12:19"} +{"current_steps": 5969, "total_steps": 7063, "loss": 49.5598, "reward": 0.872, "learning_rate": 3.100972871257271e-06, "epoch": 0.85, "percentage": 84.51, "elapsed_time": "22:44:14", "remaining_time": "4:10:02"} +{"current_steps": 5979, "total_steps": 7063, "loss": 46.403, "reward": 0.4291, "learning_rate": 3.1835504841544017e-06, "epoch": 0.85, "percentage": 84.65, "elapsed_time": "22:46:39", "remaining_time": "4:07:46"} +{"current_steps": 5989, "total_steps": 7063, "loss": 50.4731, "reward": 0.7624, "learning_rate": 3.2667029148750496e-06, "epoch": 0.85, "percentage": 84.79, "elapsed_time": "22:48:57", "remaining_time": "4:05:29"} +{"current_steps": 5999, "total_steps": 7063, "loss": 48.9891, "reward": 0.6599, "learning_rate": 3.3504038497238113e-06, "epoch": 0.85, "percentage": 84.94, "elapsed_time": "22:51:19", "remaining_time": "4:03:13"} +{"current_steps": 6009, "total_steps": 7063, "loss": 49.4926, "reward": 0.8475, "learning_rate": 3.4346268014304675e-06, "epoch": 0.85, "percentage": 85.08, "elapsed_time": "22:53:35", "remaining_time": "4:00:55"} +{"current_steps": 6019, "total_steps": 7063, "loss": 48.2269, "reward": 1.0827, "learning_rate": 3.519345117531848e-06, "epoch": 0.85, "percentage": 85.22, "elapsed_time": "22:55:50", "remaining_time": "3:58:38"} +{"current_steps": 6029, "total_steps": 7063, "loss": 57.1803, "reward": 1.0233, "learning_rate": 3.6045319888061005e-06, "epoch": 0.85, "percentage": 85.36, "elapsed_time": "22:58:14", "remaining_time": "3:56:22"} +{"current_steps": 6039, "total_steps": 7063, "loss": 62.3532, "reward": 1.2877, "learning_rate": 3.690160457756472e-06, "epoch": 0.86, "percentage": 85.5, "elapsed_time": "23:00:32", "remaining_time": "3:54:05"} +{"current_steps": 6049, "total_steps": 7063, "loss": 58.1885, "reward": 1.3709, "learning_rate": 3.7762034271420256e-06, "epoch": 0.86, "percentage": 85.64, "elapsed_time": "23:02:54", "remaining_time": "3:51:49"} +{"current_steps": 6059, "total_steps": 7063, "loss": 52.3508, "reward": 1.2359, "learning_rate": 3.862633668552651e-06, "epoch": 0.86, "percentage": 85.79, "elapsed_time": "23:05:05", "remaining_time": "3:49:30"} +{"current_steps": 6069, "total_steps": 7063, "loss": 57.1927, "reward": 1.1431, "learning_rate": 3.949423831025467e-06, "epoch": 0.86, "percentage": 85.93, "elapsed_time": "23:07:27", "remaining_time": "3:47:14"} +{"current_steps": 6079, "total_steps": 7063, "loss": 56.5176, "reward": 0.8168, "learning_rate": 4.036546449700138e-06, "epoch": 0.86, "percentage": 86.07, "elapsed_time": "23:09:45", "remaining_time": "3:44:57"} +{"current_steps": 6089, "total_steps": 7063, "loss": 54.7755, "reward": 1.1523, "learning_rate": 4.123973954510088e-06, "epoch": 0.86, "percentage": 86.21, "elapsed_time": "23:12:08", "remaining_time": "3:42:41"} +{"current_steps": 6099, "total_steps": 7063, "loss": 60.7935, "reward": 1.2583, "learning_rate": 4.2116786789071585e-06, "epoch": 0.86, "percentage": 86.35, "elapsed_time": "23:14:28", "remaining_time": "3:40:24"} +{"current_steps": 6109, "total_steps": 7063, "loss": 61.5845, "reward": 1.4049, "learning_rate": 4.299632868616677e-06, "epoch": 0.86, "percentage": 86.49, "elapsed_time": "23:16:47", "remaining_time": "3:38:07"} +{"current_steps": 6119, "total_steps": 7063, "loss": 55.3898, "reward": 0.5877, "learning_rate": 4.387808690420356e-06, "epoch": 0.87, "percentage": 86.63, "elapsed_time": "23:19:10", "remaining_time": "3:35:51"} +{"current_steps": 6129, "total_steps": 7063, "loss": 59.5955, "reward": 0.807, "learning_rate": 4.476178240964118e-06, "epoch": 0.87, "percentage": 86.78, "elapsed_time": "23:21:28", "remaining_time": "3:33:34"} +{"current_steps": 6139, "total_steps": 7063, "loss": 57.5526, "reward": 1.0594, "learning_rate": 4.564713555588204e-06, "epoch": 0.87, "percentage": 86.92, "elapsed_time": "23:23:41", "remaining_time": "3:31:16"} +{"current_steps": 6149, "total_steps": 7063, "loss": 60.5878, "reward": 0.856, "learning_rate": 4.653386617176564e-06, "epoch": 0.87, "percentage": 87.06, "elapsed_time": "23:26:00", "remaining_time": "3:28:59"} +{"current_steps": 6159, "total_steps": 7063, "loss": 64.5847, "reward": 1.2434, "learning_rate": 4.742169365022975e-06, "epoch": 0.87, "percentage": 87.2, "elapsed_time": "23:28:25", "remaining_time": "3:26:43"} +{"current_steps": 6169, "total_steps": 7063, "loss": 56.7269, "reward": 0.5983, "learning_rate": 4.8310337037108165e-06, "epoch": 0.87, "percentage": 87.34, "elapsed_time": "23:30:42", "remaining_time": "3:24:26"} +{"current_steps": 6179, "total_steps": 7063, "loss": 63.8131, "reward": 0.9494, "learning_rate": 4.919951512003969e-06, "epoch": 0.87, "percentage": 87.48, "elapsed_time": "23:33:01", "remaining_time": "3:22:09"} +{"current_steps": 6189, "total_steps": 7063, "loss": 64.3347, "reward": 0.8326, "learning_rate": 5.008894651745777e-06, "epoch": 0.88, "percentage": 87.63, "elapsed_time": "23:35:19", "remaining_time": "3:19:52"} +{"current_steps": 6199, "total_steps": 7063, "loss": 64.7426, "reward": 1.1951, "learning_rate": 5.097834976763422e-06, "epoch": 0.88, "percentage": 87.77, "elapsed_time": "23:37:38", "remaining_time": "3:17:35"} +{"current_steps": 6209, "total_steps": 7063, "loss": 68.0429, "reward": 1.0377, "learning_rate": 5.186744341774786e-06, "epoch": 0.88, "percentage": 87.91, "elapsed_time": "23:39:56", "remaining_time": "3:15:18"} +{"current_steps": 6219, "total_steps": 7063, "loss": 64.5824, "reward": 0.901, "learning_rate": 5.275594611295115e-06, "epoch": 0.88, "percentage": 88.05, "elapsed_time": "23:42:16", "remaining_time": "3:13:01"} +{"current_steps": 6229, "total_steps": 7063, "loss": 66.7038, "reward": 0.9975, "learning_rate": 5.364357668540472e-06, "epoch": 0.88, "percentage": 88.19, "elapsed_time": "23:44:42", "remaining_time": "3:10:45"} +{"current_steps": 6239, "total_steps": 7063, "loss": 67.0337, "reward": 1.3321, "learning_rate": 5.453005424325388e-06, "epoch": 0.88, "percentage": 88.33, "elapsed_time": "23:47:03", "remaining_time": "3:08:28"} +{"current_steps": 6249, "total_steps": 7063, "loss": 63.6395, "reward": 0.9098, "learning_rate": 5.541509825951659e-06, "epoch": 0.88, "percentage": 88.48, "elapsed_time": "23:49:27", "remaining_time": "3:06:12"} +{"current_steps": 6259, "total_steps": 7063, "loss": 67.9033, "reward": 1.1441, "learning_rate": 5.629842866085728e-06, "epoch": 0.89, "percentage": 88.62, "elapsed_time": "23:51:48", "remaining_time": "3:03:55"} +{"current_steps": 6269, "total_steps": 7063, "loss": 70.9057, "reward": 0.8948, "learning_rate": 5.717976591621569e-06, "epoch": 0.89, "percentage": 88.76, "elapsed_time": "23:54:04", "remaining_time": "3:01:37"} +{"current_steps": 6279, "total_steps": 7063, "loss": 71.5159, "reward": 1.16, "learning_rate": 5.805883112526548e-06, "epoch": 0.89, "percentage": 88.9, "elapsed_time": "23:56:29", "remaining_time": "2:59:21"} +{"current_steps": 6289, "total_steps": 7063, "loss": 76.1282, "reward": 0.7389, "learning_rate": 5.8935346106672345e-06, "epoch": 0.89, "percentage": 89.04, "elapsed_time": "23:58:47", "remaining_time": "2:57:04"} +{"current_steps": 6299, "total_steps": 7063, "loss": 70.8815, "reward": 0.7639, "learning_rate": 5.980903348612458e-06, "epoch": 0.89, "percentage": 89.18, "elapsed_time": "1 day, 0:01:05", "remaining_time": "2:54:47"} +{"current_steps": 6309, "total_steps": 7063, "loss": 77.9602, "reward": 1.1827, "learning_rate": 6.067961678410916e-06, "epoch": 0.89, "percentage": 89.32, "elapsed_time": "1 day, 0:03:23", "remaining_time": "2:52:30"} +{"current_steps": 6319, "total_steps": 7063, "loss": 72.6772, "reward": 0.9062, "learning_rate": 6.154682050340336e-06, "epoch": 0.89, "percentage": 89.47, "elapsed_time": "1 day, 0:05:48", "remaining_time": "2:50:13"} +{"current_steps": 6329, "total_steps": 7063, "loss": 79.4807, "reward": 0.9883, "learning_rate": 6.2410370216256875e-06, "epoch": 0.9, "percentage": 89.61, "elapsed_time": "1 day, 0:08:07", "remaining_time": "2:47:56"} +{"current_steps": 6339, "total_steps": 7063, "loss": 74.7358, "reward": 1.2483, "learning_rate": 6.326999265123421e-06, "epoch": 0.9, "percentage": 89.75, "elapsed_time": "1 day, 0:10:28", "remaining_time": "2:45:39"} +{"current_steps": 6349, "total_steps": 7063, "loss": 83.4259, "reward": 1.0581, "learning_rate": 6.412541577969229e-06, "epoch": 0.9, "percentage": 89.89, "elapsed_time": "1 day, 0:12:40", "remaining_time": "2:43:21"} +{"current_steps": 6359, "total_steps": 7063, "loss": 89.0968, "reward": 0.8648, "learning_rate": 6.497636890186405e-06, "epoch": 0.9, "percentage": 90.03, "elapsed_time": "1 day, 0:15:06", "remaining_time": "2:41:05"} +{"current_steps": 6369, "total_steps": 7063, "loss": 77.8441, "reward": 0.5074, "learning_rate": 6.582258273252179e-06, "epoch": 0.9, "percentage": 90.17, "elapsed_time": "1 day, 0:17:23", "remaining_time": "2:38:48"} +{"current_steps": 6379, "total_steps": 7063, "loss": 78.9905, "reward": 0.8686, "learning_rate": 6.666378948619256e-06, "epoch": 0.9, "percentage": 90.32, "elapsed_time": "1 day, 0:19:41", "remaining_time": "2:36:31"} +{"current_steps": 6389, "total_steps": 7063, "loss": 78.3821, "reward": 0.954, "learning_rate": 6.749972296189986e-06, "epoch": 0.9, "percentage": 90.46, "elapsed_time": "1 day, 0:22:01", "remaining_time": "2:34:14"} +{"current_steps": 6399, "total_steps": 7063, "loss": 82.9752, "reward": 0.6261, "learning_rate": 6.833011862740247e-06, "epoch": 0.91, "percentage": 90.6, "elapsed_time": "1 day, 0:24:23", "remaining_time": "2:31:57"} +{"current_steps": 6409, "total_steps": 7063, "loss": 80.6557, "reward": 1.1273, "learning_rate": 6.915471370290685e-06, "epoch": 0.91, "percentage": 90.74, "elapsed_time": "1 day, 0:26:41", "remaining_time": "2:29:39"} +{"current_steps": 6419, "total_steps": 7063, "loss": 80.5177, "reward": 0.5851, "learning_rate": 6.9973247244223385e-06, "epoch": 0.91, "percentage": 90.88, "elapsed_time": "1 day, 0:28:57", "remaining_time": "2:27:22"} +{"current_steps": 6429, "total_steps": 7063, "loss": 85.0407, "reward": 0.8287, "learning_rate": 7.078546022534314e-06, "epoch": 0.91, "percentage": 91.02, "elapsed_time": "1 day, 0:31:19", "remaining_time": "2:25:05"} +{"current_steps": 6439, "total_steps": 7063, "loss": 84.4299, "reward": 0.8082, "learning_rate": 7.1591095620406605e-06, "epoch": 0.91, "percentage": 91.17, "elapsed_time": "1 day, 0:33:37", "remaining_time": "2:22:48"} +{"current_steps": 6449, "total_steps": 7063, "loss": 90.9555, "reward": 1.3112, "learning_rate": 7.2389898485040076e-06, "epoch": 0.91, "percentage": 91.31, "elapsed_time": "1 day, 0:35:54", "remaining_time": "2:20:31"} +{"current_steps": 6459, "total_steps": 7063, "loss": 96.0831, "reward": 0.7038, "learning_rate": 7.318161603703281e-06, "epoch": 0.91, "percentage": 91.45, "elapsed_time": "1 day, 0:38:16", "remaining_time": "2:18:14"} +{"current_steps": 6469, "total_steps": 7063, "loss": 90.6466, "reward": 1.1145, "learning_rate": 7.39659977363308e-06, "epoch": 0.92, "percentage": 91.59, "elapsed_time": "1 day, 0:40:31", "remaining_time": "2:15:56"} +{"current_steps": 6479, "total_steps": 7063, "loss": 95.4607, "reward": 0.9177, "learning_rate": 7.474279536431989e-06, "epoch": 0.92, "percentage": 91.73, "elapsed_time": "1 day, 0:42:49", "remaining_time": "2:13:39"} +{"current_steps": 6489, "total_steps": 7063, "loss": 98.4857, "reward": 1.0056, "learning_rate": 7.551176310237556e-06, "epoch": 0.92, "percentage": 91.87, "elapsed_time": "1 day, 0:45:11", "remaining_time": "2:11:22"} +{"current_steps": 6499, "total_steps": 7063, "loss": 90.1566, "reward": 0.8314, "learning_rate": 7.627265760965185e-06, "epoch": 0.92, "percentage": 92.01, "elapsed_time": "1 day, 0:47:33", "remaining_time": "2:09:05"} +{"current_steps": 6509, "total_steps": 7063, "loss": 97.2564, "reward": 1.4078, "learning_rate": 7.702523810008747e-06, "epoch": 0.92, "percentage": 92.16, "elapsed_time": "1 day, 0:49:54", "remaining_time": "2:06:48"} +{"current_steps": 6519, "total_steps": 7063, "loss": 98.7758, "reward": 1.3081, "learning_rate": 7.776926641860259e-06, "epoch": 0.92, "percentage": 92.3, "elapsed_time": "1 day, 0:52:16", "remaining_time": "2:04:31"} +{"current_steps": 6529, "total_steps": 7063, "loss": 103.4797, "reward": 1.0738, "learning_rate": 7.850450711646322e-06, "epoch": 0.92, "percentage": 92.44, "elapsed_time": "1 day, 0:54:28", "remaining_time": "2:02:13"} +{"current_steps": 6539, "total_steps": 7063, "loss": 103.8763, "reward": 0.9816, "learning_rate": 7.923072752578964e-06, "epoch": 0.93, "percentage": 92.58, "elapsed_time": "1 day, 0:56:48", "remaining_time": "1:59:56"} +{"current_steps": 6549, "total_steps": 7063, "loss": 100.5021, "reward": 0.948, "learning_rate": 7.994769783318397e-06, "epoch": 0.93, "percentage": 92.72, "elapsed_time": "1 day, 0:59:08", "remaining_time": "1:57:39"} +{"current_steps": 6559, "total_steps": 7063, "loss": 103.6766, "reward": 0.7321, "learning_rate": 8.065519115245542e-06, "epoch": 0.93, "percentage": 92.86, "elapsed_time": "1 day, 1:01:30", "remaining_time": "1:55:22"} +{"current_steps": 6569, "total_steps": 7063, "loss": 97.4703, "reward": 1.2891, "learning_rate": 8.135298359641825e-06, "epoch": 0.93, "percentage": 93.01, "elapsed_time": "1 day, 1:03:54", "remaining_time": "1:53:05"} +{"current_steps": 6579, "total_steps": 7063, "loss": 96.2505, "reward": 1.2956, "learning_rate": 8.204085434774159e-06, "epoch": 0.93, "percentage": 93.15, "elapsed_time": "1 day, 1:06:18", "remaining_time": "1:50:48"} +{"current_steps": 6589, "total_steps": 7063, "loss": 99.8216, "reward": 0.7402, "learning_rate": 8.271858572882704e-06, "epoch": 0.93, "percentage": 93.29, "elapsed_time": "1 day, 1:08:40", "remaining_time": "1:48:31"} +{"current_steps": 6599, "total_steps": 7063, "loss": 99.5118, "reward": 0.9592, "learning_rate": 8.338596327069332e-06, "epoch": 0.93, "percentage": 93.43, "elapsed_time": "1 day, 1:10:59", "remaining_time": "1:46:14"} +{"current_steps": 6609, "total_steps": 7063, "loss": 105.8172, "reward": 0.7717, "learning_rate": 8.404277578084488e-06, "epoch": 0.94, "percentage": 93.57, "elapsed_time": "1 day, 1:13:16", "remaining_time": "1:43:57"} +{"current_steps": 6619, "total_steps": 7063, "loss": 102.5928, "reward": 1.1715, "learning_rate": 8.46888154101045e-06, "epoch": 0.94, "percentage": 93.71, "elapsed_time": "1 day, 1:15:34", "remaining_time": "1:41:39"} +{"current_steps": 6629, "total_steps": 7063, "loss": 96.6503, "reward": 0.8611, "learning_rate": 8.532387771838693e-06, "epoch": 0.94, "percentage": 93.86, "elapsed_time": "1 day, 1:17:54", "remaining_time": "1:39:22"} +{"current_steps": 6639, "total_steps": 7063, "loss": 108.7798, "reward": 1.0045, "learning_rate": 8.594776173939482e-06, "epoch": 0.94, "percentage": 94.0, "elapsed_time": "1 day, 1:20:15", "remaining_time": "1:37:05"} +{"current_steps": 6649, "total_steps": 7063, "loss": 102.2718, "reward": 1.2318, "learning_rate": 8.656027004421407e-06, "epoch": 0.94, "percentage": 94.14, "elapsed_time": "1 day, 1:22:31", "remaining_time": "1:34:48"} +{"current_steps": 6659, "total_steps": 7063, "loss": 114.8146, "reward": 1.2226, "learning_rate": 8.716120880379119e-06, "epoch": 0.94, "percentage": 94.28, "elapsed_time": "1 day, 1:24:49", "remaining_time": "1:32:30"} +{"current_steps": 6669, "total_steps": 7063, "loss": 118.673, "reward": 1.2495, "learning_rate": 8.775038785027017e-06, "epoch": 0.94, "percentage": 94.42, "elapsed_time": "1 day, 1:27:07", "remaining_time": "1:30:13"} +{"current_steps": 6679, "total_steps": 7063, "loss": 105.3002, "reward": 1.0926, "learning_rate": 8.832762073717173e-06, "epoch": 0.95, "percentage": 94.56, "elapsed_time": "1 day, 1:29:24", "remaining_time": "1:27:55"} +{"current_steps": 6689, "total_steps": 7063, "loss": 105.6608, "reward": 0.9392, "learning_rate": 8.889272479839424e-06, "epoch": 0.95, "percentage": 94.7, "elapsed_time": "1 day, 1:31:42", "remaining_time": "1:25:38"} +{"current_steps": 6699, "total_steps": 7063, "loss": 113.6069, "reward": 0.7844, "learning_rate": 8.944552120601897e-06, "epoch": 0.95, "percentage": 94.85, "elapsed_time": "1 day, 1:33:59", "remaining_time": "1:23:21"} +{"current_steps": 6709, "total_steps": 7063, "loss": 112.2587, "reward": 0.794, "learning_rate": 8.998583502690018e-06, "epoch": 0.95, "percentage": 94.99, "elapsed_time": "1 day, 1:36:17", "remaining_time": "1:21:03"} +{"current_steps": 6719, "total_steps": 7063, "loss": 114.1278, "reward": 0.6663, "learning_rate": 9.05134952780229e-06, "epoch": 0.95, "percentage": 95.13, "elapsed_time": "1 day, 1:38:37", "remaining_time": "1:18:46"} +{"current_steps": 6729, "total_steps": 7063, "loss": 120.6909, "reward": 1.0002, "learning_rate": 9.10283349806111e-06, "epoch": 0.95, "percentage": 95.27, "elapsed_time": "1 day, 1:40:55", "remaining_time": "1:16:29"} +{"current_steps": 6739, "total_steps": 7063, "loss": 115.4897, "reward": 0.8669, "learning_rate": 9.153019121296794e-06, "epoch": 0.95, "percentage": 95.41, "elapsed_time": "1 day, 1:43:13", "remaining_time": "1:14:11"} +{"current_steps": 6749, "total_steps": 7063, "loss": 119.1285, "reward": 1.1836, "learning_rate": 9.201890516203278e-06, "epoch": 0.96, "percentage": 95.55, "elapsed_time": "1 day, 1:45:38", "remaining_time": "1:11:54"} +{"current_steps": 6759, "total_steps": 7063, "loss": 138.4996, "reward": 1.1062, "learning_rate": 9.249432217363754e-06, "epoch": 0.96, "percentage": 95.7, "elapsed_time": "1 day, 1:47:55", "remaining_time": "1:09:37"} +{"current_steps": 6769, "total_steps": 7063, "loss": 120.5787, "reward": 0.9629, "learning_rate": 9.295629180144762e-06, "epoch": 0.96, "percentage": 95.84, "elapsed_time": "1 day, 1:50:15", "remaining_time": "1:07:19"} +{"current_steps": 6779, "total_steps": 7063, "loss": 122.6466, "reward": 0.6222, "learning_rate": 9.340466785457039e-06, "epoch": 0.96, "percentage": 95.98, "elapsed_time": "1 day, 1:52:36", "remaining_time": "1:05:02"} +{"current_steps": 6789, "total_steps": 7063, "loss": 119.8385, "reward": 1.0842, "learning_rate": 9.383930844381784e-06, "epoch": 0.96, "percentage": 96.12, "elapsed_time": "1 day, 1:54:54", "remaining_time": "1:02:45"} +{"current_steps": 6799, "total_steps": 7063, "loss": 127.7601, "reward": 1.1969, "learning_rate": 9.426007602660732e-06, "epoch": 0.96, "percentage": 96.26, "elapsed_time": "1 day, 1:57:11", "remaining_time": "1:00:27"} +{"current_steps": 6809, "total_steps": 7063, "loss": 139.4465, "reward": 0.7267, "learning_rate": 9.466683745048734e-06, "epoch": 0.96, "percentage": 96.4, "elapsed_time": "1 day, 1:59:29", "remaining_time": "0:58:10"} +{"current_steps": 6819, "total_steps": 7063, "loss": 133.0782, "reward": 0.308, "learning_rate": 9.505946399527365e-06, "epoch": 0.97, "percentage": 96.55, "elapsed_time": "1 day, 2:01:48", "remaining_time": "0:55:53"} +{"current_steps": 6829, "total_steps": 7063, "loss": 137.3628, "reward": 1.474, "learning_rate": 9.543783141378298e-06, "epoch": 0.97, "percentage": 96.69, "elapsed_time": "1 day, 2:04:13", "remaining_time": "0:53:35"} +{"current_steps": 6839, "total_steps": 7063, "loss": 132.7314, "reward": 0.8059, "learning_rate": 9.580181997115115e-06, "epoch": 0.97, "percentage": 96.83, "elapsed_time": "1 day, 2:06:33", "remaining_time": "0:51:18"} +{"current_steps": 6849, "total_steps": 7063, "loss": 127.4448, "reward": 0.97, "learning_rate": 9.615131448272345e-06, "epoch": 0.97, "percentage": 96.97, "elapsed_time": "1 day, 2:08:53", "remaining_time": "0:49:01"} +{"current_steps": 6859, "total_steps": 7063, "loss": 141.4186, "reward": 0.8046, "learning_rate": 9.648620435050486e-06, "epoch": 0.97, "percentage": 97.11, "elapsed_time": "1 day, 2:11:13", "remaining_time": "0:46:43"} +{"current_steps": 6869, "total_steps": 7063, "loss": 141.4553, "reward": 0.6626, "learning_rate": 9.680638359815904e-06, "epoch": 0.97, "percentage": 97.25, "elapsed_time": "1 day, 2:13:29", "remaining_time": "0:44:26"} +{"current_steps": 6879, "total_steps": 7063, "loss": 144.86, "reward": 0.9381, "learning_rate": 9.71117509045445e-06, "epoch": 0.97, "percentage": 97.39, "elapsed_time": "1 day, 2:15:46", "remaining_time": "0:42:08"} +{"current_steps": 6889, "total_steps": 7063, "loss": 142.082, "reward": 0.8949, "learning_rate": 9.740220963577806e-06, "epoch": 0.98, "percentage": 97.54, "elapsed_time": "1 day, 2:18:03", "remaining_time": "0:39:51"} +{"current_steps": 6899, "total_steps": 7063, "loss": 148.9801, "reward": 0.4727, "learning_rate": 9.767766787581455e-06, "epoch": 0.98, "percentage": 97.68, "elapsed_time": "1 day, 2:20:24", "remaining_time": "0:37:34"} +{"current_steps": 6909, "total_steps": 7063, "loss": 150.5426, "reward": 0.9139, "learning_rate": 9.793803845553388e-06, "epoch": 0.98, "percentage": 97.82, "elapsed_time": "1 day, 2:22:36", "remaining_time": "0:35:16"} +{"current_steps": 6919, "total_steps": 7063, "loss": 139.7014, "reward": 0.6917, "learning_rate": 9.815940382948672e-06, "epoch": 0.98, "percentage": 97.96, "elapsed_time": "1 day, 2:24:53", "remaining_time": "0:32:59"} +{"current_steps": 6929, "total_steps": 7063, "loss": 145.0367, "reward": 0.9368, "learning_rate": 9.83908848246203e-06, "epoch": 0.98, "percentage": 98.1, "elapsed_time": "1 day, 2:27:09", "remaining_time": "0:30:41"} +{"current_steps": 6939, "total_steps": 7063, "loss": 147.8457, "reward": 1.0734, "learning_rate": 9.860705246100099e-06, "epoch": 0.98, "percentage": 98.24, "elapsed_time": "1 day, 2:29:29", "remaining_time": "0:28:24"} +{"current_steps": 6949, "total_steps": 7063, "loss": 156.1528, "reward": 0.8522, "learning_rate": 9.88078383320978e-06, "epoch": 0.98, "percentage": 98.39, "elapsed_time": "1 day, 2:31:48", "remaining_time": "0:26:06"} +{"current_steps": 6959, "total_steps": 7063, "loss": 156.5533, "reward": 1.1273, "learning_rate": 9.899317889895932e-06, "epoch": 0.99, "percentage": 98.53, "elapsed_time": "1 day, 2:34:06", "remaining_time": "0:23:49"} +{"current_steps": 6969, "total_steps": 7063, "loss": 158.8596, "reward": 0.642, "learning_rate": 9.916301551032096e-06, "epoch": 0.99, "percentage": 98.67, "elapsed_time": "1 day, 2:36:26", "remaining_time": "0:21:31"} +{"current_steps": 6979, "total_steps": 7063, "loss": 163.7104, "reward": 1.1445, "learning_rate": 9.931729442116505e-06, "epoch": 0.99, "percentage": 98.81, "elapsed_time": "1 day, 2:38:41", "remaining_time": "0:19:14"} +{"current_steps": 6989, "total_steps": 7063, "loss": 159.2521, "reward": 1.2233, "learning_rate": 9.94559668097286e-06, "epoch": 0.99, "percentage": 98.95, "elapsed_time": "1 day, 2:41:02", "remaining_time": "0:16:57"} +{"current_steps": 6999, "total_steps": 7063, "loss": 163.0544, "reward": 1.0569, "learning_rate": 9.957898879295298e-06, "epoch": 0.99, "percentage": 99.09, "elapsed_time": "1 day, 2:43:27", "remaining_time": "0:14:39"} +{"current_steps": 7009, "total_steps": 7063, "loss": 168.6254, "reward": 0.9953, "learning_rate": 9.968632144037083e-06, "epoch": 0.99, "percentage": 99.24, "elapsed_time": "1 day, 2:45:42", "remaining_time": "0:12:22"} +{"current_steps": 7019, "total_steps": 7063, "loss": 182.9738, "reward": 1.1448, "learning_rate": 9.977793078642554e-06, "epoch": 0.99, "percentage": 99.38, "elapsed_time": "1 day, 2:47:55", "remaining_time": "0:10:04"} +{"current_steps": 7029, "total_steps": 7063, "loss": 180.7952, "reward": 0.6883, "learning_rate": 9.985378784121987e-06, "epoch": 1.0, "percentage": 99.52, "elapsed_time": "1 day, 2:50:13", "remaining_time": "0:07:47"} +{"current_steps": 7039, "total_steps": 7063, "loss": 181.614, "reward": 1.3635, "learning_rate": 9.991386859968964e-06, "epoch": 1.0, "percentage": 99.66, "elapsed_time": "1 day, 2:52:36", "remaining_time": "0:05:29"} +{"current_steps": 7049, "total_steps": 7063, "loss": 182.2882, "reward": 0.869, "learning_rate": 9.995815404920029e-06, "epoch": 1.0, "percentage": 99.8, "elapsed_time": "1 day, 2:54:53", "remaining_time": "0:03:12"} +{"current_steps": 7059, "total_steps": 7063, "loss": 176.3692, "reward": 1.3847, "learning_rate": 9.998663017556347e-06, "epoch": 1.0, "percentage": 99.94, "elapsed_time": "1 day, 2:57:18", "remaining_time": "0:00:54"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bb78e2a108c0ddab307e84221c56e7ac8933fad8 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,4958 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": null, + "global_step": 0, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.999612341935039e-06, + "loss": 0.2855, + "reward": 0.7376, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 9.99771375326247e-06, + "loss": 0.275, + "reward": 0.722, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 9.99423363154034e-06, + "loss": 0.2593, + "reward": 1.2159, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 9.98975021111248e-06, + "loss": 0.2415, + "reward": 1.1798, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 9.983268624014507e-06, + "loss": 0.2528, + "reward": 1.3194, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 9.975210075047007e-06, + "loss": 0.2665, + "reward": 1.1497, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 9.965577114348328e-06, + "loss": 0.2498, + "reward": 0.8864, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 9.954372790281476e-06, + "loss": 0.2489, + "reward": 1.3557, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 9.941600648469446e-06, + "loss": 0.2236, + "reward": 0.8862, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 9.92726473067321e-06, + "loss": 0.1953, + "reward": 0.9721, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 9.91136957351269e-06, + "loss": 0.2028, + "reward": 1.3082, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 9.893920207031147e-06, + "loss": 0.2024, + "reward": 1.037, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 9.874922153103414e-06, + "loss": 0.1851, + "reward": 1.3262, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 9.854381423688484e-06, + "loss": 0.1869, + "reward": 1.4129, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 9.832304518927032e-06, + "loss": 0.174, + "reward": 1.7339, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 9.808698425084422e-06, + "loss": 0.1867, + "reward": 1.6142, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 9.783570612339908e-06, + "loss": 0.1922, + "reward": 1.2096, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 9.756929032422675e-06, + "loss": 0.1731, + "reward": 1.2908, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 9.728782116095511e-06, + "loss": 0.1698, + "reward": 1.8801, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 9.69913877048688e-06, + "loss": 0.1775, + "reward": 1.3951, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 9.668008376272242e-06, + "loss": 0.1776, + "reward": 1.7341, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 9.635400784705537e-06, + "loss": 0.1701, + "reward": 1.2821, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 9.601326314501736e-06, + "loss": 0.1645, + "reward": 1.606, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 9.565795748571473e-06, + "loss": 0.1665, + "reward": 1.6989, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 9.52882033060878e-06, + "loss": 0.162, + "reward": 1.9307, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 9.490411761532994e-06, + "loss": 0.1672, + "reward": 1.6651, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 9.450582195786009e-06, + "loss": 0.1824, + "reward": 1.155, + "step": 269 + }, + { + "epoch": 0.04, + "learning_rate": 9.409344237485962e-06, + "loss": 0.1994, + "reward": 1.5433, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 9.366710936438656e-06, + "loss": 0.1849, + "reward": 1.6171, + "step": 289 + }, + { + "epoch": 0.04, + "learning_rate": 9.322695784007936e-06, + "loss": 0.1827, + "reward": 1.7545, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 9.277312708846318e-06, + "loss": 0.2003, + "reward": 1.5035, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 9.230576072487254e-06, + "loss": 0.1857, + "reward": 1.62, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 9.182500664800408e-06, + "loss": 0.1695, + "reward": 1.5519, + "step": 329 + }, + { + "epoch": 0.05, + "learning_rate": 9.133101699311382e-06, + "loss": 0.1876, + "reward": 1.7547, + "step": 339 + }, + { + "epoch": 0.05, + "learning_rate": 9.082394808387372e-06, + "loss": 0.1843, + "reward": 1.3446, + "step": 349 + }, + { + "epoch": 0.05, + "learning_rate": 9.030396038290285e-06, + "loss": 0.1749, + "reward": 1.6229, + "step": 359 + }, + { + "epoch": 0.05, + "learning_rate": 8.977121844098891e-06, + "loss": 0.1981, + "reward": 1.2249, + "step": 369 + }, + { + "epoch": 0.05, + "learning_rate": 8.922589084501567e-06, + "loss": 0.1955, + "reward": 1.3132, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 8.866815016461373e-06, + "loss": 0.1977, + "reward": 1.5104, + "step": 389 + }, + { + "epoch": 0.06, + "learning_rate": 8.809817289755034e-06, + "loss": 0.1719, + "reward": 1.6746, + "step": 399 + }, + { + "epoch": 0.06, + "learning_rate": 8.751613941387669e-06, + "loss": 0.17, + "reward": 1.6719, + "step": 409 + }, + { + "epoch": 0.06, + "learning_rate": 8.692223389884945e-06, + "loss": 0.1927, + "reward": 1.6034, + "step": 419 + }, + { + "epoch": 0.06, + "learning_rate": 8.63166442946451e-06, + "loss": 0.1759, + "reward": 1.746, + "step": 429 + }, + { + "epoch": 0.06, + "learning_rate": 8.569956224088549e-06, + "loss": 0.1667, + "reward": 1.7022, + "step": 439 + }, + { + "epoch": 0.06, + "learning_rate": 8.507118301399305e-06, + "loss": 0.1903, + "reward": 1.5584, + "step": 449 + }, + { + "epoch": 0.06, + "learning_rate": 8.443170546539546e-06, + "loss": 0.1737, + "reward": 1.7671, + "step": 459 + }, + { + "epoch": 0.07, + "learning_rate": 8.378133195859885e-06, + "loss": 0.1814, + "reward": 2.2036, + "step": 469 + }, + { + "epoch": 0.07, + "learning_rate": 8.31202683051495e-06, + "loss": 0.1912, + "reward": 1.7684, + "step": 479 + }, + { + "epoch": 0.07, + "learning_rate": 8.24487236995046e-06, + "loss": 0.1997, + "reward": 1.9846, + "step": 489 + }, + { + "epoch": 0.07, + "learning_rate": 8.176691065283236e-06, + "loss": 0.1778, + "reward": 1.7488, + "step": 499 + }, + { + "epoch": 0.07, + "learning_rate": 8.107504492576258e-06, + "loss": 0.1843, + "reward": 1.6116, + "step": 509 + }, + { + "epoch": 0.07, + "learning_rate": 8.03733454601089e-06, + "loss": 0.1809, + "reward": 1.9922, + "step": 519 + }, + { + "epoch": 0.07, + "learning_rate": 7.96620343095844e-06, + "loss": 0.1781, + "reward": 2.1922, + "step": 529 + }, + { + "epoch": 0.08, + "learning_rate": 7.894133656953241e-06, + "loss": 0.1959, + "reward": 1.9254, + "step": 539 + }, + { + "epoch": 0.08, + "learning_rate": 7.821148030569475e-06, + "loss": 0.2071, + "reward": 1.6299, + "step": 549 + }, + { + "epoch": 0.08, + "learning_rate": 7.747269648204006e-06, + "loss": 0.1883, + "reward": 1.9824, + "step": 559 + }, + { + "epoch": 0.08, + "learning_rate": 7.672521888767501e-06, + "loss": 0.2009, + "reward": 1.9649, + "step": 569 + }, + { + "epoch": 0.08, + "learning_rate": 7.596928406286133e-06, + "loss": 0.1975, + "reward": 1.7986, + "step": 579 + }, + { + "epoch": 0.08, + "learning_rate": 7.520513122416237e-06, + "loss": 0.185, + "reward": 2.3423, + "step": 589 + }, + { + "epoch": 0.08, + "learning_rate": 7.443300218874276e-06, + "loss": 0.1919, + "reward": 1.9097, + "step": 599 + }, + { + "epoch": 0.09, + "learning_rate": 7.365314129784498e-06, + "loss": 0.1971, + "reward": 1.6558, + "step": 609 + }, + { + "epoch": 0.09, + "learning_rate": 7.286579533946727e-06, + "loss": 0.1985, + "reward": 1.7381, + "step": 619 + }, + { + "epoch": 0.09, + "learning_rate": 7.207121347026728e-06, + "loss": 0.2122, + "reward": 1.6083, + "step": 629 + }, + { + "epoch": 0.09, + "learning_rate": 7.126964713671609e-06, + "loss": 0.1916, + "reward": 2.0961, + "step": 639 + }, + { + "epoch": 0.09, + "learning_rate": 7.046134999552765e-06, + "loss": 0.1757, + "reward": 1.895, + "step": 649 + }, + { + "epoch": 0.09, + "learning_rate": 6.964657783338879e-06, + "loss": 0.2049, + "reward": 2.0765, + "step": 659 + }, + { + "epoch": 0.09, + "learning_rate": 6.882558848601516e-06, + "loss": 0.1991, + "reward": 1.7684, + "step": 669 + }, + { + "epoch": 0.1, + "learning_rate": 6.799864175655886e-06, + "loss": 0.1916, + "reward": 1.4878, + "step": 679 + }, + { + "epoch": 0.1, + "learning_rate": 6.716599933339328e-06, + "loss": 0.1762, + "reward": 1.9724, + "step": 689 + }, + { + "epoch": 0.1, + "learning_rate": 6.632792470730155e-06, + "loss": 0.1778, + "reward": 1.5174, + "step": 699 + }, + { + "epoch": 0.1, + "learning_rate": 6.5484683088094525e-06, + "loss": 0.1683, + "reward": 1.9022, + "step": 709 + }, + { + "epoch": 0.1, + "learning_rate": 6.4636541320684755e-06, + "loss": 0.1777, + "reward": 1.8625, + "step": 719 + }, + { + "epoch": 0.1, + "learning_rate": 6.378376780064313e-06, + "loss": 0.1821, + "reward": 1.8226, + "step": 729 + }, + { + "epoch": 0.1, + "learning_rate": 6.292663238926471e-06, + "loss": 0.1752, + "reward": 1.535, + "step": 739 + }, + { + "epoch": 0.11, + "learning_rate": 6.206540632817073e-06, + "loss": 0.1853, + "reward": 1.6309, + "step": 749 + }, + { + "epoch": 0.11, + "learning_rate": 6.12003621534739e-06, + "loss": 0.1836, + "reward": 1.4308, + "step": 759 + }, + { + "epoch": 0.11, + "learning_rate": 6.033177360953402e-06, + "loss": 0.1656, + "reward": 1.6336, + "step": 769 + }, + { + "epoch": 0.11, + "learning_rate": 5.9459915562331075e-06, + "loss": 0.1735, + "reward": 1.3411, + "step": 779 + }, + { + "epoch": 0.11, + "learning_rate": 5.8585063912483694e-06, + "loss": 0.163, + "reward": 1.3965, + "step": 789 + }, + { + "epoch": 0.11, + "learning_rate": 5.770749550793997e-06, + "loss": 0.1873, + "reward": 1.2479, + "step": 799 + }, + { + "epoch": 0.11, + "learning_rate": 5.682748805636855e-06, + "loss": 0.1717, + "reward": 1.7367, + "step": 809 + }, + { + "epoch": 0.12, + "learning_rate": 5.594532003727772e-06, + "loss": 0.1735, + "reward": 1.9298, + "step": 819 + }, + { + "epoch": 0.12, + "learning_rate": 5.506127061389015e-06, + "loss": 0.1944, + "reward": 1.7795, + "step": 829 + }, + { + "epoch": 0.12, + "learning_rate": 5.417561954480141e-06, + "loss": 0.1597, + "reward": 1.8013, + "step": 839 + }, + { + "epoch": 0.12, + "learning_rate": 5.32886470954499e-06, + "loss": 0.1848, + "reward": 1.5395, + "step": 849 + }, + { + "epoch": 0.12, + "learning_rate": 5.240063394942656e-06, + "loss": 0.1773, + "reward": 1.6115, + "step": 859 + }, + { + "epoch": 0.12, + "learning_rate": 5.151186111965214e-06, + "loss": 0.1817, + "reward": 1.1288, + "step": 869 + }, + { + "epoch": 0.12, + "learning_rate": 5.06226098594503e-06, + "loss": 0.1786, + "reward": 1.5076, + "step": 879 + }, + { + "epoch": 0.13, + "learning_rate": 4.973316157354464e-06, + "loss": 0.1925, + "reward": 1.6208, + "step": 889 + }, + { + "epoch": 0.13, + "learning_rate": 4.8843797729007815e-06, + "loss": 0.1887, + "reward": 1.0692, + "step": 899 + }, + { + "epoch": 0.13, + "learning_rate": 4.795479976619088e-06, + "loss": 0.1736, + "reward": 1.3587, + "step": 909 + }, + { + "epoch": 0.13, + "learning_rate": 4.7066449009661146e-06, + "loss": 0.1914, + "reward": 1.6322, + "step": 919 + }, + { + "epoch": 0.13, + "learning_rate": 4.617902657917662e-06, + "loss": 0.1851, + "reward": 1.6663, + "step": 929 + }, + { + "epoch": 0.13, + "learning_rate": 4.5292813300725406e-06, + "loss": 0.1947, + "reward": 1.5436, + "step": 939 + }, + { + "epoch": 0.13, + "learning_rate": 4.440808961765778e-06, + "loss": 0.1975, + "reward": 1.591, + "step": 949 + }, + { + "epoch": 0.14, + "learning_rate": 4.352513550193965e-06, + "loss": 0.1912, + "reward": 1.5074, + "step": 959 + }, + { + "epoch": 0.14, + "learning_rate": 4.2644230365555e-06, + "loss": 0.1783, + "reward": 0.999, + "step": 969 + }, + { + "epoch": 0.14, + "learning_rate": 4.176565297208565e-06, + "loss": 0.205, + "reward": 1.3848, + "step": 979 + }, + { + "epoch": 0.14, + "learning_rate": 4.088968134849611e-06, + "loss": 0.2145, + "reward": 1.1693, + "step": 989 + }, + { + "epoch": 0.14, + "learning_rate": 4.001659269715164e-06, + "loss": 0.2206, + "reward": 1.2571, + "step": 999 + }, + { + "epoch": 0.14, + "learning_rate": 3.914666330809712e-06, + "loss": 0.2008, + "reward": 1.6519, + "step": 1009 + }, + { + "epoch": 0.14, + "learning_rate": 3.828016847162479e-06, + "loss": 0.2291, + "reward": 1.3704, + "step": 1019 + }, + { + "epoch": 0.15, + "learning_rate": 3.7417382391158208e-06, + "loss": 0.1886, + "reward": 1.7832, + "step": 1029 + }, + { + "epoch": 0.15, + "learning_rate": 3.6558578096480236e-06, + "loss": 0.2089, + "reward": 1.6251, + "step": 1039 + }, + { + "epoch": 0.15, + "learning_rate": 3.5704027357332476e-06, + "loss": 0.1983, + "reward": 1.2402, + "step": 1049 + }, + { + "epoch": 0.15, + "learning_rate": 3.4854000597413275e-06, + "loss": 0.2186, + "reward": 1.3914, + "step": 1059 + }, + { + "epoch": 0.15, + "learning_rate": 3.400876680880184e-06, + "loss": 0.2031, + "reward": 1.6209, + "step": 1069 + }, + { + "epoch": 0.15, + "learning_rate": 3.316859346683536e-06, + "loss": 0.2185, + "reward": 1.0933, + "step": 1079 + }, + { + "epoch": 0.15, + "learning_rate": 3.233374644546607e-06, + "loss": 0.2212, + "reward": 1.3092, + "step": 1089 + }, + { + "epoch": 0.16, + "learning_rate": 3.1504489933125066e-06, + "loss": 0.2131, + "reward": 1.0979, + "step": 1099 + }, + { + "epoch": 0.16, + "learning_rate": 3.068108634911958e-06, + "loss": 0.2027, + "reward": 1.3274, + "step": 1109 + }, + { + "epoch": 0.16, + "learning_rate": 2.9863796260590017e-06, + "loss": 0.2107, + "reward": 1.7021, + "step": 1119 + }, + { + "epoch": 0.16, + "learning_rate": 2.9052878300053122e-06, + "loss": 0.2234, + "reward": 1.5654, + "step": 1129 + }, + { + "epoch": 0.16, + "learning_rate": 2.8248589083557478e-06, + "loss": 0.2363, + "reward": 1.3214, + "step": 1139 + }, + { + "epoch": 0.16, + "learning_rate": 2.7451183129476843e-06, + "loss": 0.2293, + "reward": 1.3435, + "step": 1149 + }, + { + "epoch": 0.16, + "learning_rate": 2.666091277796769e-06, + "loss": 0.245, + "reward": 1.4686, + "step": 1159 + }, + { + "epoch": 0.17, + "learning_rate": 2.5878028111115692e-06, + "loss": 0.2216, + "reward": 1.38, + "step": 1169 + }, + { + "epoch": 0.17, + "learning_rate": 2.510277687379693e-06, + "loss": 0.2204, + "reward": 1.3663, + "step": 1179 + }, + { + "epoch": 0.17, + "learning_rate": 2.4335404395278793e-06, + "loss": 0.2143, + "reward": 1.5055, + "step": 1189 + }, + { + "epoch": 0.17, + "learning_rate": 2.357615351158507e-06, + "loss": 0.2343, + "reward": 1.2318, + "step": 1199 + }, + { + "epoch": 0.17, + "learning_rate": 2.282526448865034e-06, + "loss": 0.2296, + "reward": 1.3596, + "step": 1209 + }, + { + "epoch": 0.17, + "learning_rate": 2.208297494628734e-06, + "loss": 0.2347, + "reward": 1.8732, + "step": 1219 + }, + { + "epoch": 0.17, + "learning_rate": 2.1349519782991995e-06, + "loss": 0.2326, + "reward": 1.3454, + "step": 1229 + }, + { + "epoch": 0.18, + "learning_rate": 2.062513110160938e-06, + "loss": 0.244, + "reward": 1.8838, + "step": 1239 + }, + { + "epoch": 0.18, + "learning_rate": 1.9910038135884573e-06, + "loss": 0.2634, + "reward": 1.5223, + "step": 1249 + }, + { + "epoch": 0.18, + "learning_rate": 1.920446717792122e-06, + "loss": 0.2283, + "reward": 1.7335, + "step": 1259 + }, + { + "epoch": 0.18, + "learning_rate": 1.8508641506571213e-06, + "loss": 0.2352, + "reward": 1.1295, + "step": 1269 + }, + { + "epoch": 0.18, + "learning_rate": 1.7822781316777738e-06, + "loss": 0.2543, + "reward": 1.2374, + "step": 1279 + }, + { + "epoch": 0.18, + "learning_rate": 1.7147103649894336e-06, + "loss": 0.2583, + "reward": 1.191, + "step": 1289 + }, + { + "epoch": 0.18, + "learning_rate": 1.6481822325001817e-06, + "loss": 0.2475, + "reward": 1.1935, + "step": 1299 + }, + { + "epoch": 0.19, + "learning_rate": 1.5827147871245042e-06, + "loss": 0.2583, + "reward": 1.3764, + "step": 1309 + }, + { + "epoch": 0.19, + "learning_rate": 1.5183287461210578e-06, + "loss": 0.2756, + "reward": 0.9899, + "step": 1319 + }, + { + "epoch": 0.19, + "learning_rate": 1.4550444845366767e-06, + "loss": 0.2693, + "reward": 1.9678, + "step": 1329 + }, + { + "epoch": 0.19, + "learning_rate": 1.3928820287586542e-06, + "loss": 0.2551, + "reward": 1.3028, + "step": 1339 + }, + { + "epoch": 0.19, + "learning_rate": 1.3318610501773743e-06, + "loss": 0.2811, + "reward": 1.1528, + "step": 1349 + }, + { + "epoch": 0.19, + "learning_rate": 1.2720008589612642e-06, + "loss": 0.2728, + "reward": 1.3348, + "step": 1359 + }, + { + "epoch": 0.19, + "learning_rate": 1.213320397946079e-06, + "loss": 0.2606, + "reward": 1.3202, + "step": 1369 + }, + { + "epoch": 0.2, + "learning_rate": 1.1558382366404014e-06, + "loss": 0.2795, + "reward": 1.5301, + "step": 1379 + }, + { + "epoch": 0.2, + "learning_rate": 1.0995725653493155e-06, + "loss": 0.2739, + "reward": 1.417, + "step": 1389 + }, + { + "epoch": 0.2, + "learning_rate": 1.0445411894180397e-06, + "loss": 0.2953, + "reward": 1.4266, + "step": 1399 + }, + { + "epoch": 0.2, + "learning_rate": 9.907615235974206e-07, + "loss": 0.2637, + "reward": 1.5203, + "step": 1409 + }, + { + "epoch": 0.2, + "learning_rate": 9.382505865329972e-07, + "loss": 0.2821, + "reward": 1.5489, + "step": 1419 + }, + { + "epoch": 0.2, + "learning_rate": 8.870249953794418e-07, + "loss": 0.2894, + "reward": 1.4433, + "step": 1429 + }, + { + "epoch": 0.2, + "learning_rate": 8.371009605420277e-07, + "loss": 0.3028, + "reward": 0.7403, + "step": 1439 + }, + { + "epoch": 0.21, + "learning_rate": 7.88494280546836e-07, + "loss": 0.2893, + "reward": 1.714, + "step": 1449 + }, + { + "epoch": 0.21, + "learning_rate": 7.412203370412801e-07, + "loss": 0.3189, + "reward": 1.4108, + "step": 1459 + }, + { + "epoch": 0.21, + "learning_rate": 6.952940899265754e-07, + "loss": 0.3155, + "reward": 1.8459, + "step": 1469 + }, + { + "epoch": 0.21, + "learning_rate": 6.507300726236476e-07, + "loss": 0.3231, + "reward": 1.6761, + "step": 1479 + }, + { + "epoch": 0.21, + "learning_rate": 6.075423874740216e-07, + "loss": 0.3309, + "reward": 1.4689, + "step": 1489 + }, + { + "epoch": 0.21, + "learning_rate": 5.657447012771117e-07, + "loss": 0.3089, + "reward": 1.1545, + "step": 1499 + }, + { + "epoch": 0.21, + "learning_rate": 5.253502409653488e-07, + "loss": 0.3586, + "reward": 1.3766, + "step": 1509 + }, + { + "epoch": 0.22, + "learning_rate": 4.863717894184949e-07, + "loss": 0.3262, + "reward": 1.49, + "step": 1519 + }, + { + "epoch": 0.22, + "learning_rate": 4.4882168141849037e-07, + "loss": 0.3213, + "reward": 1.5069, + "step": 1529 + }, + { + "epoch": 0.22, + "learning_rate": 4.1271179974609167e-07, + "loss": 0.3275, + "reward": 1.4944, + "step": 1539 + }, + { + "epoch": 0.22, + "learning_rate": 3.78053571420553e-07, + "loss": 0.3374, + "reward": 1.1834, + "step": 1549 + }, + { + "epoch": 0.22, + "learning_rate": 3.44857964083527e-07, + "loss": 0.3382, + "reward": 1.3526, + "step": 1559 + }, + { + "epoch": 0.22, + "learning_rate": 3.1313548252834615e-07, + "loss": 0.3413, + "reward": 1.3988, + "step": 1569 + }, + { + "epoch": 0.22, + "learning_rate": 2.8289616537576005e-07, + "loss": 0.3608, + "reward": 1.1628, + "step": 1579 + }, + { + "epoch": 0.22, + "learning_rate": 2.5414958189720837e-07, + "loss": 0.3299, + "reward": 1.0217, + "step": 1589 + }, + { + "epoch": 0.23, + "learning_rate": 2.2690482898660438e-07, + "loss": 0.4025, + "reward": 1.5555, + "step": 1599 + }, + { + "epoch": 0.23, + "learning_rate": 2.0117052828161953e-07, + "loss": 0.378, + "reward": 1.9486, + "step": 1609 + }, + { + "epoch": 0.23, + "learning_rate": 1.7695482343534686e-07, + "loss": 0.3806, + "reward": 1.1219, + "step": 1619 + }, + { + "epoch": 0.23, + "learning_rate": 1.5426537753923775e-07, + "loss": 0.3702, + "reward": 1.4053, + "step": 1629 + }, + { + "epoch": 0.23, + "learning_rate": 1.3310937069810181e-07, + "loss": 0.3674, + "reward": 1.2728, + "step": 1639 + }, + { + "epoch": 0.23, + "learning_rate": 1.1349349775795604e-07, + "loss": 0.3861, + "reward": 1.2004, + "step": 1649 + }, + { + "epoch": 0.23, + "learning_rate": 9.542396618743233e-08, + "loss": 0.3939, + "reward": 1.2368, + "step": 1659 + }, + { + "epoch": 0.24, + "learning_rate": 7.890649411341378e-08, + "loss": 0.3778, + "reward": 1.7163, + "step": 1669 + }, + { + "epoch": 0.24, + "learning_rate": 6.394630851152717e-08, + "loss": 0.4324, + "reward": 1.3397, + "step": 1679 + }, + { + "epoch": 0.24, + "learning_rate": 5.0548143552061055e-08, + "loss": 0.4204, + "reward": 1.0164, + "step": 1689 + }, + { + "epoch": 0.24, + "learning_rate": 3.871623910182865e-08, + "loss": 0.4092, + "reward": 1.6092, + "step": 1699 + }, + { + "epoch": 0.24, + "learning_rate": 2.845433938245823e-08, + "loss": 0.4246, + "reward": 1.5384, + "step": 1709 + }, + { + "epoch": 0.24, + "learning_rate": 1.976569178552934e-08, + "loss": 0.3803, + "reward": 1.2442, + "step": 1719 + }, + { + "epoch": 0.24, + "learning_rate": 1.2653045844930322e-08, + "loss": 0.4257, + "reward": 1.5448, + "step": 1729 + }, + { + "epoch": 0.25, + "learning_rate": 7.1186523667665655e-09, + "loss": 0.4955, + "reward": 1.1579, + "step": 1739 + }, + { + "epoch": 0.25, + "learning_rate": 3.164262717086919e-09, + "loss": 0.478, + "reward": 1.1204, + "step": 1749 + }, + { + "epoch": 0.25, + "learning_rate": 7.911282676653642e-10, + "loss": 0.4577, + "reward": 1.1286, + "step": 1759 + }, + { + "epoch": 0.25, + "learning_rate": 0.0, + "loss": 0.4663, + "reward": 1.3502, + "step": 1769 + }, + { + "epoch": 0.25, + "learning_rate": 7.911282676653642e-10, + "loss": 0.4789, + "reward": 1.5621, + "step": 1779 + }, + { + "epoch": 0.25, + "learning_rate": 3.164262717086919e-09, + "loss": 0.4973, + "reward": 1.1185, + "step": 1789 + }, + { + "epoch": 0.25, + "learning_rate": 7.1186523667665655e-09, + "loss": 0.4797, + "reward": 1.4696, + "step": 1799 + }, + { + "epoch": 0.26, + "learning_rate": 1.2653045844930322e-08, + "loss": 0.478, + "reward": 1.1051, + "step": 1809 + }, + { + "epoch": 0.26, + "learning_rate": 1.976569178552934e-08, + "loss": 0.5791, + "reward": 1.267, + "step": 1819 + }, + { + "epoch": 0.26, + "learning_rate": 2.845433938245823e-08, + "loss": 0.4895, + "reward": 1.3581, + "step": 1829 + }, + { + "epoch": 0.26, + "learning_rate": 3.871623910182809e-08, + "loss": 0.4846, + "reward": 1.2857, + "step": 1839 + }, + { + "epoch": 0.26, + "learning_rate": 5.05481435520605e-08, + "loss": 0.5179, + "reward": 0.9826, + "step": 1849 + }, + { + "epoch": 0.26, + "learning_rate": 6.394630851152661e-08, + "loss": 0.5052, + "reward": 1.5427, + "step": 1859 + }, + { + "epoch": 0.26, + "learning_rate": 7.890649411341267e-08, + "loss": 0.508, + "reward": 1.4578, + "step": 1869 + }, + { + "epoch": 0.27, + "learning_rate": 9.542396618743177e-08, + "loss": 0.5538, + "reward": 1.3281, + "step": 1879 + }, + { + "epoch": 0.27, + "learning_rate": 1.1349349775795659e-07, + "loss": 0.5131, + "reward": 0.9273, + "step": 1889 + }, + { + "epoch": 0.27, + "learning_rate": 1.3310937069810181e-07, + "loss": 0.5548, + "reward": 1.1374, + "step": 1899 + }, + { + "epoch": 0.27, + "learning_rate": 1.542653775392383e-07, + "loss": 0.6159, + "reward": 1.3267, + "step": 1909 + }, + { + "epoch": 0.27, + "learning_rate": 1.7695482343534686e-07, + "loss": 0.554, + "reward": 1.2857, + "step": 1919 + }, + { + "epoch": 0.27, + "learning_rate": 2.0117052828161953e-07, + "loss": 0.5569, + "reward": 1.354, + "step": 1929 + }, + { + "epoch": 0.27, + "learning_rate": 2.2690482898660438e-07, + "loss": 0.6261, + "reward": 1.116, + "step": 1939 + }, + { + "epoch": 0.28, + "learning_rate": 2.5414958189720784e-07, + "loss": 0.6047, + "reward": 1.5964, + "step": 1949 + }, + { + "epoch": 0.28, + "learning_rate": 2.8289616537575947e-07, + "loss": 0.6132, + "reward": 1.2502, + "step": 1959 + }, + { + "epoch": 0.28, + "learning_rate": 3.1313548252834557e-07, + "loss": 0.5468, + "reward": 1.2231, + "step": 1969 + }, + { + "epoch": 0.28, + "learning_rate": 3.4485796408352645e-07, + "loss": 0.5976, + "reward": 1.0455, + "step": 1979 + }, + { + "epoch": 0.28, + "learning_rate": 3.7805357142055245e-07, + "loss": 0.6308, + "reward": 0.9335, + "step": 1989 + }, + { + "epoch": 0.28, + "learning_rate": 4.127117997460911e-07, + "loss": 0.6496, + "reward": 1.6818, + "step": 1999 + }, + { + "epoch": 0.28, + "learning_rate": 4.488216814184898e-07, + "loss": 0.6392, + "reward": 1.3996, + "step": 2009 + }, + { + "epoch": 0.29, + "learning_rate": 4.863717894184944e-07, + "loss": 0.6849, + "reward": 1.2105, + "step": 2019 + }, + { + "epoch": 0.29, + "learning_rate": 5.253502409653483e-07, + "loss": 0.6536, + "reward": 1.5729, + "step": 2029 + }, + { + "epoch": 0.29, + "learning_rate": 5.657447012771112e-07, + "loss": 0.6429, + "reward": 1.3701, + "step": 2039 + }, + { + "epoch": 0.29, + "learning_rate": 6.075423874740211e-07, + "loss": 0.6516, + "reward": 1.1983, + "step": 2049 + }, + { + "epoch": 0.29, + "learning_rate": 6.50730072623646e-07, + "loss": 0.676, + "reward": 1.1014, + "step": 2059 + }, + { + "epoch": 0.29, + "learning_rate": 6.952940899265737e-07, + "loss": 0.6676, + "reward": 1.1706, + "step": 2069 + }, + { + "epoch": 0.29, + "learning_rate": 7.365668228328826e-07, + "loss": 0.6, + "reward": 1.5086, + "step": 2079 + }, + { + "epoch": 0.3, + "learning_rate": 7.837066628946432e-07, + "loss": 0.6679, + "reward": 1.2791, + "step": 2089 + }, + { + "epoch": 0.3, + "learning_rate": 8.321807544939064e-07, + "loss": 0.7675, + "reward": 0.9776, + "step": 2099 + }, + { + "epoch": 0.3, + "learning_rate": 8.819737579410242e-07, + "loss": 0.7572, + "reward": 0.9887, + "step": 2109 + }, + { + "epoch": 0.3, + "learning_rate": 9.330699161749757e-07, + "loss": 0.6559, + "reward": 1.4601, + "step": 2119 + }, + { + "epoch": 0.3, + "learning_rate": 9.854530597496987e-07, + "loss": 0.6565, + "reward": 0.9818, + "step": 2129 + }, + { + "epoch": 0.3, + "learning_rate": 1.0391066119509434e-06, + "loss": 0.7718, + "reward": 1.2949, + "step": 2139 + }, + { + "epoch": 0.3, + "learning_rate": 1.0940135940419849e-06, + "loss": 0.7361, + "reward": 1.2424, + "step": 2149 + }, + { + "epoch": 0.31, + "learning_rate": 1.1501566306365751e-06, + "loss": 0.7542, + "reward": 1.5161, + "step": 2159 + }, + { + "epoch": 0.31, + "learning_rate": 1.2075179551973992e-06, + "loss": 0.8184, + "reward": 1.0929, + "step": 2169 + }, + { + "epoch": 0.31, + "learning_rate": 1.2660794156583274e-06, + "loss": 0.7978, + "reward": 1.0925, + "step": 2179 + }, + { + "epoch": 0.31, + "learning_rate": 1.3258224801686499e-06, + "loss": 0.827, + "reward": 1.184, + "step": 2189 + }, + { + "epoch": 0.31, + "learning_rate": 1.3867282429575173e-06, + "loss": 0.823, + "reward": 1.4141, + "step": 2199 + }, + { + "epoch": 0.31, + "learning_rate": 1.4487774303166852e-06, + "loss": 0.7578, + "reward": 1.3979, + "step": 2209 + }, + { + "epoch": 0.31, + "learning_rate": 1.5119504066997131e-06, + "loss": 0.7393, + "reward": 1.1531, + "step": 2219 + }, + { + "epoch": 0.32, + "learning_rate": 1.5762271809356506e-06, + "loss": 0.766, + "reward": 1.1854, + "step": 2229 + }, + { + "epoch": 0.32, + "learning_rate": 1.6415874125552804e-06, + "loss": 0.8646, + "reward": 1.1649, + "step": 2239 + }, + { + "epoch": 0.32, + "learning_rate": 1.708010418227873e-06, + "loss": 0.8664, + "reward": 1.2346, + "step": 2249 + }, + { + "epoch": 0.32, + "learning_rate": 1.7754751783064666e-06, + "loss": 1.0222, + "reward": 1.5159, + "step": 2259 + }, + { + "epoch": 0.32, + "learning_rate": 1.8439603434795529e-06, + "loss": 0.8788, + "reward": 0.8888, + "step": 2269 + }, + { + "epoch": 0.32, + "learning_rate": 1.9134442415270972e-06, + "loss": 0.9189, + "reward": 1.0559, + "step": 2279 + }, + { + "epoch": 0.32, + "learning_rate": 1.9839048841787427e-06, + "loss": 0.8378, + "reward": 1.3561, + "step": 2289 + }, + { + "epoch": 0.33, + "learning_rate": 2.055319974072026e-06, + "loss": 0.9199, + "reward": 1.0484, + "step": 2299 + }, + { + "epoch": 0.33, + "learning_rate": 2.1276669118084022e-06, + "loss": 0.914, + "reward": 1.1703, + "step": 2309 + }, + { + "epoch": 0.33, + "learning_rate": 2.2009228031048692e-06, + "loss": 0.911, + "reward": 1.1389, + "step": 2319 + }, + { + "epoch": 0.33, + "learning_rate": 2.275064466038872e-06, + "loss": 0.958, + "reward": 1.4305, + "step": 2329 + }, + { + "epoch": 0.33, + "learning_rate": 2.3500684383842686e-06, + "loss": 0.9604, + "reward": 1.1523, + "step": 2339 + }, + { + "epoch": 0.33, + "learning_rate": 2.425910985035971e-06, + "loss": 0.9629, + "reward": 1.3146, + "step": 2349 + }, + { + "epoch": 0.33, + "learning_rate": 2.502568105520966e-06, + "loss": 0.9562, + "reward": 1.2473, + "step": 2359 + }, + { + "epoch": 0.34, + "learning_rate": 2.580015541593278e-06, + "loss": 0.9781, + "reward": 1.1404, + "step": 2369 + }, + { + "epoch": 0.34, + "learning_rate": 2.6582287849105325e-06, + "loss": 1.0955, + "reward": 1.5582, + "step": 2379 + }, + { + "epoch": 0.34, + "learning_rate": 2.737183084789652e-06, + "loss": 1.0466, + "reward": 0.991, + "step": 2389 + }, + { + "epoch": 0.34, + "learning_rate": 2.816853456039237e-06, + "loss": 1.0117, + "reward": 1.1598, + "step": 2399 + }, + { + "epoch": 0.34, + "learning_rate": 2.89721468686618e-06, + "loss": 0.995, + "reward": 1.3519, + "step": 2409 + }, + { + "epoch": 0.34, + "learning_rate": 2.978241346853943e-06, + "loss": 1.0312, + "reward": 1.2049, + "step": 2419 + }, + { + "epoch": 0.34, + "learning_rate": 3.0599077950100663e-06, + "loss": 0.9939, + "reward": 1.6098, + "step": 2429 + }, + { + "epoch": 0.35, + "learning_rate": 3.1421881878802834e-06, + "loss": 1.034, + "reward": 1.6221, + "step": 2439 + }, + { + "epoch": 0.35, + "learning_rate": 3.225056487726733e-06, + "loss": 1.1193, + "reward": 1.2305, + "step": 2449 + }, + { + "epoch": 0.35, + "learning_rate": 3.30848647076761e-06, + "loss": 1.0559, + "reward": 1.1538, + "step": 2459 + }, + { + "epoch": 0.35, + "learning_rate": 3.3924517354757323e-06, + "loss": 1.0754, + "reward": 1.4628, + "step": 2469 + }, + { + "epoch": 0.35, + "learning_rate": 3.4769257109333308e-06, + "loss": 1.1396, + "reward": 1.6104, + "step": 2479 + }, + { + "epoch": 0.35, + "learning_rate": 3.5618816652404684e-06, + "loss": 1.1429, + "reward": 1.0368, + "step": 2489 + }, + { + "epoch": 0.35, + "learning_rate": 3.6472927139743597e-06, + "loss": 1.1072, + "reward": 0.9329, + "step": 2499 + }, + { + "epoch": 0.36, + "learning_rate": 3.733131828696984e-06, + "loss": 1.0086, + "reward": 1.07, + "step": 2509 + }, + { + "epoch": 0.36, + "learning_rate": 3.8193718455083016e-06, + "loss": 1.0952, + "reward": 1.268, + "step": 2519 + }, + { + "epoch": 0.36, + "learning_rate": 3.905985473642267e-06, + "loss": 1.0826, + "reward": 1.5212, + "step": 2529 + }, + { + "epoch": 0.36, + "learning_rate": 3.992945304103048e-06, + "loss": 1.1337, + "reward": 1.5485, + "step": 2539 + }, + { + "epoch": 0.36, + "learning_rate": 4.080223818338628e-06, + "loss": 1.1041, + "reward": 0.9738, + "step": 2549 + }, + { + "epoch": 0.36, + "learning_rate": 4.1677933969491075e-06, + "loss": 1.131, + "reward": 1.4512, + "step": 2559 + }, + { + "epoch": 0.36, + "learning_rate": 4.255626328426871e-06, + "loss": 1.2036, + "reward": 1.5905, + "step": 2569 + }, + { + "epoch": 0.37, + "learning_rate": 4.34369481792595e-06, + "loss": 1.2255, + "reward": 1.4039, + "step": 2579 + }, + { + "epoch": 0.37, + "learning_rate": 4.4319709960577265e-06, + "loss": 1.1984, + "reward": 1.2347, + "step": 2589 + }, + { + "epoch": 0.37, + "learning_rate": 4.520426927710256e-06, + "loss": 1.2755, + "reward": 1.0769, + "step": 2599 + }, + { + "epoch": 0.37, + "learning_rate": 4.609034620888345e-06, + "loss": 1.168, + "reward": 1.3889, + "step": 2609 + }, + { + "epoch": 0.37, + "learning_rate": 4.697766035571672e-06, + "loss": 1.2547, + "reward": 1.0953, + "step": 2619 + }, + { + "epoch": 0.37, + "learning_rate": 4.786593092588078e-06, + "loss": 1.2799, + "reward": 1.2669, + "step": 2629 + }, + { + "epoch": 0.37, + "learning_rate": 4.875487682499274e-06, + "loss": 1.2906, + "reward": 1.154, + "step": 2639 + }, + { + "epoch": 0.38, + "learning_rate": 4.964421674496092e-06, + "loss": 1.3511, + "reward": 1.0815, + "step": 2649 + }, + { + "epoch": 0.38, + "learning_rate": 5.053366925300517e-06, + "loss": 1.1756, + "reward": 0.9634, + "step": 2659 + }, + { + "epoch": 0.38, + "learning_rate": 5.142295288071672e-06, + "loss": 1.381, + "reward": 1.0239, + "step": 2669 + }, + { + "epoch": 0.38, + "learning_rate": 5.231178621312932e-06, + "loss": 1.3535, + "reward": 1.0717, + "step": 2679 + }, + { + "epoch": 0.38, + "learning_rate": 5.319988797777313e-06, + "loss": 1.3538, + "reward": 1.1411, + "step": 2689 + }, + { + "epoch": 0.38, + "learning_rate": 5.408697713368389e-06, + "loss": 1.4044, + "reward": 1.1376, + "step": 2699 + }, + { + "epoch": 0.38, + "learning_rate": 5.497277296033871e-06, + "loss": 1.4148, + "reward": 1.0292, + "step": 2709 + }, + { + "epoch": 0.38, + "learning_rate": 5.585699514649054e-06, + "loss": 1.3704, + "reward": 1.2713, + "step": 2719 + }, + { + "epoch": 0.39, + "learning_rate": 5.673936387887281e-06, + "loss": 1.413, + "reward": 0.897, + "step": 2729 + }, + { + "epoch": 0.39, + "learning_rate": 5.761959993074679e-06, + "loss": 1.3321, + "reward": 1.7666, + "step": 2739 + }, + { + "epoch": 0.39, + "learning_rate": 5.849742475026331e-06, + "loss": 1.3719, + "reward": 1.0575, + "step": 2749 + }, + { + "epoch": 0.39, + "learning_rate": 5.937256054861096e-06, + "loss": 1.4532, + "reward": 1.2209, + "step": 2759 + }, + { + "epoch": 0.39, + "learning_rate": 6.024473038792244e-06, + "loss": 1.3653, + "reward": 1.3277, + "step": 2769 + }, + { + "epoch": 0.39, + "learning_rate": 6.1113658268912125e-06, + "loss": 1.4962, + "reward": 1.511, + "step": 2779 + }, + { + "epoch": 0.39, + "learning_rate": 6.197906921821628e-06, + "loss": 1.584, + "reward": 1.2515, + "step": 2789 + }, + { + "epoch": 0.4, + "learning_rate": 6.284068937540895e-06, + "loss": 1.65, + "reward": 1.1758, + "step": 2799 + }, + { + "epoch": 0.4, + "learning_rate": 6.3698246079665124e-06, + "loss": 1.5406, + "reward": 1.1564, + "step": 2809 + }, + { + "epoch": 0.4, + "learning_rate": 6.455146795604489e-06, + "loss": 1.5352, + "reward": 0.8774, + "step": 2819 + }, + { + "epoch": 0.4, + "learning_rate": 6.5400085001370186e-06, + "loss": 1.5166, + "reward": 1.0879, + "step": 2829 + }, + { + "epoch": 0.4, + "learning_rate": 6.624382866966792e-06, + "loss": 1.5348, + "reward": 1.0728, + "step": 2839 + }, + { + "epoch": 0.4, + "learning_rate": 6.708243195715136e-06, + "loss": 1.7907, + "reward": 1.0792, + "step": 2849 + }, + { + "epoch": 0.4, + "learning_rate": 6.791562948671411e-06, + "loss": 1.4988, + "reward": 1.1279, + "step": 2859 + }, + { + "epoch": 0.41, + "learning_rate": 6.874315759190883e-06, + "loss": 1.6644, + "reward": 1.101, + "step": 2869 + }, + { + "epoch": 0.41, + "learning_rate": 6.956475440038508e-06, + "loss": 1.5802, + "reward": 1.2685, + "step": 2879 + }, + { + "epoch": 0.41, + "learning_rate": 7.029890534302395e-06, + "loss": 1.5477, + "reward": 1.4426, + "step": 2889 + }, + { + "epoch": 0.41, + "learning_rate": 7.110851801807612e-06, + "loss": 1.6061, + "reward": 1.3666, + "step": 2899 + }, + { + "epoch": 0.41, + "learning_rate": 7.191145087501121e-06, + "loss": 1.688, + "reward": 1.2262, + "step": 2909 + }, + { + "epoch": 0.41, + "learning_rate": 7.2707449824677236e-06, + "loss": 1.5746, + "reward": 1.1827, + "step": 2919 + }, + { + "epoch": 0.41, + "learning_rate": 7.34962629721661e-06, + "loss": 1.9032, + "reward": 1.197, + "step": 2929 + }, + { + "epoch": 0.42, + "learning_rate": 7.427764069652624e-06, + "loss": 1.6993, + "reward": 1.0374, + "step": 2939 + }, + { + "epoch": 0.42, + "learning_rate": 7.505133572975546e-06, + "loss": 1.8476, + "reward": 1.122, + "step": 2949 + }, + { + "epoch": 0.42, + "learning_rate": 7.581710323504927e-06, + "loss": 1.7903, + "reward": 0.9016, + "step": 2959 + }, + { + "epoch": 0.42, + "learning_rate": 7.65747008842797e-06, + "loss": 1.7651, + "reward": 1.4757, + "step": 2969 + }, + { + "epoch": 0.42, + "learning_rate": 7.732388893468042e-06, + "loss": 1.7973, + "reward": 1.1568, + "step": 2979 + }, + { + "epoch": 0.42, + "learning_rate": 7.806443030471356e-06, + "loss": 1.7356, + "reward": 1.043, + "step": 2989 + }, + { + "epoch": 0.42, + "learning_rate": 7.879609064909478e-06, + "loss": 1.6922, + "reward": 1.229, + "step": 2999 + }, + { + "epoch": 0.43, + "learning_rate": 7.951863843295167e-06, + "loss": 1.5853, + "reward": 0.8997, + "step": 3009 + }, + { + "epoch": 0.43, + "learning_rate": 8.023184500509366e-06, + "loss": 1.8937, + "reward": 1.0902, + "step": 3019 + }, + { + "epoch": 0.43, + "learning_rate": 8.093548467036875e-06, + "loss": 1.8137, + "reward": 0.6018, + "step": 3029 + }, + { + "epoch": 0.43, + "learning_rate": 8.162933476108515e-06, + "loss": 1.8954, + "reward": 1.3788, + "step": 3039 + }, + { + "epoch": 0.43, + "learning_rate": 8.231317570747481e-06, + "loss": 1.8599, + "reward": 1.2312, + "step": 3049 + }, + { + "epoch": 0.43, + "learning_rate": 8.298679110717638e-06, + "loss": 1.9199, + "reward": 1.1653, + "step": 3059 + }, + { + "epoch": 0.43, + "learning_rate": 8.364996779371618e-06, + "loss": 1.9247, + "reward": 1.2953, + "step": 3069 + }, + { + "epoch": 0.44, + "learning_rate": 8.430249590396486e-06, + "loss": 1.8365, + "reward": 1.4825, + "step": 3079 + }, + { + "epoch": 0.44, + "learning_rate": 8.494416894454915e-06, + "loss": 1.9522, + "reward": 1.4488, + "step": 3089 + }, + { + "epoch": 0.44, + "learning_rate": 8.557478385719662e-06, + "loss": 1.8893, + "reward": 1.0284, + "step": 3099 + }, + { + "epoch": 0.44, + "learning_rate": 8.619414108299387e-06, + "loss": 1.9354, + "reward": 1.1625, + "step": 3109 + }, + { + "epoch": 0.44, + "learning_rate": 8.680204462553725e-06, + "loss": 2.0966, + "reward": 1.1395, + "step": 3119 + }, + { + "epoch": 0.44, + "learning_rate": 8.739830211295619e-06, + "loss": 2.0063, + "reward": 1.8746, + "step": 3129 + }, + { + "epoch": 0.44, + "learning_rate": 8.798272485878943e-06, + "loss": 2.0812, + "reward": 0.5741, + "step": 3139 + }, + { + "epoch": 0.45, + "learning_rate": 8.855512792169513e-06, + "loss": 2.2512, + "reward": 1.0503, + "step": 3149 + }, + { + "epoch": 0.45, + "learning_rate": 8.911533016397595e-06, + "loss": 2.2182, + "reward": 1.8183, + "step": 3159 + }, + { + "epoch": 0.45, + "learning_rate": 8.966315430890007e-06, + "loss": 1.9811, + "reward": 1.1761, + "step": 3169 + }, + { + "epoch": 0.45, + "learning_rate": 9.019842699680076e-06, + "loss": 2.1593, + "reward": 1.2371, + "step": 3179 + }, + { + "epoch": 0.45, + "learning_rate": 9.072097883993627e-06, + "loss": 2.0525, + "reward": 0.915, + "step": 3189 + }, + { + "epoch": 0.45, + "learning_rate": 9.123064447609291e-06, + "loss": 2.3064, + "reward": 0.7924, + "step": 3199 + }, + { + "epoch": 0.45, + "learning_rate": 9.172726262091392e-06, + "loss": 2.1051, + "reward": 0.9017, + "step": 3209 + }, + { + "epoch": 0.46, + "learning_rate": 9.221067611893823e-06, + "loss": 2.064, + "reward": 1.1497, + "step": 3219 + }, + { + "epoch": 0.46, + "learning_rate": 9.268073199333256e-06, + "loss": 2.175, + "reward": 1.3865, + "step": 3229 + }, + { + "epoch": 0.46, + "learning_rate": 9.313728149430105e-06, + "loss": 2.0706, + "reward": 0.8848, + "step": 3239 + }, + { + "epoch": 0.46, + "learning_rate": 9.358018014615742e-06, + "loss": 2.1955, + "reward": 0.97, + "step": 3249 + }, + { + "epoch": 0.46, + "learning_rate": 9.400928779304435e-06, + "loss": 2.193, + "reward": 0.7877, + "step": 3259 + }, + { + "epoch": 0.46, + "learning_rate": 9.44244686432861e-06, + "loss": 2.1918, + "reward": 1.0283, + "step": 3269 + }, + { + "epoch": 0.46, + "learning_rate": 9.482559131235998e-06, + "loss": 2.2743, + "reward": 1.3865, + "step": 3279 + }, + { + "epoch": 0.47, + "learning_rate": 9.521252886447305e-06, + "loss": 2.0822, + "reward": 1.0819, + "step": 3289 + }, + { + "epoch": 0.47, + "learning_rate": 9.55851588527312e-06, + "loss": 2.3622, + "reward": 0.9896, + "step": 3299 + }, + { + "epoch": 0.47, + "learning_rate": 9.594336335788757e-06, + "loss": 2.2891, + "reward": 0.938, + "step": 3309 + }, + { + "epoch": 0.47, + "learning_rate": 9.628702902565832e-06, + "loss": 2.5214, + "reward": 1.1171, + "step": 3319 + }, + { + "epoch": 0.47, + "learning_rate": 9.661604710259371e-06, + "loss": 2.3513, + "reward": 1.368, + "step": 3329 + }, + { + "epoch": 0.47, + "learning_rate": 9.693031347049324e-06, + "loss": 2.3446, + "reward": 0.889, + "step": 3339 + }, + { + "epoch": 0.47, + "learning_rate": 9.722972867935398e-06, + "loss": 2.382, + "reward": 1.2531, + "step": 3349 + }, + { + "epoch": 0.48, + "learning_rate": 9.751419797884179e-06, + "loss": 2.6059, + "reward": 1.0768, + "step": 3359 + }, + { + "epoch": 0.48, + "learning_rate": 9.778363134827496e-06, + "loss": 2.5748, + "reward": 1.2895, + "step": 3369 + }, + { + "epoch": 0.48, + "learning_rate": 9.803794352511163e-06, + "loss": 2.446, + "reward": 1.58, + "step": 3379 + }, + { + "epoch": 0.48, + "learning_rate": 9.8277054031931e-06, + "loss": 2.6801, + "reward": 1.4807, + "step": 3389 + }, + { + "epoch": 0.48, + "learning_rate": 9.850088720190065e-06, + "loss": 2.5744, + "reward": 1.6297, + "step": 3399 + }, + { + "epoch": 0.48, + "learning_rate": 9.870937220272141e-06, + "loss": 2.425, + "reward": 1.0175, + "step": 3409 + }, + { + "epoch": 0.48, + "learning_rate": 9.890244305904224e-06, + "loss": 2.5949, + "reward": 0.9162, + "step": 3419 + }, + { + "epoch": 0.49, + "learning_rate": 9.90800386733383e-06, + "loss": 2.7776, + "reward": 1.0664, + "step": 3429 + }, + { + "epoch": 0.49, + "learning_rate": 9.924210284524536e-06, + "loss": 2.5816, + "reward": 0.9759, + "step": 3439 + }, + { + "epoch": 0.49, + "learning_rate": 9.938858428934433e-06, + "loss": 2.6804, + "reward": 1.2715, + "step": 3449 + }, + { + "epoch": 0.49, + "learning_rate": 9.95194366513908e-06, + "loss": 2.9037, + "reward": 0.9395, + "step": 3459 + }, + { + "epoch": 0.49, + "learning_rate": 9.963461852298375e-06, + "loss": 2.7221, + "reward": 1.0787, + "step": 3469 + }, + { + "epoch": 0.49, + "learning_rate": 9.973409345466938e-06, + "loss": 2.739, + "reward": 0.8561, + "step": 3479 + }, + { + "epoch": 0.49, + "learning_rate": 9.981782996747553e-06, + "loss": 2.9711, + "reward": 0.6989, + "step": 3489 + }, + { + "epoch": 0.5, + "learning_rate": 9.988580156287328e-06, + "loss": 2.714, + "reward": 1.2962, + "step": 3499 + }, + { + "epoch": 0.5, + "learning_rate": 9.99379867311624e-06, + "loss": 2.8646, + "reward": 1.0722, + "step": 3509 + }, + { + "epoch": 0.5, + "learning_rate": 9.99743689582782e-06, + "loss": 2.7756, + "reward": 1.2141, + "step": 3519 + }, + { + "epoch": 0.5, + "learning_rate": 9.999493673101737e-06, + "loss": 2.8301, + "reward": 0.9352, + "step": 3529 + }, + { + "epoch": 0.5, + "learning_rate": 9.999968354068129e-06, + "loss": 2.9438, + "reward": 1.6458, + "step": 3539 + }, + { + "epoch": 0.5, + "learning_rate": 9.99886078851359e-06, + "loss": 2.9145, + "reward": 0.976, + "step": 3549 + }, + { + "epoch": 0.5, + "learning_rate": 9.996171326928681e-06, + "loss": 3.1017, + "reward": 1.5394, + "step": 3559 + }, + { + "epoch": 0.51, + "learning_rate": 9.99190082039704e-06, + "loss": 3.2175, + "reward": 1.26, + "step": 3569 + }, + { + "epoch": 0.51, + "learning_rate": 9.986050620326042e-06, + "loss": 3.3646, + "reward": 1.0458, + "step": 3579 + }, + { + "epoch": 0.51, + "learning_rate": 9.978622578019142e-06, + "loss": 3.1451, + "reward": 0.8256, + "step": 3589 + }, + { + "epoch": 0.51, + "learning_rate": 9.969619044090037e-06, + "loss": 3.2034, + "reward": 1.0347, + "step": 3599 + }, + { + "epoch": 0.51, + "learning_rate": 9.959042867718814e-06, + "loss": 3.0279, + "reward": 1.2227, + "step": 3609 + }, + { + "epoch": 0.51, + "learning_rate": 9.946897395750301e-06, + "loss": 3.1472, + "reward": 0.9186, + "step": 3619 + }, + { + "epoch": 0.51, + "learning_rate": 9.93318647163498e-06, + "loss": 3.3048, + "reward": 1.1239, + "step": 3629 + }, + { + "epoch": 0.52, + "learning_rate": 9.917914434212713e-06, + "loss": 3.2461, + "reward": 1.0214, + "step": 3639 + }, + { + "epoch": 0.52, + "learning_rate": 9.901086116339697e-06, + "loss": 3.6282, + "reward": 0.8601, + "step": 3649 + }, + { + "epoch": 0.52, + "learning_rate": 9.882706843359122e-06, + "loss": 3.5003, + "reward": 1.4156, + "step": 3659 + }, + { + "epoch": 0.52, + "learning_rate": 9.862782431415948e-06, + "loss": 3.4925, + "reward": 1.298, + "step": 3669 + }, + { + "epoch": 0.52, + "learning_rate": 9.84131918561637e-06, + "loss": 3.2247, + "reward": 1.0714, + "step": 3679 + }, + { + "epoch": 0.52, + "learning_rate": 9.818323898032577e-06, + "loss": 3.1443, + "reward": 0.6895, + "step": 3689 + }, + { + "epoch": 0.52, + "learning_rate": 9.79380384555339e-06, + "loss": 3.2626, + "reward": 0.8729, + "step": 3699 + }, + { + "epoch": 0.53, + "learning_rate": 9.767766787581457e-06, + "loss": 3.2382, + "reward": 0.8079, + "step": 3709 + }, + { + "epoch": 0.53, + "learning_rate": 9.740220963577808e-06, + "loss": 3.3773, + "reward": 0.7502, + "step": 3719 + }, + { + "epoch": 0.53, + "learning_rate": 9.71117509045445e-06, + "loss": 3.6027, + "reward": 0.9402, + "step": 3729 + }, + { + "epoch": 0.53, + "learning_rate": 9.680638359815904e-06, + "loss": 3.5544, + "reward": 1.4967, + "step": 3739 + }, + { + "epoch": 0.53, + "learning_rate": 9.648620435050486e-06, + "loss": 3.6005, + "reward": 0.8315, + "step": 3749 + }, + { + "epoch": 0.53, + "learning_rate": 9.615131448272347e-06, + "loss": 3.668, + "reward": 0.8521, + "step": 3759 + }, + { + "epoch": 0.53, + "learning_rate": 9.580181997115115e-06, + "loss": 3.67, + "reward": 0.8598, + "step": 3769 + }, + { + "epoch": 0.54, + "learning_rate": 9.5437831413783e-06, + "loss": 3.6283, + "reward": 1.1778, + "step": 3779 + }, + { + "epoch": 0.54, + "learning_rate": 9.505946399527368e-06, + "loss": 3.5786, + "reward": 1.1969, + "step": 3789 + }, + { + "epoch": 0.54, + "learning_rate": 9.466683745048738e-06, + "loss": 3.7483, + "reward": 0.853, + "step": 3799 + }, + { + "epoch": 0.54, + "learning_rate": 9.426007602660732e-06, + "loss": 4.1113, + "reward": 0.9846, + "step": 3809 + }, + { + "epoch": 0.54, + "learning_rate": 9.383930844381784e-06, + "loss": 4.06, + "reward": 0.7688, + "step": 3819 + }, + { + "epoch": 0.54, + "learning_rate": 9.34046678545704e-06, + "loss": 4.0023, + "reward": 1.2128, + "step": 3829 + }, + { + "epoch": 0.54, + "learning_rate": 9.295629180144766e-06, + "loss": 4.136, + "reward": 1.1385, + "step": 3839 + }, + { + "epoch": 0.54, + "learning_rate": 9.249432217363756e-06, + "loss": 4.3136, + "reward": 0.6312, + "step": 3849 + }, + { + "epoch": 0.55, + "learning_rate": 9.20189051620328e-06, + "loss": 3.7442, + "reward": 1.0192, + "step": 3859 + }, + { + "epoch": 0.55, + "learning_rate": 9.153019121296797e-06, + "loss": 3.9179, + "reward": 0.9526, + "step": 3869 + }, + { + "epoch": 0.55, + "learning_rate": 9.102833498061115e-06, + "loss": 4.0701, + "reward": 0.8944, + "step": 3879 + }, + { + "epoch": 0.55, + "learning_rate": 9.05134952780229e-06, + "loss": 4.2683, + "reward": 1.1349, + "step": 3889 + }, + { + "epoch": 0.55, + "learning_rate": 8.998583502690016e-06, + "loss": 4.0388, + "reward": 0.9505, + "step": 3899 + }, + { + "epoch": 0.55, + "learning_rate": 8.944552120601899e-06, + "loss": 4.0588, + "reward": 0.8472, + "step": 3909 + }, + { + "epoch": 0.55, + "learning_rate": 8.889272479839426e-06, + "loss": 4.2467, + "reward": 0.9783, + "step": 3919 + }, + { + "epoch": 0.56, + "learning_rate": 8.832762073717176e-06, + "loss": 4.2177, + "reward": 1.1977, + "step": 3929 + }, + { + "epoch": 0.56, + "learning_rate": 8.77503878502702e-06, + "loss": 4.2541, + "reward": 0.8039, + "step": 3939 + }, + { + "epoch": 0.56, + "learning_rate": 8.716120880379124e-06, + "loss": 4.5727, + "reward": 0.8618, + "step": 3949 + }, + { + "epoch": 0.56, + "learning_rate": 8.656027004421407e-06, + "loss": 4.4046, + "reward": 1.1126, + "step": 3959 + }, + { + "epoch": 0.56, + "learning_rate": 8.59477617393948e-06, + "loss": 4.3201, + "reward": 0.986, + "step": 3969 + }, + { + "epoch": 0.56, + "learning_rate": 8.532387771838694e-06, + "loss": 4.7678, + "reward": 0.9263, + "step": 3979 + }, + { + "epoch": 0.56, + "learning_rate": 8.468881541010453e-06, + "loss": 4.699, + "reward": 1.0621, + "step": 3989 + }, + { + "epoch": 0.57, + "learning_rate": 8.404277578084491e-06, + "loss": 4.3774, + "reward": 1.1508, + "step": 3999 + }, + { + "epoch": 0.57, + "learning_rate": 8.338596327069336e-06, + "loss": 4.5082, + "reward": 0.7571, + "step": 4009 + }, + { + "epoch": 0.57, + "learning_rate": 8.271858572882709e-06, + "loss": 4.6633, + "reward": 1.2757, + "step": 4019 + }, + { + "epoch": 0.57, + "learning_rate": 8.204085434774164e-06, + "loss": 5.087, + "reward": 1.2216, + "step": 4029 + }, + { + "epoch": 0.57, + "learning_rate": 8.135298359641825e-06, + "loss": 4.3665, + "reward": 1.0773, + "step": 4039 + }, + { + "epoch": 0.57, + "learning_rate": 8.065519115245542e-06, + "loss": 4.8474, + "reward": 1.1603, + "step": 4049 + }, + { + "epoch": 0.57, + "learning_rate": 7.994769783318399e-06, + "loss": 4.9222, + "reward": 1.3296, + "step": 4059 + }, + { + "epoch": 0.58, + "learning_rate": 7.923072752578967e-06, + "loss": 4.9875, + "reward": 1.1223, + "step": 4069 + }, + { + "epoch": 0.58, + "learning_rate": 7.850450711646325e-06, + "loss": 5.3475, + "reward": 0.9262, + "step": 4079 + }, + { + "epoch": 0.58, + "learning_rate": 7.776926641860262e-06, + "loss": 5.2078, + "reward": 0.9273, + "step": 4089 + }, + { + "epoch": 0.58, + "learning_rate": 7.702523810008753e-06, + "loss": 5.227, + "reward": 1.255, + "step": 4099 + }, + { + "epoch": 0.58, + "learning_rate": 7.627265760965192e-06, + "loss": 5.5427, + "reward": 1.2129, + "step": 4109 + }, + { + "epoch": 0.58, + "learning_rate": 7.551176310237556e-06, + "loss": 5.0962, + "reward": 1.0178, + "step": 4119 + }, + { + "epoch": 0.58, + "learning_rate": 7.474279536431992e-06, + "loss": 5.2122, + "reward": 1.2638, + "step": 4129 + }, + { + "epoch": 0.59, + "learning_rate": 7.396599773633082e-06, + "loss": 5.7452, + "reward": 1.0502, + "step": 4139 + }, + { + "epoch": 0.59, + "learning_rate": 7.318161603703284e-06, + "loss": 5.3589, + "reward": 1.0121, + "step": 4149 + }, + { + "epoch": 0.59, + "learning_rate": 7.238989848504011e-06, + "loss": 5.2267, + "reward": 0.9817, + "step": 4159 + }, + { + "epoch": 0.59, + "learning_rate": 7.159109562040667e-06, + "loss": 5.964, + "reward": 1.487, + "step": 4169 + }, + { + "epoch": 0.59, + "learning_rate": 7.078546022534321e-06, + "loss": 5.9483, + "reward": 0.9745, + "step": 4179 + }, + { + "epoch": 0.59, + "learning_rate": 6.9973247244223385e-06, + "loss": 5.592, + "reward": 1.4021, + "step": 4189 + }, + { + "epoch": 0.59, + "learning_rate": 6.915471370290685e-06, + "loss": 5.8987, + "reward": 1.2619, + "step": 4199 + }, + { + "epoch": 0.6, + "learning_rate": 6.8330118627402506e-06, + "loss": 5.6118, + "reward": 0.7087, + "step": 4209 + }, + { + "epoch": 0.6, + "learning_rate": 6.7499722961899895e-06, + "loss": 5.7941, + "reward": 1.0972, + "step": 4219 + }, + { + "epoch": 0.6, + "learning_rate": 6.666378948619261e-06, + "loss": 5.8458, + "reward": 0.7238, + "step": 4229 + }, + { + "epoch": 0.6, + "learning_rate": 6.582258273252182e-06, + "loss": 6.0383, + "reward": 0.841, + "step": 4239 + }, + { + "epoch": 0.6, + "learning_rate": 6.4976368901864125e-06, + "loss": 5.7172, + "reward": 0.8323, + "step": 4249 + }, + { + "epoch": 0.6, + "learning_rate": 6.412541577969238e-06, + "loss": 6.9222, + "reward": 0.9498, + "step": 4259 + }, + { + "epoch": 0.6, + "learning_rate": 6.32699926512342e-06, + "loss": 5.6869, + "reward": 1.1194, + "step": 4269 + }, + { + "epoch": 0.61, + "learning_rate": 6.2410370216256875e-06, + "loss": 6.5483, + "reward": 1.0819, + "step": 4279 + }, + { + "epoch": 0.61, + "learning_rate": 6.154682050340339e-06, + "loss": 6.4802, + "reward": 0.8398, + "step": 4289 + }, + { + "epoch": 0.61, + "learning_rate": 6.06796167841092e-06, + "loss": 6.7995, + "reward": 1.3388, + "step": 4299 + }, + { + "epoch": 0.61, + "learning_rate": 5.980903348612461e-06, + "loss": 6.4248, + "reward": 1.0594, + "step": 4309 + }, + { + "epoch": 0.61, + "learning_rate": 5.893534610667239e-06, + "loss": 6.9055, + "reward": 1.1437, + "step": 4319 + }, + { + "epoch": 0.61, + "learning_rate": 5.805883112526556e-06, + "loss": 6.9643, + "reward": 0.952, + "step": 4329 + }, + { + "epoch": 0.61, + "learning_rate": 5.717976591621577e-06, + "loss": 7.0254, + "reward": 1.459, + "step": 4339 + }, + { + "epoch": 0.62, + "learning_rate": 5.6298428660857275e-06, + "loss": 7.2209, + "reward": 0.9245, + "step": 4349 + }, + { + "epoch": 0.62, + "learning_rate": 5.541509825951659e-06, + "loss": 7.022, + "reward": 0.9485, + "step": 4359 + }, + { + "epoch": 0.62, + "learning_rate": 5.453005424325387e-06, + "loss": 6.937, + "reward": 0.6953, + "step": 4369 + }, + { + "epoch": 0.62, + "learning_rate": 5.364357668540476e-06, + "loss": 7.3764, + "reward": 0.8788, + "step": 4379 + }, + { + "epoch": 0.62, + "learning_rate": 5.275594611295118e-06, + "loss": 7.2731, + "reward": 1.1819, + "step": 4389 + }, + { + "epoch": 0.62, + "learning_rate": 5.186744341774788e-06, + "loss": 7.1828, + "reward": 0.6716, + "step": 4399 + }, + { + "epoch": 0.62, + "learning_rate": 5.097834976763426e-06, + "loss": 6.8891, + "reward": 1.2351, + "step": 4409 + }, + { + "epoch": 0.63, + "learning_rate": 5.008894651745785e-06, + "loss": 7.9363, + "reward": 0.9614, + "step": 4419 + }, + { + "epoch": 0.63, + "learning_rate": 4.9199515120039774e-06, + "loss": 7.6238, + "reward": 1.1227, + "step": 4429 + }, + { + "epoch": 0.63, + "learning_rate": 4.831033703710816e-06, + "loss": 7.9218, + "reward": 1.1009, + "step": 4439 + }, + { + "epoch": 0.63, + "learning_rate": 4.742169365022975e-06, + "loss": 7.5313, + "reward": 1.0618, + "step": 4449 + }, + { + "epoch": 0.63, + "learning_rate": 4.6533866171765685e-06, + "loss": 7.3797, + "reward": 0.7438, + "step": 4459 + }, + { + "epoch": 0.63, + "learning_rate": 4.564713555588208e-06, + "loss": 8.207, + "reward": 1.3113, + "step": 4469 + }, + { + "epoch": 0.63, + "learning_rate": 4.476178240964121e-06, + "loss": 7.6581, + "reward": 1.1344, + "step": 4479 + }, + { + "epoch": 0.64, + "learning_rate": 4.38780869042036e-06, + "loss": 7.7654, + "reward": 1.3073, + "step": 4489 + }, + { + "epoch": 0.64, + "learning_rate": 4.2996328686166846e-06, + "loss": 7.7657, + "reward": 1.0558, + "step": 4499 + }, + { + "epoch": 0.64, + "learning_rate": 4.211678678907162e-06, + "loss": 8.169, + "reward": 1.2041, + "step": 4509 + }, + { + "epoch": 0.64, + "learning_rate": 4.123973954510092e-06, + "loss": 8.7798, + "reward": 0.8431, + "step": 4519 + }, + { + "epoch": 0.64, + "learning_rate": 4.036546449700141e-06, + "loss": 8.4096, + "reward": 0.7942, + "step": 4529 + }, + { + "epoch": 0.64, + "learning_rate": 3.958121586474061e-06, + "loss": 8.6166, + "reward": 0.8147, + "step": 4539 + }, + { + "epoch": 0.64, + "learning_rate": 3.87129694065821e-06, + "loss": 8.1236, + "reward": 0.9802, + "step": 4549 + }, + { + "epoch": 0.65, + "learning_rate": 3.7848294744007763e-06, + "loss": 8.0437, + "reward": 1.0559, + "step": 4559 + }, + { + "epoch": 0.65, + "learning_rate": 3.6987465504444775e-06, + "loss": 9.4977, + "reward": 0.7246, + "step": 4569 + }, + { + "epoch": 0.65, + "learning_rate": 3.6130754098431063e-06, + "loss": 8.8468, + "reward": 0.5876, + "step": 4579 + }, + { + "epoch": 0.65, + "learning_rate": 3.527843163341101e-06, + "loss": 9.423, + "reward": 1.0855, + "step": 4589 + }, + { + "epoch": 0.65, + "learning_rate": 3.4430767827942534e-06, + "loss": 9.3485, + "reward": 1.0619, + "step": 4599 + }, + { + "epoch": 0.65, + "learning_rate": 3.3588030926345015e-06, + "loss": 9.2073, + "reward": 1.4898, + "step": 4609 + }, + { + "epoch": 0.65, + "learning_rate": 3.2750487613812298e-06, + "loss": 10.2192, + "reward": 0.9771, + "step": 4619 + }, + { + "epoch": 0.66, + "learning_rate": 3.1918402932020525e-06, + "loss": 9.3942, + "reward": 1.1159, + "step": 4629 + }, + { + "epoch": 0.66, + "learning_rate": 3.1092040195254756e-06, + "loss": 9.3759, + "reward": 0.708, + "step": 4639 + }, + { + "epoch": 0.66, + "learning_rate": 3.0271660907083157e-06, + "loss": 9.9824, + "reward": 1.0753, + "step": 4649 + }, + { + "epoch": 0.66, + "learning_rate": 2.9457524677603665e-06, + "loss": 10.3342, + "reward": 1.505, + "step": 4659 + }, + { + "epoch": 0.66, + "learning_rate": 2.864988914129041e-06, + "loss": 10.0353, + "reward": 1.3609, + "step": 4669 + }, + { + "epoch": 0.66, + "learning_rate": 2.7849009875464316e-06, + "loss": 9.5413, + "reward": 1.0895, + "step": 4679 + }, + { + "epoch": 0.66, + "learning_rate": 2.7055140319416016e-06, + "loss": 11.2036, + "reward": 0.9274, + "step": 4689 + }, + { + "epoch": 0.67, + "learning_rate": 2.6268531694204016e-06, + "loss": 10.1615, + "reward": 0.7893, + "step": 4699 + }, + { + "epoch": 0.67, + "learning_rate": 2.5489432923156055e-06, + "loss": 10.3155, + "reward": 1.0313, + "step": 4709 + }, + { + "epoch": 0.67, + "learning_rate": 2.471809055309649e-06, + "loss": 10.4604, + "reward": 1.2473, + "step": 4719 + }, + { + "epoch": 0.67, + "learning_rate": 2.3954748676326533e-06, + "loss": 9.5465, + "reward": 1.0535, + "step": 4729 + }, + { + "epoch": 0.67, + "learning_rate": 2.3199648853380735e-06, + "loss": 10.0163, + "reward": 1.0787, + "step": 4739 + }, + { + "epoch": 0.67, + "learning_rate": 2.245303003658512e-06, + "loss": 10.9403, + "reward": 1.0524, + "step": 4749 + }, + { + "epoch": 0.67, + "learning_rate": 2.1715128494440217e-06, + "loss": 11.018, + "reward": 1.0536, + "step": 4759 + }, + { + "epoch": 0.68, + "learning_rate": 2.098617773685337e-06, + "loss": 11.7558, + "reward": 1.5194, + "step": 4769 + }, + { + "epoch": 0.68, + "learning_rate": 2.026640844124475e-06, + "loss": 10.2469, + "reward": 0.7457, + "step": 4779 + }, + { + "epoch": 0.68, + "learning_rate": 1.955604837954867e-06, + "loss": 10.7667, + "reward": 1.4715, + "step": 4789 + }, + { + "epoch": 0.68, + "learning_rate": 1.885532234613514e-06, + "loss": 11.7527, + "reward": 0.6686, + "step": 4799 + }, + { + "epoch": 0.68, + "learning_rate": 1.8164452086673256e-06, + "loss": 12.3804, + "reward": 0.7405, + "step": 4809 + }, + { + "epoch": 0.68, + "learning_rate": 1.7483656227959783e-06, + "loss": 11.684, + "reward": 0.8597, + "step": 4819 + }, + { + "epoch": 0.68, + "learning_rate": 1.6813150208733913e-06, + "loss": 12.0429, + "reward": 1.1068, + "step": 4829 + }, + { + "epoch": 0.69, + "learning_rate": 1.615314621150197e-06, + "loss": 12.157, + "reward": 1.029, + "step": 4839 + }, + { + "epoch": 0.69, + "learning_rate": 1.5503853095391396e-06, + "loss": 13.4578, + "reward": 0.8474, + "step": 4849 + }, + { + "epoch": 0.69, + "learning_rate": 1.4865476330057604e-06, + "loss": 11.7422, + "reward": 0.9289, + "step": 4859 + }, + { + "epoch": 0.69, + "learning_rate": 1.4238217930662312e-06, + "loss": 11.9643, + "reward": 1.1646, + "step": 4869 + }, + { + "epoch": 0.69, + "learning_rate": 1.3622276393945872e-06, + "loss": 13.1756, + "reward": 0.9199, + "step": 4879 + }, + { + "epoch": 0.69, + "learning_rate": 1.3017846635412595e-06, + "loss": 12.3998, + "reward": 1.2203, + "step": 4889 + }, + { + "epoch": 0.69, + "learning_rate": 1.2425119927649727e-06, + "loss": 13.3518, + "reward": 0.9137, + "step": 4899 + }, + { + "epoch": 0.7, + "learning_rate": 1.1844283839798543e-06, + "loss": 13.3779, + "reward": 1.1227, + "step": 4909 + }, + { + "epoch": 0.7, + "learning_rate": 1.1275522178198362e-06, + "loss": 13.8457, + "reward": 0.9895, + "step": 4919 + }, + { + "epoch": 0.7, + "learning_rate": 1.0719014928220283e-06, + "loss": 12.935, + "reward": 1.0785, + "step": 4929 + }, + { + "epoch": 0.7, + "learning_rate": 1.0174938197311069e-06, + "loss": 14.8186, + "reward": 1.18, + "step": 4939 + }, + { + "epoch": 0.7, + "learning_rate": 9.643464159263304e-07, + "loss": 13.357, + "reward": 1.0398, + "step": 4949 + }, + { + "epoch": 0.7, + "learning_rate": 9.124760999731003e-07, + "loss": 13.1203, + "reward": 0.7378, + "step": 4959 + }, + { + "epoch": 0.7, + "learning_rate": 8.618992863006926e-07, + "loss": 13.6538, + "reward": 1.0154, + "step": 4969 + }, + { + "epoch": 0.7, + "learning_rate": 8.126319800079063e-07, + "loss": 14.3607, + "reward": 1.1812, + "step": 4979 + }, + { + "epoch": 0.71, + "learning_rate": 7.646897717982188e-07, + "loss": 13.1016, + "reward": 1.1047, + "step": 4989 + }, + { + "epoch": 0.71, + "learning_rate": 7.180878330460711e-07, + "loss": 14.7443, + "reward": 0.7647, + "step": 4999 + }, + { + "epoch": 0.71, + "learning_rate": 6.728409109959033e-07, + "loss": 14.3599, + "reward": 1.174, + "step": 5009 + }, + { + "epoch": 0.71, + "learning_rate": 6.289633240953291e-07, + "loss": 15.0008, + "reward": 1.2305, + "step": 5019 + }, + { + "epoch": 0.71, + "learning_rate": 5.864689574640836e-07, + "loss": 14.599, + "reward": 0.6941, + "step": 5029 + }, + { + "epoch": 0.71, + "learning_rate": 5.453712585000254e-07, + "loss": 14.595, + "reward": 1.0988, + "step": 5039 + }, + { + "epoch": 0.71, + "learning_rate": 5.056832326237087e-07, + "loss": 15.145, + "reward": 0.9199, + "step": 5049 + }, + { + "epoch": 0.72, + "learning_rate": 4.674174391627939e-07, + "loss": 16.1126, + "reward": 0.7843, + "step": 5059 + }, + { + "epoch": 0.72, + "learning_rate": 4.3058598737764466e-07, + "loss": 15.0736, + "reward": 1.0982, + "step": 5069 + }, + { + "epoch": 0.72, + "learning_rate": 3.952005326293129e-07, + "loss": 16.5611, + "reward": 0.878, + "step": 5079 + }, + { + "epoch": 0.72, + "learning_rate": 3.612722726912116e-07, + "loss": 16.3597, + "reward": 0.8769, + "step": 5089 + }, + { + "epoch": 0.72, + "learning_rate": 3.288119442055371e-07, + "loss": 16.8776, + "reward": 1.0628, + "step": 5099 + }, + { + "epoch": 0.72, + "learning_rate": 2.9782981928567435e-07, + "loss": 17.8894, + "reward": 1.1461, + "step": 5109 + }, + { + "epoch": 0.72, + "learning_rate": 2.6833570226554526e-07, + "loss": 17.7502, + "reward": 1.0463, + "step": 5119 + }, + { + "epoch": 0.73, + "learning_rate": 2.4033892659703293e-07, + "loss": 17.9116, + "reward": 0.7333, + "step": 5129 + }, + { + "epoch": 0.73, + "learning_rate": 2.138483518963874e-07, + "loss": 16.8411, + "reward": 0.864, + "step": 5139 + }, + { + "epoch": 0.73, + "learning_rate": 1.8887236114060404e-07, + "loss": 17.8577, + "reward": 1.0259, + "step": 5149 + }, + { + "epoch": 0.73, + "learning_rate": 1.6541885801459713e-07, + "loss": 17.8102, + "reward": 1.1068, + "step": 5159 + }, + { + "epoch": 0.73, + "learning_rate": 1.434952644100912e-07, + "loss": 18.7215, + "reward": 1.0976, + "step": 5169 + }, + { + "epoch": 0.73, + "learning_rate": 1.2310851807693314e-07, + "loss": 19.5751, + "reward": 0.8139, + "step": 5179 + }, + { + "epoch": 0.73, + "learning_rate": 1.0426507042765155e-07, + "loss": 18.8442, + "reward": 0.8037, + "step": 5189 + }, + { + "epoch": 0.74, + "learning_rate": 8.69708844958822e-08, + "loss": 19.6249, + "reward": 0.9081, + "step": 5199 + }, + { + "epoch": 0.74, + "learning_rate": 7.12314330493663e-08, + "loss": 19.3005, + "reward": 0.9154, + "step": 5209 + }, + { + "epoch": 0.74, + "learning_rate": 5.705169685809031e-08, + "loss": 19.015, + "reward": 0.5151, + "step": 5219 + }, + { + "epoch": 0.74, + "learning_rate": 4.4436163118102284e-08, + "loss": 20.8741, + "reward": 0.777, + "step": 5229 + }, + { + "epoch": 0.74, + "learning_rate": 3.338882403154664e-08, + "loss": 21.1491, + "reward": 1.3843, + "step": 5239 + }, + { + "epoch": 0.74, + "learning_rate": 2.3913175543314183e-08, + "loss": 20.2964, + "reward": 0.7921, + "step": 5249 + }, + { + "epoch": 0.74, + "learning_rate": 1.6012216234756463e-08, + "loss": 19.0763, + "reward": 1.1708, + "step": 5259 + }, + { + "epoch": 0.75, + "learning_rate": 9.688446374773176e-09, + "loss": 20.3877, + "reward": 0.914, + "step": 5269 + }, + { + "epoch": 0.75, + "learning_rate": 4.943867128601176e-09, + "loss": 21.6992, + "reward": 0.7239, + "step": 5279 + }, + { + "epoch": 0.75, + "learning_rate": 1.7799799245449279e-09, + "loss": 22.0992, + "reward": 1.3829, + "step": 5289 + }, + { + "epoch": 0.75, + "learning_rate": 1.9778597884545504e-10, + "loss": 19.1273, + "reward": 0.8702, + "step": 5299 + }, + { + "epoch": 0.75, + "learning_rate": 1.9778597884545504e-10, + "loss": 21.5772, + "reward": 1.2798, + "step": 5309 + }, + { + "epoch": 0.75, + "learning_rate": 1.7799799245449279e-09, + "loss": 22.6161, + "reward": 1.1608, + "step": 5319 + }, + { + "epoch": 0.75, + "learning_rate": 4.943867128601176e-09, + "loss": 23.0654, + "reward": 1.237, + "step": 5329 + }, + { + "epoch": 0.76, + "learning_rate": 9.68844637477262e-09, + "loss": 23.4224, + "reward": 0.7672, + "step": 5339 + }, + { + "epoch": 0.76, + "learning_rate": 1.6012216234756463e-08, + "loss": 22.5207, + "reward": 0.8759, + "step": 5349 + }, + { + "epoch": 0.76, + "learning_rate": 2.3913175543313627e-08, + "loss": 22.4022, + "reward": 0.9359, + "step": 5359 + }, + { + "epoch": 0.76, + "learning_rate": 3.3388824031546087e-08, + "loss": 24.2224, + "reward": 1.14, + "step": 5369 + }, + { + "epoch": 0.76, + "learning_rate": 4.443616311810173e-08, + "loss": 23.5394, + "reward": 0.9193, + "step": 5379 + }, + { + "epoch": 0.76, + "learning_rate": 5.7051696858089754e-08, + "loss": 25.0942, + "reward": 1.0297, + "step": 5389 + }, + { + "epoch": 0.76, + "learning_rate": 7.123143304936575e-08, + "loss": 25.1026, + "reward": 0.7989, + "step": 5399 + }, + { + "epoch": 0.77, + "learning_rate": 8.697088449588164e-08, + "loss": 24.8735, + "reward": 0.8812, + "step": 5409 + }, + { + "epoch": 0.77, + "learning_rate": 1.0426507042765099e-07, + "loss": 24.6814, + "reward": 0.7561, + "step": 5419 + }, + { + "epoch": 0.77, + "learning_rate": 1.2310851807693258e-07, + "loss": 25.0014, + "reward": 0.7191, + "step": 5429 + }, + { + "epoch": 0.77, + "learning_rate": 1.434952644100901e-07, + "loss": 25.0474, + "reward": 0.9854, + "step": 5439 + }, + { + "epoch": 0.77, + "learning_rate": 1.6541885801459601e-07, + "loss": 24.6665, + "reward": 1.0098, + "step": 5449 + }, + { + "epoch": 0.77, + "learning_rate": 1.8887236114060293e-07, + "loss": 29.131, + "reward": 0.8288, + "step": 5459 + }, + { + "epoch": 0.77, + "learning_rate": 2.138483518963863e-07, + "loss": 26.0392, + "reward": 1.0817, + "step": 5469 + }, + { + "epoch": 0.78, + "learning_rate": 2.403389265970318e-07, + "loss": 29.0327, + "reward": 0.8201, + "step": 5479 + }, + { + "epoch": 0.78, + "learning_rate": 2.683357022655442e-07, + "loss": 26.231, + "reward": 1.3976, + "step": 5489 + }, + { + "epoch": 0.78, + "learning_rate": 2.9782981928567324e-07, + "loss": 28.3856, + "reward": 1.2167, + "step": 5499 + }, + { + "epoch": 0.78, + "learning_rate": 3.28811944205536e-07, + "loss": 27.6945, + "reward": 1.041, + "step": 5509 + }, + { + "epoch": 0.78, + "learning_rate": 3.612722726912099e-07, + "loss": 29.5519, + "reward": 1.19, + "step": 5519 + }, + { + "epoch": 0.78, + "learning_rate": 3.9520053262931177e-07, + "loss": 29.1206, + "reward": 1.5435, + "step": 5529 + }, + { + "epoch": 0.78, + "learning_rate": 4.3058598737764355e-07, + "loss": 28.6978, + "reward": 1.1417, + "step": 5539 + }, + { + "epoch": 0.79, + "learning_rate": 4.6741743916279223e-07, + "loss": 32.4786, + "reward": 1.1774, + "step": 5549 + }, + { + "epoch": 0.79, + "learning_rate": 5.056832326237071e-07, + "loss": 28.9053, + "reward": 0.8577, + "step": 5559 + }, + { + "epoch": 0.79, + "learning_rate": 5.453712585000237e-07, + "loss": 33.1146, + "reward": 0.9498, + "step": 5569 + }, + { + "epoch": 0.79, + "learning_rate": 5.864689574640819e-07, + "loss": 32.011, + "reward": 1.2069, + "step": 5579 + }, + { + "epoch": 0.79, + "learning_rate": 6.289633240953275e-07, + "loss": 32.2405, + "reward": 0.8766, + "step": 5589 + }, + { + "epoch": 0.79, + "learning_rate": 6.728409109959011e-07, + "loss": 31.6486, + "reward": 1.0483, + "step": 5599 + }, + { + "epoch": 0.79, + "learning_rate": 7.180878330460695e-07, + "loss": 32.3604, + "reward": 1.1886, + "step": 5609 + }, + { + "epoch": 0.8, + "learning_rate": 7.646897717982165e-07, + "loss": 34.0581, + "reward": 1.0957, + "step": 5619 + }, + { + "epoch": 0.8, + "learning_rate": 8.126319800079046e-07, + "loss": 32.3892, + "reward": 1.29, + "step": 5629 + }, + { + "epoch": 0.8, + "learning_rate": 8.618992863006909e-07, + "loss": 36.4483, + "reward": 0.5732, + "step": 5639 + }, + { + "epoch": 0.8, + "learning_rate": 9.073599436940772e-07, + "loss": 32.9463, + "reward": 0.8512, + "step": 5649 + }, + { + "epoch": 0.8, + "learning_rate": 9.59101641354881e-07, + "loss": 31.3008, + "reward": 0.9925, + "step": 5659 + }, + { + "epoch": 0.8, + "learning_rate": 1.0121220865829944e-06, + "loss": 35.8149, + "reward": 1.0764, + "step": 5669 + }, + { + "epoch": 0.8, + "learning_rate": 1.0664045009892344e-06, + "loss": 32.4432, + "reward": 1.0298, + "step": 5679 + }, + { + "epoch": 0.81, + "learning_rate": 1.1219317068326002e-06, + "loss": 36.0975, + "reward": 0.7165, + "step": 5689 + }, + { + "epoch": 0.81, + "learning_rate": 1.1786861324562364e-06, + "loss": 36.9366, + "reward": 1.1632, + "step": 5699 + }, + { + "epoch": 0.81, + "learning_rate": 1.2366498178479608e-06, + "loss": 37.8448, + "reward": 1.0953, + "step": 5709 + }, + { + "epoch": 0.81, + "learning_rate": 1.2958044203237746e-06, + "loss": 35.1652, + "reward": 0.526, + "step": 5719 + }, + { + "epoch": 0.81, + "learning_rate": 1.3561312203324056e-06, + "loss": 36.9202, + "reward": 0.9273, + "step": 5729 + }, + { + "epoch": 0.81, + "learning_rate": 1.417611127379146e-06, + "loss": 40.3897, + "reward": 1.1308, + "step": 5739 + }, + { + "epoch": 0.81, + "learning_rate": 1.48022468606704e-06, + "loss": 39.9514, + "reward": 1.2518, + "step": 5749 + }, + { + "epoch": 0.82, + "learning_rate": 1.5439520822536135e-06, + "loss": 41.6909, + "reward": 1.2512, + "step": 5759 + }, + { + "epoch": 0.82, + "learning_rate": 1.608773149321035e-06, + "loss": 40.2497, + "reward": 1.2501, + "step": 5769 + }, + { + "epoch": 0.82, + "learning_rate": 1.6746673745579178e-06, + "loss": 40.7741, + "reward": 1.0252, + "step": 5779 + }, + { + "epoch": 0.82, + "learning_rate": 1.741613905650545e-06, + "loss": 42.1902, + "reward": 1.2204, + "step": 5789 + }, + { + "epoch": 0.82, + "learning_rate": 1.8095915572816547e-06, + "loss": 38.8444, + "reward": 0.6678, + "step": 5799 + }, + { + "epoch": 0.82, + "learning_rate": 1.878578817834545e-06, + "loss": 42.2058, + "reward": 0.8402, + "step": 5809 + }, + { + "epoch": 0.82, + "learning_rate": 1.948553856200428e-06, + "loss": 41.4845, + "reward": 1.6395, + "step": 5819 + }, + { + "epoch": 0.83, + "learning_rate": 2.0194945286869657e-06, + "loss": 39.207, + "reward": 0.7803, + "step": 5829 + }, + { + "epoch": 0.83, + "learning_rate": 2.09137838602561e-06, + "loss": 42.8872, + "reward": 1.2898, + "step": 5839 + }, + { + "epoch": 0.83, + "learning_rate": 2.164182680475766e-06, + "loss": 44.517, + "reward": 1.1872, + "step": 5849 + }, + { + "epoch": 0.83, + "learning_rate": 2.237884373023278e-06, + "loss": 42.6027, + "reward": 0.9368, + "step": 5859 + }, + { + "epoch": 0.83, + "learning_rate": 2.312460140671215e-06, + "loss": 43.3298, + "reward": 1.2487, + "step": 5869 + }, + { + "epoch": 0.83, + "learning_rate": 2.387886383820429e-06, + "loss": 47.8902, + "reward": 1.2663, + "step": 5879 + }, + { + "epoch": 0.83, + "learning_rate": 2.4641392337376947e-06, + "loss": 45.3621, + "reward": 0.9799, + "step": 5889 + }, + { + "epoch": 0.84, + "learning_rate": 2.541194560108975e-06, + "loss": 46.7022, + "reward": 0.6368, + "step": 5899 + }, + { + "epoch": 0.84, + "learning_rate": 2.619027978675538e-06, + "loss": 46.4664, + "reward": 1.1432, + "step": 5909 + }, + { + "epoch": 0.84, + "learning_rate": 2.697614858950329e-06, + "loss": 50.6503, + "reward": 1.2844, + "step": 5919 + }, + { + "epoch": 0.84, + "learning_rate": 2.7769303320123823e-06, + "loss": 45.3292, + "reward": 1.3174, + "step": 5929 + }, + { + "epoch": 0.84, + "learning_rate": 2.8569492983765625e-06, + "loss": 50.5686, + "reward": 1.3988, + "step": 5939 + }, + { + "epoch": 0.84, + "learning_rate": 2.9376464359363793e-06, + "loss": 48.6937, + "reward": 0.7872, + "step": 5949 + }, + { + "epoch": 0.84, + "learning_rate": 3.0189962079771767e-06, + "loss": 47.5586, + "reward": 1.1002, + "step": 5959 + }, + { + "epoch": 0.85, + "learning_rate": 3.100972871257271e-06, + "loss": 49.5598, + "reward": 0.872, + "step": 5969 + }, + { + "epoch": 0.85, + "learning_rate": 3.1835504841544017e-06, + "loss": 46.403, + "reward": 0.4291, + "step": 5979 + }, + { + "epoch": 0.85, + "learning_rate": 3.2667029148750496e-06, + "loss": 50.4731, + "reward": 0.7624, + "step": 5989 + }, + { + "epoch": 0.85, + "learning_rate": 3.3504038497238113e-06, + "loss": 48.9891, + "reward": 0.6599, + "step": 5999 + }, + { + "epoch": 0.85, + "learning_rate": 3.4346268014304675e-06, + "loss": 49.4926, + "reward": 0.8475, + "step": 6009 + }, + { + "epoch": 0.85, + "learning_rate": 3.519345117531848e-06, + "loss": 48.2269, + "reward": 1.0827, + "step": 6019 + }, + { + "epoch": 0.85, + "learning_rate": 3.6045319888061005e-06, + "loss": 57.1803, + "reward": 1.0233, + "step": 6029 + }, + { + "epoch": 0.86, + "learning_rate": 3.690160457756472e-06, + "loss": 62.3532, + "reward": 1.2877, + "step": 6039 + }, + { + "epoch": 0.86, + "learning_rate": 3.7762034271420256e-06, + "loss": 58.1885, + "reward": 1.3709, + "step": 6049 + }, + { + "epoch": 0.86, + "learning_rate": 3.862633668552651e-06, + "loss": 52.3508, + "reward": 1.2359, + "step": 6059 + }, + { + "epoch": 0.86, + "learning_rate": 3.949423831025467e-06, + "loss": 57.1927, + "reward": 1.1431, + "step": 6069 + }, + { + "epoch": 0.86, + "learning_rate": 4.036546449700138e-06, + "loss": 56.5176, + "reward": 0.8168, + "step": 6079 + }, + { + "epoch": 0.86, + "learning_rate": 4.123973954510088e-06, + "loss": 54.7755, + "reward": 1.1523, + "step": 6089 + }, + { + "epoch": 0.86, + "learning_rate": 4.2116786789071585e-06, + "loss": 60.7935, + "reward": 1.2583, + "step": 6099 + }, + { + "epoch": 0.86, + "learning_rate": 4.299632868616677e-06, + "loss": 61.5845, + "reward": 1.4049, + "step": 6109 + }, + { + "epoch": 0.87, + "learning_rate": 4.387808690420356e-06, + "loss": 55.3898, + "reward": 0.5877, + "step": 6119 + }, + { + "epoch": 0.87, + "learning_rate": 4.476178240964118e-06, + "loss": 59.5955, + "reward": 0.807, + "step": 6129 + }, + { + "epoch": 0.87, + "learning_rate": 4.564713555588204e-06, + "loss": 57.5526, + "reward": 1.0594, + "step": 6139 + }, + { + "epoch": 0.87, + "learning_rate": 4.653386617176564e-06, + "loss": 60.5878, + "reward": 0.856, + "step": 6149 + }, + { + "epoch": 0.87, + "learning_rate": 4.742169365022975e-06, + "loss": 64.5847, + "reward": 1.2434, + "step": 6159 + }, + { + "epoch": 0.87, + "learning_rate": 4.8310337037108165e-06, + "loss": 56.7269, + "reward": 0.5983, + "step": 6169 + }, + { + "epoch": 0.87, + "learning_rate": 4.919951512003969e-06, + "loss": 63.8131, + "reward": 0.9494, + "step": 6179 + }, + { + "epoch": 0.88, + "learning_rate": 5.008894651745777e-06, + "loss": 64.3347, + "reward": 0.8326, + "step": 6189 + }, + { + "epoch": 0.88, + "learning_rate": 5.097834976763422e-06, + "loss": 64.7426, + "reward": 1.1951, + "step": 6199 + }, + { + "epoch": 0.88, + "learning_rate": 5.186744341774786e-06, + "loss": 68.0429, + "reward": 1.0377, + "step": 6209 + }, + { + "epoch": 0.88, + "learning_rate": 5.275594611295115e-06, + "loss": 64.5824, + "reward": 0.901, + "step": 6219 + }, + { + "epoch": 0.88, + "learning_rate": 5.364357668540472e-06, + "loss": 66.7038, + "reward": 0.9975, + "step": 6229 + }, + { + "epoch": 0.88, + "learning_rate": 5.453005424325388e-06, + "loss": 67.0337, + "reward": 1.3321, + "step": 6239 + }, + { + "epoch": 0.88, + "learning_rate": 5.541509825951659e-06, + "loss": 63.6395, + "reward": 0.9098, + "step": 6249 + }, + { + "epoch": 0.89, + "learning_rate": 5.629842866085728e-06, + "loss": 67.9033, + "reward": 1.1441, + "step": 6259 + }, + { + "epoch": 0.89, + "learning_rate": 5.717976591621569e-06, + "loss": 70.9057, + "reward": 0.8948, + "step": 6269 + }, + { + "epoch": 0.89, + "learning_rate": 5.805883112526548e-06, + "loss": 71.5159, + "reward": 1.16, + "step": 6279 + }, + { + "epoch": 0.89, + "learning_rate": 5.8935346106672345e-06, + "loss": 76.1282, + "reward": 0.7389, + "step": 6289 + }, + { + "epoch": 0.89, + "learning_rate": 5.980903348612458e-06, + "loss": 70.8815, + "reward": 0.7639, + "step": 6299 + }, + { + "epoch": 0.89, + "learning_rate": 6.067961678410916e-06, + "loss": 77.9602, + "reward": 1.1827, + "step": 6309 + }, + { + "epoch": 0.89, + "learning_rate": 6.154682050340336e-06, + "loss": 72.6772, + "reward": 0.9062, + "step": 6319 + }, + { + "epoch": 0.9, + "learning_rate": 6.2410370216256875e-06, + "loss": 79.4807, + "reward": 0.9883, + "step": 6329 + }, + { + "epoch": 0.9, + "learning_rate": 6.326999265123421e-06, + "loss": 74.7358, + "reward": 1.2483, + "step": 6339 + }, + { + "epoch": 0.9, + "learning_rate": 6.412541577969229e-06, + "loss": 83.4259, + "reward": 1.0581, + "step": 6349 + }, + { + "epoch": 0.9, + "learning_rate": 6.497636890186405e-06, + "loss": 89.0968, + "reward": 0.8648, + "step": 6359 + }, + { + "epoch": 0.9, + "learning_rate": 6.582258273252179e-06, + "loss": 77.8441, + "reward": 0.5074, + "step": 6369 + }, + { + "epoch": 0.9, + "learning_rate": 6.666378948619256e-06, + "loss": 78.9905, + "reward": 0.8686, + "step": 6379 + }, + { + "epoch": 0.9, + "learning_rate": 6.749972296189986e-06, + "loss": 78.3821, + "reward": 0.954, + "step": 6389 + }, + { + "epoch": 0.91, + "learning_rate": 6.833011862740247e-06, + "loss": 82.9752, + "reward": 0.6261, + "step": 6399 + }, + { + "epoch": 0.91, + "learning_rate": 6.915471370290685e-06, + "loss": 80.6557, + "reward": 1.1273, + "step": 6409 + }, + { + "epoch": 0.91, + "learning_rate": 6.9973247244223385e-06, + "loss": 80.5177, + "reward": 0.5851, + "step": 6419 + }, + { + "epoch": 0.91, + "learning_rate": 7.078546022534314e-06, + "loss": 85.0407, + "reward": 0.8287, + "step": 6429 + }, + { + "epoch": 0.91, + "learning_rate": 7.1591095620406605e-06, + "loss": 84.4299, + "reward": 0.8082, + "step": 6439 + }, + { + "epoch": 0.91, + "learning_rate": 7.2389898485040076e-06, + "loss": 90.9555, + "reward": 1.3112, + "step": 6449 + }, + { + "epoch": 0.91, + "learning_rate": 7.318161603703281e-06, + "loss": 96.0831, + "reward": 0.7038, + "step": 6459 + }, + { + "epoch": 0.92, + "learning_rate": 7.39659977363308e-06, + "loss": 90.6466, + "reward": 1.1145, + "step": 6469 + }, + { + "epoch": 0.92, + "learning_rate": 7.474279536431989e-06, + "loss": 95.4607, + "reward": 0.9177, + "step": 6479 + }, + { + "epoch": 0.92, + "learning_rate": 7.551176310237556e-06, + "loss": 98.4857, + "reward": 1.0056, + "step": 6489 + }, + { + "epoch": 0.92, + "learning_rate": 7.627265760965185e-06, + "loss": 90.1566, + "reward": 0.8314, + "step": 6499 + }, + { + "epoch": 0.92, + "learning_rate": 7.702523810008747e-06, + "loss": 97.2564, + "reward": 1.4078, + "step": 6509 + }, + { + "epoch": 0.92, + "learning_rate": 7.776926641860259e-06, + "loss": 98.7758, + "reward": 1.3081, + "step": 6519 + }, + { + "epoch": 0.92, + "learning_rate": 7.850450711646322e-06, + "loss": 103.4797, + "reward": 1.0738, + "step": 6529 + }, + { + "epoch": 0.93, + "learning_rate": 7.923072752578964e-06, + "loss": 103.8763, + "reward": 0.9816, + "step": 6539 + }, + { + "epoch": 0.93, + "learning_rate": 7.994769783318397e-06, + "loss": 100.5021, + "reward": 0.948, + "step": 6549 + }, + { + "epoch": 0.93, + "learning_rate": 8.065519115245542e-06, + "loss": 103.6766, + "reward": 0.7321, + "step": 6559 + }, + { + "epoch": 0.93, + "learning_rate": 8.135298359641825e-06, + "loss": 97.4703, + "reward": 1.2891, + "step": 6569 + }, + { + "epoch": 0.93, + "learning_rate": 8.204085434774159e-06, + "loss": 96.2505, + "reward": 1.2956, + "step": 6579 + }, + { + "epoch": 0.93, + "learning_rate": 8.271858572882704e-06, + "loss": 99.8216, + "reward": 0.7402, + "step": 6589 + }, + { + "epoch": 0.93, + "learning_rate": 8.338596327069332e-06, + "loss": 99.5118, + "reward": 0.9592, + "step": 6599 + }, + { + "epoch": 0.94, + "learning_rate": 8.404277578084488e-06, + "loss": 105.8172, + "reward": 0.7717, + "step": 6609 + }, + { + "epoch": 0.94, + "learning_rate": 8.46888154101045e-06, + "loss": 102.5928, + "reward": 1.1715, + "step": 6619 + }, + { + "epoch": 0.94, + "learning_rate": 8.532387771838693e-06, + "loss": 96.6503, + "reward": 0.8611, + "step": 6629 + }, + { + "epoch": 0.94, + "learning_rate": 8.594776173939482e-06, + "loss": 108.7798, + "reward": 1.0045, + "step": 6639 + }, + { + "epoch": 0.94, + "learning_rate": 8.656027004421407e-06, + "loss": 102.2718, + "reward": 1.2318, + "step": 6649 + }, + { + "epoch": 0.94, + "learning_rate": 8.716120880379119e-06, + "loss": 114.8146, + "reward": 1.2226, + "step": 6659 + }, + { + "epoch": 0.94, + "learning_rate": 8.775038785027017e-06, + "loss": 118.673, + "reward": 1.2495, + "step": 6669 + }, + { + "epoch": 0.95, + "learning_rate": 8.832762073717173e-06, + "loss": 105.3002, + "reward": 1.0926, + "step": 6679 + }, + { + "epoch": 0.95, + "learning_rate": 8.889272479839424e-06, + "loss": 105.6608, + "reward": 0.9392, + "step": 6689 + }, + { + "epoch": 0.95, + "learning_rate": 8.944552120601897e-06, + "loss": 113.6069, + "reward": 0.7844, + "step": 6699 + }, + { + "epoch": 0.95, + "learning_rate": 8.998583502690018e-06, + "loss": 112.2587, + "reward": 0.794, + "step": 6709 + }, + { + "epoch": 0.95, + "learning_rate": 9.05134952780229e-06, + "loss": 114.1278, + "reward": 0.6663, + "step": 6719 + }, + { + "epoch": 0.95, + "learning_rate": 9.10283349806111e-06, + "loss": 120.6909, + "reward": 1.0002, + "step": 6729 + }, + { + "epoch": 0.95, + "learning_rate": 9.153019121296794e-06, + "loss": 115.4897, + "reward": 0.8669, + "step": 6739 + }, + { + "epoch": 0.96, + "learning_rate": 9.201890516203278e-06, + "loss": 119.1285, + "reward": 1.1836, + "step": 6749 + }, + { + "epoch": 0.96, + "learning_rate": 9.249432217363754e-06, + "loss": 138.4996, + "reward": 1.1062, + "step": 6759 + }, + { + "epoch": 0.96, + "learning_rate": 9.295629180144762e-06, + "loss": 120.5787, + "reward": 0.9629, + "step": 6769 + }, + { + "epoch": 0.96, + "learning_rate": 9.340466785457039e-06, + "loss": 122.6466, + "reward": 0.6222, + "step": 6779 + }, + { + "epoch": 0.96, + "learning_rate": 9.383930844381784e-06, + "loss": 119.8385, + "reward": 1.0842, + "step": 6789 + }, + { + "epoch": 0.96, + "learning_rate": 9.426007602660732e-06, + "loss": 127.7601, + "reward": 1.1969, + "step": 6799 + }, + { + "epoch": 0.96, + "learning_rate": 9.466683745048734e-06, + "loss": 139.4465, + "reward": 0.7267, + "step": 6809 + }, + { + "epoch": 0.97, + "learning_rate": 9.505946399527365e-06, + "loss": 133.0782, + "reward": 0.308, + "step": 6819 + }, + { + "epoch": 0.97, + "learning_rate": 9.543783141378298e-06, + "loss": 137.3628, + "reward": 1.474, + "step": 6829 + }, + { + "epoch": 0.97, + "learning_rate": 9.580181997115115e-06, + "loss": 132.7314, + "reward": 0.8059, + "step": 6839 + }, + { + "epoch": 0.97, + "learning_rate": 9.615131448272345e-06, + "loss": 127.4448, + "reward": 0.97, + "step": 6849 + }, + { + "epoch": 0.97, + "learning_rate": 9.648620435050486e-06, + "loss": 141.4186, + "reward": 0.8046, + "step": 6859 + }, + { + "epoch": 0.97, + "learning_rate": 9.680638359815904e-06, + "loss": 141.4553, + "reward": 0.6626, + "step": 6869 + }, + { + "epoch": 0.97, + "learning_rate": 9.71117509045445e-06, + "loss": 144.86, + "reward": 0.9381, + "step": 6879 + }, + { + "epoch": 0.98, + "learning_rate": 9.740220963577806e-06, + "loss": 142.082, + "reward": 0.8949, + "step": 6889 + }, + { + "epoch": 0.98, + "learning_rate": 9.767766787581455e-06, + "loss": 148.9801, + "reward": 0.4727, + "step": 6899 + }, + { + "epoch": 0.98, + "learning_rate": 9.793803845553388e-06, + "loss": 150.5426, + "reward": 0.9139, + "step": 6909 + }, + { + "epoch": 0.98, + "learning_rate": 9.815940382948672e-06, + "loss": 139.7014, + "reward": 0.6917, + "step": 6919 + }, + { + "epoch": 0.98, + "learning_rate": 9.83908848246203e-06, + "loss": 145.0367, + "reward": 0.9368, + "step": 6929 + }, + { + "epoch": 0.98, + "learning_rate": 9.860705246100099e-06, + "loss": 147.8457, + "reward": 1.0734, + "step": 6939 + }, + { + "epoch": 0.98, + "learning_rate": 9.88078383320978e-06, + "loss": 156.1528, + "reward": 0.8522, + "step": 6949 + }, + { + "epoch": 0.99, + "learning_rate": 9.899317889895932e-06, + "loss": 156.5533, + "reward": 1.1273, + "step": 6959 + }, + { + "epoch": 0.99, + "learning_rate": 9.916301551032096e-06, + "loss": 158.8596, + "reward": 0.642, + "step": 6969 + }, + { + "epoch": 0.99, + "learning_rate": 9.931729442116505e-06, + "loss": 163.7104, + "reward": 1.1445, + "step": 6979 + }, + { + "epoch": 0.99, + "learning_rate": 9.94559668097286e-06, + "loss": 159.2521, + "reward": 1.2233, + "step": 6989 + }, + { + "epoch": 0.99, + "learning_rate": 9.957898879295298e-06, + "loss": 163.0544, + "reward": 1.0569, + "step": 6999 + }, + { + "epoch": 0.99, + "learning_rate": 9.968632144037083e-06, + "loss": 168.6254, + "reward": 0.9953, + "step": 7009 + }, + { + "epoch": 0.99, + "learning_rate": 9.977793078642554e-06, + "loss": 182.9738, + "reward": 1.1448, + "step": 7019 + }, + { + "epoch": 1.0, + "learning_rate": 9.985378784121987e-06, + "loss": 180.7952, + "reward": 0.6883, + "step": 7029 + }, + { + "epoch": 1.0, + "learning_rate": 9.991386859968964e-06, + "loss": 181.614, + "reward": 1.3635, + "step": 7039 + }, + { + "epoch": 1.0, + "learning_rate": 9.995815404920029e-06, + "loss": 182.2882, + "reward": 0.869, + "step": 7049 + }, + { + "epoch": 1.0, + "learning_rate": 9.998663017556347e-06, + "loss": 176.3692, + "reward": 1.3847, + "step": 7059 + } + ], + "max_steps": 7063, + "num_train_epochs": 1.0, + "total_flos": 0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd78626984cdda75a6796940b1d9b43bbbca3274 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0601825e97bfd3f110a95de205bee2e904e126713799c8a249c6751f1ca4d299 +size 3272 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..68b9574c5977fe3d80238dfb094f818ac32965fe Binary files /dev/null and b/training_loss.png differ diff --git a/training_reward.png b/training_reward.png new file mode 100644 index 0000000000000000000000000000000000000000..c510c3263675973d39012f652b0dbe92ac475c5f Binary files /dev/null and b/training_reward.png differ diff --git a/value_head.bin b/value_head.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a212d18eedc1df2504d3b15ca910795fa4ec6a9 --- /dev/null +++ b/value_head.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584f283225bcd417b47828772c9babc12d81fd7336dadc84d8f567a4f33cb7dc +size 17395