maxstahl commited on
Commit
dc518ae
1 Parent(s): 6885c4c

A2C Training 3h

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: PongNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 21.00 +/- 0.00
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: PongNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 19.60 +/- 0.80
20
  name: mean_reward
21
  verified: false
22
  ---
a2c_6h.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94bf42f575a5bd8fb371a051a927f46fd403cf41b87bd43592bf40f3131daf3
3
+ size 13593773
a2c_6h/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 2.0.0a5
a2c_6h/data ADDED
The diff for this file is too large to render. See raw diff
 
a2c_6h/policy.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72be3f0e5c470df35fd90d92db34d43daddd89ca1f97a89d6f990481e2b5c14
3
+ size 6733134
a2c_6h/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c3cf74b6a5840405db23046e5fed0fe7b4b045c82897fbcc2a6ef9ffefcaa4
3
+ size 6733298
a2c_6h/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebdad4b9cfe9cd22a3abadb5623bf7bb1f6eb2e408740245eb3f2044b0adc018
3
+ size 864
a2c_6h/system_info.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - OS: macOS-14.7-arm64-arm-64bit Darwin Kernel Version 23.6.0: Wed Jul 31 20:48:52 PDT 2024; root:xnu-10063.141.1.700.5~1/RELEASE_ARM64_T6020
2
+ - Python: 3.10.15
3
+ - Stable-Baselines3: 2.0.0a5
4
+ - PyTorch: 2.4.1
5
+ - GPU Enabled: False
6
+ - Numpy: 1.26.4
7
+ - Cloudpickle: 2.2.1
8
+ - Gymnasium: 0.28.1
9
+ - OpenAI Gym: 0.25.2
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 21.0, "std_reward": 0.0, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-10-12T09:57:55.671113"}
 
1
+ {"mean_reward": 19.6, "std_reward": 0.8, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-10-15T07:24:31.313852"}