maxstahl commited on
Commit
6885c4c
1 Parent(s): def2335

A2C Training

Browse files
Files changed (8) hide show
  1. README.md +1 -1
  2. a2c.zip +2 -2
  3. a2c/data +0 -0
  4. a2c/policy.optimizer.pth +1 -1
  5. a2c/policy.pth +1 -1
  6. config.json +0 -0
  7. replay.mp4 +0 -0
  8. results.json +1 -1
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: PongNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: -1.80 +/- 12.85
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: PongNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 21.00 +/- 0.00
20
  name: mean_reward
21
  verified: false
22
  ---
a2c.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e23248cc8af3f7e224f84d2791921e0fb670d7afe96ce4b40eea145271e805dd
3
- size 13593746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf576da31a00950a735be1c001c51fef07818577f5a38e4fa843e5540542683
3
+ size 13593773
a2c/data CHANGED
The diff for this file is too large to render. See raw diff
 
a2c/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03b396c069b65622f812acfc5e87a0c85eb847fe535db255e86018c2d4ab5651
3
  size 6733134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f14bd316e55e99d01dd318494d5b03acb038e41c75483b5425693edfb17d34d
3
  size 6733134
a2c/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c994e3f4a7c1dc9a7d7ca6d4fd0ede856ff4e908cb53e21eccbaf407a585ce3
3
  size 6733298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12da88898b0299f07e8d2a5f1c5c580b45b3c65ce9b0242d137f02b868941dff
3
  size 6733298
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -1.8, "std_reward": 12.851459061133877, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-10-04T19:35:15.343716"}
 
1
+ {"mean_reward": 21.0, "std_reward": 0.0, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-10-12T09:57:55.671113"}