Commit
·
647840c
1
Parent(s):
d1afe9c
pushing model
Browse files- DQPN_baseline.cleanrl_model +0 -0
- README.md +3 -3
- dqpn_duncan.py +3 -2
- events.out.tfevents.1676248128.wycliffeduncan-Victus-by-HP-Gaming-Laptop-15-fa0xxx.40260.0 → events.out.tfevents.1676955102.portal.2502556.0 +2 -2
- replay.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-0.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-1.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-8.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-0.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-1.mp4 +0 -0
- videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-8.mp4 +0 -0
DQPN_baseline.cleanrl_model
CHANGED
Binary files a/DQPN_baseline.cleanrl_model and b/DQPN_baseline.cleanrl_model differ
|
|
README.md
CHANGED
@@ -16,7 +16,7 @@ model-index:
|
|
16 |
type: CartPole-v1
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
-
value:
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/d
|
|
46 |
curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/pyproject.toml
|
47 |
curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/poetry.lock
|
48 |
poetry install --all-extras
|
49 |
-
python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --track --wandb-entity pfunk --wandb-project-name dqpn --save-model true --upload-model true --hf-entity pfunk --env-id CartPole-v1 --seed 1 --total-timesteps
|
50 |
```
|
51 |
|
52 |
# Hyperparameters
|
@@ -72,7 +72,7 @@ python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --t
|
|
72 |
'target_network_frequency': 100,
|
73 |
'target_tau': 1.0,
|
74 |
'torch_deterministic': True,
|
75 |
-
'total_timesteps':
|
76 |
'track': True,
|
77 |
'train_frequency': 10,
|
78 |
'update_scalar': False,
|
|
|
16 |
type: CartPole-v1
|
17 |
metrics:
|
18 |
- type: mean_reward
|
19 |
+
value: 381.50 +/- 58.54
|
20 |
name: mean_reward
|
21 |
verified: false
|
22 |
---
|
|
|
46 |
curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/pyproject.toml
|
47 |
curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/poetry.lock
|
48 |
poetry install --all-extras
|
49 |
+
python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --track --wandb-entity pfunk --wandb-project-name dqpn --save-model true --upload-model true --hf-entity pfunk --env-id CartPole-v1 --seed 1 --total-timesteps 25000000
|
50 |
```
|
51 |
|
52 |
# Hyperparameters
|
|
|
72 |
'target_network_frequency': 100,
|
73 |
'target_tau': 1.0,
|
74 |
'torch_deterministic': True,
|
75 |
+
'total_timesteps': 25000000,
|
76 |
'track': True,
|
77 |
'train_frequency': 10,
|
78 |
'update_scalar': False,
|
dqpn_duncan.py
CHANGED
@@ -78,6 +78,7 @@ def parse_args():
|
|
78 |
help="if the min TD error is within one std dev of mean -> update policy network")
|
79 |
parser.add_argument("--update-scalar", type=bool, default=False,
|
80 |
help="scalar = mean/max/0.5 and scales the # of steps between policy network updates")
|
|
|
81 |
|
82 |
args = parser.parse_args()
|
83 |
# fmt: on
|
@@ -225,8 +226,8 @@ if __name__ == "__main__":
|
|
225 |
td_target = data.rewards.flatten() + args.gamma * target_max * (1 - data.dones.flatten())
|
226 |
old_val = q_network(data.observations).gather(1, data.actions).squeeze()
|
227 |
#DUNCAN - calculate the error statistics
|
228 |
-
prev = old_val.detach().numpy()
|
229 |
-
new = td_target.detach().numpy()
|
230 |
diff = np.abs(prev-new)
|
231 |
mean = np.mean(diff)
|
232 |
maximum = np.max(diff)
|
|
|
78 |
help="if the min TD error is within one std dev of mean -> update policy network")
|
79 |
parser.add_argument("--update-scalar", type=bool, default=False,
|
80 |
help="scalar = mean/max/0.5 and scales the # of steps between policy network updates")
|
81 |
+
#DUNCAN - end
|
82 |
|
83 |
args = parser.parse_args()
|
84 |
# fmt: on
|
|
|
226 |
td_target = data.rewards.flatten() + args.gamma * target_max * (1 - data.dones.flatten())
|
227 |
old_val = q_network(data.observations).gather(1, data.actions).squeeze()
|
228 |
#DUNCAN - calculate the error statistics
|
229 |
+
prev = old_val.detach().cpu().numpy()
|
230 |
+
new = td_target.detach().cpu().numpy()
|
231 |
diff = np.abs(prev-new)
|
232 |
mean = np.mean(diff)
|
233 |
maximum = np.max(diff)
|
events.out.tfevents.1676248128.wycliffeduncan-Victus-by-HP-Gaming-Laptop-15-fa0xxx.40260.0 → events.out.tfevents.1676955102.portal.2502556.0
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac2d6df0226f7ba89a5201410b4b0cf715b38f46bedb84ff0c639160b6c389ef
|
3 |
+
size 106534158
|
replay.mp4
CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-0.mp4
DELETED
Binary file (38.7 kB)
|
|
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-1.mp4
DELETED
Binary file (48.2 kB)
|
|
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-8.mp4
DELETED
Binary file (42.6 kB)
|
|
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-0.mp4
ADDED
File without changes
|
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-1.mp4
ADDED
File without changes
|
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-8.mp4
ADDED
File without changes
|