mmorales34 commited on
Commit
647840c
·
1 Parent(s): d1afe9c

pushing model

Browse files
DQPN_baseline.cleanrl_model CHANGED
Binary files a/DQPN_baseline.cleanrl_model and b/DQPN_baseline.cleanrl_model differ
 
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: CartPole-v1
17
  metrics:
18
  - type: mean_reward
19
- value: 480.40 +/- 45.13
20
  name: mean_reward
21
  verified: false
22
  ---
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/d
46
  curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
- python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --track --wandb-entity pfunk --wandb-project-name dqpn --save-model true --upload-model true --hf-entity pfunk --env-id CartPole-v1 --seed 1 --total-timesteps 100000
50
  ```
51
 
52
  # Hyperparameters
@@ -72,7 +72,7 @@ python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --t
72
  'target_network_frequency': 100,
73
  'target_tau': 1.0,
74
  'torch_deterministic': True,
75
- 'total_timesteps': 100000,
76
  'track': True,
77
  'train_frequency': 10,
78
  'update_scalar': False,
 
16
  type: CartPole-v1
17
  metrics:
18
  - type: mean_reward
19
+ value: 381.50 +/- 58.54
20
  name: mean_reward
21
  verified: false
22
  ---
 
46
  curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/pfunk/CartPole-v1-DQPN_baseline-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
+ python dqpn_duncan.py --exp-name DQPN_baseline --target-tau 1 --policy-tau 1 --track --wandb-entity pfunk --wandb-project-name dqpn --save-model true --upload-model true --hf-entity pfunk --env-id CartPole-v1 --seed 1 --total-timesteps 25000000
50
  ```
51
 
52
  # Hyperparameters
 
72
  'target_network_frequency': 100,
73
  'target_tau': 1.0,
74
  'torch_deterministic': True,
75
+ 'total_timesteps': 25000000,
76
  'track': True,
77
  'train_frequency': 10,
78
  'update_scalar': False,
dqpn_duncan.py CHANGED
@@ -78,6 +78,7 @@ def parse_args():
78
  help="if the min TD error is within one std dev of mean -> update policy network")
79
  parser.add_argument("--update-scalar", type=bool, default=False,
80
  help="scalar = mean/max/0.5 and scales the # of steps between policy network updates")
 
81
 
82
  args = parser.parse_args()
83
  # fmt: on
@@ -225,8 +226,8 @@ if __name__ == "__main__":
225
  td_target = data.rewards.flatten() + args.gamma * target_max * (1 - data.dones.flatten())
226
  old_val = q_network(data.observations).gather(1, data.actions).squeeze()
227
  #DUNCAN - calculate the error statistics
228
- prev = old_val.detach().numpy()
229
- new = td_target.detach().numpy()
230
  diff = np.abs(prev-new)
231
  mean = np.mean(diff)
232
  maximum = np.max(diff)
 
78
  help="if the min TD error is within one std dev of mean -> update policy network")
79
  parser.add_argument("--update-scalar", type=bool, default=False,
80
  help="scalar = mean/max/0.5 and scales the # of steps between policy network updates")
81
+ #DUNCAN - end
82
 
83
  args = parser.parse_args()
84
  # fmt: on
 
226
  td_target = data.rewards.flatten() + args.gamma * target_max * (1 - data.dones.flatten())
227
  old_val = q_network(data.observations).gather(1, data.actions).squeeze()
228
  #DUNCAN - calculate the error statistics
229
+ prev = old_val.detach().cpu().numpy()
230
+ new = td_target.detach().cpu().numpy()
231
  diff = np.abs(prev-new)
232
  mean = np.mean(diff)
233
  maximum = np.max(diff)
events.out.tfevents.1676248128.wycliffeduncan-Victus-by-HP-Gaming-Laptop-15-fa0xxx.40260.0 → events.out.tfevents.1676955102.portal.2502556.0 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c88daa5228da5b3ad929b8146656dd0bde4cc97489c7dd31feffd7e3f085a5e7
3
- size 361364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac2d6df0226f7ba89a5201410b4b0cf715b38f46bedb84ff0c639160b6c389ef
3
+ size 106534158
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-0.mp4 DELETED
Binary file (38.7 kB)
 
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-1.mp4 DELETED
Binary file (48.2 kB)
 
videos/CartPole-v1__DQPN_baseline__1__1676248121-eval/rl-video-episode-8.mp4 DELETED
Binary file (42.6 kB)
 
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-0.mp4 ADDED
File without changes
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-1.mp4 ADDED
File without changes
videos/CartPole-v1__DQPN_baseline__1__1676955098-eval/rl-video-episode-8.mp4 ADDED
File without changes