upload via upload_folder 2025-08-14T10:21:06.581228+00:00
Browse files- README.md +50 -0
- eval_result.json +6 -0
- full_model.pt +3 -0
- params.json +52 -0
- replay.mp4 +0 -0
- state_dict.pt +3 -0
- tensorboard/events.out.tfevents.1755110428.winkindeMacBook-Air.local.24455.0 +3 -0
README.md
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
env_name: LunarLander-v3
|
3 |
+
tags:
|
4 |
+
- LunarLander-v3
|
5 |
+
- rainbow-dqn
|
6 |
+
- reinforcement-learning
|
7 |
+
- custom-implementation
|
8 |
+
- deep-q-learning
|
9 |
+
- pytorch
|
10 |
+
- rainbow
|
11 |
+
- dqn
|
12 |
+
model-index:
|
13 |
+
- name: Rainbow-2d-LunarLander-v3
|
14 |
+
results:
|
15 |
+
- task:
|
16 |
+
type: reinforcement-learning
|
17 |
+
name: reinforcement-learning
|
18 |
+
dataset:
|
19 |
+
name: LunarLander-v3
|
20 |
+
type: LunarLander-v3
|
21 |
+
metrics:
|
22 |
+
- type: mean_reward
|
23 |
+
value: 192.34 +/- 127.62
|
24 |
+
name: mean_reward
|
25 |
+
verified: false
|
26 |
+
---
|
27 |
+
|
28 |
+
# **Rainbow-DQN** Agent playing **LunarLander-v3**
|
29 |
+
This is a trained model of a **Rainbow-DQN** agent playing **LunarLander-v3**.
|
30 |
+
|
31 |
+
## Usage
|
32 |
+
### create the conda env in https://github.com/GeneHit/drl_practice
|
33 |
+
```bash
|
34 |
+
conda create -n drl python=3.12
|
35 |
+
conda activate drl
|
36 |
+
python -m pip install -r requirements.txt
|
37 |
+
```
|
38 |
+
|
39 |
+
### play with full model
|
40 |
+
```python
|
41 |
+
# load the full model
|
42 |
+
model = load_from_hub(repo_id="winkin119/Rainbow-2d-LunarLander-v3", filename="full_model.pt")
|
43 |
+
|
44 |
+
# Create the environment. Don't forget to check the necessary wrappers in the env setup.
|
45 |
+
env = gym.make("LunarLander-v3")
|
46 |
+
state, _ = env.reset()
|
47 |
+
action = model.action(state)
|
48 |
+
...
|
49 |
+
```
|
50 |
+
There is also a state dict version of the model, you can check the corresponding definition in the repo.
|
eval_result.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mean_reward": 192.33588980155548,
|
3 |
+
"std_reward": 127.62317107151691,
|
4 |
+
"datetime": "2025-08-13T23:25:51.510828+00:00",
|
5 |
+
"train_duration_min": "283.82"
|
6 |
+
}
|
full_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a1a95bbf540d677ec6b8011d3c4adceb5913382360dca955519344a98fe52a1
|
3 |
+
size 4717877
|
params.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"env_config": {
|
3 |
+
"env_id": "LunarLander-v3",
|
4 |
+
"env_kwargs": {},
|
5 |
+
"max_steps": null,
|
6 |
+
"normalize_obs": false,
|
7 |
+
"use_image": true,
|
8 |
+
"vector_env_num": 6,
|
9 |
+
"use_multi_processing": true,
|
10 |
+
"image_shape": [
|
11 |
+
84,
|
12 |
+
84
|
13 |
+
],
|
14 |
+
"frame_stack": 4,
|
15 |
+
"frame_skip": 2,
|
16 |
+
"training_render_mode": "rgb_array"
|
17 |
+
},
|
18 |
+
"device": "mps",
|
19 |
+
"learning_rate": 0.0001,
|
20 |
+
"gamma": 0.99,
|
21 |
+
"checkpoint_pathname": "",
|
22 |
+
"max_grad_norm": 0.5,
|
23 |
+
"log_interval": 100,
|
24 |
+
"eval_episodes": 100,
|
25 |
+
"eval_random_seed": 42,
|
26 |
+
"eval_video_num": 10,
|
27 |
+
"timesteps": 225000,
|
28 |
+
"epsilon_schedule": {
|
29 |
+
"_type": "ConstantSchedule",
|
30 |
+
"_module": "practice.utils_for_coding.scheduler_utils",
|
31 |
+
"value": 0.0
|
32 |
+
},
|
33 |
+
"replay_buffer_capacity": 0,
|
34 |
+
"batch_size": 64,
|
35 |
+
"train_interval": 1,
|
36 |
+
"target_update_interval": 250,
|
37 |
+
"update_start_step": 2000,
|
38 |
+
"dqn_algorithm": "rainbow",
|
39 |
+
"noisy_std": 0.5,
|
40 |
+
"per_buffer_config": {
|
41 |
+
"capacity": 135000,
|
42 |
+
"n_step": 3,
|
43 |
+
"gamma": 0.99,
|
44 |
+
"use_uniform_sampling": true,
|
45 |
+
"alpha": 0.6,
|
46 |
+
"beta": 0.4,
|
47 |
+
"beta_increment": 2.424242424242424e-06
|
48 |
+
},
|
49 |
+
"v_min": -300.0,
|
50 |
+
"v_max": 300.0,
|
51 |
+
"num_atoms": 51
|
52 |
+
}
|
replay.mp4
ADDED
Binary file (17.7 kB). View file
|
|
state_dict.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:109b8a5b744c269487c2e4d80ad1bccdf73323184fff603bb0949d4ba7b12676
|
3 |
+
size 4714165
|
tensorboard/events.out.tfevents.1755110428.winkindeMacBook-Air.local.24455.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e695b6d38591d9c1548aa5b23d0591ef6f639609889b351fe5f69e5883bf903
|
3 |
+
size 2166680
|