winkin119 commited on
Commit
405aaee
·
verified ·
1 Parent(s): 3caae95

upload via upload_folder 2025-08-14T10:21:06.581228+00:00

Browse files
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ env_name: LunarLander-v3
3
+ tags:
4
+ - LunarLander-v3
5
+ - rainbow-dqn
6
+ - reinforcement-learning
7
+ - custom-implementation
8
+ - deep-q-learning
9
+ - pytorch
10
+ - rainbow
11
+ - dqn
12
+ model-index:
13
+ - name: Rainbow-2d-LunarLander-v3
14
+ results:
15
+ - task:
16
+ type: reinforcement-learning
17
+ name: reinforcement-learning
18
+ dataset:
19
+ name: LunarLander-v3
20
+ type: LunarLander-v3
21
+ metrics:
22
+ - type: mean_reward
23
+ value: 192.34 +/- 127.62
24
+ name: mean_reward
25
+ verified: false
26
+ ---
27
+
28
+ # **Rainbow-DQN** Agent playing **LunarLander-v3**
29
+ This is a trained model of a **Rainbow-DQN** agent playing **LunarLander-v3**.
30
+
31
+ ## Usage
32
+ ### create the conda env in https://github.com/GeneHit/drl_practice
33
+ ```bash
34
+ conda create -n drl python=3.12
35
+ conda activate drl
36
+ python -m pip install -r requirements.txt
37
+ ```
38
+
39
+ ### play with full model
40
+ ```python
41
+ # load the full model
42
+ model = load_from_hub(repo_id="winkin119/Rainbow-2d-LunarLander-v3", filename="full_model.pt")
43
+
44
+ # Create the environment. Don't forget to check the necessary wrappers in the env setup.
45
+ env = gym.make("LunarLander-v3")
46
+ state, _ = env.reset()
47
+ action = model.action(state)
48
+ ...
49
+ ```
50
+ There is also a state dict version of the model, you can check the corresponding definition in the repo.
eval_result.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "mean_reward": 192.33588980155548,
3
+ "std_reward": 127.62317107151691,
4
+ "datetime": "2025-08-13T23:25:51.510828+00:00",
5
+ "train_duration_min": "283.82"
6
+ }
full_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a1a95bbf540d677ec6b8011d3c4adceb5913382360dca955519344a98fe52a1
3
+ size 4717877
params.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_config": {
3
+ "env_id": "LunarLander-v3",
4
+ "env_kwargs": {},
5
+ "max_steps": null,
6
+ "normalize_obs": false,
7
+ "use_image": true,
8
+ "vector_env_num": 6,
9
+ "use_multi_processing": true,
10
+ "image_shape": [
11
+ 84,
12
+ 84
13
+ ],
14
+ "frame_stack": 4,
15
+ "frame_skip": 2,
16
+ "training_render_mode": "rgb_array"
17
+ },
18
+ "device": "mps",
19
+ "learning_rate": 0.0001,
20
+ "gamma": 0.99,
21
+ "checkpoint_pathname": "",
22
+ "max_grad_norm": 0.5,
23
+ "log_interval": 100,
24
+ "eval_episodes": 100,
25
+ "eval_random_seed": 42,
26
+ "eval_video_num": 10,
27
+ "timesteps": 225000,
28
+ "epsilon_schedule": {
29
+ "_type": "ConstantSchedule",
30
+ "_module": "practice.utils_for_coding.scheduler_utils",
31
+ "value": 0.0
32
+ },
33
+ "replay_buffer_capacity": 0,
34
+ "batch_size": 64,
35
+ "train_interval": 1,
36
+ "target_update_interval": 250,
37
+ "update_start_step": 2000,
38
+ "dqn_algorithm": "rainbow",
39
+ "noisy_std": 0.5,
40
+ "per_buffer_config": {
41
+ "capacity": 135000,
42
+ "n_step": 3,
43
+ "gamma": 0.99,
44
+ "use_uniform_sampling": true,
45
+ "alpha": 0.6,
46
+ "beta": 0.4,
47
+ "beta_increment": 2.424242424242424e-06
48
+ },
49
+ "v_min": -300.0,
50
+ "v_max": 300.0,
51
+ "num_atoms": 51
52
+ }
replay.mp4 ADDED
Binary file (17.7 kB). View file
 
state_dict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109b8a5b744c269487c2e4d80ad1bccdf73323184fff603bb0949d4ba7b12676
3
+ size 4714165
tensorboard/events.out.tfevents.1755110428.winkindeMacBook-Air.local.24455.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e695b6d38591d9c1548aa5b23d0591ef6f639609889b351fe5f69e5883bf903
3
+ size 2166680