Megalino111
commited on
Commit
•
5f67c1a
1
Parent(s):
55a7968
Update README.md
Browse files
README.md
CHANGED
@@ -30,8 +30,74 @@ TODO: Add your code
|
|
30 |
|
31 |
|
32 |
```python
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
...
|
37 |
```
|
|
|
30 |
|
31 |
|
32 |
```python
|
33 |
+
import os
|
34 |
+
|
35 |
+
import gymnasium as gym
|
36 |
+
import panda_gym
|
37 |
+
|
38 |
+
from huggingface_sb3 import load_from_hub, package_to_hub
|
39 |
+
|
40 |
+
from stable_baselines3 import A2C
|
41 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
42 |
+
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
43 |
+
from stable_baselines3.common.env_util import make_vec_env
|
44 |
+
|
45 |
+
from huggingface_hub import notebook_login
|
46 |
+
|
47 |
+
|
48 |
+
env_id = "PandaReachDense-v3"
|
49 |
+
|
50 |
+
# Create the env
|
51 |
+
env = gym.make(env_id)
|
52 |
+
|
53 |
+
# Get the state space and action space
|
54 |
+
s_size = env.observation_space.shape
|
55 |
+
a_size = env.action_space
|
56 |
+
|
57 |
+
print("_____OBSERVATION SPACE_____ \n")
|
58 |
+
print("The State Space is: ", s_size)
|
59 |
+
print("Sample observation", env.observation_space.sample()) # Get a random observation
|
60 |
+
|
61 |
+
print("\n _____ACTION SPACE_____ \n")
|
62 |
+
print("The Action Space is: ", a_size)
|
63 |
+
print("Action Space Sample", env.action_space.sample()) # Take a random action
|
64 |
+
|
65 |
+
env = make_vec_env(env_id, n_envs=4)
|
66 |
+
|
67 |
+
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)
|
68 |
+
|
69 |
+
model = A2C("MultiInputPolicy", env, verbose=1) # Create the A2C model and try to find the best parameters
|
70 |
+
|
71 |
+
model.learn(1_000_000)
|
72 |
+
|
73 |
+
# Save the model and VecNormalize statistics when saving the agent
|
74 |
+
model.save("a2c-PandaReachDense-v3")
|
75 |
+
env.save("vec_normalize.pkl")
|
76 |
+
|
77 |
+
|
78 |
+
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
|
79 |
+
|
80 |
+
# Load the saved statistics
|
81 |
+
eval_env = DummyVecEnv([lambda: gym.make("PandaReachDense-v3")])
|
82 |
+
eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)
|
83 |
+
|
84 |
+
# We need to override the render_mode
|
85 |
+
eval_env.render_mode = "rgb_array"
|
86 |
+
|
87 |
+
# do not update them at test time
|
88 |
+
eval_env.training = False
|
89 |
+
# reward normalization is not needed at test time
|
90 |
+
eval_env.norm_reward = False
|
91 |
+
|
92 |
+
# Load the agent
|
93 |
+
model = A2C.load("a2c-PandaReachDense-v3")
|
94 |
+
|
95 |
+
mean_reward, std_reward = evaluate_policy(model, eval_env)
|
96 |
+
|
97 |
+
print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
|
102 |
...
|
103 |
```
|