diff --git "a/rx_rover_PPO.ipynb" "b/rx_rover_PPO.ipynb" new file mode 100644--- /dev/null +++ "b/rx_rover_PPO.ipynb" @@ -0,0 +1,1263 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Proximal Policy Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "from gym import spaces\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.patches import Rectangle\n", + "\n", + "import os\n", + "import random\n", + "import imageio\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "from torch.distributions import Categorical" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "class RoverGridEnv(gym.Env):\n", + " metadata={'render.modes': ['human']} \n", + " def __init__(self,max_ts=20): \n", + " super(RoverGridEnv,self).__init__()\n", + " self.max_ts=max_ts # Note: The Max_Timestamps is set to 20 by default.\n", + " self.grid_size=(15,15) \n", + " self.action_space=spaces.Discrete(5) \n", + " self.observation_space=spaces.MultiDiscrete([15,15,15,15,15,15])\n", + " self.rover_positions=np.array([[6,4],[10,4]])\n", + " self.operation_desks=np.array([[6,3],[10,3]])\n", + " self.rooms=np.array([[4,7],[4,10],[4,13],[8,7],[8,10],[8,13],[12,7],[12,10],[12,13]])\n", + " self.human_position=np.array([8,9])\n", + " self.targets=np.array([[5,10],[9,13]])\n", + " self.actions=[(0,-1),(0,1),(-1,0),(1,0),(0,0)] # Down,Up,Left,Right,Wait\n", + " self.rover_done=[False,False] \n", + " self.reset()\n", + " \n", + " def seed(self,seed=None):\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " \n", + " def reset(self):\n", + " self.current_step=0\n", + " self.rover_positions=np.array([[6,4],[10,4]])\n", + " self.rover_done=[False,False]\n", + " self.human_position=np.array([7,8])\n", + " self.current_step=0\n", + " return self._get_obs()\n", + " \n", + " def _get_obs(self):\n", + " return np.concatenate((self.rover_positions.flatten(),self.human_position))\n", + " \n", + " def step(self,actions):\n", + " rewards=np.zeros(2)\n", + " done=[False,False]\n", + " info={'message': ''} \n", + " for i,action in enumerate(actions):\n", + " if self.rover_done[i]:\n", + " done[i]=True \n", + " continue\n", + " prev_distance=np.linalg.norm(self.targets[i]-self.rover_positions[i])\n", + " if self._is_human_adjacent(self.rover_positions[i]):\n", + " rewards[i] -= 5\n", + " else:\n", + " delta=np.array(self.actions[action])\n", + " new_position=self.rover_positions[i]+delta\n", + " if self._out_of_bounds(new_position):\n", + " rewards[i] -= 15\n", + " continue\n", + " if self._collision(new_position,i):\n", + " rewards[i] -= 15\n", + " continue\n", + " self.rover_positions[i]=new_position\n", + " new_distance=np.linalg.norm(self.targets[i]-new_position)\n", + " if new_distance < prev_distance:\n", + " rewards[i]+=30 \n", + " else:\n", + " rewards[i] -= 20 \n", + " if np.array_equal(new_position,self.targets[i]):\n", + " rewards[i]+=100\n", + " self.rover_done[i]=True \n", + " done[i]=True\n", + "\n", + " # move human randomly\n", + " self._move_human()\n", + " self.current_step+=1\n", + " all_done=all(done) or self.current_step >= self.max_ts\n", + " if all_done and not all(done): # if the maximum number of steps is reached but not all targets were reached\n", + " info['message']='Maximum number of timestamps reached'\n", + " return self._get_obs(),rewards,all_done,info\n", + "\n", + " def _is_human_adjacent(self,position):\n", + " for delta in [(1,1),(1,-1),(-1,1),(-1,-1)]:\n", + " adjacent_position=position+np.array(delta)\n", + " if np.array_equal(adjacent_position,self.human_position):\n", + " return True\n", + " return False\n", + "\n", + " def _out_of_bounds(self,position):\n", + " return not (0 <= position[0] < self.grid_size[0] and 0 <= position[1] < self.grid_size[1])\n", + " \n", + " def _collision(self,new_position,rover_index):\n", + " if any(np.array_equal(new_position,pos) for pos in np.delete(self.rover_positions,rover_index,axis=0)):\n", + " return True # Collision with the other rover\n", + " if any(np.array_equal(new_position,pos) for pos in self.rooms):\n", + " return True # Collision with a room\n", + " if any(np.array_equal(new_position,pos) for pos in self.operation_desks):\n", + " return True # Collision with an operation desk\n", + " if np.array_equal(new_position,self.human_position):\n", + " return True # Collision with the human\n", + " return False\n", + " \n", + " def _move_human(self):\n", + " valid_moves=[move for move in self.actions if not self._out_of_bounds(self.human_position+np.array(move))]\n", + " self.human_position+=np.array(valid_moves[np.random.choice(len(valid_moves))])\n", + " \n", + " def render(self,mode='human',save_path=None):\n", + " fig,ax=plt.subplots(figsize=(7,7))\n", + " ax.set_xlim(0,self.grid_size[0])\n", + " ax.set_ylim(0,self.grid_size[1])\n", + " ax.set_xticks(np.arange(0,15,1))\n", + " ax.set_yticks(np.arange(0,15,1))\n", + " ax.grid(which='both')\n", + "\n", + " # draw elements\n", + " for pos in self.rover_positions:\n", + " ax.add_patch(Rectangle((pos[0]-0.5,pos[1]-0.5),1,1,color='blue'))\n", + " for pos in self.operation_desks:\n", + " ax.add_patch(Rectangle((pos[0]-0.5,pos[1]-0.5),1,1,color='darkgreen'))\n", + " for pos in self.rooms:\n", + " ax.add_patch(Rectangle((pos[0]-0.5,pos[1]-0.5),1,1,color='black'))\n", + " ax.add_patch(Rectangle((self.human_position[0]-0.5,self.human_position[1]-0.5),1,1,color='purple'))\n", + " for pos in self.targets:\n", + " ax.add_patch(Rectangle((pos[0]-0.5,pos[1]-0.5),1,1,color='yellow',alpha=0.5))\n", + "\n", + " if save_path is not None:\n", + " plt.savefig(save_path)\n", + " plt.close()\n", + " \n", + " def close(self):\n", + " plt.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial Setup\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "env=RoverGridEnv()\n", + "print(\"Initial Setup\")\n", + "observation=env.reset()\n", + "env.render()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PPO" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "class ActorCritic(nn.Module):\n", + " def __init__(self,\n", + " input_dim,\n", + " n_actions):\n", + " super(ActorCritic,self).__init__()\n", + " self.fc1=nn.Linear(input_dim,128)\n", + " self.fc2=nn.Linear(128,64)\n", + " self.actor=nn.Linear(64,n_actions)\n", + " self.critic=nn.Linear(64,1)\n", + "\n", + " def forward(self,x):\n", + " x=torch.relu(self.fc1(x))\n", + " x=torch.relu(self.fc2(x))\n", + " policy_logits=self.actor(x)\n", + " value=self.critic(x)\n", + " return policy_logits,value\n", + "\n", + "def compute_advantages(rewards,\n", + " values,\n", + " next_values,\n", + " gamma=0.99,\n", + " lambda_=0.95):\n", + " deltas=rewards+gamma*next_values-values\n", + " advantages=[]\n", + " advantage=0\n", + " for delta in reversed(deltas):\n", + " advantage=delta+gamma*lambda_*advantage\n", + " advantages.insert(0,advantage)\n", + " return torch.tensor(advantages,dtype=torch.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def train_ppo(env,\n", + " actor_critic,\n", + " optimizer,\n", + " total_timesteps=10000,\n", + " gamma=0.99,\n", + " lambda_=0.95,\n", + " epsilon=0.2,\n", + " epochs=3,\n", + " batch_size=64):\n", + " \n", + " episode_rwds_ppo=[]\n", + " for _ in range(total_timesteps // batch_size):\n", + " obs=env.reset()\n", + " obs_list,action_list,reward_list,value_list,logprob_list=[],[],[],[],[]\n", + " total_episode_reward=0\n", + " for _ in range(batch_size):\n", + " obs_tensor=torch.tensor(obs,\n", + " dtype=torch.float32).unsqueeze(0)\n", + " policy_logits,value=actor_critic(obs_tensor)\n", + " dist=Categorical(logits=policy_logits)\n", + " action=dist.sample()\n", + " obs_list.append(obs)\n", + " action_list.append(action.item())\n", + " reward_list.append(0)\n", + " value_list.append(value.item())\n", + " logprob_list.append(dist.log_prob(action).item())\n", + " obs,rewards,done,_=env.step([action.item(),\n", + " action.item()])\n", + " reward_list[-1]=rewards.sum()\n", + " total_episode_reward+=rewards.sum()\n", + " if done:\n", + " episode_rwds_ppo.append(total_episode_reward)\n", + " print(f\"Episode {len(episode_rwds_ppo)} ended with reward: {total_episode_reward}\")\n", + " obs=env.reset()\n", + " total_episode_reward=0\n", + " break\n", + " obs_tensor=torch.tensor(np.array(obs_list),\n", + " dtype=torch.float32)\n", + " action_tensor=torch.tensor(action_list)\n", + "\n", + " reward_tensor=torch.tensor(reward_list,\n", + " dtype=torch.float32)\n", + " value_tensor=torch.tensor(value_list,\n", + " dtype=torch.float32)\n", + " logprob_tensor=torch.tensor(logprob_list,\n", + " dtype=torch.float32)\n", + " advantages=compute_advantages(reward_tensor,\n", + " value_tensor,\n", + " torch.cat((value_tensor[1:],\n", + " torch.tensor([0])),\n", + " axis=0),\n", + " gamma,\n", + " lambda_)\n", + "\n", + " for _ in range(epochs):\n", + " new_policy_logits,new_values=actor_critic(obs_tensor)\n", + " new_dist=Categorical(logits=new_policy_logits)\n", + " new_logprobs=new_dist.log_prob(action_tensor)\n", + " ratio=torch.exp(new_logprobs-logprob_tensor)\n", + " surr1=ratio*advantages\n", + " surr2=torch.clamp(ratio,\n", + " 1-epsilon,\n", + " 1+epsilon)*advantages\n", + " policy_loss=-torch.min(surr1,surr2).mean()\n", + " value_loss=nn.MSELoss()(new_values.squeeze(),\n", + " reward_tensor)\n", + " loss=policy_loss+0.5*value_loss\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " return episode_rwds_ppo" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Episode 1 ended with reward: -370.0\n", + "Episode 2 ended with reward: -30.0\n", + "Episode 3 ended with reward: -90.0\n", + "Episode 4 ended with reward: -285.0\n", + "Episode 5 ended with reward: 35.0\n", + "Episode 6 ended with reward: 355.0\n", + "Episode 7 ended with reward: 190.0\n", + "Episode 8 ended with reward: 425.0\n", + "Episode 9 ended with reward: 710.0\n", + "Episode 10 ended with reward: 455.0\n", + "Episode 11 ended with reward: 110.0\n", + "Episode 12 ended with reward: 150.0\n", + "Episode 13 ended with reward: 140.0\n", + "Episode 14 ended with reward: 50.0\n", + "Episode 15 ended with reward: 150.0\n", + "Episode 16 ended with reward: 60.0\n", + "Episode 17 ended with reward: 60.0\n", + "Episode 18 ended with reward: 220.0\n", + "Episode 19 ended with reward: 160.0\n", + "Episode 20 ended with reward: 120.0\n", + "Episode 21 ended with reward: 345.0\n", + "Episode 22 ended with reward: 50.0\n", + "Episode 23 ended with reward: 170.0\n", + "Episode 24 ended with reward: 130.0\n", + "Episode 25 ended with reward: 115.0\n", + "Episode 26 ended with reward: 375.0\n", + "Episode 27 ended with reward: 150.0\n", + "Episode 28 ended with reward: 110.0\n", + "Episode 29 ended with reward: 120.0\n", + "Episode 30 ended with reward: 90.0\n", + "Episode 31 ended with reward: 120.0\n", + "Episode 32 ended with reward: 405.0\n", + "Episode 33 ended with reward: 710.0\n", + "Episode 34 ended with reward: 85.0\n", + "Episode 35 ended with reward: 380.0\n", + "Episode 36 ended with reward: 170.0\n", + "Episode 37 ended with reward: 105.0\n", + "Episode 38 ended with reward: 80.0\n", + "Episode 39 ended with reward: -115.0\n", + "Episode 40 ended with reward: 380.0\n", + "Episode 41 ended with reward: 675.0\n", + "Episode 42 ended with reward: 475.0\n", + "Episode 43 ended with reward: 220.0\n", + "Episode 44 ended with reward: 70.0\n", + "Episode 45 ended with reward: 580.0\n", + "Episode 46 ended with reward: 400.0\n", + "Episode 47 ended with reward: 355.0\n", + "Episode 48 ended with reward: 460.0\n", + "Episode 49 ended with reward: 420.0\n", + "Episode 50 ended with reward: 535.0\n", + "Episode 51 ended with reward: 470.0\n", + "Episode 52 ended with reward: 745.0\n", + "Episode 53 ended with reward: 460.0\n", + "Episode 54 ended with reward: 600.0\n", + "Episode 55 ended with reward: 750.0\n", + "Episode 56 ended with reward: 290.0\n", + "Episode 57 ended with reward: 745.0\n", + "Episode 58 ended with reward: 500.0\n", + "Episode 59 ended with reward: 550.0\n", + "Episode 60 ended with reward: 610.0\n", + "Episode 61 ended with reward: 465.0\n", + "Episode 62 ended with reward: 610.0\n", + "Episode 63 ended with reward: 785.0\n", + "Episode 64 ended with reward: 710.0\n", + "Episode 65 ended with reward: 275.0\n", + "Episode 66 ended with reward: 145.0\n", + "Episode 67 ended with reward: 300.0\n", + "Episode 68 ended with reward: 685.0\n", + "Episode 69 ended with reward: 660.0\n", + "Episode 70 ended with reward: 665.0\n", + "Episode 71 ended with reward: 585.0\n", + "Episode 72 ended with reward: 625.0\n", + "Episode 73 ended with reward: 615.0\n", + "Episode 74 ended with reward: 565.0\n", + "Episode 75 ended with reward: 690.0\n", + "Episode 76 ended with reward: 700.0\n", + "Episode 77 ended with reward: 465.0\n", + "Episode 78 ended with reward: 380.0\n", + "Episode 79 ended with reward: 395.0\n", + "Episode 80 ended with reward: 560.0\n", + "Episode 81 ended with reward: 480.0\n", + "Episode 82 ended with reward: 515.0\n", + "Episode 83 ended with reward: 470.0\n", + "Episode 84 ended with reward: 495.0\n", + "Episode 85 ended with reward: 510.0\n", + "Episode 86 ended with reward: 365.0\n", + "Episode 87 ended with reward: 460.0\n", + "Episode 88 ended with reward: 480.0\n", + "Episode 89 ended with reward: 515.0\n", + "Episode 90 ended with reward: 785.0\n", + "Episode 91 ended with reward: 715.0\n", + "Episode 92 ended with reward: 635.0\n", + "Episode 93 ended with reward: 540.0\n", + "Episode 94 ended with reward: 745.0\n", + "Episode 95 ended with reward: 690.0\n", + "Episode 96 ended with reward: 750.0\n", + "Episode 97 ended with reward: 530.0\n", + "Episode 98 ended with reward: 555.0\n", + "Episode 99 ended with reward: 665.0\n", + "Episode 100 ended with reward: 445.0\n", + "Episode 101 ended with reward: 370.0\n", + "Episode 102 ended with reward: 410.0\n", + "Episode 103 ended with reward: 120.0\n", + "Episode 104 ended with reward: -95.0\n", + "Episode 105 ended with reward: -250.0\n", + "Episode 106 ended with reward: -170.0\n", + "Episode 107 ended with reward: 380.0\n", + "Episode 108 ended with reward: 75.0\n", + "Episode 109 ended with reward: 265.0\n", + "Episode 110 ended with reward: 260.0\n", + "Episode 111 ended with reward: 265.0\n", + "Episode 112 ended with reward: 325.0\n", + "Episode 113 ended with reward: 335.0\n", + "Episode 114 ended with reward: 250.0\n", + "Episode 115 ended with reward: 400.0\n", + "Episode 116 ended with reward: 375.0\n", + "Episode 117 ended with reward: 320.0\n", + "Episode 118 ended with reward: 370.0\n", + "Episode 119 ended with reward: 405.0\n", + "Episode 120 ended with reward: 410.0\n", + "Episode 121 ended with reward: 510.0\n", + "Episode 122 ended with reward: 465.0\n", + "Episode 123 ended with reward: 530.0\n", + "Episode 124 ended with reward: 465.0\n", + "Episode 125 ended with reward: 520.0\n", + "Episode 126 ended with reward: 500.0\n", + "Episode 127 ended with reward: 490.0\n", + "Episode 128 ended with reward: 465.0\n", + "Episode 129 ended with reward: 380.0\n", + "Episode 130 ended with reward: 515.0\n", + "Episode 131 ended with reward: 500.0\n", + "Episode 132 ended with reward: 500.0\n", + "Episode 133 ended with reward: 430.0\n", + "Episode 134 ended with reward: 480.0\n", + "Episode 135 ended with reward: 295.0\n", + "Episode 136 ended with reward: 465.0\n", + "Episode 137 ended with reward: 350.0\n", + "Episode 138 ended with reward: 420.0\n", + "Episode 139 ended with reward: 420.0\n", + "Episode 140 ended with reward: 465.0\n", + "Episode 141 ended with reward: 430.0\n", + "Episode 142 ended with reward: 385.0\n", + "Episode 143 ended with reward: 420.0\n", + "Episode 144 ended with reward: 310.0\n", + "Episode 145 ended with reward: 445.0\n", + "Episode 146 ended with reward: 360.0\n", + "Episode 147 ended with reward: 400.0\n", + "Episode 148 ended with reward: 470.0\n", + "Episode 149 ended with reward: 420.0\n", + "Episode 150 ended with reward: 445.0\n", + "Episode 151 ended with reward: 455.0\n", + "Episode 152 ended with reward: 405.0\n", + "Episode 153 ended with reward: 395.0\n", + "Episode 154 ended with reward: 445.0\n", + "Episode 155 ended with reward: 445.0\n", + "Episode 156 ended with reward: 515.0\n", + "Episode 157 ended with reward: 435.0\n", + "Episode 158 ended with reward: 485.0\n", + "Episode 159 ended with reward: 500.0\n", + "Episode 160 ended with reward: 420.0\n", + "Episode 161 ended with reward: 500.0\n", + "Episode 162 ended with reward: 440.0\n", + "Episode 163 ended with reward: 405.0\n", + "Episode 164 ended with reward: 500.0\n", + "Episode 165 ended with reward: 420.0\n", + "Episode 166 ended with reward: 450.0\n", + "Episode 167 ended with reward: 485.0\n", + "Episode 168 ended with reward: 455.0\n", + "Episode 169 ended with reward: 465.0\n", + "Episode 170 ended with reward: 490.0\n", + "Episode 171 ended with reward: 655.0\n", + "Episode 172 ended with reward: 565.0\n", + "Episode 173 ended with reward: 575.0\n", + "Episode 174 ended with reward: 510.0\n", + "Episode 175 ended with reward: 680.0\n", + "Episode 176 ended with reward: 300.0\n", + "Episode 177 ended with reward: 730.0\n", + "Episode 178 ended with reward: 375.0\n", + "Episode 179 ended with reward: 540.0\n", + "Episode 180 ended with reward: 695.0\n", + "Episode 181 ended with reward: 350.0\n", + "Episode 182 ended with reward: 590.0\n", + "Episode 183 ended with reward: 705.0\n", + "Episode 184 ended with reward: 210.0\n", + "Episode 185 ended with reward: 785.0\n", + "Episode 186 ended with reward: 730.0\n", + "Episode 187 ended with reward: 610.0\n", + "Episode 188 ended with reward: 510.0\n", + "Episode 189 ended with reward: 140.0\n", + "Episode 190 ended with reward: 565.0\n", + "Episode 191 ended with reward: 775.0\n", + "Episode 192 ended with reward: 625.0\n", + "Episode 193 ended with reward: 620.0\n", + "Episode 194 ended with reward: 450.0\n", + "Episode 195 ended with reward: 555.0\n", + "Episode 196 ended with reward: 570.0\n", + "Episode 197 ended with reward: 510.0\n", + "Episode 198 ended with reward: 450.0\n", + "Episode 199 ended with reward: 450.0\n", + "Episode 200 ended with reward: 505.0\n", + "Episode 201 ended with reward: 645.0\n", + "Episode 202 ended with reward: 740.0\n", + "Episode 203 ended with reward: 515.0\n", + "Episode 204 ended with reward: 710.0\n", + "Episode 205 ended with reward: 290.0\n", + "Episode 206 ended with reward: 560.0\n", + "Episode 207 ended with reward: 380.0\n", + "Episode 208 ended with reward: 200.0\n", + "Episode 209 ended with reward: 500.0\n", + "Episode 210 ended with reward: 110.0\n", + "Episode 211 ended with reward: 320.0\n", + "Episode 212 ended with reward: -175.0\n", + "Episode 213 ended with reward: 160.0\n", + "Episode 214 ended with reward: 490.0\n", + "Episode 215 ended with reward: 445.0\n", + "Episode 216 ended with reward: 685.0\n", + "Episode 217 ended with reward: 470.0\n", + "Episode 218 ended with reward: 475.0\n", + "Episode 219 ended with reward: 745.0\n", + "Episode 220 ended with reward: 800.0\n", + "Episode 221 ended with reward: 560.0\n", + "Episode 222 ended with reward: 500.0\n", + "Episode 223 ended with reward: 570.0\n", + "Episode 224 ended with reward: 260.0\n", + "Episode 225 ended with reward: 645.0\n", + "Episode 226 ended with reward: 110.0\n", + "Episode 227 ended with reward: 665.0\n", + "Episode 228 ended with reward: 500.0\n", + "Episode 229 ended with reward: 515.0\n", + "Episode 230 ended with reward: 470.0\n", + "Episode 231 ended with reward: 420.0\n", + "Episode 232 ended with reward: 470.0\n", + "Episode 233 ended with reward: 420.0\n", + "Episode 234 ended with reward: 505.0\n", + "Episode 235 ended with reward: 415.0\n", + "Episode 236 ended with reward: 455.0\n", + "Episode 237 ended with reward: 755.0\n", + "Episode 238 ended with reward: 525.0\n", + "Episode 239 ended with reward: 465.0\n", + "Episode 240 ended with reward: 485.0\n", + "Episode 241 ended with reward: 610.0\n", + "Episode 242 ended with reward: 480.0\n", + "Episode 243 ended with reward: 675.0\n", + "Episode 244 ended with reward: 335.0\n", + "Episode 245 ended with reward: 195.0\n", + "Episode 246 ended with reward: 440.0\n", + "Episode 247 ended with reward: 370.0\n", + "Episode 248 ended with reward: 355.0\n", + "Episode 249 ended with reward: 405.0\n", + "Episode 250 ended with reward: 365.0\n", + "Episode 251 ended with reward: 750.0\n", + "Episode 252 ended with reward: 390.0\n", + "Episode 253 ended with reward: 585.0\n", + "Episode 254 ended with reward: 660.0\n", + "Episode 255 ended with reward: 445.0\n", + "Episode 256 ended with reward: 685.0\n", + "Episode 257 ended with reward: 395.0\n", + "Episode 258 ended with reward: 550.0\n", + "Episode 259 ended with reward: 725.0\n", + "Episode 260 ended with reward: 455.0\n", + "Episode 261 ended with reward: 520.0\n", + "Episode 262 ended with reward: 535.0\n", + "Episode 263 ended with reward: 530.0\n", + "Episode 264 ended with reward: 550.0\n", + "Episode 265 ended with reward: 620.0\n", + "Episode 266 ended with reward: 515.0\n", + "Episode 267 ended with reward: 465.0\n", + "Episode 268 ended with reward: 425.0\n", + "Episode 269 ended with reward: 535.0\n", + "Episode 270 ended with reward: 565.0\n", + "Episode 271 ended with reward: 725.0\n", + "Episode 272 ended with reward: 505.0\n", + "Episode 273 ended with reward: 560.0\n", + "Episode 274 ended with reward: 270.0\n", + "Episode 275 ended with reward: 175.0\n", + "Episode 276 ended with reward: 490.0\n", + "Episode 277 ended with reward: 355.0\n", + "Episode 278 ended with reward: 505.0\n", + "Episode 279 ended with reward: 480.0\n", + "Episode 280 ended with reward: 500.0\n", + "Episode 281 ended with reward: 520.0\n", + "Episode 282 ended with reward: 465.0\n", + "Episode 283 ended with reward: 465.0\n", + "Episode 284 ended with reward: 485.0\n", + "Episode 285 ended with reward: 530.0\n", + "Episode 286 ended with reward: 465.0\n", + "Episode 287 ended with reward: 275.0\n", + "Episode 288 ended with reward: 410.0\n", + "Episode 289 ended with reward: 355.0\n", + "Episode 290 ended with reward: 455.0\n", + "Episode 291 ended with reward: 345.0\n", + "Episode 292 ended with reward: 265.0\n", + "Episode 293 ended with reward: 500.0\n", + "Episode 294 ended with reward: 530.0\n", + "Episode 295 ended with reward: 500.0\n", + "Episode 296 ended with reward: 515.0\n", + "Episode 297 ended with reward: 500.0\n", + "Episode 298 ended with reward: 500.0\n", + "Episode 299 ended with reward: 480.0\n", + "Episode 300 ended with reward: 500.0\n", + "Episode 301 ended with reward: 460.0\n", + "Episode 302 ended with reward: 390.0\n", + "Episode 303 ended with reward: 485.0\n", + "Episode 304 ended with reward: 440.0\n", + "Episode 305 ended with reward: 330.0\n", + "Episode 306 ended with reward: 365.0\n", + "Episode 307 ended with reward: 440.0\n", + "Episode 308 ended with reward: 360.0\n", + "Episode 309 ended with reward: 500.0\n", + "Episode 310 ended with reward: 450.0\n", + "Episode 311 ended with reward: 315.0\n", + "Episode 312 ended with reward: 420.0\n", + "Episode 313 ended with reward: 500.0\n", + "Episode 314 ended with reward: 340.0\n", + "Episode 315 ended with reward: 565.0\n", + "Episode 316 ended with reward: 560.0\n", + "Episode 317 ended with reward: 515.0\n", + "Episode 318 ended with reward: 520.0\n", + "Episode 319 ended with reward: 500.0\n", + "Episode 320 ended with reward: 595.0\n", + "Episode 321 ended with reward: 780.0\n", + "Episode 322 ended with reward: 500.0\n", + "Episode 323 ended with reward: 530.0\n", + "Episode 324 ended with reward: 470.0\n", + "Episode 325 ended with reward: 320.0\n", + "Episode 326 ended with reward: 460.0\n", + "Episode 327 ended with reward: 475.0\n", + "Episode 328 ended with reward: 555.0\n", + "Episode 329 ended with reward: 695.0\n", + "Episode 330 ended with reward: 515.0\n", + "Episode 331 ended with reward: 615.0\n", + "Episode 332 ended with reward: 500.0\n", + "Episode 333 ended with reward: 510.0\n", + "Episode 334 ended with reward: 575.0\n", + "Episode 335 ended with reward: 300.0\n", + "Episode 336 ended with reward: 445.0\n", + "Episode 337 ended with reward: 535.0\n", + "Episode 338 ended with reward: 400.0\n", + "Episode 339 ended with reward: 455.0\n", + "Episode 340 ended with reward: 565.0\n", + "Episode 341 ended with reward: 570.0\n", + "Episode 342 ended with reward: 260.0\n", + "Episode 343 ended with reward: 260.0\n", + "Episode 344 ended with reward: 570.0\n", + "Episode 345 ended with reward: 510.0\n", + "Episode 346 ended with reward: 555.0\n", + "Episode 347 ended with reward: 515.0\n", + "Episode 348 ended with reward: 230.0\n", + "Episode 349 ended with reward: 525.0\n", + "Episode 350 ended with reward: 360.0\n", + "Episode 351 ended with reward: 130.0\n", + "Episode 352 ended with reward: 505.0\n", + "Episode 353 ended with reward: 520.0\n", + "Episode 354 ended with reward: 290.0\n", + "Episode 355 ended with reward: 760.0\n", + "Episode 356 ended with reward: 470.0\n", + "Episode 357 ended with reward: 615.0\n", + "Episode 358 ended with reward: 150.0\n", + "Episode 359 ended with reward: 505.0\n", + "Episode 360 ended with reward: 140.0\n", + "Episode 361 ended with reward: 770.0\n", + "Episode 362 ended with reward: 760.0\n", + "Episode 363 ended with reward: 570.0\n", + "Episode 364 ended with reward: 270.0\n", + "Episode 365 ended with reward: 575.0\n", + "Episode 366 ended with reward: 365.0\n", + "Episode 367 ended with reward: 500.0\n", + "Episode 368 ended with reward: 555.0\n", + "Episode 369 ended with reward: 140.0\n", + "Episode 370 ended with reward: 725.0\n", + "Episode 371 ended with reward: 585.0\n", + "Episode 372 ended with reward: 310.0\n", + "Episode 373 ended with reward: 500.0\n", + "Episode 374 ended with reward: 515.0\n", + "Episode 375 ended with reward: 535.0\n", + "Episode 376 ended with reward: 415.0\n", + "Episode 377 ended with reward: 150.0\n", + "Episode 378 ended with reward: 735.0\n", + "Episode 379 ended with reward: 480.0\n", + "Episode 380 ended with reward: 505.0\n", + "Episode 381 ended with reward: 595.0\n", + "Episode 382 ended with reward: 495.0\n", + "Episode 383 ended with reward: 150.0\n", + "Episode 384 ended with reward: 475.0\n", + "Episode 385 ended with reward: 200.0\n", + "Episode 386 ended with reward: 295.0\n", + "Episode 387 ended with reward: 305.0\n", + "Episode 388 ended with reward: 695.0\n", + "Episode 389 ended with reward: 610.0\n", + "Episode 390 ended with reward: 220.0\n", + "Episode 391 ended with reward: 595.0\n", + "Episode 392 ended with reward: 565.0\n", + "Episode 393 ended with reward: 110.0\n", + "Episode 394 ended with reward: 730.0\n", + "Episode 395 ended with reward: 355.0\n", + "Episode 396 ended with reward: 760.0\n", + "Episode 397 ended with reward: 790.0\n", + "Episode 398 ended with reward: 680.0\n", + "Episode 399 ended with reward: 575.0\n", + "Episode 400 ended with reward: 760.0\n", + "Episode 401 ended with reward: 450.0\n", + "Episode 402 ended with reward: 625.0\n", + "Episode 403 ended with reward: 345.0\n", + "Episode 404 ended with reward: 485.0\n", + "Episode 405 ended with reward: 525.0\n", + "Episode 406 ended with reward: 670.0\n", + "Episode 407 ended with reward: 565.0\n", + "Episode 408 ended with reward: 405.0\n", + "Episode 409 ended with reward: 50.0\n", + "Episode 410 ended with reward: 410.0\n", + "Episode 411 ended with reward: 300.0\n", + "Episode 412 ended with reward: 415.0\n", + "Episode 413 ended with reward: 485.0\n", + "Episode 414 ended with reward: 735.0\n", + "Episode 415 ended with reward: 630.0\n", + "Episode 416 ended with reward: 530.0\n", + "Episode 417 ended with reward: 500.0\n", + "Episode 418 ended with reward: 595.0\n", + "Episode 419 ended with reward: 500.0\n", + "Episode 420 ended with reward: 315.0\n", + "Episode 421 ended with reward: 515.0\n", + "Episode 422 ended with reward: 445.0\n", + "Episode 423 ended with reward: 395.0\n", + "Episode 424 ended with reward: 500.0\n", + "Episode 425 ended with reward: 430.0\n", + "Episode 426 ended with reward: 365.0\n", + "Episode 427 ended with reward: 560.0\n", + "Episode 428 ended with reward: 435.0\n", + "Episode 429 ended with reward: 465.0\n", + "Episode 430 ended with reward: 500.0\n", + "Episode 431 ended with reward: 485.0\n", + "Episode 432 ended with reward: 500.0\n", + "Episode 433 ended with reward: 520.0\n", + "Episode 434 ended with reward: 485.0\n", + "Episode 435 ended with reward: 455.0\n", + "Episode 436 ended with reward: 460.0\n", + "Episode 437 ended with reward: 330.0\n", + "Episode 438 ended with reward: 360.0\n", + "Episode 439 ended with reward: 455.0\n", + "Episode 440 ended with reward: 485.0\n", + "Episode 441 ended with reward: 340.0\n", + "Episode 442 ended with reward: 460.0\n", + "Episode 443 ended with reward: 500.0\n", + "Episode 444 ended with reward: 320.0\n", + "Episode 445 ended with reward: 490.0\n", + "Episode 446 ended with reward: 455.0\n", + "Episode 447 ended with reward: 480.0\n", + "Episode 448 ended with reward: 455.0\n", + "Episode 449 ended with reward: 500.0\n", + "Episode 450 ended with reward: 415.0\n", + "Episode 451 ended with reward: 515.0\n", + "Episode 452 ended with reward: 550.0\n", + "Episode 453 ended with reward: 720.0\n", + "Episode 454 ended with reward: 570.0\n", + "Episode 455 ended with reward: 500.0\n", + "Episode 456 ended with reward: 535.0\n", + "Episode 457 ended with reward: 725.0\n", + "Episode 458 ended with reward: 530.0\n", + "Episode 459 ended with reward: 760.0\n", + "Episode 460 ended with reward: 130.0\n", + "Episode 461 ended with reward: 595.0\n", + "Episode 462 ended with reward: 735.0\n", + "Episode 463 ended with reward: 730.0\n", + "Episode 464 ended with reward: 615.0\n", + "Episode 465 ended with reward: 500.0\n", + "Episode 466 ended with reward: 725.0\n", + "Episode 467 ended with reward: 720.0\n", + "Episode 468 ended with reward: 465.0\n", + "Episode 469 ended with reward: 465.0\n", + "Episode 470 ended with reward: 760.0\n", + "Episode 471 ended with reward: 255.0\n", + "Episode 472 ended with reward: 125.0\n", + "Episode 473 ended with reward: 500.0\n", + "Episode 474 ended with reward: 705.0\n", + "Episode 475 ended with reward: 500.0\n", + "Episode 476 ended with reward: 485.0\n", + "Episode 477 ended with reward: 500.0\n", + "Episode 478 ended with reward: 500.0\n", + "Episode 479 ended with reward: 470.0\n", + "Episode 480 ended with reward: 635.0\n", + "Episode 481 ended with reward: 440.0\n", + "Episode 482 ended with reward: 275.0\n", + "Episode 483 ended with reward: 305.0\n", + "Episode 484 ended with reward: 600.0\n", + "Episode 485 ended with reward: 465.0\n", + "Episode 486 ended with reward: 370.0\n", + "Episode 487 ended with reward: 775.0\n", + "Episode 488 ended with reward: 300.0\n", + "Episode 489 ended with reward: 415.0\n", + "Episode 490 ended with reward: 460.0\n", + "Episode 491 ended with reward: 620.0\n", + "Episode 492 ended with reward: 355.0\n", + "Episode 493 ended with reward: 455.0\n", + "Episode 494 ended with reward: 365.0\n", + "Episode 495 ended with reward: 720.0\n", + "Episode 496 ended with reward: 500.0\n", + "Episode 497 ended with reward: 120.0\n", + "Episode 498 ended with reward: 500.0\n", + "Episode 499 ended with reward: 355.0\n", + "Episode 500 ended with reward: 605.0\n", + "Episode 501 ended with reward: 475.0\n", + "Episode 502 ended with reward: 415.0\n", + "Episode 503 ended with reward: 700.0\n", + "Episode 504 ended with reward: 715.0\n", + "Episode 505 ended with reward: 720.0\n", + "Episode 506 ended with reward: 335.0\n", + "Episode 507 ended with reward: 240.0\n", + "Episode 508 ended with reward: 425.0\n", + "Episode 509 ended with reward: 705.0\n", + "Episode 510 ended with reward: 435.0\n", + "Episode 511 ended with reward: 205.0\n", + "Episode 512 ended with reward: 485.0\n", + "Episode 513 ended with reward: 730.0\n", + "Episode 514 ended with reward: 380.0\n", + "Episode 515 ended with reward: 470.0\n", + "Episode 516 ended with reward: 490.0\n", + "Episode 517 ended with reward: 480.0\n", + "Episode 518 ended with reward: 440.0\n", + "Episode 519 ended with reward: 570.0\n", + "Episode 520 ended with reward: 405.0\n", + "Episode 521 ended with reward: 430.0\n", + "Episode 522 ended with reward: 370.0\n", + "Episode 523 ended with reward: 710.0\n", + "Episode 524 ended with reward: 420.0\n", + "Episode 525 ended with reward: 465.0\n", + "Episode 526 ended with reward: 510.0\n", + "Episode 527 ended with reward: 685.0\n", + "Episode 528 ended with reward: 360.0\n", + "Episode 529 ended with reward: 490.0\n", + "Episode 530 ended with reward: 465.0\n", + "Episode 531 ended with reward: 365.0\n", + "Episode 532 ended with reward: 640.0\n", + "Episode 533 ended with reward: 575.0\n", + "Episode 534 ended with reward: 520.0\n", + "Episode 535 ended with reward: 340.0\n", + "Episode 536 ended with reward: 440.0\n", + "Episode 537 ended with reward: 230.0\n", + "Episode 538 ended with reward: 455.0\n", + "Episode 539 ended with reward: 530.0\n", + "Episode 540 ended with reward: 475.0\n", + "Episode 541 ended with reward: 435.0\n", + "Episode 542 ended with reward: 385.0\n", + "Episode 543 ended with reward: 405.0\n", + "Episode 544 ended with reward: 745.0\n", + "Episode 545 ended with reward: 500.0\n", + "Episode 546 ended with reward: 395.0\n", + "Episode 547 ended with reward: 230.0\n", + "Episode 548 ended with reward: 400.0\n", + "Episode 549 ended with reward: 490.0\n", + "Episode 550 ended with reward: 465.0\n", + "Episode 551 ended with reward: 725.0\n", + "Episode 552 ended with reward: 630.0\n", + "Episode 553 ended with reward: 150.0\n", + "Episode 554 ended with reward: 470.0\n", + "Episode 555 ended with reward: 535.0\n", + "Episode 556 ended with reward: 640.0\n", + "Episode 557 ended with reward: 280.0\n", + "Episode 558 ended with reward: 405.0\n", + "Episode 559 ended with reward: 215.0\n", + "Episode 560 ended with reward: 140.0\n", + "Episode 561 ended with reward: 410.0\n", + "Episode 562 ended with reward: 550.0\n", + "Episode 563 ended with reward: 780.0\n", + "Episode 564 ended with reward: 465.0\n", + "Episode 565 ended with reward: 500.0\n", + "Episode 566 ended with reward: 315.0\n", + "Episode 567 ended with reward: 650.0\n", + "Episode 568 ended with reward: 735.0\n", + "Episode 569 ended with reward: 470.0\n", + "Episode 570 ended with reward: 500.0\n", + "Episode 571 ended with reward: 500.0\n", + "Episode 572 ended with reward: 745.0\n", + "Episode 573 ended with reward: 605.0\n", + "Episode 574 ended with reward: 515.0\n", + "Episode 575 ended with reward: 220.0\n", + "Episode 576 ended with reward: 710.0\n", + "Episode 577 ended with reward: 780.0\n", + "Episode 578 ended with reward: 320.0\n", + "Episode 579 ended with reward: 500.0\n", + "Episode 580 ended with reward: 380.0\n", + "Episode 581 ended with reward: 330.0\n", + "Episode 582 ended with reward: 675.0\n", + "Episode 583 ended with reward: 465.0\n", + "Episode 584 ended with reward: 275.0\n", + "Episode 585 ended with reward: 630.0\n", + "Episode 586 ended with reward: 515.0\n", + "Episode 587 ended with reward: 285.0\n", + "Episode 588 ended with reward: 575.0\n", + "Episode 589 ended with reward: 690.0\n", + "Episode 590 ended with reward: 685.0\n", + "Episode 591 ended with reward: 575.0\n", + "Episode 592 ended with reward: 720.0\n", + "Episode 593 ended with reward: 610.0\n", + "Episode 594 ended with reward: 285.0\n", + "Episode 595 ended with reward: 405.0\n", + "Episode 596 ended with reward: 680.0\n", + "Episode 597 ended with reward: 310.0\n", + "Episode 598 ended with reward: 340.0\n", + "Episode 599 ended with reward: 195.0\n", + "Episode 600 ended with reward: -145.0\n", + "Episode 601 ended with reward: 285.0\n", + "Episode 602 ended with reward: 695.0\n", + "Episode 603 ended with reward: 775.0\n", + "Episode 604 ended with reward: 430.0\n", + "Episode 605 ended with reward: 560.0\n", + "Episode 606 ended with reward: 555.0\n", + "Episode 607 ended with reward: 655.0\n", + "Episode 608 ended with reward: 775.0\n", + "Episode 609 ended with reward: 725.0\n", + "Episode 610 ended with reward: 500.0\n", + "Episode 611 ended with reward: 235.0\n", + "Episode 612 ended with reward: 735.0\n", + "Episode 613 ended with reward: 440.0\n", + "Episode 614 ended with reward: 650.0\n", + "Episode 615 ended with reward: 310.0\n", + "Episode 616 ended with reward: 330.0\n", + "Episode 617 ended with reward: 430.0\n", + "Episode 618 ended with reward: 540.0\n", + "Episode 619 ended with reward: 460.0\n", + "Episode 620 ended with reward: 405.0\n", + "Episode 621 ended with reward: 500.0\n", + "Episode 622 ended with reward: 500.0\n", + "Episode 623 ended with reward: 580.0\n", + "Episode 624 ended with reward: 725.0\n", + "Episode 625 ended with reward: 525.0\n", + "Episode 626 ended with reward: 140.0\n", + "Episode 627 ended with reward: 605.0\n", + "Episode 628 ended with reward: 480.0\n", + "Episode 629 ended with reward: 450.0\n", + "Episode 630 ended with reward: 715.0\n", + "Episode 631 ended with reward: 500.0\n", + "Episode 632 ended with reward: 500.0\n", + "Episode 633 ended with reward: 550.0\n", + "Episode 634 ended with reward: 325.0\n", + "Episode 635 ended with reward: 465.0\n", + "Episode 636 ended with reward: 310.0\n", + "Episode 637 ended with reward: 720.0\n", + "Episode 638 ended with reward: 675.0\n", + "Episode 639 ended with reward: 435.0\n", + "Episode 640 ended with reward: 575.0\n", + "Episode 641 ended with reward: 340.0\n", + "Episode 642 ended with reward: 620.0\n", + "Episode 643 ended with reward: 685.0\n", + "Episode 644 ended with reward: 485.0\n", + "Episode 645 ended with reward: 285.0\n", + "Episode 646 ended with reward: 100.0\n", + "Episode 647 ended with reward: 565.0\n", + "Episode 648 ended with reward: 220.0\n", + "Episode 649 ended with reward: 535.0\n", + "Episode 650 ended with reward: 555.0\n", + "Episode 651 ended with reward: 500.0\n", + "Episode 652 ended with reward: 525.0\n", + "Episode 653 ended with reward: 435.0\n", + "Episode 654 ended with reward: 500.0\n", + "Episode 655 ended with reward: 355.0\n", + "Episode 656 ended with reward: 545.0\n", + "Episode 657 ended with reward: 750.0\n", + "Episode 658 ended with reward: 745.0\n", + "Episode 659 ended with reward: 220.0\n", + "Episode 660 ended with reward: 500.0\n", + "Episode 661 ended with reward: 480.0\n", + "Episode 662 ended with reward: 470.0\n", + "Episode 663 ended with reward: 100.0\n", + "Episode 664 ended with reward: 370.0\n", + "Episode 665 ended with reward: 390.0\n", + "Episode 666 ended with reward: 500.0\n", + "Episode 667 ended with reward: 365.0\n", + "Episode 668 ended with reward: 465.0\n", + "Episode 669 ended with reward: 495.0\n", + "Episode 670 ended with reward: 745.0\n", + "Episode 671 ended with reward: 515.0\n", + "Episode 672 ended with reward: 745.0\n", + "Episode 673 ended with reward: 510.0\n", + "Episode 674 ended with reward: 745.0\n", + "Episode 675 ended with reward: 760.0\n", + "Episode 676 ended with reward: 705.0\n", + "Episode 677 ended with reward: 605.0\n", + "Episode 678 ended with reward: 715.0\n", + "Episode 679 ended with reward: 735.0\n", + "Episode 680 ended with reward: 700.0\n", + "Episode 681 ended with reward: 390.0\n", + "Episode 682 ended with reward: 630.0\n", + "Episode 683 ended with reward: 690.0\n", + "Episode 684 ended with reward: 735.0\n", + "Episode 685 ended with reward: 255.0\n", + "Episode 686 ended with reward: 425.0\n", + "Episode 687 ended with reward: 185.0\n", + "Episode 688 ended with reward: 500.0\n", + "Episode 689 ended with reward: 370.0\n", + "Episode 690 ended with reward: 465.0\n", + "Episode 691 ended with reward: 515.0\n", + "Episode 692 ended with reward: 750.0\n", + "Episode 693 ended with reward: 500.0\n", + "Episode 694 ended with reward: 425.0\n", + "Episode 695 ended with reward: 500.0\n", + "Episode 696 ended with reward: 470.0\n", + "Episode 697 ended with reward: 465.0\n", + "Episode 698 ended with reward: 610.0\n", + "Episode 699 ended with reward: 470.0\n", + "Episode 700 ended with reward: 650.0\n", + "Episode 701 ended with reward: 440.0\n", + "Episode 702 ended with reward: 465.0\n", + "Episode 703 ended with reward: 520.0\n", + "Episode 704 ended with reward: 675.0\n", + "Episode 705 ended with reward: 690.0\n", + "Episode 706 ended with reward: 450.0\n", + "Episode 707 ended with reward: 550.0\n", + "Episode 708 ended with reward: 410.0\n", + "Episode 709 ended with reward: 640.0\n", + "Episode 710 ended with reward: 780.0\n", + "Episode 711 ended with reward: 630.0\n", + "Episode 712 ended with reward: 140.0\n", + "Episode 713 ended with reward: 640.0\n", + "Episode 714 ended with reward: 730.0\n", + "Episode 715 ended with reward: 430.0\n", + "Episode 716 ended with reward: 425.0\n", + "Episode 717 ended with reward: 500.0\n", + "Episode 718 ended with reward: 710.0\n", + "Episode 719 ended with reward: 370.0\n", + "Episode 720 ended with reward: 470.0\n", + "Episode 721 ended with reward: 395.0\n", + "Episode 722 ended with reward: 480.0\n", + "Episode 723 ended with reward: 445.0\n", + "Episode 724 ended with reward: 640.0\n", + "Episode 725 ended with reward: 515.0\n", + "Episode 726 ended with reward: 280.0\n", + "Episode 727 ended with reward: 560.0\n", + "Episode 728 ended with reward: 515.0\n", + "Episode 729 ended with reward: 730.0\n", + "Episode 730 ended with reward: 685.0\n", + "Episode 731 ended with reward: 480.0\n", + "Episode 732 ended with reward: 505.0\n", + "Episode 733 ended with reward: 185.0\n", + "Episode 734 ended with reward: 675.0\n", + "Episode 735 ended with reward: 330.0\n", + "Episode 736 ended with reward: 400.0\n", + "Episode 737 ended with reward: 405.0\n", + "Episode 738 ended with reward: 490.0\n", + "Episode 739 ended with reward: 60.0\n", + "Episode 740 ended with reward: 465.0\n", + "Episode 741 ended with reward: 670.0\n", + "Episode 742 ended with reward: 550.0\n", + "Episode 743 ended with reward: 710.0\n", + "Episode 744 ended with reward: 220.0\n", + "Episode 745 ended with reward: 560.0\n", + "Episode 746 ended with reward: 520.0\n", + "Episode 747 ended with reward: 320.0\n", + "Episode 748 ended with reward: 540.0\n", + "Episode 749 ended with reward: 485.0\n", + "Episode 750 ended with reward: 230.0\n", + "Episode 751 ended with reward: 585.0\n", + "Episode 752 ended with reward: 655.0\n", + "Episode 753 ended with reward: 740.0\n", + "Episode 754 ended with reward: 395.0\n", + "Episode 755 ended with reward: 440.0\n", + "Episode 756 ended with reward: 730.0\n", + "Episode 757 ended with reward: 790.0\n", + "Episode 758 ended with reward: 715.0\n", + "Episode 759 ended with reward: 390.0\n", + "Episode 760 ended with reward: 735.0\n", + "Episode 761 ended with reward: 485.0\n", + "Episode 762 ended with reward: 290.0\n", + "Episode 763 ended with reward: 445.0\n", + "Episode 764 ended with reward: 555.0\n", + "Episode 765 ended with reward: 445.0\n", + "Episode 766 ended with reward: 345.0\n", + "Episode 767 ended with reward: 440.0\n", + "Episode 768 ended with reward: 515.0\n", + "Episode 769 ended with reward: 500.0\n", + "Episode 770 ended with reward: 515.0\n", + "Episode 771 ended with reward: 500.0\n", + "Episode 772 ended with reward: 595.0\n", + "Episode 773 ended with reward: 485.0\n", + "Episode 774 ended with reward: 680.0\n", + "Episode 775 ended with reward: 455.0\n", + "Episode 776 ended with reward: 505.0\n", + "Episode 777 ended with reward: 490.0\n", + "Episode 778 ended with reward: 500.0\n", + "Episode 779 ended with reward: 475.0\n", + "Episode 780 ended with reward: 470.0\n", + "Episode 781 ended with reward: 470.0\n" + ] + } + ], + "source": [ + "env=RoverGridEnv()\n", + "input_dim=env.observation_space.shape[0]\n", + "n_actions=env.action_space.n\n", + "actor_critic=ActorCritic(input_dim,\n", + " n_actions)\n", + "optimizer=optim.Adam(actor_critic.parameters(),\n", + " lr=1e-3)\n", + "episode_rwds_ppo=train_ppo(env,\n", + " actor_critic,\n", + " optimizer,\n", + " total_timesteps=50000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(episode_rwds_ppo)\n", + "plt.xlabel(\"Episode\")\n", + "plt.ylabel(\"Total Reward\")\n", + "plt.title(\"Total Rewards Per Episode\")\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\91740\\AppData\\Local\\Temp\\ipykernel_8108\\1561640575.py:31: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning disappear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly.\n", + " frames = [imageio.imread(path) for path in frames_paths]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Episode 1 evaluated, GIF saved to eval_gifs\\episode_1.gif.\n", + "Episode 2 evaluated, GIF saved to eval_gifs\\episode_2.gif.\n", + "Episode 3 evaluated, GIF saved to eval_gifs\\episode_3.gif.\n", + "Episode 4 evaluated, GIF saved to eval_gifs\\episode_4.gif.\n", + "Episode 5 evaluated, GIF saved to eval_gifs\\episode_5.gif.\n", + "Episode 6 evaluated, GIF saved to eval_gifs\\episode_6.gif.\n", + "Episode 7 evaluated, GIF saved to eval_gifs\\episode_7.gif.\n", + "Episode 8 evaluated, GIF saved to eval_gifs\\episode_8.gif.\n", + "Episode 9 evaluated, GIF saved to eval_gifs\\episode_9.gif.\n", + "Episode 10 evaluated, GIF saved to eval_gifs\\episode_10.gif.\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def evaluate_ppo_agent(env,\n", + " actor_critic,\n", + " num_episodes=10,\n", + " output_dir='eval_gifs'):\n", + " if not os.path.exists(output_dir):\n", + " os.makedirs(output_dir)\n", + " \n", + " eval_episode_rwds=[] \n", + " for episode in range(num_episodes):\n", + " obs=env.reset()\n", + " episode_rwds_ppo=np.zeros((2,))\n", + " frames_paths=[]\n", + " done=False\n", + " timestep=0\n", + " while not done:\n", + " with torch.no_grad():\n", + " obs_tensor=torch.tensor(obs,\n", + " dtype=torch.float32).unsqueeze(0)\n", + " policy_logits,_=actor_critic(obs_tensor)\n", + " action1=Categorical(logits=policy_logits).sample().item()\n", + " action2=Categorical(logits=policy_logits).sample().item()\n", + " next_obs,rewards,done,_=env.step([action1,\n", + " action2])\n", + " episode_rwds_ppo+=rewards \n", + " obs=next_obs\n", + " frame_path=os.path.join(output_dir,\n", + " f\"episode_{episode+1}_frame_{timestep}.png\")\n", + " env.render(save_path=frame_path) \n", + " frames_paths.append(frame_path) \n", + " timestep+=1\n", + " eval_episode_rwds.append(episode_rwds_ppo) \n", + " \n", + " frames=[imageio.imread(path) for path in frames_paths]\n", + " gif_path=os.path.join(output_dir,f\"episode_{episode+1}.gif\")\n", + " imageio.mimsave(gif_path,frames,fps=10) \n", + " for path in frames_paths:\n", + " os.remove(path)\n", + " print(f\"Episode {episode+1} evaluated, GIF saved to {gif_path}.\")\n", + "\n", + " eval_episode_rwds=np.array(eval_episode_rwds) \n", + " plt.figure(figsize=(12,6))\n", + " for agent_index in range(2):\n", + " plt.plot(range(1,num_episodes+1),eval_episode_rwds[:,agent_index],marker='o',label=f'Agent {agent_index+1}')\n", + " plt.title('Total Rewards per Episode for Each Agent')\n", + " plt.xlabel('Episode')\n", + " plt.ylabel('Total Reward')\n", + " plt.legend()\n", + " plt.grid(True)\n", + " plt.savefig(os.path.join(output_dir,'rewards_plot.png'))\n", + " plt.show()\n", + "\n", + "evaluate_ppo_agent(env,actor_critic,num_episodes=10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}