fixed plotting bug (missing variables) referenced in Issue #5

qfettes · Nov 29, 2018 · 1996cca · 1996cca
1 parent 592c21d
commit 1996cca
Show file tree

Hide file tree

Showing 16 changed files with 79 additions and 30 deletions.
diff --git a/01.DQN.ipynb b/01.DQN.ipynb
@@ -386,7 +386,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -420,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/02.NStep_DQN.ipynb b/02.NStep_DQN.ipynb
@@ -342,7 +342,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -376,7 +376,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/03.Double_DQN.ipynb b/03.Double_DQN.ipynb
@@ -193,7 +193,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -227,7 +227,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/04.Dueling_DQN.ipynb b/04.Dueling_DQN.ipynb
@@ -243,7 +243,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -277,7 +277,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/05.DQN-NoisyNets.ipynb b/05.DQN-NoisyNets.ipynb
@@ -314,7 +314,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -348,7 +348,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/06.DQN_PriorityReplay.ipynb b/06.DQN_PriorityReplay.ipynb
@@ -294,7 +294,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -328,7 +328,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/07.Categorical-DQN.ipynb b/07.Categorical-DQN.ipynb
@@ -298,7 +298,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -332,7 +332,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/08.Rainbow.ipynb b/08.Rainbow.ipynb
@@ -321,7 +321,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -355,7 +355,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/09.QuantileRegression-DQN.ipynb b/09.QuantileRegression-DQN.ipynb
@@ -285,7 +285,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -319,7 +319,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/10.Quantile-Rainbow.ipynb b/10.Quantile-Rainbow.ipynb
@@ -302,7 +302,7 @@
     "        episode_reward = 0\n",
     "        \n",
     "        if np.mean(model.rewards[-10:]) > 19:\n",
-    "            plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
+    "            plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
     "            break\n",
     "\n",
     "    if frame_idx % 10000 == 0:\n",
@@ -336,7 +336,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,

diff --git a/11.DRQN.ipynb b/11.DRQN.ipynb
diff --git a/agents/DQN.py b/agents/DQN.py
@@ -58,8 +58,8 @@ def __init__(self, static_policy=False, env=None, config=None):
         self.nstep_buffer = []
 
     def declare_networks(self):
-        self.model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=AtariBody)
-        self.target_model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=AtariBody)
+        self.model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=SimpleBody)
+        self.target_model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=SimpleBody)
 
     def declare_memory(self):
         self.memory = ExperienceReplayMemory(self.experience_replay_size) if not self.priority_replay else PrioritizedReplayMemory(self.experience_replay_size, self.priority_alpha, self.priority_beta_start, self.priority_beta_frames)

diff --git a/dqn_devel.py b/dqn_devel.py
@@ -3,7 +3,7 @@
 
 from IPython.display import clear_output
 import matplotlib
-matplotlib.use("agg")
+#matplotlib.use("agg")
 from matplotlib import pyplot as plt
 #%matplotlib inline
 
@@ -76,17 +76,17 @@ def plot(frame_idx, rewards, losses, sigma, elapsed_time):
         plt.title('noisy param magnitude')
         plt.plot(sigma)
     plt.show()
-    #print('frame %s. reward: %s. time: %s' % (frame_idx, np.mean(rewards[-10:]), elapsed_time))
+    print('frame %s. reward: %s. time: %s' % (frame_idx, np.mean(rewards[-10:]), elapsed_time))
 
 
 if __name__=='__main__':
     start=timer()
 
-    env_id = "PongNoFrameskip-v4"
+    '''env_id = "PongNoFrameskip-v4"
     env    = make_atari(env_id)
     env    = wrap_deepmind(env, frame_stack=False)
-    env    = wrap_pytorch(env)
-    #env = gym.make('CartPole-v0')
+    env    = wrap_pytorch(env)'''
+    env = gym.make('CartPole-v0')
     #env = wrappers.Monitor(env, 'Delete', force=True)
     model = Model(env=env, config=config)
 

diff --git a/networks/networks.py b/networks/networks.py
@@ -289,6 +289,55 @@ def forward(self, inputs):
     def feature_size(self, input_shape):
         return self.conv3(self.conv2(self.conv1(torch.zeros(1, *input_shape)))).view(1, -1).size(1)
 
+    def layer_init(self, module, weight_init, bias_init, gain=1):
+        weight_init(module.weight.data, gain=gain)
+        bias_init(module.bias.data)
+        return module
+
+
+class ActorCriticER(nn.Module):
+    def __init__(self, input_shape, num_actions):
+        super(ActorCriticER, self).__init__()
+
+        init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
+                    lambda x: nn.init.constant_(x, 0),
+                    nn.init.calculate_gain('relu'))
+
+        self.conv1 = init_(nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4))
+        self.conv2 = init_(nn.Conv2d(32, 64, kernel_size=4, stride=2))
+        self.conv3 = init_(nn.Conv2d(64, 32, kernel_size=3, stride=1))
+        self.fc1 = init_(nn.Linear(self.feature_size(input_shape), 512))
+
+        init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
+          lambda x: nn.init.constant_(x, 0))
+
+        self.critic_linear = init_(nn.Linear(512, num_actions))
+
+        init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
+              lambda x: nn.init.constant_(x, 0), gain=0.01)
+
+        self.actor_linear = init_(nn.Linear(512, num_actions))
+
+        self.train()
+
+    def forward(self, inputs):
+        x = F.relu(self.conv1(inputs/255.0))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = x.view(x.size(0), -1)
+
+        x = F.relu(self.fc1(x))
+
+        q_value = self.critic_linear(x)
+        logits = self.actor_linear(x)
+        policy = F.softmax(logits, dim=1) 
+        value = (policy * q_value).sum(-1, keepdim=True)
+
+        return logits, policy, value, q_value
+
+    def feature_size(self, input_shape):
+        return self.conv3(self.conv2(self.conv1(torch.zeros(1, *input_shape)))).view(1, -1).size(1)
+
     def layer_init(self, module, weight_init, bias_init, gain=1):
         weight_init(module.weight.data, gain=gain)
         bias_init(module.bias.data)

diff --git a/saved_agents/model.dump b/saved_agents/model.dump
diff --git a/saved_agents/optim.dump b/saved_agents/optim.dump