From 9e9a89b3010e1682f1c19a7c4a8c9551c3a4f37f Mon Sep 17 00:00:00 2001 From: quintin Date: Wed, 20 Jun 2018 14:58:01 -0400 Subject: [PATCH] fixed plotting bug --- 01.DQN.ipynb | 6 ++---- 02.NStep_DQN.ipynb | 6 ++---- 03.Double_DQN.ipynb | 4 +--- 04.Dueling_DQN.ipynb | 4 +--- 05.DQN-NoisyNets.ipynb | 4 +--- 06.DQN_PriorityReplay.ipynb | 4 +--- 07.Categorical-DQN.ipynb | 4 +--- 08.Rainbow.ipynb | 4 +--- 09.QuantileRegression-DQN.ipynb | 4 +--- 10.Quantile-Rainbow.ipynb | 4 +--- 11.DRQN.ipynb | 4 +--- 11 files changed, 13 insertions(+), 35 deletions(-) diff --git a/01.DQN.ipynb b/01.DQN.ipynb index 2465d6e..9909383 100644 --- a/01.DQN.ipynb +++ b/01.DQN.ipynb @@ -41,7 +41,7 @@ "from utils.wrappers import make_atari, wrap_deepmind, wrap_pytorch\n", "\n", "from utils.hyperparameters import Config\n", - "from agents import BaseAgent" + "from agents.BaseAgent import BaseAgent" ] }, { @@ -366,8 +366,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -389,7 +387,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/02.NStep_DQN.ipynb b/02.NStep_DQN.ipynb index 6c48cae..a08d2dc 100644 --- a/02.NStep_DQN.ipynb +++ b/02.NStep_DQN.ipynb @@ -39,7 +39,7 @@ "from utils.ReplayMemory import ExperienceReplayMemory\n", "\n", "from utils.hyperparameters import Config\n", - "from agents import BaseAgent" + "from agents.BaseAgent import BaseAgent" ] }, { @@ -321,8 +321,6 @@ "#env = wrappers.Monitor(env, 'Delete', force=True)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -344,7 +342,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/03.Double_DQN.ipynb b/03.Double_DQN.ipynb index e974414..223ffd3 100644 --- a/03.Double_DQN.ipynb +++ b/03.Double_DQN.ipynb @@ -171,8 +171,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -194,7 +192,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/04.Dueling_DQN.ipynb b/04.Dueling_DQN.ipynb index 2974108..7992833 100644 --- a/04.Dueling_DQN.ipynb +++ b/04.Dueling_DQN.ipynb @@ -221,8 +221,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -244,7 +242,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/05.DQN-NoisyNets.ipynb b/05.DQN-NoisyNets.ipynb index b3f95aa..eb9c9c4 100644 --- a/05.DQN-NoisyNets.ipynb +++ b/05.DQN-NoisyNets.ipynb @@ -294,8 +294,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -315,7 +313,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/06.DQN_PriorityReplay.ipynb b/06.DQN_PriorityReplay.ipynb index beeafda..9d90a1c 100644 --- a/06.DQN_PriorityReplay.ipynb +++ b/06.DQN_PriorityReplay.ipynb @@ -272,8 +272,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -295,7 +293,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/07.Categorical-DQN.ipynb b/07.Categorical-DQN.ipynb index e36c793..750631c 100644 --- a/07.Categorical-DQN.ipynb +++ b/07.Categorical-DQN.ipynb @@ -276,8 +276,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -299,7 +297,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/08.Rainbow.ipynb b/08.Rainbow.ipynb index 94b90c1..ece8811 100644 --- a/08.Rainbow.ipynb +++ b/08.Rainbow.ipynb @@ -301,8 +301,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -322,7 +320,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/09.QuantileRegression-DQN.ipynb b/09.QuantileRegression-DQN.ipynb index 0955e3d..1d93f88 100644 --- a/09.QuantileRegression-DQN.ipynb +++ b/09.QuantileRegression-DQN.ipynb @@ -263,8 +263,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -286,7 +284,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/10.Quantile-Rainbow.ipynb b/10.Quantile-Rainbow.ipynb index dbbb9a3..a940c60 100644 --- a/10.Quantile-Rainbow.ipynb +++ b/10.Quantile-Rainbow.ipynb @@ -282,8 +282,6 @@ "env = wrap_pytorch(env)\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -303,7 +301,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n", diff --git a/11.DRQN.ipynb b/11.DRQN.ipynb index 65f1e71..94cacc5 100644 --- a/11.DRQN.ipynb +++ b/11.DRQN.ipynb @@ -353,8 +353,6 @@ "#env = gym.make('CartPole-v1')\n", "model = Model(env=env, config=config)\n", "\n", - "losses = []\n", - "all_rewards = []\n", "episode_reward = 0\n", "\n", "observation = env.reset()\n", @@ -376,7 +374,7 @@ " model.save_reward(episode_reward)\n", " episode_reward = 0\n", " \n", - " if np.mean(all_rewards[-10:]) > 19:\n", + " if np.mean(model.rewards[-10:]) > 19:\n", " plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n", " break\n", "\n",