Skip to content

Commit

Permalink
fixed plotting bug (missing variables) referenced in Issue #5
Browse files Browse the repository at this point in the history
  • Loading branch information
qfettes committed Nov 29, 2018
1 parent 592c21d commit 1996cca
Show file tree
Hide file tree
Showing 16 changed files with 79 additions and 30 deletions.
4 changes: 2 additions & 2 deletions 01.DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -420,7 +420,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 02.NStep_DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -376,7 +376,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 03.Double_DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -227,7 +227,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 04.Dueling_DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -277,7 +277,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 05.DQN-NoisyNets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -348,7 +348,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 06.DQN_PriorityReplay.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -328,7 +328,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 07.Categorical-DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -332,7 +332,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 08.Rainbow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -355,7 +355,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 09.QuantileRegression-DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -319,7 +319,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions 10.Quantile-Rainbow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@
" episode_reward = 0\n",
" \n",
" if np.mean(model.rewards[-10:]) > 19:\n",
" plot(frame_idx, all_rewards, losses, timedelta(seconds=int(timer()-start)))\n",
" plot(frame_idx, model.rewards, model.losses, model.sigma_parameter_mag, timedelta(seconds=int(timer()-start)))\n",
" break\n",
"\n",
" if frame_idx % 10000 == 0:\n",
Expand Down Expand Up @@ -336,7 +336,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
6 changes: 3 additions & 3 deletions 11.DRQN.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions agents/DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def __init__(self, static_policy=False, env=None, config=None):
self.nstep_buffer = []

def declare_networks(self):
self.model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=AtariBody)
self.target_model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=AtariBody)
self.model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=SimpleBody)
self.target_model = DQN(self.num_feats, self.num_actions, noisy=self.noisy, sigma_init=self.sigma_init, body=SimpleBody)

def declare_memory(self):
self.memory = ExperienceReplayMemory(self.experience_replay_size) if not self.priority_replay else PrioritizedReplayMemory(self.experience_replay_size, self.priority_alpha, self.priority_beta_start, self.priority_beta_frames)
Expand Down
10 changes: 5 additions & 5 deletions dqn_devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from IPython.display import clear_output
import matplotlib
matplotlib.use("agg")
#matplotlib.use("agg")
from matplotlib import pyplot as plt
#%matplotlib inline

Expand Down Expand Up @@ -76,17 +76,17 @@ def plot(frame_idx, rewards, losses, sigma, elapsed_time):
plt.title('noisy param magnitude')
plt.plot(sigma)
plt.show()
#print('frame %s. reward: %s. time: %s' % (frame_idx, np.mean(rewards[-10:]), elapsed_time))
print('frame %s. reward: %s. time: %s' % (frame_idx, np.mean(rewards[-10:]), elapsed_time))


if __name__=='__main__':
start=timer()

env_id = "PongNoFrameskip-v4"
'''env_id = "PongNoFrameskip-v4"
env = make_atari(env_id)
env = wrap_deepmind(env, frame_stack=False)
env = wrap_pytorch(env)
#env = gym.make('CartPole-v0')
env = wrap_pytorch(env)'''
env = gym.make('CartPole-v0')
#env = wrappers.Monitor(env, 'Delete', force=True)
model = Model(env=env, config=config)

Expand Down
49 changes: 49 additions & 0 deletions networks/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,55 @@ def forward(self, inputs):
def feature_size(self, input_shape):
return self.conv3(self.conv2(self.conv1(torch.zeros(1, *input_shape)))).view(1, -1).size(1)

def layer_init(self, module, weight_init, bias_init, gain=1):
weight_init(module.weight.data, gain=gain)
bias_init(module.bias.data)
return module


class ActorCriticER(nn.Module):
def __init__(self, input_shape, num_actions):
super(ActorCriticER, self).__init__()

init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
lambda x: nn.init.constant_(x, 0),
nn.init.calculate_gain('relu'))

self.conv1 = init_(nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4))
self.conv2 = init_(nn.Conv2d(32, 64, kernel_size=4, stride=2))
self.conv3 = init_(nn.Conv2d(64, 32, kernel_size=3, stride=1))
self.fc1 = init_(nn.Linear(self.feature_size(input_shape), 512))

init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
lambda x: nn.init.constant_(x, 0))

self.critic_linear = init_(nn.Linear(512, num_actions))

init_ = lambda m: self.layer_init(m, nn.init.orthogonal_,
lambda x: nn.init.constant_(x, 0), gain=0.01)

self.actor_linear = init_(nn.Linear(512, num_actions))

self.train()

def forward(self, inputs):
x = F.relu(self.conv1(inputs/255.0))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = x.view(x.size(0), -1)

x = F.relu(self.fc1(x))

q_value = self.critic_linear(x)
logits = self.actor_linear(x)
policy = F.softmax(logits, dim=1)
value = (policy * q_value).sum(-1, keepdim=True)

return logits, policy, value, q_value

def feature_size(self, input_shape):
return self.conv3(self.conv2(self.conv1(torch.zeros(1, *input_shape)))).view(1, -1).size(1)

def layer_init(self, module, weight_init, bias_init, gain=1):
weight_init(module.weight.data, gain=gain)
bias_init(module.bias.data)
Expand Down
Binary file modified saved_agents/model.dump
Binary file not shown.
Binary file modified saved_agents/optim.dump
Binary file not shown.

0 comments on commit 1996cca

Please sign in to comment.