Skip to content

Commit

Permalink
improved stat collection. At every timestep, collect: Avg. temporal d…
Browse files Browse the repository at this point in the history
…ifference, action selection frequency, sigma parameter magnitude. Plot all results. Only Q-Learning based methods support this improved plotting and stat collection for now.
  • Loading branch information
qfettes committed Feb 14, 2019
1 parent a4cba58 commit 2fe6e39
Show file tree
Hide file tree
Showing 19 changed files with 330 additions and 56 deletions.
8 changes: 4 additions & 4 deletions 01.DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@
"outputs": [],
"source": [
"class Model(BaseAgent):\n",
" def __init__(self, static_policy=False, env=None, config=None):\n",
" super(Model, self).__init__()\n",
" def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n",
" super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n",
" self.device = config.device\n",
"\n",
" self.gamma = config.GAMMA\n",
Expand Down Expand Up @@ -279,8 +279,8 @@
" self.optimizer.step()\n",
"\n",
" self.update_target_model()\n",
" self.save_loss(loss.item())\n",
" self.save_sigma_param_magnitudes()\n",
" self.save_loss(loss.item(), frame)\n",
" self.save_sigma_param_magnitudes(frame)\n",
"\n",
" def get_action(self, s, eps=0.1):\n",
" with torch.no_grad():\n",
Expand Down
8 changes: 4 additions & 4 deletions 02.NStep_DQN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@
"outputs": [],
"source": [
"class Model(BaseAgent):\n",
" def __init__(self, static_policy=False, env=None, config=None):\n",
" super(Model, self).__init__()\n",
" def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n",
" super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n",
" self.device = config.device\n",
"\n",
" self.gamma = config.GAMMA\n",
Expand Down Expand Up @@ -219,8 +219,8 @@
" self.optimizer.step()\n",
"\n",
" self.update_target_model()\n",
" self.save_loss(loss.item())\n",
" self.save_sigma_param_magnitudes()\n",
" self.save_loss(loss.item(), frame)\n",
" self.save_sigma_param_magnitudes(frame)\n",
"\n",
" def get_action(self, s, eps=0.1):\n",
" with torch.no_grad():\n",
Expand Down
10 changes: 5 additions & 5 deletions 12.A2C.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@
"outputs": [],
"source": [
"class Model(BaseAgent):\n",
" def __init__(self, static_policy=False, env=None, config=None):\n",
" super(Model, self).__init__()\n",
" def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n",
" super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n",
" self.device = config.device\n",
"\n",
" self.noisy=config.USE_NOISY_NETS\n",
Expand Down Expand Up @@ -316,16 +316,16 @@
" torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm_max)\n",
" self.optimizer.step()\n",
"\n",
" self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())\n",
" #self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())\n",
" #self.save_sigma_param_magnitudes()\n",
"\n",
" return value_loss.item(), action_loss.item(), dist_entropy.item()\n",
"\n",
" def save_loss(self, loss, policy_loss, value_loss, entropy_loss):\n",
" '''def save_loss(self, loss, policy_loss, value_loss, entropy_loss):\n",
" super(Model, self).save_loss(loss)\n",
" self.policy_losses.append(policy_loss)\n",
" self.value_losses.append(value_loss)\n",
" self.entropy_losses.append(entropy_loss)"
" self.entropy_losses.append(entropy_loss)'''"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion 14.PPO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
" dist_entropy_epoch /= (self.ppo_epoch * self.num_mini_batch)\n",
" total_loss = value_loss_epoch + action_loss_epoch + dist_entropy_epoch\n",
"\n",
" self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)\n",
" #self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)\n",
"\n",
" return action_loss_epoch, value_loss_epoch, dist_entropy_epoch"
]
Expand Down
12 changes: 6 additions & 6 deletions agents/A2C.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from timeit import default_timer as timer

class Model(BaseAgent):
def __init__(self, static_policy=False, env=None, config=None):
super(Model, self).__init__()
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
super(Model, self).__init__(config=config, env=env, log_dir=log_dir)
self.device = config.device

self.noisy=config.USE_NOISY_NETS
Expand Down Expand Up @@ -120,13 +120,13 @@ def update(self, rollout):
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm_max)
self.optimizer.step()

self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())
#self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())
#self.save_sigma_param_magnitudes()

return value_loss.item(), action_loss.item(), dist_entropy.item()

def save_loss(self, loss, policy_loss, value_loss, entropy_loss):
super(Model, self).save_loss(loss)
'''def save_loss(self, loss, policy_loss, value_loss, entropy_loss):
super(Model, self).save_td(loss)
self.policy_losses.append(policy_loss)
self.value_losses.append(value_loss)
self.entropy_losses.append(entropy_loss)
self.entropy_losses.append(entropy_loss)'''
43 changes: 35 additions & 8 deletions agents/BaseAgent.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
import numpy as np
import pickle
import os.path
import csv

import torch
import torch.optim as optim


class BaseAgent(object):
def __init__(self):
def __init__(self, config, env, log_dir='/tmp/gym'):
self.model=None
self.target_model=None
self.optimizer = None
self.losses = []

self.td_file = open(os.path.join(log_dir, 'td.csv'), 'a')
self.td = csv.writer(self.td_file)

self.sigma_parameter_mag_file = open(os.path.join(log_dir, 'sig_param_mag.csv'), 'a')
self.sigma_parameter_mag = csv.writer(self.sigma_parameter_mag_file)

self.rewards = []
self.sigma_parameter_mag=[]

self.action_log_frequency = config.ACTION_SELECTION_COUNT_FREQUENCY
self.action_selections = [0 for _ in range(env.action_space.n)]
self.action_log_file = open(os.path.join(log_dir, 'action_log.csv'), 'a')
self.action_log = csv.writer(self.action_log_file)

def __del__(self):
self.td_file.close()
self.sigma_parameter_mag_file.close()
self.action_log_file.close()

def huber(self, x):
cond = (x.abs() < 1.0).float().detach()
Expand Down Expand Up @@ -45,7 +61,7 @@ def load_replay(self):
if os.path.isfile(fname):
self.memory = pickle.load(open(fname, 'rb'))

def save_sigma_param_magnitudes(self):
def save_sigma_param_magnitudes(self, tstep):
with torch.no_grad():
sum_, count = 0.0, 0.0
for name, param in self.model.named_parameters():
Expand All @@ -54,10 +70,21 @@ def save_sigma_param_magnitudes(self):
count += np.prod(param.shape)

if count > 0:
self.sigma_parameter_mag.append(sum_/count)
self.sigma_parameter_mag.writerow((tstep, sum_/count))

def save_loss(self, loss):
self.losses.append(loss)
def save_td(self, td, tstep):
self.td.writerow((tstep, td))

def save_reward(self, reward):
self.rewards.append(reward)
self.rewards.append(reward)

def save_action(self, action, tstep):
self.action_selections[int(action)] += 1.0/self.action_log_frequency
if (tstep+1) % self.action_log_frequency == 0:
self.action_log.writerow(list([tstep]+self.action_selections))
self.action_selections = [0 for _ in range(len(self.action_selections))]

def flush_data(self):
self.action_log_file.flush()
self.sigma_parameter_mag_file.flush()
self.td_file.flush()
4 changes: 2 additions & 2 deletions agents/Categorical_DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@


class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
self.atoms = config.ATOMS
self.v_max = config.V_MAX
self.v_min = config.V_MIN
self.supports = torch.linspace(self.v_min, self.v_max, self.atoms).view(1, 1, self.atoms).to(config.device)
self.delta = (self.v_max - self.v_min) / (self.atoms - 1)

super(Model, self).__init__(static_policy, env, config)
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

def declare_networks(self):
self.model = CategoricalDQN(self.env.observation_space.shape, self.env.action_space.n, noisy=self.noisy, sigma_init=self.sigma_init, atoms=self.atoms)
Expand Down
8 changes: 4 additions & 4 deletions agents/DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from timeit import default_timer as timer

class Model(BaseAgent):
def __init__(self, static_policy=False, env=None, config=None):
super(Model, self).__init__()
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
super(Model, self).__init__(config=config, env=env, log_dir=log_dir)
self.device = config.device

self.noisy=config.USE_NOISY_NETS
Expand Down Expand Up @@ -145,8 +145,8 @@ def update(self, s, a, r, s_, frame=0):
self.optimizer.step()

self.update_target_model()
self.save_loss(loss.item())
self.save_sigma_param_magnitudes()
self.save_td(loss.item(), frame)
self.save_sigma_param_magnitudes(frame)

def get_action(self, s, eps=0.1): #faster
with torch.no_grad():
Expand Down
4 changes: 2 additions & 2 deletions agents/DRQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from networks.network_bodies import AtariBody, SimpleBody

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
self.sequence_length=config.SEQUENCE_LENGTH

super(Model, self).__init__(static_policy, env, config)
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

self.reset_hx()

Expand Down
4 changes: 2 additions & 2 deletions agents/Double_DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from agents.DQN import Model as DQN_Agent

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
super(Model, self).__init__(static_policy, env, config)
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

def get_max_next_state_action(self, next_states):
return self.model(next_states).max(dim=1)[1].view(-1, 1)
4 changes: 2 additions & 2 deletions agents/Dueling_DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from networks.networks import DuelingDQN

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
super(Model, self).__init__(static_policy, env, config)
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

def declare_networks(self):
self.model = DuelingDQN(self.env.observation_space.shape, self.env.action_space.n, noisy=self.noisy, sigma_init=self.sigma_init)
Expand Down
2 changes: 1 addition & 1 deletion agents/PPO.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def update(self, rollout):
dist_entropy_epoch /= (self.ppo_epoch * self.num_mini_batch)
total_loss = value_loss_epoch + action_loss_epoch + dist_entropy_epoch

self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)
#self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)
#self.save_sigma_param_magnitudes()

return action_loss_epoch, value_loss_epoch, dist_entropy_epoch
4 changes: 2 additions & 2 deletions agents/QuantileRegression_DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from networks.networks import QRDQN

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
self.num_quantiles = config.QUANTILES
self.cumulative_density = torch.tensor((2 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles), device=config.device, dtype=torch.float)
self.quantile_weight = 1.0 / self.num_quantiles

super(Model, self).__init__(static_policy, env, config)
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)


def declare_networks(self):
Expand Down
4 changes: 2 additions & 2 deletions agents/Quantile_Rainbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from utils.ReplayMemory import PrioritizedReplayMemory

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
self.num_quantiles = config.QUANTILES
self.cumulative_density = torch.tensor((2 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles), device=config.device, dtype=torch.float)
self.quantile_weight = 1.0 / self.num_quantiles

super(Model, self).__init__(static_policy, env, config)
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

self.nsteps=max(self.nsteps, 3)

Expand Down
4 changes: 2 additions & 2 deletions agents/Rainbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from utils.ReplayMemory import PrioritizedReplayMemory

class Model(DQN_Agent):
def __init__(self, static_policy=False, env=None, config=None):
def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):
self.atoms=config.ATOMS
self.v_max=config.V_MAX
self.v_min=config.V_MIN
self.supports = torch.linspace(self.v_min, self.v_max, self.atoms).view(1, 1, self.atoms).to(config.device)
self.delta = (self.v_max - self.v_min) / (self.atoms - 1)

super(Model, self).__init__(static_policy, env, config)
super(Model, self).__init__(static_policy, env, config, log_dir=log_dir)

self.nsteps=max(self.nsteps,3)

Expand Down
17 changes: 13 additions & 4 deletions dqn_devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from utils.wrappers import *
from utils.hyperparameters import Config
from agents.DQN import Model
from utils.plot import plot_reward
from utils.plot import plot_all_data

config = Config()

Expand Down Expand Up @@ -58,14 +58,20 @@
#DRQN Parameters
config.SEQUENCE_LENGTH = 8

#data logging parameters
config.ACTION_SELECTION_COUNT_FREQUENCY = 1000

if __name__=='__main__':
start=timer()

log_dir = "/tmp/gym/"
try:
os.makedirs(log_dir)
except OSError:
files = glob.glob(os.path.join(log_dir, '*.monitor.csv'))
files = glob.glob(os.path.join(log_dir, '*.monitor.csv')) \
+ glob.glob(os.path.join(log_dir, '*td.csv')) \
+ glob.glob(os.path.join(log_dir, '*sig_param_mag.csv')) \
+ glob.glob(os.path.join(log_dir, '*action_log.csv'))
for f in files:
os.remove(f)

Expand All @@ -74,7 +80,7 @@
env = bench.Monitor(env, os.path.join(log_dir, env_id))
env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=True)
env = WrapPyTorch(env)
model = Model(env=env, config=config)
model = Model(env=env, config=config, log_dir=log_dir)

episode_reward = 0

Expand All @@ -83,6 +89,8 @@
epsilon = config.epsilon_by_frame(frame_idx)

action = model.get_action(observation, epsilon)
model.save_action(action, frame_idx) #log action selection

prev_observation=observation
observation, reward, done, _ = env.step(action)
observation = None if done else observation
Expand All @@ -100,7 +108,8 @@
if frame_idx % 10000 == 0:
try:
print('frame %s. time: %s' % (frame_idx, timedelta(seconds=int(timer()-start))))
plot_reward(log_dir, env_id, 'DRQN', config.MAX_FRAMES, bin_size=10, smooth=1, time=timedelta(seconds=int(timer()-start)), ipynb=False)
model.flush_data() #make sure all data is flushed to files
plot_all_data(log_dir, env_id, 'DRQN', config.MAX_FRAMES, bin_size=(10, 100, 100, 1), smooth=1, time=timedelta(seconds=int(timer()-start)), ipynb=False)
except IOError:
pass

Expand Down
Binary file modified results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions utils/hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def __init__(self):
#DRQN Parameters
self.SEQUENCE_LENGTH=8

#data logging parameters
self.ACTION_SELECTION_COUNT_FREQUENCY = 1000


'''
Expand Down
Loading

0 comments on commit 2fe6e39

Please sign in to comment.