diff --git a/01.DQN.ipynb b/01.DQN.ipynb index 6ac7b40..dbff71e 100644 --- a/01.DQN.ipynb +++ b/01.DQN.ipynb @@ -169,8 +169,8 @@ "outputs": [], "source": [ "class Model(BaseAgent):\n", - " def __init__(self, static_policy=False, env=None, config=None):\n", - " super(Model, self).__init__()\n", + " def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n", + " super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n", " self.device = config.device\n", "\n", " self.gamma = config.GAMMA\n", @@ -279,8 +279,8 @@ " self.optimizer.step()\n", "\n", " self.update_target_model()\n", - " self.save_loss(loss.item())\n", - " self.save_sigma_param_magnitudes()\n", + " self.save_loss(loss.item(), frame)\n", + " self.save_sigma_param_magnitudes(frame)\n", "\n", " def get_action(self, s, eps=0.1):\n", " with torch.no_grad():\n", diff --git a/02.NStep_DQN.ipynb b/02.NStep_DQN.ipynb index 55a4bc0..bf5ed34 100644 --- a/02.NStep_DQN.ipynb +++ b/02.NStep_DQN.ipynb @@ -98,8 +98,8 @@ "outputs": [], "source": [ "class Model(BaseAgent):\n", - " def __init__(self, static_policy=False, env=None, config=None):\n", - " super(Model, self).__init__()\n", + " def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n", + " super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n", " self.device = config.device\n", "\n", " self.gamma = config.GAMMA\n", @@ -219,8 +219,8 @@ " self.optimizer.step()\n", "\n", " self.update_target_model()\n", - " self.save_loss(loss.item())\n", - " self.save_sigma_param_magnitudes()\n", + " self.save_loss(loss.item(), frame)\n", + " self.save_sigma_param_magnitudes(frame)\n", "\n", " def get_action(self, s, eps=0.1):\n", " with torch.no_grad():\n", diff --git a/12.A2C.ipynb b/12.A2C.ipynb index aa6e8d1..8ef8c49 100644 --- a/12.A2C.ipynb +++ b/12.A2C.ipynb @@ -208,8 +208,8 @@ "outputs": [], "source": [ "class Model(BaseAgent):\n", - " def __init__(self, static_policy=False, env=None, config=None):\n", - " super(Model, self).__init__()\n", + " def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'):\n", + " super(Model, self).__init__(config=config, env=env, log_dir=log_dir)\n", " self.device = config.device\n", "\n", " self.noisy=config.USE_NOISY_NETS\n", @@ -316,16 +316,16 @@ " torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm_max)\n", " self.optimizer.step()\n", "\n", - " self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())\n", + " #self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item())\n", " #self.save_sigma_param_magnitudes()\n", "\n", " return value_loss.item(), action_loss.item(), dist_entropy.item()\n", "\n", - " def save_loss(self, loss, policy_loss, value_loss, entropy_loss):\n", + " '''def save_loss(self, loss, policy_loss, value_loss, entropy_loss):\n", " super(Model, self).save_loss(loss)\n", " self.policy_losses.append(policy_loss)\n", " self.value_losses.append(value_loss)\n", - " self.entropy_losses.append(entropy_loss)" + " self.entropy_losses.append(entropy_loss)'''" ] }, { diff --git a/14.PPO.ipynb b/14.PPO.ipynb index 2e46f17..f4377f3 100644 --- a/14.PPO.ipynb +++ b/14.PPO.ipynb @@ -250,7 +250,7 @@ " dist_entropy_epoch /= (self.ppo_epoch * self.num_mini_batch)\n", " total_loss = value_loss_epoch + action_loss_epoch + dist_entropy_epoch\n", "\n", - " self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)\n", + " #self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch)\n", "\n", " return action_loss_epoch, value_loss_epoch, dist_entropy_epoch" ] diff --git a/agents/A2C.py b/agents/A2C.py index 58c440d..52e3c0d 100644 --- a/agents/A2C.py +++ b/agents/A2C.py @@ -12,8 +12,8 @@ from timeit import default_timer as timer class Model(BaseAgent): - def __init__(self, static_policy=False, env=None, config=None): - super(Model, self).__init__() + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): + super(Model, self).__init__(config=config, env=env, log_dir=log_dir) self.device = config.device self.noisy=config.USE_NOISY_NETS @@ -120,13 +120,13 @@ def update(self, rollout): torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm_max) self.optimizer.step() - self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item()) + #self.save_loss(loss.item(), action_loss.item(), value_loss.item(), dist_entropy.item()) #self.save_sigma_param_magnitudes() return value_loss.item(), action_loss.item(), dist_entropy.item() - def save_loss(self, loss, policy_loss, value_loss, entropy_loss): - super(Model, self).save_loss(loss) + '''def save_loss(self, loss, policy_loss, value_loss, entropy_loss): + super(Model, self).save_td(loss) self.policy_losses.append(policy_loss) self.value_losses.append(value_loss) - self.entropy_losses.append(entropy_loss) + self.entropy_losses.append(entropy_loss)''' diff --git a/agents/BaseAgent.py b/agents/BaseAgent.py index 019426d..324a707 100644 --- a/agents/BaseAgent.py +++ b/agents/BaseAgent.py @@ -1,19 +1,35 @@ import numpy as np import pickle import os.path +import csv import torch import torch.optim as optim class BaseAgent(object): - def __init__(self): + def __init__(self, config, env, log_dir='/tmp/gym'): self.model=None self.target_model=None self.optimizer = None - self.losses = [] + + self.td_file = open(os.path.join(log_dir, 'td.csv'), 'a') + self.td = csv.writer(self.td_file) + + self.sigma_parameter_mag_file = open(os.path.join(log_dir, 'sig_param_mag.csv'), 'a') + self.sigma_parameter_mag = csv.writer(self.sigma_parameter_mag_file) + self.rewards = [] - self.sigma_parameter_mag=[] + + self.action_log_frequency = config.ACTION_SELECTION_COUNT_FREQUENCY + self.action_selections = [0 for _ in range(env.action_space.n)] + self.action_log_file = open(os.path.join(log_dir, 'action_log.csv'), 'a') + self.action_log = csv.writer(self.action_log_file) + + def __del__(self): + self.td_file.close() + self.sigma_parameter_mag_file.close() + self.action_log_file.close() def huber(self, x): cond = (x.abs() < 1.0).float().detach() @@ -45,7 +61,7 @@ def load_replay(self): if os.path.isfile(fname): self.memory = pickle.load(open(fname, 'rb')) - def save_sigma_param_magnitudes(self): + def save_sigma_param_magnitudes(self, tstep): with torch.no_grad(): sum_, count = 0.0, 0.0 for name, param in self.model.named_parameters(): @@ -54,10 +70,21 @@ def save_sigma_param_magnitudes(self): count += np.prod(param.shape) if count > 0: - self.sigma_parameter_mag.append(sum_/count) + self.sigma_parameter_mag.writerow((tstep, sum_/count)) - def save_loss(self, loss): - self.losses.append(loss) + def save_td(self, td, tstep): + self.td.writerow((tstep, td)) def save_reward(self, reward): - self.rewards.append(reward) \ No newline at end of file + self.rewards.append(reward) + + def save_action(self, action, tstep): + self.action_selections[int(action)] += 1.0/self.action_log_frequency + if (tstep+1) % self.action_log_frequency == 0: + self.action_log.writerow(list([tstep]+self.action_selections)) + self.action_selections = [0 for _ in range(len(self.action_selections))] + + def flush_data(self): + self.action_log_file.flush() + self.sigma_parameter_mag_file.flush() + self.td_file.flush() diff --git a/agents/Categorical_DQN.py b/agents/Categorical_DQN.py index d6487e8..4d5058b 100644 --- a/agents/Categorical_DQN.py +++ b/agents/Categorical_DQN.py @@ -8,14 +8,14 @@ class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): self.atoms = config.ATOMS self.v_max = config.V_MAX self.v_min = config.V_MIN self.supports = torch.linspace(self.v_min, self.v_max, self.atoms).view(1, 1, self.atoms).to(config.device) self.delta = (self.v_max - self.v_min) / (self.atoms - 1) - super(Model, self).__init__(static_policy, env, config) + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) def declare_networks(self): self.model = CategoricalDQN(self.env.observation_space.shape, self.env.action_space.n, noisy=self.noisy, sigma_init=self.sigma_init, atoms=self.atoms) diff --git a/agents/DQN.py b/agents/DQN.py index 911b7bb..6f0fead 100644 --- a/agents/DQN.py +++ b/agents/DQN.py @@ -11,8 +11,8 @@ from timeit import default_timer as timer class Model(BaseAgent): - def __init__(self, static_policy=False, env=None, config=None): - super(Model, self).__init__() + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): + super(Model, self).__init__(config=config, env=env, log_dir=log_dir) self.device = config.device self.noisy=config.USE_NOISY_NETS @@ -145,8 +145,8 @@ def update(self, s, a, r, s_, frame=0): self.optimizer.step() self.update_target_model() - self.save_loss(loss.item()) - self.save_sigma_param_magnitudes() + self.save_td(loss.item(), frame) + self.save_sigma_param_magnitudes(frame) def get_action(self, s, eps=0.1): #faster with torch.no_grad(): diff --git a/agents/DRQN.py b/agents/DRQN.py index cbc3e46..3f029af 100644 --- a/agents/DRQN.py +++ b/agents/DRQN.py @@ -10,10 +10,10 @@ from networks.network_bodies import AtariBody, SimpleBody class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): self.sequence_length=config.SEQUENCE_LENGTH - super(Model, self).__init__(static_policy, env, config) + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) self.reset_hx() diff --git a/agents/Double_DQN.py b/agents/Double_DQN.py index 9a0b692..2c6e96b 100644 --- a/agents/Double_DQN.py +++ b/agents/Double_DQN.py @@ -5,8 +5,8 @@ from agents.DQN import Model as DQN_Agent class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): - super(Model, self).__init__(static_policy, env, config) + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) def get_max_next_state_action(self, next_states): return self.model(next_states).max(dim=1)[1].view(-1, 1) diff --git a/agents/Dueling_DQN.py b/agents/Dueling_DQN.py index a348d94..d26fc13 100644 --- a/agents/Dueling_DQN.py +++ b/agents/Dueling_DQN.py @@ -6,8 +6,8 @@ from networks.networks import DuelingDQN class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): - super(Model, self).__init__(static_policy, env, config) + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) def declare_networks(self): self.model = DuelingDQN(self.env.observation_space.shape, self.env.action_space.n, noisy=self.noisy, sigma_init=self.sigma_init) diff --git a/agents/PPO.py b/agents/PPO.py index 90d3e01..96eba4a 100644 --- a/agents/PPO.py +++ b/agents/PPO.py @@ -71,7 +71,7 @@ def update(self, rollout): dist_entropy_epoch /= (self.ppo_epoch * self.num_mini_batch) total_loss = value_loss_epoch + action_loss_epoch + dist_entropy_epoch - self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch) + #self.save_loss(total_loss, action_loss_epoch, value_loss_epoch, dist_entropy_epoch) #self.save_sigma_param_magnitudes() return action_loss_epoch, value_loss_epoch, dist_entropy_epoch \ No newline at end of file diff --git a/agents/QuantileRegression_DQN.py b/agents/QuantileRegression_DQN.py index c2e7013..1cfd975 100644 --- a/agents/QuantileRegression_DQN.py +++ b/agents/QuantileRegression_DQN.py @@ -7,12 +7,12 @@ from networks.networks import QRDQN class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): self.num_quantiles = config.QUANTILES self.cumulative_density = torch.tensor((2 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles), device=config.device, dtype=torch.float) self.quantile_weight = 1.0 / self.num_quantiles - super(Model, self).__init__(static_policy, env, config) + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) def declare_networks(self): diff --git a/agents/Quantile_Rainbow.py b/agents/Quantile_Rainbow.py index f784c43..624b98c 100644 --- a/agents/Quantile_Rainbow.py +++ b/agents/Quantile_Rainbow.py @@ -8,12 +8,12 @@ from utils.ReplayMemory import PrioritizedReplayMemory class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): self.num_quantiles = config.QUANTILES self.cumulative_density = torch.tensor((2 * np.arange(self.num_quantiles) + 1) / (2.0 * self.num_quantiles), device=config.device, dtype=torch.float) self.quantile_weight = 1.0 / self.num_quantiles - super(Model, self).__init__(static_policy, env, config) + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) self.nsteps=max(self.nsteps, 3) diff --git a/agents/Rainbow.py b/agents/Rainbow.py index 295904d..3cb055e 100644 --- a/agents/Rainbow.py +++ b/agents/Rainbow.py @@ -7,14 +7,14 @@ from utils.ReplayMemory import PrioritizedReplayMemory class Model(DQN_Agent): - def __init__(self, static_policy=False, env=None, config=None): + def __init__(self, static_policy=False, env=None, config=None, log_dir='/tmp/gym'): self.atoms=config.ATOMS self.v_max=config.V_MAX self.v_min=config.V_MIN self.supports = torch.linspace(self.v_min, self.v_max, self.atoms).view(1, 1, self.atoms).to(config.device) self.delta = (self.v_max - self.v_min) / (self.atoms - 1) - super(Model, self).__init__(static_policy, env, config) + super(Model, self).__init__(static_policy, env, config, log_dir=log_dir) self.nsteps=max(self.nsteps,3) diff --git a/dqn_devel.py b/dqn_devel.py index cbde06d..ef38fd4 100644 --- a/dqn_devel.py +++ b/dqn_devel.py @@ -9,7 +9,7 @@ from utils.wrappers import * from utils.hyperparameters import Config from agents.DQN import Model -from utils.plot import plot_reward +from utils.plot import plot_all_data config = Config() @@ -58,6 +58,9 @@ #DRQN Parameters config.SEQUENCE_LENGTH = 8 +#data logging parameters +config.ACTION_SELECTION_COUNT_FREQUENCY = 1000 + if __name__=='__main__': start=timer() @@ -65,7 +68,10 @@ try: os.makedirs(log_dir) except OSError: - files = glob.glob(os.path.join(log_dir, '*.monitor.csv')) + files = glob.glob(os.path.join(log_dir, '*.monitor.csv')) \ + + glob.glob(os.path.join(log_dir, '*td.csv')) \ + + glob.glob(os.path.join(log_dir, '*sig_param_mag.csv')) \ + + glob.glob(os.path.join(log_dir, '*action_log.csv')) for f in files: os.remove(f) @@ -74,7 +80,7 @@ env = bench.Monitor(env, os.path.join(log_dir, env_id)) env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=True) env = WrapPyTorch(env) - model = Model(env=env, config=config) + model = Model(env=env, config=config, log_dir=log_dir) episode_reward = 0 @@ -83,6 +89,8 @@ epsilon = config.epsilon_by_frame(frame_idx) action = model.get_action(observation, epsilon) + model.save_action(action, frame_idx) #log action selection + prev_observation=observation observation, reward, done, _ = env.step(action) observation = None if done else observation @@ -100,7 +108,8 @@ if frame_idx % 10000 == 0: try: print('frame %s. time: %s' % (frame_idx, timedelta(seconds=int(timer()-start)))) - plot_reward(log_dir, env_id, 'DRQN', config.MAX_FRAMES, bin_size=10, smooth=1, time=timedelta(seconds=int(timer()-start)), ipynb=False) + model.flush_data() #make sure all data is flushed to files + plot_all_data(log_dir, env_id, 'DRQN', config.MAX_FRAMES, bin_size=(10, 100, 100, 1), smooth=1, time=timedelta(seconds=int(timer()-start)), ipynb=False) except IOError: pass diff --git a/results.png b/results.png index 0d08c8f..a8be87f 100644 Binary files a/results.png and b/results.png differ diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py index e0b818a..5757ff1 100644 --- a/utils/hyperparameters.py +++ b/utils/hyperparameters.py @@ -64,6 +64,9 @@ def __init__(self): #DRQN Parameters self.SEQUENCE_LENGTH=8 + #data logging parameters + self.ACTION_SELECTION_COUNT_FREQUENCY = 1000 + ''' diff --git a/utils/plot.py b/utils/plot.py index 48de7a5..ae836d3 100644 --- a/utils/plot.py +++ b/utils/plot.py @@ -43,7 +43,7 @@ def fix_point(x, y, interval): return fx, fy -def load_data(indir, smooth, bin_size): +def load_reward_data(indir, smooth, bin_size): datas = [] infiles = glob.glob(os.path.join(indir, '*.monitor.csv')) @@ -78,9 +78,70 @@ def load_data(indir, smooth, bin_size): x, y = fix_point(x, y, bin_size) return [x, y] +#TODO: only works for Experience Replay style training for now +def load_custom_data(indir, stat_file, smooth, bin_size): + datas = [] + infiles = glob.glob(os.path.join(indir, stat_file)) + + for inf in infiles: #should be 1 + with open(inf, 'r') as f: + for line in f: + tmp = line.split(',') + tmp = [int(tmp[0]), float(tmp[1])] + datas.append(tmp) + + datas = sorted(datas, key=lambda d_entry: d_entry[0]) + result = [] + for i in range(len(datas)): + result.append([datas[i][0], datas[i][1]]) + + if len(result) < bin_size: + return [None, None] + + x, y = np.array(result)[:, 0], np.array(result)[:, 1] + + if smooth == 1: + x, y = smooth_reward_curve(x, y) + + if smooth == 2: + y = medfilt(y, kernel_size=9) + + x, y = fix_point(x, y, bin_size) + return [x, y] + +#TODO: only works for Experience Replay style training for now +def load_action_data(indir, smooth, bin_size): + datas = [] + infiles = glob.glob(os.path.join(indir, 'action_log.csv')) + + for inf in infiles: #should be 1 + with open(inf, 'r') as f: + for line in f: + tmp = line.split(',') + tmp = [int(tmp[0])] + [float(tmp[i]) for i in range(1, len(tmp))] + datas.append(tmp) + + datas = sorted(datas, key=lambda d_entry: d_entry[0]) + result = datas + #for i in range(len(datas)): + # result.append([datas[i][0], datas[i][1]]) + + if len(result) < bin_size: + return [None, None] + + x, y = np.array(result)[:, 0], np.array(result)[:, 1:] + + '''if smooth == 1: + x, y = smooth_reward_curve(x, y) + + if smooth == 2: + y = medfilt(y, kernel_size=9) + + x, y = fix_point(x, y, bin_size)''' + return [x, np.transpose(y)] def visdom_plot(viz, win, folder, game, name, num_steps, bin_size=100, smooth=1): - tx, ty = load_data(folder, smooth, bin_size) + tx, ty = load_reward_data(folder, smooth, bin_size) if tx is None or ty is None: return win @@ -113,7 +174,7 @@ def visdom_plot(viz, win, folder, game, name, num_steps, bin_size=100, smooth=1) def plot(folder, game, name, num_steps, bin_size=100, smooth=1): matplotlib.rcParams.update({'font.size': 20}) - tx, ty = load_data(folder, smooth, bin_size) + tx, ty = load_reward_data(folder, smooth, bin_size) if tx is None or ty is None: return @@ -134,9 +195,117 @@ def plot(folder, game, name, num_steps, bin_size=100, smooth=1): plt.legend(loc=4) plt.show() +def make_patch_spines_invisible(ax): + ax.set_frame_on(True) + ax.patch.set_visible(False) + for sp in ax.spines.values(): + sp.set_visible(False) + +def plot_all_data(folder, game, name, num_steps, bin_size=(10, 100, 100, 1), smooth=1, time=None, save_filename='results.png', ipynb=False): + matplotlib.rcParams.update({'font.size': 20}) + params = { + 'xtick.labelsize': 20, + 'ytick.labelsize': 15, + 'legend.fontsize': 15 + } + plt.rcParams.update(params) + + tx, ty = load_reward_data(folder, smooth, bin_size[0]) + + if tx is None or ty is None: + return + + if time is not None: + title = 'Avg. Last 10 Rewards: ' + str(np.round(np.mean(ty[-10]))) + ' || ' + game + ' || Elapsed Time: ' + str(time) + else: + title = 'Avg. Last 10 Rewards: ' + str(np.round(np.mean(ty[-10]))) + ' || ' + game + + tick_fractions = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1.0]) + ticks = tick_fractions * num_steps + tick_names = ["{:.0e}".format(tick) for tick in ticks] + + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(20, 15), subplot_kw = dict(xticks=ticks, xlim=(0, num_steps*1.01), xlabel='Timestep', title=title)) + ax1.set_xticklabels(tick_names) + ax2.set_xticklabels(tick_names) + ax3.set_xticklabels(tick_names) + + ax1.set_ylabel('Reward') + + p1, = ax1.plot(tx, ty, label="Reward") + #lines = [p1] + + ax1.yaxis.label.set_color(p1.get_color()) + ax1.tick_params(axis='y', colors=p1.get_color()) + + ax1.legend([p1], [p1.get_label()], loc=4) + + + #Load td data if it exists + tx, ty = load_custom_data(folder, 'td.csv', smooth, bin_size[1]) + + ax2.set_title('Loss vs Timestep') + + if tx is not None or ty is not None: + ax2.set_ylabel('Avg .Temporal Difference') + p2, = ax2.plot(tx, ty, 'r-', label='Avg. TD') + g2_lines = [p2] + + ax2.yaxis.label.set_color(p2.get_color()) + ax2.tick_params(axis='y', colors=p2.get_color()) + + ax2.legend(g2_lines, [l.get_label() for l in g2_lines], loc=4) + + #Load Sigma Parameter Data if it exists + tx, ty = load_custom_data(folder, 'sig_param_mag.csv', smooth, bin_size[2]) + + if tx is not None or ty is not None: + #need to update g2 title if sig data will be included + ax2.set_title('Loss/Avg. Sigma Parameter Magnitude vs Timestep') + + ax4 = ax2.twinx() + + ax4.set_ylabel('Avg. Sigma Parameter Mag.') + p4, = ax4.plot(tx, ty, 'g-', label='Avg. Sigma Mag.') + g2_lines += [p4] + + ax4.yaxis.label.set_color(p4.get_color()) + ax4.tick_params(axis='y', colors=p4.get_color()) + + #ax4.spines["right"].set_position(("axes", 1.05)) + #make_patch_spines_invisible(ax4) + #ax4.spines["right"].set_visible(True) + + ax2.legend(g2_lines, [l.get_label() for l in g2_lines], loc=4) #remake g2 legend because we have a new line + + #Load action selection data if it exists + tx, ty = load_action_data(folder, smooth, bin_size[3]) + + ax3.set_title('Action Selection Frequency(%) vs Timestep') + + if tx is not None or ty is not None: + ax3.set_ylabel('Action Selection Frequency(%)') + labels = ['Action {}'.format(i) for i in range(ty.shape[0])] + p3 = ax3.stackplot(tx, ty, labels=labels) + + #ax3.yaxis.label.set_color(p3.get_color()) + #ax3.tick_params(axis='y', colors=p3.get_color()) + + ax3.legend(loc=4) #remake g2 legend because we have a new line + + plt.tight_layout() # prevent label cutoff + + if ipynb: + plt.show() + else: + plt.savefig(save_filename) + plt.clf() + plt.close() + + #return np.round(np.mean(ty[-10:])) + def plot_reward(folder, game, name, num_steps, bin_size=10, smooth=1, time=None, save_filename='results.png', ipynb=False): matplotlib.rcParams.update({'font.size': 20}) - tx, ty = load_data(folder, smooth, bin_size) + tx, ty = load_reward_data(folder, smooth, bin_size) if tx is None or ty is None: return @@ -165,4 +334,70 @@ def plot_reward(folder, game, name, num_steps, bin_size=10, smooth=1, time=None, plt.clf() plt.close() - return np.round(np.mean(ty[-10])) \ No newline at end of file + return np.round(np.mean(ty[-10])) + +'''def plot_td(folder, game, name, num_steps, bin_size=10, smooth=1, time=None, save_filename='td.png', ipynb=False): + matplotlib.rcParams.update({'font.size': 20}) + tx, ty = load_custom_data(folder, 'td.csv', smooth, bin_size) + + if tx is None or ty is None: + return + + fig = plt.figure(figsize=(20,5)) + plt.plot(tx, ty, label="{}".format(name)) + + tick_fractions = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1.0]) + ticks = tick_fractions * num_steps + tick_names = ["{:.0e}".format(tick) for tick in ticks] + plt.xticks(ticks, tick_names) + plt.xlim(0, num_steps * 1.01) + + plt.xlabel('Number of Timesteps') + plt.ylabel('Rewards') + + if time is not None: + plt.title(game + ' || Last 10: ' + str(np.round(np.mean(ty[-1]))) + ' || Elapsed Time: ' + str(time)) + else: + plt.title(game + ' || Last 10: ' + str(np.round(np.mean(ty[-1])))) + plt.legend(loc=4) + if ipynb: + plt.show() + else: + plt.savefig(save_filename) + plt.clf() + plt.close() + + return np.round(np.mean(ty[-1])) + +def plot_sig(folder, game, name, num_steps, bin_size=10, smooth=1, time=None, save_filename='sig.png', ipynb=False): + matplotlib.rcParams.update({'font.size': 20}) + tx, ty = load_custom_data(folder, 'sig_param_mag.csv', smooth, bin_size) + + if tx is None or ty is None: + return + + fig = plt.figure(figsize=(20,5)) + plt.plot(tx, ty, label="{}".format(name)) + + tick_fractions = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1.0]) + ticks = tick_fractions * num_steps + tick_names = ["{:.0e}".format(tick) for tick in ticks] + plt.xticks(ticks, tick_names) + plt.xlim(0, num_steps * 1.01) + + plt.xlabel('Number of Timesteps') + plt.ylabel('Rewards') + + if time is not None: + plt.title(game + ' || Last 10: ' + str(np.round(np.mean(ty[-1]))) + ' || Elapsed Time: ' + str(time)) + else: + plt.title(game + ' || Last 10: ' + str(np.round(np.mean(ty[-1])))) + plt.legend(loc=4) + if ipynb: + plt.show() + else: + plt.savefig(save_filename) + plt.clf() + plt.close() + + return np.round(np.mean(ty[-1]))''' \ No newline at end of file