From 4c4c0a4752579c8f1e0394eeba26ce7c351220aa Mon Sep 17 00:00:00 2001 From: Luis Pineda Date: Mon, 8 Mar 2021 11:53:12 -0500 Subject: [PATCH 1/5] changed abstract Model to have is_ensemble and is_determinsitic as properties relying on abstract methods that must be implemented by subclasses. --- mbrl/models/base_models.py | 30 +++++++++++++++++++++--------- mbrl/models/gaussian_mlp.py | 30 +++++++++++++++++------------- mbrl/models/model_env.py | 2 +- tests/core/test_models.py | 31 ++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 24 deletions(-) diff --git a/mbrl/models/base_models.py b/mbrl/models/base_models.py index 54a1db51..39e5ab2c 100644 --- a/mbrl/models/base_models.py +++ b/mbrl/models/base_models.py @@ -111,7 +111,6 @@ def __init__( self.in_size = in_size self.out_size = out_size self.device = torch.device(device) - self.is_ensemble = False self.to(device) def forward(self, x: torch.Tensor, **kwargs) -> Tuple[torch.Tensor, torch.Tensor]: @@ -141,7 +140,6 @@ def loss( Returns: (tensor): a loss tensor. """ - pass @abc.abstractmethod def eval_score(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tensor: @@ -166,23 +164,35 @@ def eval_score(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tens Returns: (tensor): a non-reduced tensor score. """ - pass @abc.abstractmethod def save(self, path: str): """Saves the model to the given path. """ - pass @abc.abstractmethod def load(self, path: str): """Loads the model from the given path.""" + + @abc.abstractmethod + def _is_deterministic_impl(self): + # Subclasses must specify if model is _deterministic or not pass @abc.abstractmethod - def is_deterministic(self): - """Whether the model produces logvar predictions or not.""" + def _is_ensemble_impl(self): + # Subclasses must specify if they are ensembles or not pass + @property + def is_deterministic(self): + """Whether the model is deterministic or not.""" + return self._is_deterministic_impl() + + @property + def is_ensemble(self): + """Whether the model is an ensemble or not.""" + return self._is_ensemble_impl() + def update( self, model_in: torch.Tensor, @@ -277,7 +287,6 @@ def __init__( ): super().__init__(in_size, out_size, device) self.members = [] - self.is_ensemble = True for i in range(ensemble_size): model = hydra.utils.instantiate(member_cfg) self.members.append(model) @@ -483,8 +492,11 @@ def load(self, path: str): state_dict = torch.load(path) self.load_state_dict(state_dict) - def is_deterministic(self): - return self.members[0].is_deterministic() + def _is_ensemble_impl(self): + return True + + def _is_deterministic_impl(self): + return self.members[0].is_deterministic def sample_propagation_indices( self, batch_size: int, rng: torch.Generator diff --git a/mbrl/models/gaussian_mlp.py b/mbrl/models/gaussian_mlp.py index 3dc9e643..96a05e88 100644 --- a/mbrl/models/gaussian_mlp.py +++ b/mbrl/models/gaussian_mlp.py @@ -55,8 +55,9 @@ def __init__( activation_cls = nn.SiLU if use_silu else nn.ReLU self.num_members = None + self._is_ensemble = False if ensemble_size > 1: - self.is_ensemble = True + self._is_ensemble = True self.num_members = ensemble_size def create_linear_layer(l_in, l_out): @@ -77,13 +78,13 @@ def create_linear_layer(l_in, l_out): ) self.hidden_layers = nn.Sequential(*hidden_layers) - self.deterministic = deterministic + self._deterministic = deterministic if deterministic: self.mean_and_logvar = create_linear_layer(hid_size, out_size) else: self.mean_and_logvar = create_linear_layer(hid_size, 2 * out_size) logvar_shape = ( - (self.num_members, 1, out_size) if self.is_ensemble else (1, out_size) + (self.num_members, 1, out_size) if self._is_ensemble else (1, out_size) ) self.min_logvar = nn.Parameter( -10 * torch.ones(logvar_shape, requires_grad=True) @@ -116,12 +117,12 @@ def _default_forward( x = self.hidden_layers(x) mean_and_logvar = self.mean_and_logvar(x) self._maybe_toggle_layers_use_only_elite(only_elite) - if self.deterministic: + if self._deterministic: return mean_and_logvar, None else: mean = mean_and_logvar[..., : self.out_size] logvar = mean_and_logvar[..., self.out_size :] - if self.is_ensemble and self.elite_models is not None: + if self._is_ensemble and self.elite_models is not None: model_idx = self.elite_models if only_elite else range(self.num_members) assert not only_elite or (len(model_idx) != self.num_members), ( "If elite size == self.num_members, it's better " @@ -202,7 +203,7 @@ def forward( # type: ignore ) -> Tuple[torch.Tensor, torch.Tensor]: """Computes mean and logvar predictions for the given input. - When ``self.is_ensemble = True``, the model supports uncertainty propagation options + When ``self._is_ensemble = True``, the model supports uncertainty propagation options that can be used to aggregate the outputs of the different models in the ensemble. Valid propagation options are: @@ -250,7 +251,7 @@ def forward( # type: ignore the output to :func:`mbrl.math.propagate`. """ - if self.is_ensemble: + if self._is_ensemble: return self._forward_ensemble( x, propagation=propagation, @@ -261,7 +262,7 @@ def forward( # type: ignore def _mse_loss(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tensor: pred_mean, _ = self.forward(model_in) - if self.is_ensemble: + if self._is_ensemble: assert model_in.ndim == 3 and target.ndim == 3 total_loss: torch.Tensor = 0.0 for i in range(self.num_members): @@ -274,7 +275,7 @@ def _mse_loss(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tenso def _nll_loss(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tensor: pred_mean, pred_logvar = self.forward(model_in) - if self.is_ensemble: + if self._is_ensemble: assert model_in.ndim == 3 and target.ndim == 3 nll: torch.Tensor = 0.0 for i in range(self.num_members): @@ -310,7 +311,7 @@ def loss(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tensor: the model over the given input/target. If the model is an ensemble, returns the average over all models. """ - if self.deterministic: + if self._deterministic: return self._mse_loss(model_in, target) else: return self._nll_loss(model_in, target) @@ -334,7 +335,7 @@ def eval_score(self, model_in: torch.Tensor, target: torch.Tensor) -> torch.Tens assert model_in.ndim == 2 and target.ndim == 2 with torch.no_grad(): pred_mean, _ = self.forward(model_in) - if self.is_ensemble: + if self._is_ensemble: target = target.repeat((self.num_members, 1, 1)) return F.mse_loss(pred_mean, target, reduction="none") @@ -344,8 +345,11 @@ def save(self, path: str): def load(self, path: str): self.load_state_dict(torch.load(path)) - def is_deterministic(self): - return self.deterministic + def _is_deterministic_impl(self): + return self._deterministic + + def _is_ensemble_impl(self): + return self._is_ensemble def __len__(self): return self.num_members diff --git a/mbrl/models/model_env.py b/mbrl/models/model_env.py index bb8457f6..acfb9da3 100644 --- a/mbrl/models/model_env.py +++ b/mbrl/models/model_env.py @@ -186,7 +186,7 @@ def evaluate_action_sequences( actions_for_step, num_particles, dim=0 ) _, rewards, _, _ = self.step( - action_batch, sample=not self.dynamics_model.model.is_deterministic() + action_batch, sample=not self.dynamics_model.model.is_deterministic ) total_rewards += rewards diff --git a/tests/core/test_models.py b/tests/core/test_models.py index f72eb3a5..3b716ad2 100644 --- a/tests/core/test_models.py +++ b/tests/core/test_models.py @@ -9,6 +9,35 @@ import mbrl.models +def test_gaussian_mlp_and_basic_ensemble_properties(): + model_in_size = 2 + model_out_size = 2 + for det in [True, False]: + member_cfg = omegaconf.OmegaConf.create( + { + "_target_": "mbrl.models.GaussianMLP", + "device": "cpu", + "in_size": model_in_size, + "out_size": model_out_size, + "deterministic": det, + "ensemble_size": 1, + } + ) + ensemble = mbrl.models.BasicEnsemble( + 2, model_in_size, model_out_size, torch.device("cpu"), member_cfg + ) + + assert ensemble.is_deterministic == det + assert ensemble.is_ensemble + assert not ensemble.members[0].is_ensemble + + member_cfg["ensemble_size"] = 2 + ensemble = mbrl.models.BasicEnsemble( + 1, model_in_size, model_out_size, torch.device("cpu"), member_cfg + ) + assert ensemble.members[0].is_ensemble + + def test_basic_ensemble_gaussian_forward(): model_in_size = 2 model_out_size = 2 @@ -59,7 +88,7 @@ def _create_gaussian_ensemble_mock(ensemble_size, as_float=False): ) # With this we can use the output value to identify which model produced the output - def mock_fwd(_x): + def mock_fwd(_x, only_elite=False): output = _x.clone() if output.shape[0] == 1: output = output.repeat(ensemble_size, 1, 1) From ab4d820770e7a89de317621a7e017f9b0fb83fd7 Mon Sep 17 00:00:00 2001 From: Luis Pineda Date: Mon, 8 Mar 2021 11:57:29 -0500 Subject: [PATCH 2/5] created tests for common utils. So far, added test for create_dynamics_model. --- mbrl/util/common.py | 5 ++- tests/core/test_common_utils.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 tests/core/test_common_utils.py diff --git a/mbrl/util/common.py b/mbrl/util/common.py index 446a1bfd..ec094fdd 100644 --- a/mbrl/util/common.py +++ b/mbrl/util/common.py @@ -14,8 +14,8 @@ def create_dynamics_model( cfg: Union[omegaconf.ListConfig, omegaconf.DictConfig], - obs_shape: Tuple[int], - act_shape: Tuple[int], + obs_shape: Tuple[int, ...], + act_shape: Tuple[int, ...], model_dir: Optional[Union[str, pathlib.Path]] = None, ): """Creates a dynamics model from a given configuration. @@ -41,6 +41,7 @@ def create_dynamics_model( -overrides -no_delta_list (list[int], optional): to be passed to the dynamics model wrapper -obs_process_fn (str, optional): a Python function to pre-process observations + -num_elites (int, optional): number of elite members for ensembles If ``cfg.dynamics_model.model.in_size`` is not provided, it will be automatically set to `obs_shape[0] + act_shape[0]`. If ``cfg.dynamics_model.model.out_size`` is not provided, diff --git a/tests/core/test_common_utils.py b/tests/core/test_common_utils.py new file mode 100644 index 00000000..e2e9c713 --- /dev/null +++ b/tests/core/test_common_utils.py @@ -0,0 +1,79 @@ +import omegaconf +import pytest + +import mbrl.models as models +import mbrl.util.common as utils + + +class MockModel(models.Model): + def __init__(self, x, y, in_size, out_size): + super().__init__(in_size, out_size, "cpu") + self.x = x + self.y = y + + def _is_deterministic_impl(self): + return True + + def _is_ensemble_impl(self): + return False + + def load(self, path): + pass + + def save(self, paht): + pass + + def loss(self, model_in, target): + pass + + def eval_score(self, model_in, target): + pass + + +def mock_obs_func(): + pass + + +def test_create_dynamics_model(): + cfg_dict = { + "dynamics_model": { + "model": { + "_target_": "tests.core.test_common_utils.MockModel", + "x": 1, + "y": 2, + } + }, + "algorithm": { + "learned_rewards": "true", + "terget_is_delta": "true", + "normalize": "true", + }, + "overrides": {}, + } + obs_shape = (10,) + act_shape = (1,) + + cfg = omegaconf.OmegaConf.create(cfg_dict) + dynamics_model = utils.create_dynamics_model(cfg, obs_shape, act_shape) + + assert isinstance(dynamics_model.model, MockModel) + assert dynamics_model.model.in_size == obs_shape[0] + act_shape[0] + assert dynamics_model.model.out_size == obs_shape[0] + 1 + assert dynamics_model.model.x == 1 and dynamics_model.model.y == 2 + assert dynamics_model.num_elites is None + assert dynamics_model.no_delta_list == [] + + # Check given input/output sizes, overrides active, and no learned rewards option + cfg.dynamics_model.model.in_size = 11 + cfg.dynamics_model.model.out_size = 7 + cfg.algorithm.learned_rewards = False + cfg.overrides.no_delta_list = [0] + cfg.overrides.num_elites = 8 + cfg.overrides.obs_process_fn = "tests.core.test_common_utils.mock_obs_func" + dynamics_model = utils.create_dynamics_model(cfg, obs_shape, act_shape) + + assert dynamics_model.model.in_size == 11 + assert dynamics_model.model.out_size == 7 + assert dynamics_model.num_elites == 8 + assert dynamics_model.no_delta_list == [0] + assert dynamics_model.obs_process_fn == mock_obs_func From 7a0f3dd994a394bd071dcfbaa94e53727829e95f Mon Sep 17 00:00:00 2001 From: Luis Pineda Date: Mon, 8 Mar 2021 13:13:28 -0500 Subject: [PATCH 3/5] added test for create_replay_buffers --- tests/core/test_common_utils.py | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/core/test_common_utils.py b/tests/core/test_common_utils.py index e2e9c713..2d1aff0c 100644 --- a/tests/core/test_common_utils.py +++ b/tests/core/test_common_utils.py @@ -2,6 +2,7 @@ import pytest import mbrl.models as models +import mbrl.replay_buffer as replay_buffer import mbrl.util.common as utils @@ -77,3 +78,55 @@ def test_create_dynamics_model(): assert dynamics_model.num_elites == 8 assert dynamics_model.no_delta_list == [0] assert dynamics_model.obs_process_fn == mock_obs_func + + +def test_create_replay_buffers(): + trial_length = 20 + num_trials = 10 + val_ratio = 0.1 + cfg_dict = { + "dynamics_model": {"model": {"ensemble_size": 1}}, + "algorithm": {}, + "overrides": { + "trial_length": trial_length, + "num_trials": num_trials, + "model_batch_size": 64, + "validation_ratio": val_ratio, + }, + } + cfg = omegaconf.OmegaConf.create(cfg_dict) + obs_shape = (6,) + act_shape = (4,) + + def _check_shapes(train_cap): + val_cap = int(val_ratio * train_cap) + assert train.obs.shape == (train_cap, obs_shape[0]) + assert val.obs.shape == (val_cap, obs_shape[0]) + assert train.next_obs.shape == (train_cap, obs_shape[0]) + assert val.next_obs.shape == (val_cap, obs_shape[0]) + assert train.action.shape == (train_cap, act_shape[0]) + assert val.action.shape == (val_cap, act_shape[0]) + assert train.reward.shape == (train_cap,) + assert val.reward.shape == (val_cap,) + assert train.done.shape == (train_cap,) + assert val.done.shape == (val_cap,) + + # Test reading from the above configuration and no bootstrap replay buffer + train, val = utils.create_replay_buffers( + cfg, obs_shape, act_shape, train_is_bootstrap=False + ) + assert isinstance(train, replay_buffer.IterableReplayBuffer) + assert isinstance(val, replay_buffer.IterableReplayBuffer) + + _check_shapes(num_trials * trial_length) + + # Now add a training bootstrap and override the dataset size + cfg_dict["algorithm"]["dataset_size"] = 1500 + cfg = omegaconf.OmegaConf.create(cfg_dict) + train, val = utils.create_replay_buffers( + cfg, obs_shape, act_shape, train_is_bootstrap=True + ) + assert isinstance(train, replay_buffer.BootstrapReplayBuffer) + assert isinstance(val, replay_buffer.IterableReplayBuffer) + + _check_shapes(1500) From 52c1da4cde9a256e5b3f4e776cf4efacd3c71b8b Mon Sep 17 00:00:00 2001 From: Luis Pineda Date: Mon, 8 Mar 2021 15:24:19 -0500 Subject: [PATCH 4/5] added test for rollout_model_env --- tests/core/test_common_utils.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/core/test_common_utils.py b/tests/core/test_common_utils.py index 2d1aff0c..e4355fc5 100644 --- a/tests/core/test_common_utils.py +++ b/tests/core/test_common_utils.py @@ -1,3 +1,4 @@ +import numpy as np import omegaconf import pytest @@ -130,3 +131,58 @@ def _check_shapes(train_cap): assert isinstance(val, replay_buffer.IterableReplayBuffer) _check_shapes(1500) + + +class MockModelEnv: + def __init__(self): + self.obs = None + + def reset(self, obs0, propagation_method=None, return_as_np=None): + self.obs = obs0 + return obs0 + + def step(self, action, sample=None): + next_obs = self.obs + action[:, :1] + reward = np.ones(next_obs.shape[0]) + done = np.zeros(next_obs.shape[0]) + self.obs = next_obs + return next_obs, reward, done, {} + + +class MockAgent: + def __init__(self, length): + self.actions = np.ones((length, 1)) + + def plan(self, obs): + return self.actions + + +def test_rollout_model_env(): + obs_size = 10 + plan_length = 20 + num_samples = 5 + model_env = MockModelEnv() + obs0 = np.zeros(obs_size) + agent = MockAgent(plan_length) + plan = 0 * agent.plan(obs0) # this should be ignored + + # Check rolling out with an agent + obs, rewards, actions = utils.rollout_model_env( + model_env, obs0, plan, agent, num_samples=num_samples + ) + + assert obs.shape == (plan_length + 1, num_samples, obs_size) + assert rewards.shape == (plan_length, num_samples) + assert actions.shape == (plan_length, 1) + + for i, o in enumerate(obs): + assert o.min() == i + + # Check rolling out with a given plan + plan = 2 * agent.plan(obs0) + obs, rewards, actions = utils.rollout_model_env( + model_env, obs0, plan, None, num_samples=num_samples + ) + + for i, o in enumerate(obs): + assert o.min() == 2 * i From 3a30b1e45aeec5baaa2bad53eda49eaa860da037 Mon Sep 17 00:00:00 2001 From: Luis Pineda Date: Mon, 8 Mar 2021 16:40:09 -0500 Subject: [PATCH 5/5] updated docs with more information about deterministic models --- docs/models.rst | 10 ++++++++++ notebooks/pets_example.ipynb | 34 ++++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/docs/models.rst b/docs/models.rst index 1c9cbc19..75842e08 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -1,5 +1,15 @@ Models module ============= +This module provides implementations of common model architectures used in model-based RL, +including probabilistic and deterministic ensembles. All models in the library derive from +class :class:`mbrl.models.Model`. We provide a generic ensemble implementation, +:class:`mbrl.models.BasicEnsemble`, that can be used to produce epistemic uncertainty estimates +for any subclass of `Model`. For efficiency considerations, some specific model implementations +also provide their own ensemble implementations, without having to rely on BasicEnsemble. +One such model is :class:`mbrl.models.GaussianMLP`, which can be used as a single model or as +an ensemble. Additionally, it can be used as a deterministic model +trained with MSE loss, or a parameterized Gaussian with mean and log variance outputs, trained +with negative log-likelihood. .. automodule:: mbrl.models :members: diff --git a/notebooks/pets_example.ipynb b/notebooks/pets_example.ipynb index 4b8a8fc0..e057c804 100644 --- a/notebooks/pets_example.ipynb +++ b/notebooks/pets_example.ipynb @@ -13,7 +13,7 @@ "1. Gather data using an exploration policy\n", "2. Repeat:
\n", " 2.1. Train the dynamics model using all available data.
\n", - " 2.2. Do a trajectory on the environment, choosing actions using the planner over the dynamics model.\n", + " 2.2. Do a trajectory on the environment, choosing actions with the planner, using the dynamics model to simulate environment transitions.\n", " \n", "The ensemble model is trained to predict the environment's dynamics, and the planner tries to find high-reward trajectories over the model dynamics. \n", "\n", @@ -53,7 +53,7 @@ "source": [ "# Creating the environment\n", "\n", - "First we instantiate the environment and specify which reward function and termination function to use with the gym-like model wrapper, along with some utility objects." + "First we instantiate the environment and specify which reward function and termination function to use with the gym-like model wrapper, along with some utility objects. The termination function tells the wrapper if an observation should cause an episode to end or not, and it is an input used in some algorithms, like [MBPO](https://github.com/JannerM/mbpo/blob/master/mbpo/static/halfcheetah.py). The reward function is used to compute the value of the reward given an observation, and it's used by some algorithms, like [PETS](https://github.com/kchua/handful-of-trials/blob/77fd8802cc30b7683f0227c90527b5414c0df34c/dmbrl/controllers/MPC.py#L65)." ] }, { @@ -91,7 +91,7 @@ "source": [ "# Hydra configuration\n", "\n", - "MBRL-Lib uses [Hydra](https://github.com/facebookresearch/hydra) to manage configurations. For the purpose of this example, you can think of the configuration object as a dictionary with key/value pairs--and equivalent attributes--that specifying the model and algorithmic options. Our toolbox expects the configuration object to be organized as follows:" + "MBRL-Lib uses [Hydra](https://github.com/facebookresearch/hydra) to manage configurations. For the purpose of this example, you can think of the configuration object as a dictionary with key/value pairs--and equivalent attributes--that specify the model and algorithmic options. Our toolbox expects the configuration object to be organized as follows:" ] }, { @@ -101,7 +101,7 @@ "outputs": [], "source": [ "trial_length = 200\n", - "num_trials = 5\n", + "num_trials = 10\n", "device = \"cuda:0\"\n", "\n", "# Everything with \"???\" indicates an option with a missing value.\n", @@ -119,6 +119,7 @@ " \"use_silu\": True,\n", " \"in_size\": \"???\",\n", " \"out_size\": \"???\",\n", + " \"deterministic\": False\n", " }\n", " },\n", " # options for training the dynamics model\n", @@ -138,6 +139,13 @@ "cfg = omegaconf.OmegaConf.create(cfg_dict)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
Note: This example uses a probabilistic ensemble. You can also use a fully deterministic model with class GaussianMLP by setting ensemble_size=1, and deterministic=False.
" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -333,7 +341,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -410,7 +418,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -432,6 +440,20 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Where to learn more about MBRL?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To learn about the other features of the library, please check out our [documentation](https://luisenp.github.io/mbrl-lib/). Also take a look at our provided implementations of [PETS](https://github.com/luisenp/mbrl-lib/blob/master/mbrl/algorithms/pets.py) and [MBPO](https://github.com/luisenp/mbrl-lib/blob/master/mbrl/algorithms/mbpo.py), and their configuration [files](https://github.com/luisenp/mbrl-lib/tree/master/conf)." + ] + }, { "cell_type": "code", "execution_count": null,